├── .github ├── dependabot.yml └── workflows │ ├── external.ci.yml │ ├── gfx-windows-public-bmg.env │ ├── gfx-windows-public.env │ └── internal.ci.yml ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── SECURITY.md ├── api ├── CMakeLists.txt └── api.cpp ├── apps ├── CMakeLists.txt ├── oidnBenchmark.cpp ├── oidnDenoise.cpp ├── oidnTest.cpp └── utils │ ├── CMakeLists.txt │ ├── arg_parser.cpp │ ├── arg_parser.h │ ├── device_info.h │ ├── dummy.c │ ├── image_buffer.cpp │ ├── image_buffer.h │ ├── image_io.cpp │ ├── image_io.h │ └── random.h ├── cmake ├── Config.cmake.in ├── FindLevelZero.cmake ├── FindOpenImageIO.cmake ├── FindTBB.cmake ├── oidn_bnns.cmake ├── oidn_common.cmake ├── oidn_common_external.cmake ├── oidn_dnnl.cmake ├── oidn_ispc.cmake ├── oidn_macros.cmake ├── oidn_metal.cmake ├── oidn_package.cmake ├── oidn_platform.cmake └── oidn_version.cmake ├── common ├── CMakeLists.txt ├── common.cpp ├── common.h ├── export.linux.map.in ├── export.macos.map.in ├── half.cpp ├── half.h ├── oidn.rc ├── oidn_utils.cpp ├── oidn_utils.h ├── platform.cpp ├── platform.h └── timer.h ├── core ├── CMakeLists.txt ├── arena.cpp ├── arena.h ├── arena_planner.cpp ├── arena_planner.h ├── autoexposure.h ├── buffer.cpp ├── buffer.h ├── color.cpp ├── color.h ├── concat_conv.cpp ├── concat_conv.h ├── concat_conv_chw.cpp ├── concat_conv_chw.h ├── concat_conv_hwc.cpp ├── concat_conv_hwc.h ├── context.cpp ├── context.h ├── conv.cpp ├── conv.h ├── data.h ├── device.cpp ├── device.h ├── device_factory.h ├── engine.cpp ├── engine.h ├── exception.cpp ├── exception.h ├── filter.cpp ├── filter.h ├── graph.cpp ├── graph.h ├── heap.cpp ├── heap.h ├── image.cpp ├── image.h ├── image_accessor.h ├── image_copy.h ├── input_process.cpp ├── input_process.h ├── kernel.h ├── math.h ├── module.cpp ├── module.h ├── op.cpp ├── op.h ├── output_process.cpp ├── output_process.h ├── pool.cpp ├── pool.h ├── progress.cpp ├── progress.h ├── record.h ├── ref.h ├── rt_filter.cpp ├── rt_filter.h ├── rtlightmap_filter.cpp ├── rtlightmap_filter.h ├── subdevice.cpp ├── subdevice.h ├── tensor.cpp ├── tensor.h ├── tensor_accessor.h ├── tensor_layout.h ├── tensor_reorder.cpp ├── tensor_reorder.h ├── thread.cpp ├── thread.h ├── tile.h ├── tza.cpp ├── tza.h ├── unet_filter.cpp ├── unet_filter.h ├── upsample.cpp ├── upsample.h ├── vec.h └── verbose.h ├── devices ├── CMakeLists.txt ├── cpu │ ├── CMakeLists.txt │ ├── bnns │ │ ├── bnns_common.cpp │ │ ├── bnns_common.h │ │ ├── bnns_conv.cpp │ │ ├── bnns_conv.h │ │ ├── bnns_engine.cpp │ │ ├── bnns_engine.h │ │ ├── bnns_pool.cpp │ │ └── bnns_pool.h │ ├── color.ispc │ ├── color.isph │ ├── cpu_autoexposure.cpp │ ├── cpu_autoexposure.h │ ├── cpu_autoexposure.ispc │ ├── cpu_common.cpp │ ├── cpu_common.h │ ├── cpu_conv.cpp │ ├── cpu_conv.h │ ├── cpu_conv.ispc │ ├── cpu_conv_compute.isph │ ├── cpu_conv_compute_block.isph │ ├── cpu_device.cpp │ ├── cpu_device.h │ ├── cpu_engine.cpp │ ├── cpu_engine.h │ ├── cpu_image_copy.cpp │ ├── cpu_image_copy.h │ ├── cpu_image_copy.ispc │ ├── cpu_input_process.cpp │ ├── cpu_input_process.h │ ├── cpu_input_process.ispc │ ├── cpu_module.cpp │ ├── cpu_output_process.cpp │ ├── cpu_output_process.h │ ├── cpu_output_process.ispc │ ├── cpu_pool.cpp │ ├── cpu_pool.h │ ├── cpu_pool.ispc │ ├── cpu_upsample.cpp │ ├── cpu_upsample.h │ ├── cpu_upsample.ispc │ ├── dnnl │ │ ├── dnnl_common.cpp │ │ ├── dnnl_common.h │ │ ├── dnnl_conv.cpp │ │ ├── dnnl_conv.h │ │ ├── dnnl_engine.cpp │ │ ├── dnnl_engine.h │ │ ├── dnnl_tensor.cpp │ │ └── dnnl_tensor.h │ ├── image_accessor.isph │ ├── math.isph │ ├── platform.ispc │ ├── platform.isph │ ├── tasking.cpp │ ├── tasking.h │ ├── tensor_accessor.isph │ ├── tile.isph │ └── vec.isph ├── cuda │ ├── CMakeLists.txt │ ├── cuda_conv.cu │ ├── cuda_conv.h │ ├── cuda_device.cpp │ ├── cuda_device.h │ ├── cuda_engine.cu │ ├── cuda_engine.h │ ├── cuda_external_buffer.cpp │ ├── cuda_external_buffer.h │ ├── cuda_module.cpp │ ├── curtn.cpp │ ├── curtn.h │ ├── cutlass_conv.h │ ├── cutlass_conv_sm70.cu │ ├── cutlass_conv_sm75.cu │ └── cutlass_conv_sm80.cu ├── gpu │ ├── gpu_autoexposure.h │ ├── gpu_image_copy.h │ ├── gpu_input_process.h │ ├── gpu_output_process.h │ ├── gpu_pool.h │ └── gpu_upsample.h ├── hip │ ├── CMakeLists.txt │ ├── ck_conv.h │ ├── ck_conv_dl.cpp │ ├── ck_conv_wmma.cpp │ ├── hip_conv.cpp │ ├── hip_conv.h │ ├── hip_device.cpp │ ├── hip_device.h │ ├── hip_engine.cpp │ ├── hip_engine.h │ ├── hip_external_buffer.cpp │ ├── hip_external_buffer.h │ └── hip_module.cpp ├── metal │ ├── CMakeLists.txt │ ├── metal_buffer.h │ ├── metal_buffer.mm │ ├── metal_common.h │ ├── metal_common.mm │ ├── metal_conv.h │ ├── metal_conv.mm │ ├── metal_device.h │ ├── metal_device.mm │ ├── metal_engine.h │ ├── metal_engine.mm │ ├── metal_heap.h │ ├── metal_heap.mm │ ├── metal_kernels.metal │ └── metal_module.mm └── sycl │ ├── CMakeLists.txt │ ├── sycl_common.h │ ├── sycl_conv.h │ ├── sycl_conv_xe.h │ ├── sycl_conv_xe2.cpp │ ├── sycl_conv_xehpc.cpp │ ├── sycl_conv_xehpg.cpp │ ├── sycl_conv_xelp.cpp │ ├── sycl_device.cpp │ ├── sycl_device.h │ ├── sycl_device_table.h │ ├── sycl_engine.cpp │ ├── sycl_engine.h │ ├── sycl_external_buffer.cpp │ ├── sycl_external_buffer.h │ └── sycl_module.cpp ├── doc ├── .gitignore ├── BINARY-LICENSE.txt ├── Makefile ├── api.md ├── compilation.md ├── documentation.md ├── downloads.md ├── examples.md ├── filter-latex.py ├── filter-sectionnumbers.py ├── gallery.md ├── images.md ├── legal.md ├── links.md ├── overview.md ├── preamble.tex ├── readme.tex ├── readme_head.md ├── related_projects.md ├── stylesheet.css ├── teaser.html ├── training.md └── webtemplate.html ├── external └── catch.hpp ├── include └── OpenImageDenoise │ ├── config.h.in │ ├── oidn.h │ └── oidn.hpp ├── readme.pdf ├── requirements.txt ├── scripts ├── blob_to_cpp.py ├── build.py ├── build_src.py ├── build_weights.py ├── common.py ├── csan.supp.xml ├── protex_scan.sh ├── store-files.sh ├── test.py └── valgrind.supp ├── third-party-programs-DPCPP.txt ├── third-party-programs-oneDNN.txt ├── third-party-programs-oneTBB.txt ├── third-party-programs.txt └── training ├── .gitignore ├── color.py ├── compare_image.py ├── config.py ├── convert_image.py ├── dataset.py ├── export.py ├── find_lr.py ├── image.py ├── infer.py ├── loss.py ├── model.py ├── preprocess.py ├── result.py ├── split_exr.py ├── ssim.py ├── train.py ├── tza.py ├── util.py └── visualize.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "monthly" 12 | -------------------------------------------------------------------------------- /.github/workflows/gfx-windows-public-bmg.env: -------------------------------------------------------------------------------- 1 | GFX_DRIVER_VERSION=windows-101.6256 -------------------------------------------------------------------------------- /.github/workflows/gfx-windows-public.env: -------------------------------------------------------------------------------- 1 | GFX_DRIVER_VERSION=windows-101.6449 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | !.gitignore 3 | !.github 4 | venv/ 5 | 6 | *~ 7 | *.autosave 8 | *.a 9 | *.core 10 | *.moc 11 | *.o 12 | *.obj 13 | *.orig 14 | *.rej 15 | *.so 16 | *.so.* 17 | *_pch.h.cpp 18 | *.qm 19 | .#* 20 | *.*# 21 | core 22 | !core/ 23 | tags 24 | .DS_Store 25 | .directory 26 | *.debug 27 | *.prl 28 | *.app 29 | moc_*.cpp 30 | ui_*.h 31 | qrc_*.cpp 32 | Thumbs.db 33 | *.res 34 | /.qmake.cache 35 | /.qmake.stash 36 | .bash_history 37 | .python_history 38 | 39 | # Qt Creator generated files 40 | *.txt.user* 41 | *.pro.user* 42 | 43 | # xemacs temporary files 44 | *.flc 45 | 46 | # Vim temporary files 47 | .*.swp 48 | 49 | # Visual Studio generated files 50 | *.ib_pdb_index 51 | *.idb 52 | *.ilk 53 | *.pdb 54 | *.sln 55 | *.suo 56 | *.vcproj 57 | *vcproj.*.*.user 58 | *.ncb 59 | *.sdf 60 | *.opensdf 61 | *.vcxproj 62 | *vcxproj.* 63 | *.log 64 | 65 | # Visual Studio Code generated files 66 | .vscode 67 | 68 | # MinGW generated files 69 | *.Debug 70 | *.Release 71 | 72 | # Python byte code 73 | *.pyc 74 | 75 | # Binaries 76 | *.dll 77 | *.exe 78 | 79 | # Images 80 | /images* 81 | *.exr 82 | *.pfm 83 | *.phm 84 | *.png 85 | *.bmp 86 | *.ppm 87 | 88 | # Build directories 89 | /build* 90 | 91 | # Dependencies 92 | /deps 93 | 94 | # Generated files 95 | include/OpenImageDenoise/config.h 96 | common/export.linux.map 97 | common/export.macos.map 98 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "mkl-dnn"] 2 | path = external/mkl-dnn 3 | url = ../mkl-dnn.git 4 | [submodule "weights"] 5 | path = weights 6 | url = ../oidn-weights.git 7 | [submodule "cutlass"] 8 | path = external/cutlass 9 | url = https://github.com/NVIDIA/cutlass.git 10 | [submodule "external/composable_kernel"] 11 | path = external/composable_kernel 12 | url = https://github.com/ROCmSoftwarePlatform/composable_kernel.git 13 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 6 | 7 | 8 | ## Security Considerations 9 | When integrating this library into your application, you are responsible for ensuring overall application security. If you are building and using the **Open Image Denoise (OIDN) library** from source, be aware of potential security risks, including **DLL planting attacks** and other dynamic library loading vulnerabilities. 10 | 11 | To help mitigate such risks, we provide the `OIDN_DEPENDENTLOADFLAG` CMake option. This allows you to specify the appropriate **Windows linker flag** based on your security requirements. By default, this option is not set, and you should configure it according to your deployment needs. See the official Microsoft documentation for more details: [DEPENDENTLOADFLAG Linker Option](https://learn.microsoft.com/en-us/cpp/build/reference/dependentloadflag?view=msvc-170). 12 | 13 | To enable and set this flag, configure your build with: 14 | 15 | cmake -DOIDN_DEPENDENTLOADFLAG= .. 16 | 17 | For more information on securing dynamic library loading, refer to Microsoft's official documentation on: 18 | 19 | - [Safe DLL Search Mode](https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order) 20 | - [Dynamic-Link Library Security](https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-security) 21 | -------------------------------------------------------------------------------- /api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Copyright 2018 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | set(API_SOURCES 5 | api.cpp 6 | ) 7 | 8 | add_library(OpenImageDenoise ${OIDN_LIB_TYPE} ${API_SOURCES} ${OIDN_RESOURCE_FILE}) 9 | 10 | set_target_properties(OpenImageDenoise PROPERTIES 11 | OUTPUT_NAME ${OIDN_LIBRARY_NAME} 12 | ) 13 | if(OIDN_LIBRARY_VERSIONED) 14 | set_target_properties(OpenImageDenoise PROPERTIES 15 | VERSION ${PROJECT_VERSION} 16 | SOVERSION ${PROJECT_VERSION_MAJOR} 17 | ) 18 | endif() 19 | 20 | target_include_directories(OpenImageDenoise 21 | PUBLIC 22 | $ 23 | $ 24 | ) 25 | 26 | target_link_libraries(OpenImageDenoise PRIVATE OpenImageDenoise_core) 27 | 28 | if(NOT OIDN_STATIC_LIB) 29 | oidn_strip_symbols(OpenImageDenoise) 30 | endif() 31 | 32 | install(TARGETS OpenImageDenoise 33 | EXPORT 34 | OpenImageDenoise_Exports 35 | ARCHIVE 36 | DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel 37 | LIBRARY 38 | DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel 39 | RUNTIME 40 | DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT lib 41 | ) -------------------------------------------------------------------------------- /apps/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Copyright 2020 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | add_subdirectory(utils) 5 | 6 | macro(oidn_add_app APP_NAME) 7 | add_executable(${APP_NAME} ${ARGN} ${OIDN_RESOURCE_FILE}) 8 | target_link_libraries(${APP_NAME} PRIVATE OpenImageDenoise_common OpenImageDenoise_utils OpenImageDenoise) 9 | install(TARGETS ${APP_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT apps) 10 | endmacro() 11 | 12 | oidn_add_app(oidnDenoise oidnDenoise.cpp) 13 | oidn_add_app(oidnBenchmark oidnBenchmark.cpp) 14 | oidn_add_app(oidnTest oidnTest.cpp "${PROJECT_SOURCE_DIR}/external/catch.hpp") -------------------------------------------------------------------------------- /apps/utils/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Copyright 2020 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | include(CMakeDependentOption) 5 | 6 | set(OIDN_UTILS_SOURCES 7 | arg_parser.h 8 | arg_parser.cpp 9 | device_info.h 10 | image_buffer.h 11 | image_buffer.cpp 12 | image_io.h 13 | image_io.cpp 14 | random.h 15 | ) 16 | 17 | if(NOT OIDN_API_NAMESPACE) 18 | list(APPEND OIDN_UTILS_SOURCES dummy.c) 19 | endif() 20 | 21 | add_library(OpenImageDenoise_utils STATIC ${OIDN_UTILS_SOURCES}) 22 | set_target_properties(OpenImageDenoise_utils PROPERTIES OUTPUT_NAME ${OIDN_LIBRARY_NAME}_utils) 23 | target_link_libraries(OpenImageDenoise_utils PUBLIC OpenImageDenoise_common OpenImageDenoise) 24 | 25 | # OpenImageIO 26 | option(OIDN_APPS_OPENIMAGEIO "Enable OpenImageIO support in example and test apps." OFF) 27 | if(OIDN_APPS_OPENIMAGEIO) 28 | find_package(OpenImageIO REQUIRED) 29 | if(OPENIMAGEIO_VERSION VERSION_GREATER_EQUAL "3") 30 | # OpenImageIO 3.x requires C++17 31 | set_target_properties(OpenImageDenoise_utils PROPERTIES CXX_STANDARD 17) 32 | elseif(OPENIMAGEIO_VERSION VERSION_GREATER_EQUAL "2.3") 33 | # OpenImageIO 2.3+ requires C++14 34 | set_target_properties(OpenImageDenoise_utils PROPERTIES CXX_STANDARD 14) 35 | endif() 36 | target_compile_definitions(OpenImageDenoise_utils PRIVATE OIDN_USE_OPENIMAGEIO) 37 | target_link_libraries(OpenImageDenoise_utils PRIVATE OpenImageIO::OpenImageIO) 38 | endif() 39 | -------------------------------------------------------------------------------- /apps/utils/arg_parser.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "arg_parser.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | ArgParser::ArgParser(int argc, char* argv[]) 9 | : argc(argc), argv(argv), 10 | pos(1) {} 11 | 12 | bool ArgParser::hasNext() const 13 | { 14 | return pos < argc; 15 | } 16 | 17 | std::string ArgParser::getNext() 18 | { 19 | if (pos < argc) 20 | return argv[pos++]; 21 | else 22 | throw std::invalid_argument("argument expected"); 23 | } 24 | 25 | std::string ArgParser::getNextOpt() 26 | { 27 | std::string str = getNext(); 28 | size_t pos = str.find_first_not_of("-"); 29 | if (pos == 0 || pos == std::string::npos) 30 | throw std::invalid_argument("option expected"); 31 | return str.substr(pos); 32 | } 33 | 34 | template<> 35 | std::string ArgParser::getNextValue() 36 | { 37 | std::string str = getNext(); 38 | if (!str.empty() && str[0] == '-') 39 | throw std::invalid_argument("value expected"); 40 | return str; 41 | } 42 | 43 | OIDN_NAMESPACE_END 44 | 45 | -------------------------------------------------------------------------------- /apps/utils/arg_parser.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Command-line argument parser 11 | class ArgParser 12 | { 13 | public: 14 | ArgParser(int argc, char* argv[]); 15 | 16 | bool hasNext() const; 17 | std::string getNext(); 18 | std::string getNextOpt(); 19 | 20 | template 21 | T getNextValue() 22 | { 23 | return fromString(getNextValue()); 24 | } 25 | 26 | private: 27 | int argc; 28 | char** argv; 29 | int pos; 30 | }; 31 | 32 | template<> 33 | std::string ArgParser::getNextValue(); 34 | 35 | OIDN_NAMESPACE_END 36 | 37 | -------------------------------------------------------------------------------- /apps/utils/device_info.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | inline int printPhysicalDevices() 11 | { 12 | const int numDevices = getNumPhysicalDevices(); 13 | if (numDevices == 0) 14 | { 15 | std::cout << "No supported devices found" << std::endl; 16 | return 1; 17 | } 18 | 19 | for (int i = 0; i < numDevices; ++i) 20 | { 21 | PhysicalDeviceRef physicalDevice(i); 22 | std::cout << "Device " << i << std::endl; 23 | std::cout << " Name: " << physicalDevice.get("name") << std::endl; 24 | std::cout << " Type: " << physicalDevice.get("type") << std::endl; 25 | if (physicalDevice.get("uuidSupported")) 26 | std::cout << " UUID: " << physicalDevice.get("uuid") << std::endl; 27 | if (physicalDevice.get("luidSupported")) 28 | { 29 | std::cout << " LUID: " << physicalDevice.get("luid") << std::endl; 30 | std::cout << " Node: " << physicalDevice.get("nodeMask") << std::endl; 31 | } 32 | if (physicalDevice.get("pciAddressSupported")) 33 | { 34 | auto flags = std::cout.flags(); 35 | std::cout << " PCI : " 36 | << std::hex << std::setfill('0') 37 | << std::setw(4) << physicalDevice.get("pciDomain") << ":" 38 | << std::setw(2) << physicalDevice.get("pciBus") << ":" 39 | << std::setw(2) << physicalDevice.get("pciDevice") << "." 40 | << std::setw(1) << physicalDevice.get("pciFunction") 41 | << std::endl; 42 | std::cout.flags(flags); 43 | } 44 | if (i < numDevices-1) 45 | std::cout << std::endl; 46 | } 47 | 48 | return 0; 49 | } 50 | 51 | OIDN_NAMESPACE_END 52 | -------------------------------------------------------------------------------- /apps/utils/dummy.c: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // This dummy file is used for checking whether the headers are valid C code 5 | #include "include/OpenImageDenoise/oidn.h" -------------------------------------------------------------------------------- /apps/utils/image_io.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "image_buffer.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Loads an image with optionally specified number of channels and data type 11 | std::shared_ptr loadImage(const DeviceRef& device, 12 | const std::string& filename, 13 | DataType dataType = DataType::Void, 14 | Storage storage = Storage::Undefined); 15 | 16 | // Loads an image with/without sRGB to linear conversion 17 | std::shared_ptr loadImage(const DeviceRef& device, 18 | const std::string& filename, 19 | bool srgb, 20 | DataType dataType = DataType::Void, 21 | Storage storage = Storage::Undefined); 22 | 23 | // Saves an image 24 | void saveImage(const std::string& filename, const ImageBuffer& image); 25 | 26 | // Saves an image with/without linear to sRGB conversion 27 | void saveImage(const std::string& filename, const ImageBuffer& image, bool srgb); 28 | 29 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /apps/utils/random.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Simple and very fast LCG random number generator 11 | class Random 12 | { 13 | private: 14 | uint32_t state; 15 | 16 | public: 17 | oidn_inline Random(uint32_t seed = 1) : state(seed) {} 18 | 19 | oidn_inline void reset(uint32_t seed = 1) 20 | { 21 | state = (seed * 8191) ^ 140167; 22 | } 23 | 24 | oidn_inline void next() 25 | { 26 | const uint32_t multiplier = 1664525; 27 | const uint32_t increment = 1013904223; 28 | state = multiplier * state + increment; 29 | } 30 | 31 | oidn_inline uint32_t getUInt() 32 | { 33 | next(); 34 | return state; 35 | } 36 | 37 | oidn_inline int getInt() 38 | { 39 | next(); 40 | return state; 41 | } 42 | 43 | oidn_inline float getFloat() 44 | { 45 | next(); 46 | return float(state) * 2.3283064365386962890625e-10f; // x / 2^32 47 | } 48 | }; 49 | 50 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /cmake/Config.cmake.in: -------------------------------------------------------------------------------- 1 | ## Copyright 2023 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | @PACKAGE_INIT@ 5 | 6 | set(OIDN_DEVICE_CPU @OIDN_DEVICE_CPU@) 7 | set(OIDN_DEVICE_SYCL @OIDN_DEVICE_SYCL@) 8 | set(OIDN_DEVICE_CUDA @OIDN_DEVICE_CUDA@) 9 | set(OIDN_DEVICE_HIP @OIDN_DEVICE_HIP@) 10 | 11 | set(OIDN_FILTER_RT @OIDN_FILTER_RT@) 12 | set(OIDN_FILTER_RTLIGHTMAP @OIDN_FILTER_RTLIGHTMAP@) 13 | 14 | set(OIDN_STATIC_LIB @OIDN_STATIC_LIB@) 15 | 16 | if(OIDN_STATIC_LIB AND OIDN_DEVICE_CPU) 17 | include(CMakeFindDependencyMacro) 18 | find_dependency(TBB) 19 | endif() 20 | 21 | include("${CMAKE_CURRENT_LIST_DIR}/OpenImageDenoiseTargets.cmake") 22 | 23 | check_required_components(OpenImageDenoise) -------------------------------------------------------------------------------- /cmake/oidn_bnns.cmake: -------------------------------------------------------------------------------- 1 | ## Copyright 2021 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | if(NOT IOS) 5 | set(OIDN_APPLE_SDK_VERSION_MIN 11.0) 6 | set(OIDN_APPLE_SDK_VERSION_MAX 11.0) 7 | else() 8 | set(OIDN_APPLE_SDK_VERSION_MIN 14.0) 9 | set(OIDN_APPLE_SDK_VERSION_MAX 14.0) 10 | endif() 11 | 12 | if(OIDN_APPLE_SDK_VERSION VERSION_LESS OIDN_APPLE_SDK_VERSION_MAX) 13 | message(FATAL_ERROR "Building with BNNS support requires Apple SDK version ${OIDN_APPLE_SDK_VERSION_MAX} or newer") 14 | endif() -------------------------------------------------------------------------------- /cmake/oidn_common.cmake: -------------------------------------------------------------------------------- 1 | ## Copyright 2018 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | # Policy: find_package() uses _ROOT variables 5 | if(POLICY CMP0074) 6 | cmake_policy(SET CMP0074 NEW) 7 | endif() 8 | 9 | # Set build output directories 10 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR}) 11 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR}) 12 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR}) 13 | 14 | # CMake macros 15 | include(oidn_macros) 16 | 17 | # Configuration types 18 | set(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo") 19 | if(WIN32) 20 | if(NOT OIDN_DEFAULT_CMAKE_CONFIGURATION_TYPES_SET) 21 | set(CMAKE_CONFIGURATION_TYPES "${CONFIGURATION_TYPES}" 22 | CACHE STRING "List of generated configurations." FORCE) 23 | set(OIDN_DEFAULT_CMAKE_CONFIGURATION_TYPES_SET ON 24 | CACHE INTERNAL "Default CMake configuration types set.") 25 | endif() 26 | else() 27 | if(NOT CMAKE_BUILD_TYPE) 28 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the build type." FORCE) 29 | endif() 30 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CONFIGURATION_TYPES}) 31 | endif() 32 | 33 | # Build as shared or static library 34 | option(OIDN_STATIC_LIB "Build Open Image Denoise as a static or hybrid static/shared library.") 35 | mark_as_advanced(CLEAR OIDN_STATIC_LIB) 36 | if(OIDN_STATIC_LIB) 37 | set(OIDN_LIB_TYPE STATIC) 38 | else() 39 | set(OIDN_LIB_TYPE SHARED) 40 | endif() 41 | 42 | # Library name 43 | set(OIDN_LIBRARY_NAME "OpenImageDenoise" CACHE STRING "Base name of the Open Image Denoise library files.") 44 | option(OIDN_LIBRARY_VERSIONED "Build versioned Open Image Denoise library files." ON) 45 | mark_as_advanced(OIDN_LIBRARY_VERSIONED) 46 | 47 | # API namespace 48 | set(OIDN_API_NAMESPACE "" CACHE STRING "C++ namespace to put API symbols into.") 49 | if(OIDN_API_NAMESPACE) 50 | set(OIDN_NAMESPACE ${OIDN_API_NAMESPACE}::oidn) 51 | else() 52 | set(OIDN_NAMESPACE oidn) 53 | endif() 54 | 55 | # File containing targets exported from the build tree for external projects 56 | set(OIDN_BUILD_TREE_EXPORT_FILE ${OIDN_ROOT_BINARY_DIR}/cmake/oidn_targets.cmake) 57 | 58 | # Common resource file 59 | set(OIDN_RESOURCE_FILE ${OIDN_ROOT_SOURCE_DIR}/common/oidn.rc) 60 | 61 | # Platform-specific settings 62 | include(oidn_platform) 63 | 64 | # Packaging 65 | include(oidn_package) -------------------------------------------------------------------------------- /cmake/oidn_common_external.cmake: -------------------------------------------------------------------------------- 1 | ## Copyright 2023 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | # Define cache variables for common paths which will be set by the main library build 5 | set(OIDN_ROOT_BINARY_DIR "" CACHE PATH "Location of the main library build directory.") 6 | set(OIDN_INSTALL_RPATH_PREFIX "" CACHE PATH "Prefix for the RPATH of installed binaries.") 7 | 8 | if(NOT OIDN_ROOT_BINARY_DIR) 9 | message(FATAL_ERROR "OIDN_ROOT_BINARY_DIR is not set. The cache may have been deleted, please try building again.") 10 | endif() 11 | 12 | # Common 13 | include(oidn_common) 14 | 15 | # Import targets from the main library build directory 16 | include(${OIDN_BUILD_TREE_EXPORT_FILE}) -------------------------------------------------------------------------------- /cmake/oidn_version.cmake: -------------------------------------------------------------------------------- 1 | ## Copyright 2018 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | set(OIDN_VERSION_MAJOR 2) 5 | set(OIDN_VERSION_MINOR 3) 6 | set(OIDN_VERSION_PATCH 3) 7 | set(OIDN_VERSION_NOTE "") 8 | 9 | set(OIDN_VERSION ${OIDN_VERSION_MAJOR}.${OIDN_VERSION_MINOR}.${OIDN_VERSION_PATCH}) 10 | math(EXPR OIDN_VERSION_NUMBER "10000*${OIDN_VERSION_MAJOR} + 100*${OIDN_VERSION_MINOR} + ${OIDN_VERSION_PATCH}") -------------------------------------------------------------------------------- /common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Copyright 2018 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | # Generate config.h 5 | configure_file( 6 | "${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h.in" 7 | "${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h" 8 | ) 9 | 10 | # Generate list of symbols to export 11 | configure_file( 12 | "${PROJECT_SOURCE_DIR}/common/export.linux.map.in" 13 | "${PROJECT_SOURCE_DIR}/common/export.linux.map" 14 | ) 15 | configure_file( 16 | "${PROJECT_SOURCE_DIR}/common/export.macos.map.in" 17 | "${PROJECT_SOURCE_DIR}/common/export.macos.map" 18 | ) 19 | 20 | add_library(OpenImageDenoise_common OBJECT 21 | ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h 22 | ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/oidn.h 23 | ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/oidn.hpp 24 | common.h 25 | common.cpp 26 | half.h 27 | half.cpp 28 | oidn_utils.h 29 | oidn_utils.cpp 30 | platform.h 31 | platform.cpp 32 | timer.h 33 | ) 34 | 35 | set_target_properties(OpenImageDenoise_common PROPERTIES OUTPUT_NAME ${OIDN_LIBRARY_NAME}_common) 36 | 37 | target_include_directories(OpenImageDenoise_common 38 | PUBLIC 39 | $ 40 | $ 41 | $ 42 | ) 43 | 44 | install(TARGETS OpenImageDenoise_common EXPORT OpenImageDenoise_Exports) -------------------------------------------------------------------------------- /common/common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "common.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | size_t getDataTypeSize(DataType dataType) 9 | { 10 | switch (dataType) 11 | { 12 | case DataType::UInt8: return 1; 13 | case DataType::Float16: return sizeof(int16_t); 14 | case DataType::Float32: return sizeof(float); 15 | default: 16 | throw std::invalid_argument("invalid data type"); 17 | } 18 | } 19 | 20 | DataType getFormatDataType(Format format) 21 | { 22 | switch (format) 23 | { 24 | case Format::Undefined: 25 | return DataType::Void; 26 | case Format::Float: 27 | case Format::Float2: 28 | case Format::Float3: 29 | case Format::Float4: 30 | return DataType::Float32; 31 | case Format::Half: 32 | case Format::Half2: 33 | case Format::Half3: 34 | case Format::Half4: 35 | return DataType::Float16; 36 | default: 37 | throw std::invalid_argument("invalid format"); 38 | } 39 | } 40 | 41 | Format makeFormat(DataType dataType, int numChannels) 42 | { 43 | if (dataType == DataType::Void) 44 | return Format::Undefined; 45 | 46 | Format baseFormat; 47 | switch (dataType) 48 | { 49 | case DataType::Float16: 50 | baseFormat = Format::Half; 51 | break; 52 | case DataType::Float32: 53 | baseFormat = Format::Float; 54 | break; 55 | default: 56 | throw std::invalid_argument("unsupported format data type"); 57 | } 58 | 59 | if (numChannels < 1 || numChannels > 4) 60 | throw std::invalid_argument("invalid number of channels"); 61 | 62 | return Format(int(baseFormat) + numChannels - 1); 63 | } 64 | 65 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /common/common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "oidn_utils.h" // must be included before platform.h 7 | #include "platform.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // Synchronization mode for operations 12 | enum class SyncMode 13 | { 14 | Blocking, 15 | Async 16 | }; 17 | 18 | template 19 | struct DataTypeOf; 20 | 21 | template<> struct DataTypeOf { static constexpr DataType value = DataType::Void; }; 22 | template<> struct DataTypeOf { static constexpr DataType value = DataType::UInt8; }; 23 | template<> struct DataTypeOf { static constexpr DataType value = DataType::Float16; }; 24 | template<> struct DataTypeOf { static constexpr DataType value = DataType::Float32; }; 25 | 26 | // Returns the size of a data type in bytes 27 | size_t getDataTypeSize(DataType dataType); 28 | 29 | // Returns the data type of a format 30 | DataType getFormatDataType(Format format); 31 | 32 | // Makes a format from a data type and number of channels 33 | Format makeFormat(DataType dataType, int numChannels); 34 | 35 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /common/export.linux.map.in: -------------------------------------------------------------------------------- 1 | { 2 | global: 3 | oidn[A-Z]*; 4 | oidn_*; 5 | @OIDN_API_NAMESPACE@_oidn_*; 6 | _ZN[0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*; 7 | _ZN[0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*; 8 | _ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*; 9 | _ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*; 10 | local: 11 | *; 12 | }; -------------------------------------------------------------------------------- /common/export.macos.map.in: -------------------------------------------------------------------------------- 1 | _oidn[A-Z]* 2 | _oidn_* 3 | _@OIDN_API_NAMESPACE@_oidn_* 4 | __ZN[0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]* 5 | __ZN[0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]* 6 | __ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]* 7 | __ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]* -------------------------------------------------------------------------------- /common/half.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "include/OpenImageDenoise/config.h" 7 | #include 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | float half_to_float(int16_t x); 12 | int16_t float_to_half(float x); 13 | 14 | // Minimal half data type 15 | class half 16 | { 17 | public: 18 | half() = default; 19 | half(const half& h) : x(h.x) {} 20 | half(float f) : x(float_to_half(f)) {} 21 | 22 | half& operator =(const half& h) { x = h.x; return *this; } 23 | half& operator =(float f) { x = float_to_half(f); return *this; } 24 | 25 | operator float() const { return half_to_float(x); } 26 | 27 | private: 28 | int16_t x; 29 | }; 30 | 31 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /common/oidn.rc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "include/OpenImageDenoise/config.h" 5 | 6 | 1 VERSIONINFO 7 | FILEVERSION OIDN_VERSION_MAJOR,OIDN_VERSION_MINOR,OIDN_VERSION_PATCH,0 8 | PRODUCTVERSION OIDN_VERSION_MAJOR,OIDN_VERSION_MINOR,OIDN_VERSION_PATCH,0 9 | FILEFLAGSMASK 0x3fL 10 | #ifdef _DEBUG 11 | FILEFLAGS 0x1L 12 | #else 13 | FILEFLAGS 0x0L 14 | #endif 15 | FILEOS 0x40004L 16 | FILETYPE 0x2L 17 | FILESUBTYPE 0x0L 18 | { 19 | BLOCK "StringFileInfo" 20 | { 21 | BLOCK "040904b0" 22 | { 23 | VALUE "CompanyName", "Intel" 24 | VALUE "FileDescription", "Intel Open Image Denoise Library" 25 | VALUE "FileVersion", OIDN_VERSION_STRING 26 | VALUE "ProductVersion", OIDN_VERSION_STRING 27 | VALUE "LegalCopyright", "Copyright 2009-2025 Intel Corporation" 28 | VALUE "InternalName", "Intel Open Image Denoise" 29 | VALUE "ProductName", "Intel(R) Open Image Denoise" 30 | } 31 | } 32 | BLOCK "VarFileInfo" 33 | { 34 | VALUE "Translation", 0x409, 1200 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /common/oidn_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "include/OpenImageDenoise/oidn.hpp" 7 | #include 8 | #include 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | // Returns the size of a format in bytes 13 | size_t getFormatSize(Format format); 14 | 15 | std::ostream& operator <<(std::ostream& sm, Format format); 16 | 17 | std::ostream& operator <<(std::ostream& sm, DeviceType deviceType); 18 | std::istream& operator >>(std::istream& sm, DeviceType& deviceType); 19 | 20 | std::ostream& operator <<(std::ostream& sm, Quality quality); 21 | 22 | std::ostream& operator <<(std::ostream& sm, const UUID& uuid); 23 | std::ostream& operator <<(std::ostream& sm, const LUID& luid); 24 | 25 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /common/timer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | #include 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class Timer 12 | { 13 | public: 14 | Timer() 15 | { 16 | reset(); 17 | } 18 | 19 | void reset() 20 | { 21 | start = clock::now(); 22 | } 23 | 24 | double query() const 25 | { 26 | auto end = clock::now(); 27 | return std::chrono::duration_cast>(end - start).count(); 28 | } 29 | 30 | private: 31 | using clock = std::chrono::steady_clock; 32 | 33 | std::chrono::time_point start; 34 | }; 35 | 36 | OIDN_NAMESPACE_END 37 | -------------------------------------------------------------------------------- /core/autoexposure.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #if !defined(OIDN_COMPILE_METAL_DEVICE) 7 | #include "op.h" 8 | #include "image.h" 9 | #include "record.h" 10 | #endif 11 | 12 | OIDN_NAMESPACE_BEGIN 13 | 14 | struct AutoexposureParams 15 | { 16 | static constexpr oidn_constant int maxBinSize = 16; 17 | static constexpr oidn_constant float key = 0.18f; 18 | static constexpr oidn_constant float eps = 1e-8f; 19 | }; 20 | 21 | #if !defined(OIDN_COMPILE_METAL_DEVICE) 22 | 23 | class Autoexposure : public BaseOp, public AutoexposureParams 24 | { 25 | public: 26 | explicit Autoexposure(const ImageDesc& srcDesc) 27 | : srcDesc(srcDesc) 28 | { 29 | numBinsH = ceil_div(srcDesc.getH(), maxBinSize); 30 | numBinsW = ceil_div(srcDesc.getW(), maxBinSize); 31 | numBins = numBinsH * numBinsW; 32 | } 33 | 34 | void setSrc(const Ref& src) 35 | { 36 | if (!src || src->getW() != srcDesc.getW() || src->getH() != srcDesc.getH()) 37 | throw std::invalid_argument("invalid autoexposure source"); 38 | this->src = src; 39 | } 40 | 41 | void setDst(const Ref>& dst) { this->dst = dst; } 42 | float* getDstPtr() const { return dst->getPtr(); } 43 | 44 | protected: 45 | ImageDesc srcDesc; 46 | Ref src; 47 | Ref> dst; 48 | 49 | int numBinsH; 50 | int numBinsW; 51 | int numBins; 52 | }; 53 | 54 | #endif // !defined(OIDN_COMPILE_METAL_DEVICE) 55 | 56 | OIDN_NAMESPACE_END 57 | -------------------------------------------------------------------------------- /core/color.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "color.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | constexpr float TransferFunction::yMax; 9 | 10 | TransferFunction::TransferFunction(Type type) 11 | : type(type) 12 | { 13 | const float xMax = math::reduce_max(forward(yMax)); 14 | normScale = 1./xMax; 15 | rcpNormScale = xMax; 16 | } 17 | 18 | OIDN_NAMESPACE_END 19 | -------------------------------------------------------------------------------- /core/concat_conv.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "concat_conv.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | ConcatConv::ConcatConv(const ConcatConvDesc& desc) 9 | : ConcatConvDesc(desc) 10 | { 11 | if (src1Desc.getRank() != 3 || 12 | src2Desc.getRank() != 3 || 13 | src1Desc.getH() != src2Desc.getH() || 14 | src1Desc.getW() != src2Desc.getW() || 15 | src1Desc.layout != src2Desc.layout || 16 | src1Desc.dataType != src2Desc.dataType) 17 | throw std::invalid_argument("invalid concat+conv source descriptor"); 18 | if (weightDesc.getRank() != 4 || weightDesc.getI() != (src1Desc.getC() + src2Desc.getC()) || 19 | weightDesc.getPaddedI() != (src1Desc.getPaddedC() + src2Desc.getPaddedC())) 20 | throw std::invalid_argument("invalid concat+conv weight shape"); 21 | 22 | TensorDims dstDims{weightDesc.getO(), src1Desc.getH(), src1Desc.getW()}; 23 | TensorDims dstPaddedDims{weightDesc.getPaddedO(), src1Desc.getH(), src1Desc.getW()}; 24 | dstDesc = {dstDims, dstPaddedDims, src1Desc.layout, src1Desc.dataType}; 25 | } 26 | 27 | void ConcatConv::setSrc(const Ref& src1, const Ref& src2) 28 | { 29 | if (!src1 || src1->getDesc() != src1Desc || !src2 || src2->getDesc() != src2Desc) 30 | throw std::invalid_argument("invalid concat+conv source"); 31 | 32 | this->src1 = src1; 33 | this->src2 = src2; 34 | updateSrc(); 35 | } 36 | 37 | void ConcatConv::setBias(const Ref& bias) 38 | { 39 | if (!bias || bias->getDesc() != biasDesc) 40 | throw std::invalid_argument("invalid concat+conv bias"); 41 | 42 | this->bias = bias; 43 | updateBias(); 44 | } 45 | 46 | void ConcatConv::setDst(const Ref& dst) 47 | { 48 | if (!dst || dst->getDesc() != dstDesc) 49 | throw std::invalid_argument("invalid concat+conv destination"); 50 | 51 | this->dst = dst; 52 | updateDst(); 53 | } 54 | 55 | OIDN_NAMESPACE_END 56 | -------------------------------------------------------------------------------- /core/concat_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "conv.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Concatenation + convolution descriptor 11 | struct ConcatConvDesc 12 | { 13 | TensorDesc src1Desc; 14 | TensorDesc src2Desc; 15 | TensorDesc weightDesc; 16 | TensorDesc biasDesc; 17 | Activation activation; 18 | bool fastMath; // prefer performance over accuracy 19 | }; 20 | 21 | class ConcatConv : public BaseOp, protected ConcatConvDesc 22 | { 23 | public: 24 | ConcatConv(const ConcatConvDesc& desc); 25 | 26 | TensorDesc getDstDesc() const { return dstDesc; } 27 | Ref getDst() const { return dst; } 28 | 29 | void setSrc(const Ref& src1, const Ref& src2); 30 | void setBias(const Ref& bias); 31 | void setDst(const Ref& dst); 32 | 33 | protected: 34 | virtual void updateSrc() {} 35 | virtual void updateBias() {} 36 | virtual void updateDst() {} 37 | 38 | TensorDesc dstDesc; 39 | 40 | Ref src1; 41 | Ref src2; 42 | Ref bias; 43 | Ref dst; 44 | }; 45 | 46 | OIDN_NAMESPACE_END 47 | -------------------------------------------------------------------------------- /core/concat_conv_chw.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "concat_conv_chw.h" 5 | #include "engine.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | ConcatConvCHW::ConcatConvCHW(Engine* engine, const ConcatConvDesc& desc) 10 | : ConcatConv(desc) 11 | { 12 | if (src1Desc.layout == TensorLayout::hwc) 13 | throw std::invalid_argument("unsupported concat+conv source layout"); 14 | 15 | TensorDims srcDims{src1Desc.getC() + src2Desc.getC(), src1Desc.getH(), src1Desc.getW()}; 16 | TensorDims srcPaddedDims{src1Desc.getPaddedC() + src2Desc.getPaddedC(), src1Desc.getH(), src1Desc.getW()}; 17 | srcDesc = {srcDims, srcPaddedDims, src1Desc.layout, src1Desc.dataType}; 18 | 19 | conv = engine->newConv({srcDesc, weightDesc, biasDesc, activation, PostOp::None, fastMath}); 20 | } 21 | 22 | void ConcatConvCHW::updateSrc() 23 | { 24 | if (!src1->getBuffer() || !src2->getBuffer()) 25 | throw std::invalid_argument("concat+conv sources must be backed by buffers"); 26 | if (src1->getBuffer() != src2->getBuffer() || 27 | (static_cast(src1->getPtr()) + src1->getByteSize()) != static_cast(src2->getPtr())) 28 | throw std::invalid_argument("concat+conv sources are not pre-concatenated in memory"); 29 | 30 | auto src = src1->getBuffer()->newTensor(srcDesc, src1->getByteOffset()); 31 | conv->setSrc(src); 32 | } 33 | 34 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/concat_conv_chw.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "concat_conv.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Concatenation + convolution for CHW tensors (including blocked) stored consecutively in memory 11 | // Since the tensors are pre-concatenated in memory, only the convolution needs to be executed 12 | class ConcatConvCHW final : public ConcatConv 13 | { 14 | public: 15 | ConcatConvCHW(Engine* engine, const ConcatConvDesc& desc); 16 | 17 | Engine* getEngine() const override { return conv->getEngine(); } 18 | 19 | size_t getScratchByteSize() override { return conv->getScratchByteSize(); } 20 | void setScratch(const Ref& scratch) override { conv->setScratch(scratch); } 21 | 22 | void setWeight(const Ref& weight) { conv->setWeight(weight); } 23 | 24 | void finalize() override { conv->finalize(); } 25 | void submitKernels(const Ref& ct) override { conv->submitKernels(ct); } 26 | 27 | private: 28 | void updateSrc() override; 29 | void updateBias() override { conv->setBias(bias); } 30 | void updateDst() override { conv->setDst(dst); } 31 | 32 | TensorDesc srcDesc; // pre-concatenated source 33 | Ref conv; 34 | }; 35 | 36 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/concat_conv_hwc.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "concat_conv_hwc.h" 5 | #include "engine.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | ConcatConvHWC::ConcatConvHWC(Engine* engine, const ConcatConvDesc& desc) 10 | : ConcatConv(desc) 11 | { 12 | if (src1Desc.layout != TensorLayout::hwc) 13 | throw std::logic_error("unsupported concat+conv source layout"); 14 | 15 | // Split the convolution into two smaller convolutions 16 | weight1Desc = {{dstDesc.getC(), src1Desc.getC(), weightDesc.getH(), weightDesc.getW()}, 17 | {dstDesc.getPaddedC(), src1Desc.getPaddedC(), weightDesc.getH(), weightDesc.getW()}, 18 | weightDesc.layout, 19 | weightDesc.dataType}; 20 | 21 | weight2Desc = {{dstDesc.getC(), src2Desc.getC(), weightDesc.getH(), weightDesc.getW()}, 22 | {dstDesc.getPaddedC(), src2Desc.getPaddedC(), weightDesc.getH(), weightDesc.getW()}, 23 | weightDesc.layout, 24 | weightDesc.dataType}; 25 | 26 | // Convolution 1: dst = conv(src1, weight1) + bias 27 | conv1 = engine->newConv({src1Desc, weight1Desc, biasDesc, Activation::None, PostOp::None, fastMath}); 28 | 29 | // Convolution 2: dst = activation(conv(src2, weight2) + dst) 30 | // We use dst as bias 31 | conv2 = engine->newConv({src2Desc, weight2Desc, dstDesc, activation, PostOp::None, fastMath}); 32 | } 33 | 34 | bool ConcatConvHWC::isSupported() const 35 | { 36 | return conv1->isSupported() && conv2->isSupported(); 37 | } 38 | 39 | size_t ConcatConvHWC::getScratchByteSize() 40 | { 41 | return max(conv1->getScratchByteSize(), conv2->getScratchByteSize()); 42 | } 43 | 44 | void ConcatConvHWC::setScratch(const Ref& scratch) 45 | { 46 | conv1->setScratch(scratch); 47 | conv2->setScratch(scratch); 48 | } 49 | 50 | void ConcatConvHWC::setWeight(const Ref& weight1, const Ref& weight2) 51 | { 52 | conv1->setWeight(weight1); 53 | conv2->setWeight(weight2); 54 | } 55 | 56 | void ConcatConvHWC::updateSrc() 57 | { 58 | conv1->setSrc(src1); 59 | conv2->setSrc(src2); 60 | } 61 | 62 | void ConcatConvHWC::updateDst() 63 | { 64 | conv1->setDst(dst); 65 | 66 | conv2->setBias(dst); 67 | conv2->setDst(dst); 68 | } 69 | 70 | void ConcatConvHWC::finalize() 71 | { 72 | conv1->finalize(); 73 | conv2->finalize(); 74 | } 75 | 76 | void ConcatConvHWC::submitKernels(const Ref& ct) 77 | { 78 | conv1->submitKernels(ct); 79 | conv2->submitKernels(ct); 80 | } 81 | 82 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/concat_conv_hwc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "concat_conv.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Concatenation + convolution for HWC tensors 11 | // The convolution is split into two smaller convolutions, one for each input tensor 12 | // The weights for each convolution must be set separately 13 | class ConcatConvHWC final : public ConcatConv 14 | { 15 | public: 16 | ConcatConvHWC(Engine* engine, const ConcatConvDesc& desc); 17 | 18 | Engine* getEngine() const override { return conv1->getEngine(); } 19 | bool isSupported() const override; 20 | 21 | size_t getScratchByteSize() override; 22 | void setScratch(const Ref& scratch) override; 23 | 24 | TensorDesc getWeight1Desc() const { return weight1Desc; } 25 | TensorDesc getWeight2Desc() const { return weight2Desc; } 26 | void setWeight(const Ref& weight1, const Ref& weight2); 27 | 28 | void finalize() override; 29 | void submitKernels(const Ref& ct) override; 30 | 31 | private: 32 | void updateSrc() override; 33 | void updateBias() override { conv1->setBias(bias); } 34 | void updateDst() override; 35 | 36 | TensorDesc weight1Desc; 37 | TensorDesc weight2Desc; 38 | 39 | Ref conv1; 40 | Ref conv2; 41 | }; 42 | 43 | OIDN_NAMESPACE_END 44 | -------------------------------------------------------------------------------- /core/context.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "context.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | Context& Context::get() 9 | { 10 | static Context instance; 11 | return instance; 12 | } 13 | 14 | bool Context::isDeviceSupported(DeviceType type) const 15 | { 16 | return deviceFactories.find(type) != deviceFactories.end(); 17 | } 18 | 19 | DeviceFactory* Context::getDeviceFactory(DeviceType type) const 20 | { 21 | auto it = deviceFactories.find(type); 22 | if (it == deviceFactories.end()) 23 | throw Exception(Error::UnsupportedHardware, "unsupported device type: " + toString(type)); 24 | return it->second.get(); 25 | } 26 | 27 | const Ref& Context::getPhysicalDevice(int id) const 28 | { 29 | if (id < 0 || static_cast(id) >= physicalDevices.size()) 30 | throw Exception(Error::InvalidArgument, "invalid physical device ID: " + toString(id)); 31 | return physicalDevices[id]; 32 | } 33 | 34 | Ref Context::newDevice(int physicalDeviceID) 35 | { 36 | const auto& physicalDevice = getPhysicalDevice(physicalDeviceID); 37 | const DeviceType type = physicalDevice->type; 38 | return getDeviceFactory(type)->newDevice(physicalDevice); 39 | } 40 | 41 | Ref Context::newDevice(DeviceType type) 42 | { 43 | if (type == DeviceType::Default) 44 | return newDevice(0); 45 | 46 | // Find the first physical device of the specified type 47 | for (const auto& physicalDevice : physicalDevices) 48 | { 49 | if (physicalDevice->type == type) 50 | return getDeviceFactory(type)->newDevice(physicalDevice); 51 | } 52 | 53 | throw Exception(Error::UnsupportedHardware, "unsupported device type: " + toString(type)); 54 | } 55 | 56 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "tensor.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // Activation function 12 | enum class Activation 13 | { 14 | None, // identity 15 | ReLU 16 | }; 17 | 18 | enum class PostOp 19 | { 20 | None, 21 | Pool, 22 | Upsample 23 | }; 24 | 25 | // Convolution descriptor 26 | struct ConvDesc 27 | { 28 | TensorDesc srcDesc; 29 | TensorDesc weightDesc; 30 | TensorDesc biasDesc; 31 | Activation activation; 32 | PostOp postOp; 33 | bool fastMath; // prefer performance over accuracy 34 | }; 35 | 36 | // Convolution 37 | class Conv : public BaseOp, protected ConvDesc 38 | { 39 | public: 40 | Conv(const ConvDesc& desc); 41 | 42 | TensorDesc getDstDesc() const { return dstDesc; } 43 | Ref getDst() const { return dst; } 44 | 45 | void setSrc(const Ref& src); 46 | void setWeight(const Ref& weight); 47 | void setBias(const Ref& bias); 48 | void setDst(const Ref& dst); 49 | 50 | protected: 51 | virtual void updateSrc() {} 52 | virtual void updateWeight() {} 53 | virtual void updateBias() {} 54 | virtual void updateDst() {} 55 | 56 | TensorDesc dstDesc; 57 | Ref src; 58 | Ref weight; 59 | Ref bias; 60 | Ref dst; 61 | }; 62 | 63 | OIDN_NAMESPACE_END 64 | -------------------------------------------------------------------------------- /core/data.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Opaque read-only data 11 | struct Data 12 | { 13 | const void* ptr; 14 | size_t size; 15 | 16 | Data() : ptr(nullptr), size(0) {} 17 | Data(std::nullptr_t) : ptr(nullptr), size(0) {} 18 | 19 | template 20 | Data(T* ptr, size_t size) 21 | : ptr(ptr), 22 | size(size) 23 | { 24 | if (ptr == nullptr && size > 0) 25 | throw Exception(Error::InvalidArgument, "data pointer is null"); 26 | } 27 | 28 | template 29 | Data(T (&array)[N]) : ptr(array), size(sizeof(array)) {} 30 | 31 | template 32 | Data& operator =(T (&array)[N]) 33 | { 34 | ptr = array; 35 | size = sizeof(array); 36 | return *this; 37 | } 38 | 39 | oidn_inline operator bool() const 40 | { 41 | return ptr != nullptr; 42 | } 43 | }; 44 | 45 | OIDN_NAMESPACE_END 46 | -------------------------------------------------------------------------------- /core/device_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "device.h" 7 | #include 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class DeviceFactory : public RefCount 12 | { 13 | public: 14 | virtual Ref newDevice(const Ref& physicalDevice) = 0; 15 | }; 16 | 17 | class SYCLDeviceFactoryBase : public DeviceFactory 18 | { 19 | public: 20 | using DeviceFactory::newDevice; 21 | 22 | virtual bool isDeviceSupported(const sycl::device* device) = 0; 23 | virtual Ref newDevice(const sycl::queue* queues, int numQueues) = 0; 24 | }; 25 | 26 | class CUDADeviceFactoryBase : public DeviceFactory 27 | { 28 | public: 29 | using DeviceFactory::newDevice; 30 | 31 | virtual bool isDeviceSupported(int deviceID) = 0; 32 | virtual Ref newDevice(const int* deviceIDs, const cudaStream_t* streams, int numPairs) = 0; 33 | }; 34 | 35 | class HIPDeviceFactoryBase : public DeviceFactory 36 | { 37 | public: 38 | using DeviceFactory::newDevice; 39 | 40 | virtual bool isDeviceSupported(int deviceID) = 0; 41 | virtual Ref newDevice(const int* deviceIDs, const hipStream_t* streams, int numPairs) = 0; 42 | }; 43 | 44 | class MetalDeviceFactoryBase : public DeviceFactory 45 | { 46 | public: 47 | using DeviceFactory::newDevice; 48 | 49 | virtual bool isDeviceSupported(MTLDevice_id device) = 0; 50 | virtual Ref newDevice(const MTLCommandQueue_id* commandQueues, int numQueues) = 0; 51 | }; 52 | 53 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/exception.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "exception.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | // We *must* define this function here because Exception must have a key function, which is the 9 | // first non-pure out-of-line virtual function of a type. Otherwise, the type_info would be 10 | // emitted as a weak symbol and its address may be different in dynamically loaded modules, 11 | // which would cause exception handling and dynamic_cast to fail. 12 | const char* Exception::what() const noexcept 13 | { 14 | return message->c_str(); 15 | } 16 | 17 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/exception.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/common.h" 7 | #include 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class Exception : public std::exception 12 | { 13 | public: 14 | Exception(Error error, const char* message) 15 | : error(error), 16 | message(std::make_shared(message)) {} 17 | 18 | Exception(Error error, const std::string& message) 19 | : error(error), 20 | message(std::make_shared(message)) {} 21 | 22 | Error code() const noexcept 23 | { 24 | return error; 25 | } 26 | 27 | const char* what() const noexcept override; 28 | 29 | private: 30 | Error error; 31 | 32 | // Exceptions must have noexcept copy constructors, so we cannot use std::string directly 33 | std::shared_ptr message; 34 | }; 35 | 36 | OIDN_NAMESPACE_END 37 | -------------------------------------------------------------------------------- /core/filter.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "filter.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | Filter::Filter(const Ref& device) 9 | : device(device) {} 10 | 11 | Filter::~Filter() 12 | { 13 | // We trim the scratch heaps only here to make filter resolution changes more efficient 14 | device->trimScratch(); 15 | } 16 | 17 | void Filter::setProgressMonitorFunction(ProgressMonitorFunction func, void* userPtr) 18 | { 19 | progressFunc = func; 20 | progressUserPtr = userPtr; 21 | } 22 | 23 | void Filter::setParam(int& dst, int src) 24 | { 25 | dirtyParam |= dst != src; 26 | dst = src; 27 | } 28 | 29 | void Filter::setParam(bool& dst, int src) 30 | { 31 | dirtyParam |= dst != bool(src); 32 | dst = src; 33 | } 34 | 35 | void Filter::setParam(Quality& dst, Quality src) 36 | { 37 | dirtyParam |= dst != src; 38 | dst = src; 39 | } 40 | 41 | void Filter::setParam(Ref& dst, const Ref& src) 42 | { 43 | // Check whether the image is accessible by the device 44 | if (src && *src && !device->isSystemMemorySupported()) 45 | { 46 | const Storage storage = src->getBuffer() ? src->getBuffer()->getStorage() 47 | : device->getPtrStorage(src->getPtr()); 48 | if (storage == Storage::Undefined) 49 | throw Exception(Error::InvalidArgument, "image data not accessible by the device, please use OIDNBuffer or device allocator for storage"); 50 | } 51 | 52 | // The image parameter is *not* dirty if only the pointer and/or strides change (except to/from nullptr) 53 | dirtyParam |= (!dst && src && *src) || (dst && (!src || !(*src))) || 54 | (dst && src && *src && 55 | ((dst->getW() != src->getW()) || (dst->getH() != src->getH()) || 56 | (dst->getFormat() != src->getFormat()))); 57 | 58 | if (src && *src) 59 | dst = src; 60 | else 61 | dst = nullptr; 62 | } 63 | 64 | void Filter::removeParam(Ref& dst) 65 | { 66 | dirtyParam |= bool(dst); 67 | dst = nullptr; 68 | } 69 | 70 | void Filter::setParam(Data& dst, const Data& src) 71 | { 72 | // Check whether the data is accessible to the host 73 | if (src && device->getPtrStorage(src.ptr) == Storage::Device) 74 | throw Exception(Error::InvalidArgument, "the specified data is not accessible to the host, please use host malloc"); 75 | 76 | dirtyParam = dst || src; 77 | dst = src; 78 | } 79 | 80 | void Filter::removeParam(Data& dst) 81 | { 82 | dirtyParam |= dst; 83 | dst = Data(); 84 | } 85 | 86 | OIDN_NAMESPACE_END 87 | -------------------------------------------------------------------------------- /core/filter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "device.h" 7 | #include "image.h" 8 | #include "data.h" 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | class Filter : public RefCount 13 | { 14 | public: 15 | explicit Filter(const Ref& device); 16 | ~Filter(); 17 | 18 | Device* getDevice() const { return device.get(); } 19 | 20 | virtual void setImage(const std::string& name, const Ref& image) = 0; 21 | virtual void unsetImage(const std::string& name) = 0; 22 | virtual void setData(const std::string& name, const Data& data) = 0; 23 | virtual void updateData(const std::string& name) = 0; 24 | virtual void unsetData(const std::string& name) = 0; 25 | virtual void setInt(const std::string& name, int value) = 0; 26 | virtual int getInt(const std::string& name) = 0; 27 | virtual void setFloat(const std::string& name, float value) = 0; 28 | virtual float getFloat(const std::string& name) = 0; 29 | 30 | void setProgressMonitorFunction(ProgressMonitorFunction func, void* userPtr); 31 | 32 | virtual void commit() = 0; 33 | virtual void execute(SyncMode sync = SyncMode::Blocking) = 0; 34 | 35 | protected: 36 | void setParam(int& dst, int src); 37 | void setParam(bool& dst, int src); 38 | void setParam(Quality& dst, Quality src); 39 | void setParam(Ref& dst, const Ref& src); 40 | void removeParam(Ref& dst); 41 | void setParam(Data& dst, const Data& src); 42 | void removeParam(Data& dst); 43 | 44 | Ref device; 45 | 46 | ProgressMonitorFunction progressFunc = nullptr; 47 | void* progressUserPtr = nullptr; 48 | 49 | bool dirty = true; 50 | bool dirtyParam = true; 51 | }; 52 | 53 | OIDN_NAMESPACE_END 54 | -------------------------------------------------------------------------------- /core/heap.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "heap.h" 5 | #include "buffer.h" 6 | #include "engine.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // ----------------------------------------------------------------------------------------------- 11 | // Heap 12 | // ----------------------------------------------------------------------------------------------- 13 | 14 | void Heap::attach(Buffer* buffer) 15 | { 16 | buffers.insert(buffer); 17 | } 18 | 19 | void Heap::detach(Buffer* buffer) 20 | { 21 | buffers.erase(buffer); 22 | } 23 | 24 | void Heap::preRealloc() 25 | { 26 | for (auto buffer : buffers) 27 | buffer->preRealloc(); 28 | } 29 | 30 | void Heap::postRealloc() 31 | { 32 | for (auto buffer : buffers) 33 | buffer->postRealloc(); 34 | } 35 | 36 | // ----------------------------------------------------------------------------------------------- 37 | // USMHeap 38 | // ----------------------------------------------------------------------------------------------- 39 | 40 | USMHeap::USMHeap(Engine* engine, size_t byteSize, Storage storage) 41 | : engine(engine), 42 | ptr(nullptr), 43 | byteSize(byteSize), 44 | storage(storage) 45 | { 46 | if (storage == Storage::Undefined) 47 | this->storage = Storage::Device; 48 | 49 | ptr = static_cast(engine->usmAlloc(byteSize, this->storage)); 50 | } 51 | 52 | USMHeap::~USMHeap() 53 | { 54 | try 55 | { 56 | engine->usmFree(ptr, storage); 57 | } 58 | catch (...) {} 59 | } 60 | 61 | void USMHeap::realloc(size_t newByteSize) 62 | { 63 | if (newByteSize == byteSize) 64 | return; 65 | 66 | preRealloc(); 67 | 68 | engine->usmFree(ptr, storage); 69 | ptr = static_cast(engine->usmAlloc(newByteSize, storage)); 70 | byteSize = newByteSize; 71 | 72 | postRealloc(); 73 | } 74 | 75 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/heap.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/common.h" 7 | #include "ref.h" 8 | #include 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | class Engine; 13 | class Buffer; 14 | 15 | // ----------------------------------------------------------------------------------------------- 16 | // Heap 17 | // ----------------------------------------------------------------------------------------------- 18 | 19 | class Heap : public RefCount 20 | { 21 | friend class Buffer; 22 | 23 | public: 24 | virtual Engine* getEngine() const = 0; 25 | virtual size_t getByteSize() const = 0; 26 | virtual Storage getStorage() const = 0; 27 | 28 | virtual void realloc(size_t newByteSize) = 0; 29 | 30 | protected: 31 | void preRealloc(); 32 | void postRealloc(); 33 | 34 | private: 35 | void attach(Buffer* buffer); 36 | void detach(Buffer* buffer); 37 | 38 | std::unordered_set buffers; 39 | }; 40 | 41 | // ----------------------------------------------------------------------------------------------- 42 | // USMHeap 43 | // ----------------------------------------------------------------------------------------------- 44 | 45 | class USMBuffer; 46 | 47 | // Unified shared memory (USM) based heap 48 | class USMHeap : public Heap 49 | { 50 | friend class USMBuffer; 51 | 52 | public: 53 | USMHeap(Engine* engine, size_t byteSize, Storage storage); 54 | ~USMHeap(); 55 | 56 | Engine* getEngine() const override { return engine; } 57 | size_t getByteSize() const override { return byteSize; } 58 | Storage getStorage() const override { return storage; } 59 | 60 | void realloc(size_t newByteSize) override; 61 | 62 | private: 63 | Engine* engine; 64 | char* ptr; 65 | size_t byteSize; 66 | Storage storage; 67 | }; 68 | 69 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/image_copy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "image.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class ImageCopy : public BaseOp 12 | { 13 | public: 14 | void setSrc(const Ref& src) { this->src = src; } 15 | void setDst(const Ref& dst) { this->dst = dst; } 16 | 17 | protected: 18 | void check() 19 | { 20 | if (!src || !dst) 21 | throw std::logic_error("image copy source/destination not set"); 22 | if (dst->getH() < src->getH() || dst->getW() < src->getW()) 23 | throw std::out_of_range("image copy destination smaller than the source"); 24 | } 25 | 26 | Ref src; 27 | Ref dst; 28 | }; 29 | 30 | OIDN_NAMESPACE_END 31 | -------------------------------------------------------------------------------- /core/input_process.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "input_process.h" 5 | #include "engine.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | InputProcess::InputProcess(Engine* engine, const InputProcessDesc& desc) 10 | : InputProcessDesc(desc) 11 | { 12 | if (srcDims.size() != 3) 13 | throw std::invalid_argument("invalid input processing source shape"); 14 | 15 | TensorDims dstDims = srcDims; 16 | 17 | TensorDims dstPaddedDims { 18 | round_up(srcDims[0], engine->getDevice()->getTensorBlockC()), // round up C 19 | dstDims[1], 20 | dstDims[2] 21 | }; 22 | 23 | dstDesc = {dstDims, dstPaddedDims, engine->getDevice()->getTensorLayout(), engine->getDevice()->getTensorDataType()}; 24 | 25 | setTile(0, 0, 0, 0, 0, 0); 26 | } 27 | 28 | void InputProcess::setSrc(const Ref& color, 29 | const Ref& albedo, 30 | const Ref& normal) 31 | { 32 | int C = 0; 33 | if (color) C += 3; // always broadcast to 3 channels 34 | if (albedo) C += 3; 35 | if (normal) C += 3; 36 | if (C != srcDims[0]) 37 | throw std::invalid_argument("invalid input processing source"); 38 | 39 | this->color = color; 40 | this->albedo = albedo; 41 | this->normal = normal; 42 | updateSrc(); 43 | } 44 | 45 | void InputProcess::setDst(const Ref& dst) 46 | { 47 | if (!dst || dst->getDesc() != dstDesc) 48 | throw std::invalid_argument("invalid input processing destination"); 49 | 50 | this->dst = dst; 51 | } 52 | 53 | void InputProcess::setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W) 54 | { 55 | tile.hSrcBegin = hSrc; 56 | tile.wSrcBegin = wSrc; 57 | tile.hDstBegin = hDst; 58 | tile.wDstBegin = wDst; 59 | tile.H = H; 60 | tile.W = W; 61 | } 62 | 63 | void InputProcess::check() 64 | { 65 | if (!getMainSrc() || !dst) 66 | throw std::logic_error("input processing source/destination not set"); 67 | if (tile.hSrcBegin + tile.H > getMainSrc()->getH() || 68 | tile.wSrcBegin + tile.W > getMainSrc()->getW() || 69 | tile.hDstBegin + tile.H > dst->getH() || 70 | tile.wDstBegin + tile.W > dst->getW()) 71 | throw std::out_of_range("input processing source/destination out of bounds"); 72 | } 73 | 74 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/input_process.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "image.h" 8 | #include "tensor.h" 9 | #include "color.h" 10 | #include "tile.h" 11 | 12 | OIDN_NAMESPACE_BEGIN 13 | 14 | struct InputProcessDesc 15 | { 16 | TensorDims srcDims; 17 | std::shared_ptr transferFunc; 18 | bool hdr; 19 | bool snorm; 20 | }; 21 | 22 | class InputProcess : public BaseOp, protected InputProcessDesc 23 | { 24 | public: 25 | InputProcess(Engine* engine, const InputProcessDesc& desc); 26 | 27 | TensorDesc getDstDesc() const { return dstDesc; } 28 | Ref getDst() const { return dst; } 29 | 30 | void setSrc(const Ref& color, 31 | const Ref& albedo, 32 | const Ref& normal); 33 | void setDst(const Ref& dst); 34 | void setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W); 35 | 36 | protected: 37 | virtual void updateSrc() {} 38 | void check(); 39 | 40 | Image* getMainSrc() 41 | { 42 | return color ? color.get() : (albedo ? albedo.get() : normal.get()); 43 | } 44 | 45 | TensorDesc dstDesc; 46 | Ref color; 47 | Ref albedo; 48 | Ref normal; 49 | Ref dst; 50 | Tile tile; 51 | }; 52 | 53 | OIDN_NAMESPACE_END 54 | -------------------------------------------------------------------------------- /core/math.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | namespace math { 10 | 11 | #if defined(OIDN_COMPILE_SYCL_DEVICE) 12 | // Use the SYCL math functions 13 | using sycl::min; 14 | using sycl::max; 15 | using sycl::isfinite; 16 | using sycl::isnan; 17 | using sycl::pow; 18 | using sycl::log; 19 | using sycl::log2; 20 | using sycl::exp; 21 | using sycl::exp2; 22 | #elif defined(OIDN_COMPILE_CUDA_DEVICE) || defined(OIDN_COMPILE_HIP_DEVICE) 23 | // Use the CUDA/HIP math functions 24 | template oidn_host_device_inline T min(T a, T b) { return ::min(a, b); } 25 | template oidn_host_device_inline T max(T a, T b) { return ::max(a, b); } 26 | using ::isfinite; 27 | using ::isnan; 28 | using ::pow; 29 | using ::log; 30 | using ::log2; 31 | using ::exp; 32 | using ::exp2; 33 | #elif defined(OIDN_COMPILE_METAL_DEVICE) 34 | // Use the Metal math functions 35 | using metal::min; 36 | using metal::max; 37 | using metal::isfinite; 38 | using metal::isnan; 39 | using metal::pow; 40 | using metal::log; 41 | using metal::log2; 42 | using metal::exp; 43 | using metal::exp2; 44 | #else 45 | using OIDN_NAMESPACE::min; 46 | using OIDN_NAMESPACE::max; 47 | using std::isfinite; 48 | using std::isnan; 49 | using std::pow; 50 | using std::log; 51 | using std::log2; 52 | using std::exp; 53 | using std::exp2; 54 | #endif 55 | 56 | // CUDA and HIP do not provide min/max overloads for half 57 | #if defined(OIDN_COMPILE_CUDA_DEVICE) && (__CUDA_ARCH__ >= 800) 58 | oidn_device_inline half min(half a, half b) { return __hmin(a, b); } 59 | oidn_device_inline half max(half a, half b) { return __hmax(a, b); } 60 | #elif (defined(OIDN_COMPILE_CUDA_DEVICE) && (__CUDA_ARCH__ >= 530)) || defined(OIDN_COMPILE_HIP_DEVICE) 61 | oidn_device_inline half min(half a, half b) { return (b < a) ? b : a; } 62 | oidn_device_inline half max(half a, half b) { return (a < b) ? b : a; } 63 | #endif 64 | 65 | template 66 | oidn_host_device_inline T clamp(T x, T minVal, T maxVal) 67 | { 68 | return min(max(x, minVal), maxVal); 69 | } 70 | 71 | oidn_host_device_inline float to_float_unorm(uint32_t x) 72 | { 73 | return float(x) * 2.3283064365386962890625e-10f; // x / 2^32 74 | } 75 | 76 | // Maps nan to zero 77 | oidn_host_device_inline float nan_to_zero(float x) 78 | { 79 | return isnan(x) ? 0.f : x; 80 | } 81 | 82 | } // namespace math 83 | OIDN_NAMESPACE_END 84 | -------------------------------------------------------------------------------- /core/module.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/common.h" 7 | #include 8 | 9 | #if defined(_WIN32) 10 | #define OIDN_MODULE_EXPORT extern "C" __declspec(dllexport) 11 | #else 12 | #define OIDN_MODULE_EXPORT extern "C" __attribute__ ((visibility ("default"))) 13 | #endif 14 | 15 | #define OIDN_DECLARE_INIT_MODULE(name) \ 16 | OIDN_MODULE_EXPORT void OIDN_CONCAT(OIDN_NAMESPACE_C, OIDN_CONCAT(_init_module_##name##_v, OIDN_VERSION))() 17 | 18 | #if defined(OIDN_STATIC_LIB) 19 | #define OIDN_DECLARE_INIT_STATIC_MODULE(name) void init_##name() 20 | #else 21 | #define OIDN_DECLARE_INIT_STATIC_MODULE(name) OIDN_DECLARE_INIT_MODULE(name) 22 | #endif 23 | 24 | OIDN_NAMESPACE_BEGIN 25 | 26 | class ModuleLoader 27 | { 28 | public: 29 | ModuleLoader(); 30 | 31 | bool load(const std::string& name); 32 | 33 | private: 34 | #if defined(_WIN32) 35 | using Path = std::wstring; 36 | static constexpr const wchar_t* pathSeps = L"/\\"; 37 | #else 38 | using Path = std::string; 39 | static constexpr const char* pathSeps = "/\\"; 40 | #endif 41 | 42 | static void* getSymbolAddress(void* module, const std::string& name); 43 | static void closeModule(void* module); 44 | 45 | // Returns the absolute path of the module that contains the given address 46 | // If address is nullptr, returns the path of this module 47 | static Path getModulePath(void* address = nullptr); 48 | 49 | Path modulePathPrefix; // absolute path of the module directory with trailing path separator 50 | std::unordered_set modules; // loaded module names 51 | }; 52 | 53 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/op.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "op.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | void BaseOp::submit(const Ref& progress) 9 | { 10 | Engine* engine = nullptr; 11 | 12 | if (progress) 13 | { 14 | engine = getEngine(); 15 | Progress::submitUpdate(engine, progress); 16 | } 17 | 18 | submitKernels(progress); 19 | 20 | if (progress) 21 | Progress::submitUpdate(engine, progress, getWorkAmount()); 22 | } 23 | 24 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/op.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "engine.h" 7 | #include "buffer.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // Abstract operation class 12 | class Op : public RefCount 13 | { 14 | public: 15 | virtual ~Op() = default; 16 | 17 | virtual Engine* getEngine() const = 0; 18 | 19 | // Support must be checked before getting the scratch size or submission 20 | virtual bool isSupported() const { return true; } 21 | 22 | // Scratch memory 23 | virtual size_t getScratchByteSize() { return 0; } 24 | virtual void setScratch(const Ref& scratch) {} 25 | 26 | // Finalization is required before submission 27 | virtual void finalize() {} 28 | 29 | // Enqueues the operation to the engine, optionally updating the progress as well 30 | virtual void submit(const Ref& progress = nullptr) = 0; 31 | 32 | // Returns the estimated amount of work for progress monitoring 33 | virtual size_t getWorkAmount() const { return 1; } 34 | 35 | // Name for debugging purposes 36 | std::string getName() const { return name; } 37 | void setName(const std::string& name) { this->name = name; } 38 | 39 | private: 40 | std::string name; 41 | }; 42 | 43 | // Base class for most operations (except compound operations, e.g. Graph) 44 | class BaseOp : public Op 45 | { 46 | public: 47 | void submit(const Ref& progress) final; 48 | 49 | // Enqueues the kernel(s) of the operation to the engine, which may be cancelled if supported 50 | virtual void submitKernels(const Ref& ct = nullptr) = 0; 51 | }; 52 | 53 | OIDN_NAMESPACE_END 54 | -------------------------------------------------------------------------------- /core/output_process.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "output_process.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | OutputProcess::OutputProcess(const OutputProcessDesc& desc) 9 | : OutputProcessDesc(desc) 10 | { 11 | if (srcDesc.getRank() != 3) 12 | throw std::invalid_argument("invalid output processing source shape"); 13 | 14 | setTile(0, 0, 0, 0, 0, 0); 15 | } 16 | 17 | void OutputProcess::setSrc(const Ref& src) 18 | { 19 | if (!src || src->getDesc() != srcDesc) 20 | throw std::invalid_argument("invalid output processing source"); 21 | 22 | this->src = src; 23 | } 24 | 25 | void OutputProcess::setDst(const Ref& dst) 26 | { 27 | if (!dst || dst->getC() > srcDesc.getC()) 28 | throw std::invalid_argument("invalid output processing destination"); 29 | 30 | this->dst = dst; 31 | } 32 | 33 | void OutputProcess::setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W) 34 | { 35 | tile.hSrcBegin = hSrc; 36 | tile.wSrcBegin = wSrc; 37 | tile.hDstBegin = hDst; 38 | tile.wDstBegin = wDst; 39 | tile.H = H; 40 | tile.W = W; 41 | } 42 | 43 | void OutputProcess::check() 44 | { 45 | if (!src || !dst) 46 | throw std::logic_error("output processing source/destination not set"); 47 | if (tile.hSrcBegin + tile.H > src->getH() || 48 | tile.wSrcBegin + tile.W > src->getW() || 49 | tile.hDstBegin + tile.H > dst->getH() || 50 | tile.wDstBegin + tile.W > dst->getW()) 51 | throw std::out_of_range("output processing source/destination out of bounds"); 52 | } 53 | 54 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/output_process.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "image.h" 8 | #include "tensor.h" 9 | #include "color.h" 10 | #include "tile.h" 11 | 12 | OIDN_NAMESPACE_BEGIN 13 | 14 | struct OutputProcessDesc 15 | { 16 | TensorDesc srcDesc; 17 | std::shared_ptr transferFunc; 18 | bool hdr; 19 | bool snorm; 20 | }; 21 | 22 | class OutputProcess : public BaseOp, protected OutputProcessDesc 23 | { 24 | public: 25 | OutputProcess(const OutputProcessDesc& desc); 26 | 27 | TensorDesc getSrcDesc() const { return srcDesc; } 28 | Ref getSrc() const { return src; } 29 | 30 | void setSrc(const Ref& src); 31 | void setDst(const Ref& dst); 32 | void setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W); 33 | 34 | protected: 35 | void check(); 36 | 37 | Ref src; 38 | Ref dst; 39 | Tile tile; 40 | }; 41 | 42 | OIDN_NAMESPACE_END 43 | -------------------------------------------------------------------------------- /core/pool.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "pool.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | Pool::Pool(const PoolDesc& desc) 9 | : PoolDesc(desc) 10 | { 11 | if (srcDesc.getRank() != 3 || srcDesc.getH() % 2 != 0 || srcDesc.getW() % 2 != 0) 12 | throw std::invalid_argument("invalid pooling source shape"); 13 | 14 | TensorDims dstDims{srcDesc.getC(), srcDesc.getH() / 2, srcDesc.getW() / 2}; 15 | TensorDims dstPaddedDims{srcDesc.getPaddedC(), dstDims[1], dstDims[2]}; 16 | dstDesc = {dstDims, dstPaddedDims, srcDesc.layout, srcDesc.dataType}; 17 | } 18 | 19 | void Pool::setSrc(const Ref& src) 20 | { 21 | if (!src || src->getDesc() != srcDesc) 22 | throw std::invalid_argument("invalid pooling source"); 23 | 24 | this->src = src; 25 | updateSrc(); 26 | } 27 | 28 | void Pool::setDst(const Ref& dst) 29 | { 30 | if (!dst || dst->getDesc() != dstDesc) 31 | throw std::invalid_argument("invalid pooling destination"); 32 | 33 | this->dst = dst; 34 | updateDst(); 35 | } 36 | 37 | OIDN_NAMESPACE_END 38 | -------------------------------------------------------------------------------- /core/pool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "tensor.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // 2x2 max pooling descriptor 12 | struct PoolDesc 13 | { 14 | TensorDesc srcDesc; 15 | }; 16 | 17 | // 2x2 max pooling 18 | class Pool : public BaseOp, protected PoolDesc 19 | { 20 | public: 21 | Pool(const PoolDesc& desc); 22 | 23 | TensorDesc getDstDesc() const { return dstDesc; } 24 | Ref getDst() const { return dst; } 25 | 26 | void setSrc(const Ref& src); 27 | void setDst(const Ref& dst); 28 | 29 | protected: 30 | virtual void updateSrc() {} 31 | virtual void updateDst() {} 32 | 33 | TensorDesc dstDesc; 34 | Ref src; 35 | Ref dst; 36 | }; 37 | 38 | OIDN_NAMESPACE_END 39 | -------------------------------------------------------------------------------- /core/progress.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "progress.h" 5 | #include "engine.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | Progress::Progress(ProgressMonitorFunction func, void* userPtr, size_t total) 10 | : func(func), 11 | userPtr(userPtr), 12 | total(total), 13 | current(0), 14 | started(false) 15 | { 16 | if (!func) 17 | throw std::invalid_argument("progress monitor function is null"); 18 | } 19 | 20 | void Progress::update(size_t delta) 21 | { 22 | std::lock_guard lock(mutex); 23 | current = std::min(current + delta, total); 24 | if (!func(userPtr, double(current) / double(total))) 25 | cancel(); 26 | } 27 | 28 | void Progress::submitUpdate(Engine* engine, const Ref& progress, size_t delta) 29 | { 30 | if (progress->isCancelled()) 31 | throw Exception(Error::Cancelled, "execution was cancelled"); 32 | 33 | if (!progress->started || delta != 0) // always submit the first update 34 | { 35 | engine->submitHostFunc([progress, delta]() { progress->update(delta); }, progress); 36 | progress->started = true; 37 | } 38 | } 39 | 40 | OIDN_NAMESPACE_END 41 | -------------------------------------------------------------------------------- /core/progress.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/common.h" 7 | #include "ref.h" 8 | #include 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | class Engine; 13 | 14 | // Cancellation request state for asynchronous operations 15 | class CancellationToken : public RefCount 16 | { 17 | public: 18 | CancellationToken() : cancelled(false) {} 19 | 20 | bool isCancelled() const { return cancelled; } 21 | void cancel() { cancelled = true; } 22 | 23 | protected: 24 | std::atomic cancelled; 25 | }; 26 | 27 | // Progress monitoring for asynchronous operations 28 | class Progress : public CancellationToken 29 | { 30 | public: 31 | Progress(ProgressMonitorFunction func, void* userPtr, size_t total); 32 | 33 | // Enqueues a progress update, advancing the progress with the specified amount, and calling 34 | // the progress monitor function 35 | static void submitUpdate(Engine* engine, const Ref& progress, size_t delta = 0); 36 | 37 | private: 38 | ProgressMonitorFunction func; 39 | void* userPtr; 40 | size_t total; // maximum progress value 41 | size_t current; // current progress value 42 | bool started; // whether any progress updates have been submitted yet 43 | std::mutex mutex; 44 | 45 | void update(size_t delta); 46 | }; 47 | 48 | OIDN_NAMESPACE_END 49 | -------------------------------------------------------------------------------- /core/record.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "buffer.h" 7 | #include "exception.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // Plain value or structure stored in a buffer 12 | template 13 | class Record final : public Memory 14 | { 15 | static_assert(std::is_trivial::value, "record can be used only for trivial types"); 16 | 17 | public: 18 | Record(const Ref& buffer, size_t byteOffset = 0) 19 | : Memory(buffer, byteOffset) 20 | { 21 | if (byteOffset + sizeof(T) > buffer->getByteSize()) 22 | throw Exception(Error::InvalidArgument, "buffer region is out of bounds"); 23 | } 24 | 25 | T* getPtr() const 26 | { 27 | return (T*)((char*)buffer->getPtr() + byteOffset); 28 | } 29 | }; 30 | 31 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/rt_filter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "unet_filter.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // RT: Generic ray tracing denoiser 11 | class RTFilter final : public UNetFilter 12 | { 13 | public: 14 | explicit RTFilter(const Ref& device); 15 | 16 | void setImage(const std::string& name, const Ref& image) override; 17 | void unsetImage(const std::string& name) override; 18 | void setInt(const std::string& name, int value) override; 19 | int getInt(const std::string& name) override; 20 | 21 | protected: 22 | std::shared_ptr newTransferFunc() override; 23 | }; 24 | 25 | OIDN_NAMESPACE_END 26 | -------------------------------------------------------------------------------- /core/rtlightmap_filter.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "rtlightmap_filter.h" 5 | 6 | #if defined(OIDN_FILTER_RTLIGHTMAP) 7 | #include "weights/rtlightmap_hdr.h" 8 | #include "weights/rtlightmap_dir.h" 9 | #endif 10 | 11 | OIDN_NAMESPACE_BEGIN 12 | 13 | RTLightmapFilter::RTLightmapFilter(const Ref& device) 14 | : UNetFilter(device) 15 | { 16 | hdr = true; 17 | 18 | #if defined(OIDN_FILTER_RTLIGHTMAP) 19 | models.hdr = {blobs::weights::rtlightmap_hdr}; 20 | models.dir = {blobs::weights::rtlightmap_dir}; 21 | #endif 22 | } 23 | 24 | std::shared_ptr RTLightmapFilter::newTransferFunc() 25 | { 26 | if (hdr) 27 | return std::make_shared(TransferFunction::Type::Log); 28 | else 29 | return std::make_shared(TransferFunction::Type::Linear); 30 | } 31 | 32 | void RTLightmapFilter::setImage(const std::string& name, const Ref& image) 33 | { 34 | if (name == "color") 35 | setParam(color, image); 36 | else if (name == "output") 37 | setParam(output, image); 38 | else 39 | device->printWarning("unknown filter parameter or type mismatch: '" + name + "'"); 40 | 41 | dirty = true; 42 | } 43 | 44 | void RTLightmapFilter::unsetImage(const std::string& name) 45 | { 46 | if (name == "color") 47 | removeParam(color); 48 | else if (name == "output") 49 | removeParam(output); 50 | else 51 | device->printWarning("unknown filter parameter or type mismatch: '" + name + "'"); 52 | 53 | dirty = true; 54 | } 55 | 56 | void RTLightmapFilter::setInt(const std::string& name, int value) 57 | { 58 | if (name == "directional") 59 | { 60 | setParam(directional, value); 61 | hdr = !directional; 62 | } 63 | else 64 | UNetFilter::setInt(name, value); 65 | 66 | dirty = true; 67 | } 68 | 69 | int RTLightmapFilter::getInt(const std::string& name) 70 | { 71 | if (name == "directional") 72 | return directional; 73 | else 74 | return UNetFilter::getInt(name); 75 | } 76 | 77 | OIDN_NAMESPACE_END 78 | -------------------------------------------------------------------------------- /core/rtlightmap_filter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "unet_filter.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // RTLightmap: Ray traced lightmap denoiser 11 | class RTLightmapFilter final : public UNetFilter 12 | { 13 | public: 14 | explicit RTLightmapFilter(const Ref& device); 15 | 16 | void setImage(const std::string& name, const Ref& image) override; 17 | void unsetImage(const std::string& name) override; 18 | void setInt(const std::string& name, int value) override; 19 | int getInt(const std::string& name) override; 20 | 21 | protected: 22 | std::shared_ptr newTransferFunc() override; 23 | }; 24 | 25 | OIDN_NAMESPACE_END 26 | -------------------------------------------------------------------------------- /core/subdevice.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "subdevice.h" 5 | #include "engine.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | Subdevice::Subdevice(std::unique_ptr&& engine) 10 | : engine(std::move(engine)) 11 | { 12 | this->engine->setSubdevice(this); 13 | } 14 | 15 | Ref Subdevice::newScratchArena(size_t byteSize, const std::string& name) 16 | { 17 | if (!scratchArenaManager) 18 | scratchArenaManager.reset(new ScratchArenaManager(engine.get())); 19 | return makeRef(scratchArenaManager.get(), byteSize, name); 20 | } 21 | 22 | void Subdevice::trimScratch() 23 | { 24 | if (scratchArenaManager) 25 | scratchArenaManager->trim(); 26 | } 27 | 28 | std::shared_ptr Subdevice::getCachedTensors(const void* key) 29 | { 30 | std::shared_ptr& tensorMap = cachedTensors[key]; 31 | if (!tensorMap) 32 | tensorMap = std::make_shared(); 33 | return tensorMap; 34 | } 35 | 36 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/subdevice.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "device.h" 7 | #include "arena.h" 8 | #include "tensor.h" 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | // Subdevice consisting of an engine and some shared resources 13 | class Subdevice final 14 | { 15 | public: 16 | explicit Subdevice(std::unique_ptr&& engine); 17 | 18 | Engine* getEngine() const { return engine.get(); } 19 | 20 | // Scratch 21 | Ref newScratchArena(size_t byteSize, const std::string& name = ""); 22 | void trimScratch(); 23 | 24 | // Tensor cache 25 | std::shared_ptr getCachedTensors(const void* key); 26 | 27 | private: 28 | // Disable copying 29 | Subdevice(const Subdevice&) = delete; 30 | Subdevice& operator =(const Subdevice&) = delete; 31 | 32 | std::unique_ptr engine; // must be declared first / destroyed last 33 | 34 | // Resources 35 | std::unique_ptr scratchArenaManager; 36 | std::unordered_map> cachedTensors; // cached weights 37 | }; 38 | 39 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/tensor_accessor.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "tensor_layout.h" 7 | #include "vec.h" 8 | 9 | // ISPC forward declarations 10 | namespace ispc 11 | { 12 | struct TensorAccessor1D; 13 | struct TensorAccessor3D; 14 | struct TensorAccessor4D; 15 | }; 16 | 17 | OIDN_NAMESPACE_BEGIN 18 | 19 | template 20 | struct TensorAccessor1D 21 | { 22 | TensorByteOffset getByteOffset; 23 | oidn_global char* ptr; 24 | int X; // padded dimensions 25 | 26 | TensorAccessor1D() = default; 27 | 28 | oidn_host_device_inline TensorAccessor1D(oidn_global void* data, int X) 29 | : ptr(static_cast(data)), 30 | X(X) {} 31 | 32 | oidn_host_device_inline oidn_global T& operator ()(int x) const 33 | { 34 | return *reinterpret_cast(ptr + getByteOffset(x)); 35 | } 36 | }; 37 | 38 | template 39 | struct TensorAccessor3D 40 | { 41 | TensorByteOffset getByteOffset; 42 | oidn_global char* ptr; 43 | int C, H, W; // padded dimensions 44 | 45 | TensorAccessor3D() = default; 46 | 47 | oidn_host_device_inline TensorAccessor3D(oidn_global void* data, int C, int H, int W) 48 | : getByteOffset(C, H, W), 49 | ptr(static_cast(data)), C(C), H(H), W(W) {} 50 | 51 | oidn_host_device_inline oidn_global T& operator ()(int c, int h, int w) const 52 | { 53 | return *reinterpret_cast(ptr + getByteOffset(c, h, w)); 54 | } 55 | 56 | oidn_host_device_inline vec3 get3(int c, int h, int w) const 57 | { 58 | return vec3((*this)(c, h, w), 59 | (*this)(c+1, h, w), 60 | (*this)(c+2, h, w)); 61 | } 62 | 63 | oidn_host_device_inline void set3(int c, int h, int w, vec3 value) const 64 | { 65 | (*this)(c, h, w) = value.x; 66 | (*this)(c+1, h, w) = value.y; 67 | (*this)(c+2, h, w) = value.z; 68 | } 69 | }; 70 | 71 | template 72 | struct TensorAccessor4D 73 | { 74 | TensorByteOffset getByteOffset; 75 | oidn_global char* ptr; 76 | int O, I, H, W; // padded dimensions 77 | 78 | TensorAccessor4D() = default; 79 | 80 | oidn_host_device_inline TensorAccessor4D(oidn_global void* data, int O, int I, int H, int W) 81 | : getByteOffset(O, I, H, W), 82 | ptr(static_cast(data)), O(O), I(I), H(H), W(W) {} 83 | 84 | oidn_host_device_inline oidn_global T& operator ()(int o, int i, int h, int w) const 85 | { 86 | return *reinterpret_cast(ptr + getByteOffset(o, i, h, w)); 87 | } 88 | }; 89 | 90 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/tensor_reorder.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "tensor.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | void reorderWeight(Tensor& src, int srcBeginI, int srcI, Tensor& dst, int dstBeginI, int dstI); 9 | void reorderWeight(Tensor& src, Tensor& dst); 10 | void reorderBias(Tensor& src, Tensor& dst); 11 | 12 | OIDN_NAMESPACE_END 13 | -------------------------------------------------------------------------------- /core/tile.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | struct Tile 11 | { 12 | int hSrcBegin; 13 | int wSrcBegin; 14 | int hDstBegin; 15 | int wDstBegin; 16 | int H; 17 | int W; 18 | }; 19 | 20 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/tza.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "tensor.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Parses tensors from a Tensor Archive (TZA) 11 | std::shared_ptr parseTZA(const void* buffer, size_t size); 12 | 13 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /core/upsample.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "upsample.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | Upsample::Upsample(const UpsampleDesc& desc) 9 | : UpsampleDesc(desc) 10 | { 11 | if (srcDesc.getRank() != 3) 12 | throw std::invalid_argument("invalid upsampling source shape"); 13 | 14 | TensorDims dstDims{srcDesc.getC(), srcDesc.getH() * 2, srcDesc.getW() * 2}; 15 | TensorDims dstPaddedDims{srcDesc.getPaddedC(), dstDims[1], dstDims[2]}; 16 | dstDesc = {dstDims, dstPaddedDims, srcDesc.layout, srcDesc.dataType}; 17 | } 18 | 19 | void Upsample::setSrc(const Ref& src) 20 | { 21 | if (!src || src->getDesc() != srcDesc) 22 | throw std::invalid_argument("invalid upsampling source"); 23 | 24 | this->src = src; 25 | updateSrc(); 26 | } 27 | 28 | void Upsample::setDst(const Ref& dst) 29 | { 30 | if (!dst || dst->getDesc() != dstDesc) 31 | throw std::invalid_argument("invalid upsampling destination"); 32 | 33 | this->dst = dst; 34 | updateDst(); 35 | } 36 | 37 | OIDN_NAMESPACE_END 38 | -------------------------------------------------------------------------------- /core/upsample.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "op.h" 7 | #include "tensor.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | struct UpsampleDesc 12 | { 13 | TensorDesc srcDesc; 14 | }; 15 | 16 | // 2x2 nearest-neighbor upsampling 17 | class Upsample : public BaseOp, protected UpsampleDesc 18 | { 19 | public: 20 | Upsample(const UpsampleDesc& desc); 21 | 22 | TensorDesc getDstDesc() const { return dstDesc; } 23 | Ref getDst() const { return dst; } 24 | 25 | void setSrc(const Ref& src); 26 | void setDst(const Ref& dst); 27 | 28 | protected: 29 | virtual void updateSrc() {} 30 | virtual void updateDst() {} 31 | 32 | TensorDesc dstDesc; 33 | Ref src; 34 | Ref dst; 35 | }; 36 | 37 | OIDN_NAMESPACE_END 38 | -------------------------------------------------------------------------------- /core/verbose.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "common/platform.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Base class for verbose classes 11 | class Verbose 12 | { 13 | public: 14 | Verbose(int v = 0) : verbose(v) {} 15 | 16 | void setVerbose(int v) { verbose = v; } 17 | bool isVerbose(int v = 1) const { return v <= verbose; } 18 | 19 | void print(const std::string& message) 20 | { 21 | if (isVerbose()) 22 | std::cout << message << std::endl; 23 | } 24 | 25 | void printWarning(const std::string& message) 26 | { 27 | if (isVerbose()) 28 | std::cerr << "Warning: " << message << std::endl; 29 | } 30 | 31 | void printError(const std::string& message) 32 | { 33 | if (isVerbose()) 34 | std::cerr << "Error: " << message << std::endl; 35 | } 36 | 37 | void printDebug(const std::string& message) 38 | { 39 | if (isVerbose(2)) 40 | std::cout << message << std::endl; 41 | } 42 | 43 | protected: 44 | int verbose; 45 | }; 46 | 47 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "bnns_common.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | BNNSNDArrayDescriptor toBNNS(const TensorDesc& td) 9 | { 10 | BNNSNDArrayDescriptor res; 11 | 12 | switch (td.layout) 13 | { 14 | case TensorLayout::x: 15 | assert(td.getRank() == 1); 16 | res = BNNSNDArrayDescriptor({ 17 | .layout = BNNSDataLayoutVector, 18 | .size = {size_t(td.dims[0])} 19 | }); 20 | break; 21 | case TensorLayout::chw: 22 | assert(td.getRank() == 3); 23 | res = BNNSNDArrayDescriptor({ 24 | .layout = BNNSDataLayoutImageCHW, 25 | .size = {size_t(td.dims[2]), size_t(td.dims[1]), size_t(td.dims[0])} 26 | }); 27 | break; 28 | case TensorLayout::oihw: 29 | assert(td.getRank() == 4); 30 | res = BNNSNDArrayDescriptor({ 31 | .layout = BNNSDataLayoutConvolutionWeightsOIHW, 32 | .size = {size_t(td.dims[3]), size_t(td.dims[2]), size_t(td.dims[1]), size_t(td.dims[0])} 33 | }); 34 | break; 35 | default: 36 | throw std::invalid_argument("unsupported tensor layout"); 37 | } 38 | 39 | switch (td.dataType) 40 | { 41 | case DataType::Float32: 42 | res.data_type = BNNSDataTypeFloat32; 43 | break; 44 | case DataType::Float16: 45 | res.data_type = BNNSDataTypeFloat16; 46 | break; 47 | case DataType::UInt8: 48 | res.data_type = BNNSDataTypeUInt8; 49 | break; 50 | default: 51 | throw std::invalid_argument("unsupported data type"); 52 | } 53 | 54 | res.data = nullptr; 55 | return res; 56 | } 57 | 58 | BNNSNDArrayDescriptor toBNNS(const Ref& t) 59 | { 60 | BNNSNDArrayDescriptor res = toBNNS(t->getDesc()); 61 | res.data = t->getPtr(); 62 | return res; 63 | } 64 | 65 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include 7 | #include "core/tensor.h" 8 | #include "bnns_engine.h" 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | BNNSNDArrayDescriptor toBNNS(const TensorDesc& td); 13 | BNNSNDArrayDescriptor toBNNS(const Ref& t); 14 | 15 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_conv.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "bnns_conv.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | BNNSConv::BNNSConv(BNNSEngine* engine, const ConvDesc& desc) 9 | : Conv(desc), 10 | engine(engine) 11 | {} 12 | 13 | BNNSConv::~BNNSConv() 14 | { 15 | if (filter) 16 | BNNSFilterDestroy(filter); 17 | } 18 | 19 | void BNNSConv::updateWeight() 20 | { 21 | if (filter) 22 | throw std::logic_error("convolution weight cannot be set after finalization"); 23 | } 24 | 25 | void BNNSConv::updateBias() 26 | { 27 | if (filter) 28 | throw std::logic_error("convolution bias cannot be set after finalization"); 29 | } 30 | 31 | void BNNSConv::finalize() 32 | { 33 | if (filter) 34 | throw std::logic_error("convolution already finalized"); 35 | if (!weight || !bias) 36 | throw std::logic_error("convolution weight/bias not set before finalization"); 37 | 38 | BNNSLayerParametersConvolution params = { 39 | .i_desc = toBNNS(srcDesc), 40 | .w_desc = toBNNS(weight), 41 | .o_desc = toBNNS(dstDesc), 42 | .bias = toBNNS(bias), 43 | .x_stride = 1, 44 | .y_stride = 1, 45 | .x_dilation_stride = 1, 46 | .y_dilation_stride = 1, 47 | .x_padding = 1, 48 | .y_padding = 1, 49 | }; 50 | 51 | if (activation == Activation::ReLU) 52 | params.activation.function = BNNSActivationFunctionRectifiedLinear; 53 | else 54 | params.activation.function = BNNSActivationFunctionIdentity; 55 | 56 | filter = BNNSFilterCreateLayerConvolution(¶ms, nullptr); 57 | if (!filter) 58 | throw std::runtime_error("BNNSFilterCreateLayerConvolution failed"); 59 | } 60 | 61 | void BNNSConv::submitKernels(const Ref& ct) 62 | { 63 | if (!filter) 64 | throw std::logic_error("convolution not finalized"); 65 | if (!src || !dst) 66 | throw std::logic_error("convolution source/destination not set"); 67 | 68 | void* srcPtr = src->getPtr(); 69 | void* dstPtr = dst->getPtr(); 70 | engine->submitFunc([=] { BNNSFilterApply(filter, srcPtr, dstPtr); }, ct); 71 | } 72 | 73 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "bnns_common.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class BNNSConv : public Conv 12 | { 13 | public: 14 | BNNSConv(BNNSEngine* engine, const ConvDesc& desc); 15 | ~BNNSConv(); 16 | 17 | Engine* getEngine() const override { return engine; } 18 | void finalize() override; 19 | void submitKernels(const Ref& ct) override; 20 | 21 | private: 22 | void updateWeight() override; 23 | void updateBias() override; 24 | 25 | BNNSEngine* engine; 26 | BNNSFilter filter = nullptr; 27 | }; 28 | 29 | OIDN_NAMESPACE_END 30 | -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_engine.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "bnns_engine.h" 5 | #include "bnns_conv.h" 6 | #include "bnns_pool.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | BNNSEngine::BNNSEngine(CPUDevice* device, int numThreads) 11 | : CPUEngine(device, numThreads) 12 | {} 13 | 14 | Ref BNNSEngine::newConv(const ConvDesc& desc) 15 | { 16 | return makeRef(this, desc); 17 | } 18 | 19 | Ref BNNSEngine::newPool(const PoolDesc& desc) 20 | { 21 | return makeRef(this, desc); 22 | } 23 | 24 | OIDN_NAMESPACE_END 25 | -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_engine.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "../cpu_engine.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | class BNNSEngine final : public CPUEngine 11 | { 12 | public: 13 | BNNSEngine(CPUDevice* device, int numThreads); 14 | 15 | // Ops 16 | Ref newConv(const ConvDesc& desc) override; 17 | Ref newPool(const PoolDesc& desc) override; 18 | }; 19 | 20 | OIDN_NAMESPACE_END 21 | -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_pool.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "bnns_pool.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | BNNSPool::BNNSPool(BNNSEngine* engine, const PoolDesc& desc) 9 | : Pool(desc), 10 | engine(engine) 11 | {} 12 | 13 | BNNSPool::~BNNSPool() 14 | { 15 | if (filter) 16 | BNNSFilterDestroy(filter); 17 | } 18 | 19 | void BNNSPool::finalize() 20 | { 21 | if (filter) 22 | throw std::logic_error("pooling already finalized"); 23 | 24 | BNNSLayerParametersPooling params = { 25 | .i_desc = toBNNS(srcDesc), 26 | .o_desc = toBNNS(dstDesc), 27 | .pooling_function = BNNSPoolingFunctionMax, 28 | .k_width = 2, 29 | .k_height = 2, 30 | .x_stride = 2, 31 | .y_stride = 2 32 | }; 33 | 34 | filter = BNNSFilterCreateLayerPooling(¶ms, nullptr); 35 | if (!filter) 36 | throw std::runtime_error("BNNSFilterCreateLayerPooling failed"); 37 | } 38 | 39 | void BNNSPool::submitKernels(const Ref& ct) 40 | { 41 | if (!filter) 42 | throw std::logic_error("pooling not finalized"); 43 | if (!src || !dst) 44 | throw std::logic_error("pooling source/destination not set"); 45 | 46 | void* srcPtr = src->getPtr(); 47 | void* dstPtr = dst->getPtr(); 48 | engine->submitFunc([=] { BNNSFilterApply(filter, srcPtr, dstPtr); }, ct); 49 | } 50 | 51 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/bnns/bnns_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/pool.h" 7 | #include "bnns_common.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class BNNSPool : public Pool 12 | { 13 | public: 14 | BNNSPool(BNNSEngine* engine, const PoolDesc& desc); 15 | ~BNNSPool(); 16 | 17 | Engine* getEngine() const override { return engine; } 18 | void finalize() override; 19 | void submitKernels(const Ref& ct) override; 20 | 21 | private: 22 | BNNSEngine* engine; 23 | BNNSFilter filter = nullptr; 24 | }; 25 | 26 | OIDN_NAMESPACE_END 27 | -------------------------------------------------------------------------------- /devices/cpu/color.isph: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "vec.isph" 5 | 6 | struct TransferFunction 7 | { 8 | // Forward and inverse functions 9 | vec3f (*uniform forward)(const uniform TransferFunction* uniform self, vec3f y); 10 | vec3f (*uniform inverse)(const uniform TransferFunction* uniform self, vec3f x); 11 | 12 | // Input and output scales (*not* applied by the forward/inverse functions!) 13 | uniform const float* uniform inputScalePtr; 14 | uniform float inputScale; 15 | uniform float outputScale; 16 | 17 | // Normalization scale (HDR only) 18 | uniform float normScale; 19 | uniform float rcpNormScale; 20 | }; 21 | 22 | inline uniform float TransferFunction_getInputScale(const uniform TransferFunction* uniform self) 23 | { 24 | return self->inputScalePtr ? *self->inputScalePtr : self->inputScale; 25 | } 26 | 27 | inline uniform float TransferFunction_getOutputScale(const uniform TransferFunction* uniform self) 28 | { 29 | if (self->inputScalePtr) 30 | { 31 | const uniform float inputScale = *self->inputScalePtr; 32 | return (inputScale != 0.f) ? (1.f / inputScale) : 0.f; 33 | } 34 | return self->outputScale; 35 | } 36 | 37 | // Computes the luminance of an RGB color 38 | inline float luminance(vec3f c) 39 | { 40 | return 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z; 41 | } -------------------------------------------------------------------------------- /devices/cpu/cpu_autoexposure.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_autoexposure.h" 5 | #include "cpu_autoexposure_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUAutoexposure::CPUAutoexposure(CPUEngine* engine, const ImageDesc& srcDesc) 11 | : Autoexposure(srcDesc), 12 | engine(engine) 13 | {} 14 | 15 | void CPUAutoexposure::submitKernels(const Ref& ct) 16 | { 17 | if (!src) 18 | throw std::logic_error("autoexposure source not set"); 19 | if (!dst) 20 | throw std::logic_error("autoexposure destination not set"); 21 | 22 | // Downsample the image to minimize sensitivity to noise 23 | ispc::ImageAccessor srcAcc = *src; 24 | float* dstPtr = getDstPtr(); 25 | 26 | engine->submitFunc([=]() 27 | { 28 | // Compute the average log luminance of the downsampled image 29 | using Sum = std::pair; 30 | 31 | Sum sum = 32 | tbb::parallel_deterministic_reduce( 33 | tbb::blocked_range2d(0, numBinsH, 0, numBinsW), 34 | Sum(0.f, 0), 35 | [&](const tbb::blocked_range2d& r, Sum sum) -> Sum 36 | { 37 | // Iterate over bins 38 | for (int i = r.rows().begin(); i != r.rows().end(); ++i) 39 | { 40 | for (int j = r.cols().begin(); j != r.cols().end(); ++j) 41 | { 42 | // Compute the average luminance in the current bin 43 | const int beginH = int(ptrdiff_t(i) * srcAcc.H / numBinsH); 44 | const int beginW = int(ptrdiff_t(j) * srcAcc.W / numBinsW); 45 | const int endH = int(ptrdiff_t(i+1) * srcAcc.H / numBinsH); 46 | const int endW = int(ptrdiff_t(j+1) * srcAcc.W / numBinsW); 47 | 48 | const float L = ispc::autoexposureDownsample(srcAcc, beginH, endH, beginW, endW); 49 | 50 | // Accumulate the log luminance 51 | if (L > eps) 52 | { 53 | sum.first += math::log2(L); 54 | sum.second++; 55 | } 56 | } 57 | } 58 | 59 | return sum; 60 | }, 61 | [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); } 62 | ); 63 | 64 | *dstPtr = (sum.second > 0) ? (key / math::exp2(sum.first / float(sum.second))) : 1.f; 65 | }, ct); 66 | } 67 | 68 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_autoexposure.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/autoexposure.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUAutoexposure final : public Autoexposure 12 | { 13 | public: 14 | CPUAutoexposure(CPUEngine* engine, const ImageDesc& srcDesc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_autoexposure.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "image_accessor.isph" 5 | #include "color.isph" 6 | 7 | // Returns the average luminance of the specified image bin 8 | export uniform float autoexposureDownsample(const uniform ImageAccessor& color, 9 | uniform int beginH, uniform int endH, 10 | uniform int beginW, uniform int endW) 11 | { 12 | float L = 0.f; 13 | 14 | for (uniform int h = beginH; h < endH; ++h) 15 | { 16 | foreach (w = beginW ... endW) 17 | { 18 | vec3f c = Image_get3(color, h, w); 19 | c = clamp(nan_to_zero(c), 0.f, pos_max); // sanitize 20 | L += luminance(c); 21 | } 22 | } 23 | 24 | return reduce_add(L) / ((endH - beginH) * (endW - beginW)); 25 | } 26 | -------------------------------------------------------------------------------- /devices/cpu/cpu_common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/image.h" 7 | #include "core/tensor.h" 8 | #include "core/tile.h" 9 | #include "core/color.h" 10 | #include "cpu_input_process_ispc.h" 11 | #include "color_ispc.h" 12 | 13 | OIDN_NAMESPACE_BEGIN 14 | 15 | ispc::Tile toISPC(const Tile& tile); 16 | ispc::TransferFunction toISPC(const TransferFunction& tf); 17 | 18 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUConv final : public Conv 12 | { 13 | public: 14 | CPUConv(CPUEngine* engine, const ConvDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | int blockOCB; // block of output channel blocks 22 | int blockOW; // block of output width 23 | int OCBB; // number of output channel block blocks 24 | int OWT; // number of output width tiles 25 | }; 26 | 27 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_conv_compute.isph: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | unmasked void CPUConvKernel_compute(T, blockOCB)(const uniform CPUConvKernel* uniform self, 5 | uniform int ocb, uniform int oh, 6 | uniform int owBegin, uniform int owEnd) 7 | { 8 | const uniform int oc = ocb * blockC; 9 | 10 | #if KH == 3 && PH == 1 11 | const uniform int khBegin = oh > 0 ? 0 : 1; 12 | const uniform int khEnd = oh < self->dst.H-1 ? 3 : 2; 13 | #else 14 | const uniform int khBegin = max(PH - oh, 0); 15 | const uniform int khEnd = KH - max(PH + oh - (self->dst.H-1), 0); 16 | #endif 17 | 18 | for (uniform int ic = 0; ic < self->src.C; ic += blockC) 19 | { 20 | const uniform uint8* uniform srcPtr = Tensor_getPtr(self->src, ic, oh + khBegin - PH, owBegin); 21 | const uniform uint8* uniform weightPtr = Tensor_getPtr(self->weight, oc, ic, khBegin, 0); 22 | const uniform uint8* uniform biasPtr = (ic == 0) ? Tensor_getPtr(self->bias, oc) : NULL; 23 | uniform uint8* uniform dstPtr = Tensor_getPtr(self->dst, oc, oh, owBegin); 24 | const uniform bool relu = self->relu && ic == (self->src.C - blockC); 25 | 26 | uniform int ow = owBegin; // owBegin/owEnd *must* be aligned to block boundaries 27 | while (ow < owEnd) 28 | { 29 | if (ow > PW - 1 && ow + blockOW + PW - 1 < self->dst.W) 30 | { 31 | // Fast path (no padding, width blocking) 32 | CPUConvKernel_computeBlock(T, blockOCB, blockOW)( 33 | srcPtr, self->src.hByteStride, 34 | weightPtr, biasPtr, 35 | dstPtr, self->dst.CByteStride, 36 | khEnd - khBegin, 37 | 0, KW, 38 | relu); 39 | 40 | srcPtr += blockOW * blockC * sizeof(uniform T); 41 | dstPtr += blockOW * blockC * sizeof(uniform T); 42 | ow += blockOW; 43 | } 44 | else 45 | { 46 | // Slow path (padding, no width blocking) 47 | CPUConvKernel_computeBlock(T, blockOCB, 1)( 48 | srcPtr, self->src.hByteStride, 49 | weightPtr, biasPtr, 50 | dstPtr, self->dst.CByteStride, 51 | khEnd - khBegin, 52 | #if KW == 3 && PW == 1 53 | ow > 0 ? 0 : 1, 54 | ow < self->dst.W-1 ? 3 : 2, 55 | #else 56 | max(PW - ow, 0), 57 | KW - max(PW + ow - (self->dst.W-1), 0), 58 | #endif 59 | relu); 60 | 61 | srcPtr += blockC * sizeof(uniform T); 62 | dstPtr += blockC * sizeof(uniform T); 63 | ow++; 64 | } 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /devices/cpu/cpu_device.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/device.h" 7 | #include "tasking.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUEngine; 12 | 13 | // CPU instruction set 14 | enum class CPUArch 15 | { 16 | Unknown, 17 | SSE2, 18 | SSE41, 19 | AVX2, 20 | AVX512, 21 | NEON 22 | }; 23 | 24 | class CPUPhysicalDevice final : public PhysicalDevice 25 | { 26 | public: 27 | explicit CPUPhysicalDevice(int score); 28 | }; 29 | 30 | class CPUDevice final : public Device 31 | { 32 | friend class CPUEngine; 33 | friend class DNNLEngine; 34 | 35 | public: 36 | static std::vector> getPhysicalDevices(); 37 | static std::string getName(); 38 | static CPUArch getArch(); 39 | 40 | CPUDevice(); 41 | 42 | DeviceType getType() const override { return DeviceType::CPU; } 43 | 44 | #if !defined(OIDN_DNNL) 45 | bool needWeightAndBiasOnDevice() const override { return false; } // no need to copy 46 | #endif 47 | Storage getPtrStorage(const void* ptr) override; 48 | 49 | int getInt(const std::string& name) override; 50 | void setInt(const std::string& name, int value) override; 51 | 52 | void wait() override; 53 | 54 | protected: 55 | void init() override; 56 | 57 | private: 58 | CPUArch arch = CPUArch::Unknown; 59 | 60 | int numThreads = 0; // autodetect by default 61 | bool setAffinity = true; 62 | }; 63 | 64 | OIDN_NAMESPACE_END 65 | -------------------------------------------------------------------------------- /devices/cpu/cpu_image_copy.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_image_copy.h" 5 | #include "cpu_image_copy_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUImageCopy::CPUImageCopy(CPUEngine* engine) 11 | : engine(engine) 12 | {} 13 | 14 | void CPUImageCopy::submitKernels(const Ref& ct) 15 | { 16 | check(); 17 | 18 | ispc::CPUImageCopyKernel kernel; 19 | kernel.src = *src; 20 | kernel.dst = *dst; 21 | 22 | engine->submitFunc([=] 23 | { 24 | parallel_for(kernel.dst.H, [&](int h) 25 | { 26 | ispc::CPUImageCopyKernel_run(&kernel, h); 27 | }); 28 | }, ct); 29 | } 30 | 31 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_image_copy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/image_copy.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUImageCopy final : public ImageCopy 12 | { 13 | public: 14 | explicit CPUImageCopy(CPUEngine* engine); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_image_copy.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "image_accessor.isph" 5 | 6 | struct CPUImageCopyKernel 7 | { 8 | uniform ImageAccessor src; 9 | uniform ImageAccessor dst; 10 | }; 11 | 12 | export void CPUImageCopyKernel_run(const uniform CPUImageCopyKernel* uniform self, uniform int h) 13 | { 14 | foreach (w = 0 ... self->dst.W) 15 | { 16 | vec3f value = Image_get3(self->src, h, w); 17 | Image_set3(self->dst, h, w, value); 18 | } 19 | } -------------------------------------------------------------------------------- /devices/cpu/cpu_input_process.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_input_process.h" 5 | #include "cpu_input_process_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUInputProcess::CPUInputProcess(CPUEngine* engine, const InputProcessDesc& desc) 11 | : InputProcess(engine, desc), 12 | engine(engine) 13 | {} 14 | 15 | void CPUInputProcess::submitKernels(const Ref& ct) 16 | { 17 | check(); 18 | 19 | ispc::CPUInputProcessKernel kernel; 20 | Image nullImage; 21 | 22 | kernel.input = color ? *color : (albedo ? *albedo : *normal); 23 | kernel.albedo = (color && albedo) ? *albedo : nullImage; 24 | kernel.normal = (color && normal) ? *normal : nullImage; 25 | kernel.dst = *dst; 26 | kernel.tile = toISPC(tile); 27 | kernel.transferFunc = toISPC(*transferFunc); 28 | kernel.hdr = hdr; 29 | kernel.snorm = snorm; 30 | 31 | engine->submitFunc([=] 32 | { 33 | parallel_for(kernel.dst.H, [&](int hDst) 34 | { 35 | ispc::CPUInputProcessKernel_run(&kernel, hDst); 36 | }); 37 | }, ct); 38 | } 39 | 40 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_input_process.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/input_process.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUInputProcess final : public InputProcess 12 | { 13 | public: 14 | CPUInputProcess(CPUEngine* engine, const InputProcessDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_module.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "core/context.h" 5 | #include "cpu_device.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | class CPUDeviceFactory : public DeviceFactory 10 | { 11 | public: 12 | Ref newDevice(const Ref& physicalDevice) override 13 | { 14 | assert(physicalDevice->type == DeviceType::CPU); 15 | return makeRef(); 16 | } 17 | }; 18 | 19 | OIDN_DECLARE_INIT_STATIC_MODULE(device_cpu) 20 | { 21 | Context::registerDeviceType(DeviceType::CPU, CPUDevice::getPhysicalDevices()); 22 | } 23 | 24 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_output_process.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_output_process.h" 5 | #include "cpu_output_process_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUOutputProcess::CPUOutputProcess(CPUEngine* engine, const OutputProcessDesc& desc) 11 | : OutputProcess(desc), 12 | engine(engine) 13 | {} 14 | 15 | void CPUOutputProcess::submitKernels(const Ref& ct) 16 | { 17 | check(); 18 | 19 | ispc::CPUOutputProcessKernel kernel; 20 | 21 | kernel.src = *src; 22 | kernel.dst = *dst; 23 | kernel.tile = toISPC(tile); 24 | kernel.transferFunc = toISPC(*transferFunc); 25 | kernel.hdr = hdr; 26 | kernel.snorm = snorm; 27 | 28 | engine->submitFunc([=] 29 | { 30 | parallel_for(kernel.tile.H, [&](int h) 31 | { 32 | ispc::CPUOutputProcessKernel_run(&kernel, h); 33 | }); 34 | }, ct); 35 | } 36 | 37 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_output_process.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/output_process.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUOutputProcess final : public OutputProcess 12 | { 13 | public: 14 | CPUOutputProcess(CPUEngine* engine, const OutputProcessDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_output_process.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "tensor_accessor.isph" 5 | #include "image_accessor.isph" 6 | #include "color.isph" 7 | #include "tile.isph" 8 | 9 | struct CPUOutputProcessKernel 10 | { 11 | // Source 12 | uniform TensorAccessor3D src; 13 | 14 | // Destination 15 | uniform ImageAccessor dst; 16 | 17 | // Tile 18 | uniform Tile tile; 19 | 20 | // Transfer function 21 | uniform TransferFunction transferFunc; 22 | uniform bool hdr; 23 | uniform bool snorm; // signed normalized ([-1..1]) 24 | }; 25 | 26 | export void CPUOutputProcessKernel_run(const uniform CPUOutputProcessKernel* uniform self, 27 | uniform int h) 28 | { 29 | const uniform int hSrc = h + self->tile.hSrcBegin; 30 | const uniform int hDst = h + self->tile.hDstBegin; 31 | 32 | const uniform float outputScale = TransferFunction_getOutputScale(&self->transferFunc); 33 | 34 | foreach (w = 0 ... self->tile.W) 35 | { 36 | const int wSrc = w + self->tile.wSrcBegin; 37 | const int wDst = w + self->tile.wDstBegin; 38 | 39 | // Load 40 | vec3f value = Tensor_get3(self->src, 0, hSrc, wSrc); 41 | 42 | // The CNN output may contain negative values or even NaNs, so it must be sanitized 43 | value = clamp(nan_to_zero(value), 0.f, pos_max); 44 | 45 | // Apply the inverse transfer function 46 | value = self->transferFunc.inverse(&self->transferFunc, value); 47 | 48 | // Average the channels if there is only one output channel 49 | if (self->dst.C == 1) 50 | value = make_vec3f((value.x + value.y + value.z) * (1.f / 3.f)); 51 | 52 | // Sanitize 53 | if (self->snorm) 54 | { 55 | // Transform to [-1..1] 56 | value = value * 2.f - 1.f; 57 | value = max(value, -1.f); 58 | } 59 | if (!self->hdr) 60 | value = min(value, 1.f); 61 | 62 | // Scale 63 | value = value * outputScale; 64 | 65 | // Store 66 | Image_set3(self->dst, hDst, wDst, value); 67 | } 68 | } -------------------------------------------------------------------------------- /devices/cpu/cpu_pool.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_pool.h" 5 | #include "cpu_pool_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUPool::CPUPool(CPUEngine* engine, const PoolDesc& desc) 11 | : Pool(desc), 12 | engine(engine) 13 | { 14 | if (srcDesc.layout != TensorLayout::Chw8c && 15 | srcDesc.layout != TensorLayout::Chw16c) 16 | throw std::invalid_argument("unsupported pooling source layout"); 17 | } 18 | 19 | void CPUPool::submitKernels(const Ref& ct) 20 | { 21 | if (!src || !dst) 22 | throw std::logic_error("pooling source/destination not set"); 23 | 24 | const int blockC = getTensorLayoutInfo(dstDesc.layout).blockC; 25 | 26 | ispc::CPUPoolKernel kernel; 27 | kernel.src = *src; 28 | kernel.dst = *dst; 29 | 30 | engine->submitFunc([=] 31 | { 32 | parallel_for(kernel.dst.C / blockC, kernel.dst.H, [&](int cb, int h) 33 | { 34 | ispc::CPUPoolKernel_run(&kernel, cb, h); 35 | }); 36 | }, ct); 37 | } 38 | 39 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/pool.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUPool final : public Pool 12 | { 13 | public: 14 | CPUPool(CPUEngine* engine, const PoolDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_pool.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "tensor_accessor.isph" 5 | 6 | struct CPUPoolKernel 7 | { 8 | uniform TensorAccessor3D src; 9 | uniform TensorAccessor3D dst; 10 | }; 11 | 12 | export void CPUPoolKernel_run(const uniform CPUPoolKernel* uniform self, 13 | uniform int cb, uniform int h) 14 | { 15 | const uniform size_t H = (uniform size_t)self->dst.H; 16 | const uniform size_t W = (uniform size_t)self->dst.W; 17 | 18 | const uniform size_t offset = (cb*H + h) * (W*B); 19 | uniform float* const uniform dstPtr_line = (uniform float* uniform)self->dst.ptr + offset; 20 | uniform float* const uniform srcPtr_line0 = (uniform float* uniform)self->src.ptr + offset * 4; 21 | uniform float* const uniform srcPtr_line1 = srcPtr_line0 + W*2*B; // next line 22 | 23 | for (uniform size_t w = 0; w < W; ++w) 24 | { 25 | const float value0 = *((varying float* uniform)&srcPtr_line0[w*2*B ]); 26 | const float value1 = *((varying float* uniform)&srcPtr_line0[w*2*B+B]); 27 | const float value2 = *((varying float* uniform)&srcPtr_line1[w*2*B ]); 28 | const float value3 = *((varying float* uniform)&srcPtr_line1[w*2*B+B]); 29 | 30 | const float value = max(max(value0, value1), max(value2, value3)); 31 | streaming_store(&dstPtr_line[w*B], value); 32 | } 33 | } -------------------------------------------------------------------------------- /devices/cpu/cpu_upsample.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cpu_upsample.h" 5 | #include "cpu_upsample_ispc.h" 6 | #include "cpu_common.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | CPUUpsample::CPUUpsample(CPUEngine* engine, const UpsampleDesc& desc) 11 | : Upsample(desc), 12 | engine(engine) 13 | { 14 | if (srcDesc.layout != TensorLayout::chw && 15 | srcDesc.layout != TensorLayout::Chw8c && 16 | srcDesc.layout != TensorLayout::Chw16c) 17 | throw std::invalid_argument("unsupported upsampling source layout"); 18 | } 19 | 20 | void CPUUpsample::submitKernels(const Ref& ct) 21 | { 22 | if (!src || !dst) 23 | throw std::logic_error("upsampling source/destination not set"); 24 | 25 | if (srcDesc.layout != TensorLayout::chw) 26 | { 27 | const int blockC = getTensorLayoutInfo(srcDesc.layout).blockC; 28 | 29 | ispc::CPUUpsampleKernel kernel; 30 | kernel.src = *src; 31 | kernel.dst = *dst; 32 | 33 | engine->submitFunc([=] 34 | { 35 | parallel_for(kernel.src.C / blockC, kernel.src.H, [&](int cb, int h) 36 | { 37 | ispc::CPUUpsampleKernel_run(&kernel, cb, h); 38 | }); 39 | }, ct); 40 | } 41 | else 42 | { 43 | const int C = src->getPaddedC(); 44 | const size_t H = src->getH(); 45 | const size_t W = src->getW(); 46 | const float* srcPtr = (float*)src->getPtr(); 47 | float* dstPtr = (float*)dst->getPtr(); 48 | 49 | engine->submitFunc([=] 50 | { 51 | parallel_for(C, H, [&](int c, size_t h) 52 | { 53 | const size_t offset = (c*H + h) * W; 54 | const float* srcPtr_line = srcPtr + offset; 55 | float* dstPtr_line0 = dstPtr + offset * 4; 56 | float* dstPtr_line1 = dstPtr_line0 + W*2; // next line 57 | 58 | #pragma unroll(16) 59 | for (size_t w = 0; w < W; ++w) 60 | { 61 | // Load value 62 | const float value = srcPtr_line[w]; 63 | 64 | // Store value 2x2 65 | dstPtr_line0[w*2 ] = value; 66 | dstPtr_line0[w*2+1] = value; 67 | dstPtr_line1[w*2 ] = value; 68 | dstPtr_line1[w*2+1] = value; 69 | } 70 | }); 71 | }, ct); 72 | } 73 | } 74 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/cpu_upsample.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/upsample.h" 7 | #include "cpu_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CPUUpsample final : public Upsample 12 | { 13 | public: 14 | CPUUpsample(CPUEngine* engine, const UpsampleDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | void submitKernels(const Ref& ct) override; 18 | 19 | private: 20 | CPUEngine* engine; 21 | }; 22 | 23 | OIDN_NAMESPACE_END 24 | -------------------------------------------------------------------------------- /devices/cpu/cpu_upsample.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "tensor_accessor.isph" 5 | 6 | struct CPUUpsampleKernel 7 | { 8 | uniform TensorAccessor3D src; 9 | uniform TensorAccessor3D dst; 10 | }; 11 | 12 | export void CPUUpsampleKernel_run(const uniform CPUUpsampleKernel* uniform self, 13 | uniform int cb, uniform int h) 14 | { 15 | const uniform size_t H = (uniform size_t)self->src.H; 16 | const uniform size_t W = (uniform size_t)self->src.W; 17 | 18 | const uniform size_t offset = (cb*H + h) * (W*B); 19 | uniform float* const uniform srcPtr_line = (uniform float* uniform)self->src.ptr + offset; 20 | uniform float* const uniform dstPtr_line0 = (uniform float* uniform)self->dst.ptr + offset * 4; 21 | uniform float* const uniform dstPtr_line1 = dstPtr_line0 + W*2*B; // next line 22 | 23 | for (uniform size_t w = 0; w < W; ++w) 24 | { 25 | const float value = *((varying float* uniform)&srcPtr_line[w*B]); 26 | 27 | streaming_store(&dstPtr_line0[w*2*B ], value); 28 | streaming_store(&dstPtr_line0[w*2*B+B], value); 29 | streaming_store(&dstPtr_line1[w*2*B ], value); 30 | streaming_store(&dstPtr_line1[w*2*B+B], value); 31 | } 32 | } -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/tensor.h" 7 | #include "dnnl_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | dnnl::memory::data_type toDNNL(DataType dataType); 12 | dnnl::memory::desc toDNNL(const TensorDesc& td); 13 | 14 | // Creates a DNNL memory structure for a buffer 15 | dnnl::memory toDNNL(const Ref& buffer); 16 | 17 | // Returns the internal DNNL memory structure of a DNNLTensor 18 | const dnnl::memory& getDNNL(const Ref& tensor); 19 | 20 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_conv.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "dnnl_conv.h" 5 | #include "dnnl_tensor.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | DNNLConv::DNNLConv(DNNLEngine* engine, const ConvDesc& desc) 10 | : Conv(desc), 11 | engine(engine) 12 | { 13 | const dnnl::memory::dims strides = {1, 1}; 14 | const dnnl::memory::dims padding = {1, 1}; 15 | 16 | // Incorporate activation 17 | dnnl::primitive_attr attr; 18 | if (activation == Activation::ReLU) 19 | { 20 | dnnl::post_ops ops; 21 | ops.append_eltwise( 22 | dnnl::algorithm::eltwise_relu, 23 | 0.f, // alpha 24 | 0.f // beta 25 | ); 26 | attr.set_post_ops(ops); 27 | } 28 | attr.set_scratchpad_mode(dnnl::scratchpad_mode::user); 29 | 30 | primDesc = dnnl::convolution_forward::primitive_desc( 31 | engine->getDNNLEngine(), 32 | dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, 33 | toDNNL(srcDesc), 34 | toDNNL(weightDesc), 35 | toDNNL(biasDesc), 36 | toDNNL(dstDesc), 37 | strides, padding, padding, 38 | attr); 39 | } 40 | 41 | size_t DNNLConv::getScratchByteSize() 42 | { 43 | return primDesc.scratchpad_desc().get_size(); 44 | } 45 | 46 | void DNNLConv::setScratch(const Ref& scratch) 47 | { 48 | this->scratch = scratch; 49 | args[DNNL_ARG_SCRATCHPAD] = toDNNL(scratch); 50 | } 51 | 52 | void DNNLConv::updateSrc() 53 | { 54 | args[DNNL_ARG_SRC] = getDNNL(src); 55 | } 56 | 57 | void DNNLConv::updateWeight() 58 | { 59 | args[DNNL_ARG_WEIGHTS] = getDNNL(weight); 60 | } 61 | 62 | void DNNLConv::updateBias() 63 | { 64 | args[DNNL_ARG_BIAS] = getDNNL(bias); 65 | } 66 | 67 | void DNNLConv::updateDst() 68 | { 69 | args[DNNL_ARG_DST] = getDNNL(dst); 70 | } 71 | 72 | void DNNLConv::finalize() 73 | { 74 | prim = dnnl::convolution_forward(primDesc); 75 | } 76 | 77 | void DNNLConv::submitKernels(const Ref& ct) 78 | { 79 | if (!prim) 80 | throw std::logic_error("convolution not finalized"); 81 | if (!src || !dst || !weight || !bias) 82 | throw std::logic_error("convolution source/weight/bias/destination not set"); 83 | 84 | engine->submitFunc([=] { prim.execute(engine->getDNNLStream(), args); }, ct); 85 | } 86 | 87 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "dnnl_common.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class DNNLConv final : public Conv 12 | { 13 | public: 14 | DNNLConv(DNNLEngine* engine, const ConvDesc& desc); 15 | 16 | Engine* getEngine() const override { return engine; } 17 | 18 | size_t getScratchByteSize() override; 19 | void setScratch(const Ref& scratch) override; 20 | 21 | void finalize() override; 22 | void submitKernels(const Ref& ct) override; 23 | 24 | private: 25 | void updateSrc() override; 26 | void updateWeight() override; 27 | void updateBias() override; 28 | void updateDst() override; 29 | 30 | DNNLEngine* engine; 31 | dnnl::convolution_forward::primitive_desc primDesc; 32 | dnnl::convolution_forward prim; 33 | std::unordered_map args; 34 | Ref scratch; 35 | }; 36 | 37 | OIDN_NAMESPACE_END 38 | -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_engine.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "dnnl_engine.h" 5 | #include "dnnl_tensor.h" 6 | #include "dnnl_conv.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | DNNLEngine::DNNLEngine(CPUDevice* device, int numThreads) 11 | : CPUEngine(device, numThreads) 12 | { 13 | dnnl_set_verbose(clamp(device->verbose - 2, 0, 2)); // unfortunately this is not per-device but global 14 | dnnlEngine = dnnl::engine(dnnl::engine::kind::cpu, 0); 15 | dnnlStream = dnnl::stream(dnnlEngine); 16 | } 17 | 18 | Ref DNNLEngine::newTensor(const TensorDesc& desc, Storage storage) 19 | { 20 | if (!isSupported(desc)) 21 | throw std::invalid_argument("unsupported tensor descriptor"); 22 | 23 | return makeRef(this, desc, storage); 24 | } 25 | 26 | Ref DNNLEngine::newTensor(const Ref& buffer, const TensorDesc& desc, size_t byteOffset) 27 | { 28 | if (!isSupported(desc)) 29 | throw std::invalid_argument("unsupported tensor descriptor"); 30 | if (buffer->getEngine() != this) 31 | throw std::invalid_argument("buffer was created by a different engine"); 32 | 33 | return makeRef(buffer, desc, byteOffset); 34 | } 35 | 36 | Ref DNNLEngine::newConv(const ConvDesc& desc) 37 | { 38 | return makeRef(this, desc); 39 | } 40 | 41 | OIDN_NAMESPACE_END 42 | -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_engine.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "../cpu_engine.h" 7 | #include "mkl-dnn/include/dnnl.hpp" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class DNNLEngine final : public CPUEngine 12 | { 13 | public: 14 | DNNLEngine(CPUDevice* device, int numThreads); 15 | 16 | oidn_inline dnnl::engine& getDNNLEngine() { return dnnlEngine; } 17 | oidn_inline dnnl::stream& getDNNLStream() { return dnnlStream; } 18 | 19 | Ref newTensor(const TensorDesc& desc, Storage storage) override; 20 | Ref newTensor(const Ref& buffer, const TensorDesc& desc, size_t byteOffset) override; 21 | 22 | // Ops 23 | Ref newConv(const ConvDesc& desc) override; 24 | 25 | private: 26 | dnnl::engine dnnlEngine; 27 | dnnl::stream dnnlStream; 28 | }; 29 | 30 | OIDN_NAMESPACE_END 31 | -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_tensor.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "dnnl_tensor.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | DNNLTensor::DNNLTensor(DNNLEngine* engine, const TensorDesc& desc, Storage storage) 9 | : Tensor(engine->newBuffer(desc.getByteSize(), storage), desc) 10 | { 11 | mem = dnnl::memory(toDNNL(getDesc()), engine->getDNNLEngine(), buffer->getPtr()); 12 | } 13 | 14 | DNNLTensor::DNNLTensor(const Ref& buffer, const TensorDesc& desc, size_t byteOffset) 15 | : Tensor(buffer, desc, byteOffset) 16 | { 17 | if (byteOffset + getByteSize() > buffer->getByteSize()) 18 | throw Exception(Error::InvalidArgument, "buffer region is out of bounds"); 19 | 20 | mem = dnnl::memory(toDNNL(getDesc()), 21 | static_cast(buffer->getEngine())->getDNNLEngine(), 22 | static_cast(buffer->getPtr()) + byteOffset); 23 | } 24 | 25 | void DNNLTensor::postRealloc() 26 | { 27 | if (buffer) 28 | mem.set_data_handle(static_cast(buffer->getPtr()) + byteOffset); 29 | } 30 | 31 | OIDN_NAMESPACE_END 32 | -------------------------------------------------------------------------------- /devices/cpu/dnnl/dnnl_tensor.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/tensor.h" 7 | #include "dnnl_common.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | // Native DNNL tensor 12 | class DNNLTensor final : public Tensor 13 | { 14 | public: 15 | DNNLTensor(DNNLEngine* engine, const TensorDesc& desc, Storage storage); 16 | DNNLTensor(const Ref& buffer, const TensorDesc& desc, size_t byteOffset); 17 | 18 | void* getPtr() const override { return mem.get_data_handle(); } 19 | const dnnl::memory& getDNNLMemory() const { return mem; } 20 | 21 | private: 22 | void postRealloc() override; 23 | 24 | dnnl::memory mem; 25 | }; 26 | 27 | OIDN_NAMESPACE_END 28 | -------------------------------------------------------------------------------- /devices/cpu/platform.ispc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "platform.isph" 5 | 6 | // ----------------------------------------------------------------------------------------------- 7 | // System information 8 | // ----------------------------------------------------------------------------------------------- 9 | 10 | enum CPUArch 11 | { 12 | CPUArch_Unknown, 13 | CPUArch_SSE2, 14 | CPUArch_SSE4, 15 | CPUArch_AVX2, 16 | CPUArch_AVX512, 17 | CPUArch_NEON 18 | }; 19 | 20 | export uniform CPUArch getCPUArch() 21 | { 22 | #if defined(ISPC_TARGET_SSE2) 23 | return CPUArch_SSE2; 24 | #elif defined(ISPC_TARGET_SSE4) 25 | return CPUArch_SSE4; 26 | #elif defined(ISPC_TARGET_AVX2) 27 | return CPUArch_AVX2; 28 | #elif defined(ISPC_TARGET_AVX512SKX) || defined(ISPC_TARGET_AVX512SPR) 29 | return CPUArch_AVX512; 30 | #elif defined(ISPC_TARGET_NEON) 31 | return CPUArch_NEON; 32 | #endif 33 | return CPUArch_Unknown; 34 | } -------------------------------------------------------------------------------- /devices/cpu/platform.isph: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | // ------------------------------------------------------------------------------------------------- 7 | // Macros 8 | // ------------------------------------------------------------------------------------------------- 9 | 10 | #define _OIDN_CONCAT(a, b) a##b 11 | #define OIDN_CONCAT(a, b) _OIDN_CONCAT(a, b) 12 | 13 | // ------------------------------------------------------------------------------------------------- 14 | // Types 15 | // ------------------------------------------------------------------------------------------------- 16 | 17 | #ifndef ISPC_UINT_IS_DEFINED 18 | typedef unsigned int8 uint8; 19 | typedef unsigned int16 uint16; 20 | typedef unsigned int32 uint32; 21 | typedef unsigned int64 uint64; 22 | #endif -------------------------------------------------------------------------------- /devices/cpu/tasking.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "tasking.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | // ----------------------------------------------------------------------------------------------- 9 | // PinningObserver 10 | // ----------------------------------------------------------------------------------------------- 11 | 12 | PinningObserver::PinningObserver(const std::shared_ptr& affinity) 13 | : affinity(affinity) 14 | { 15 | observe(true); 16 | } 17 | 18 | PinningObserver::PinningObserver(const std::shared_ptr& affinity, tbb::task_arena& arena) 19 | : tbb::task_scheduler_observer(arena), 20 | affinity(affinity) 21 | { 22 | observe(true); 23 | } 24 | 25 | PinningObserver::~PinningObserver() 26 | { 27 | observe(false); 28 | } 29 | 30 | void PinningObserver::on_scheduler_entry(bool isWorker) 31 | { 32 | const int threadIndex = tbb::this_task_arena::current_thread_index(); 33 | if (threadIndex >= 0) 34 | affinity->set(threadIndex); 35 | } 36 | 37 | void PinningObserver::on_scheduler_exit(bool isWorker) 38 | { 39 | const int threadIndex = tbb::this_task_arena::current_thread_index(); 40 | if (threadIndex >= 0) 41 | affinity->restore(threadIndex); 42 | } 43 | 44 | OIDN_NAMESPACE_END 45 | -------------------------------------------------------------------------------- /devices/cpu/tile.isph: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | struct Tile 5 | { 6 | uniform int hSrcBegin; 7 | uniform int wSrcBegin; 8 | uniform int hDstBegin; 9 | uniform int wDstBegin; 10 | uniform int H; 11 | uniform int W; 12 | }; -------------------------------------------------------------------------------- /devices/cuda/cuda_conv.cu: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cuda_conv.h" 5 | #include "cutlass_conv.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | Ref newCUDAConv(CUDAEngine* engine, const ConvDesc& desc) 10 | { 11 | // Get the list of kernels supported by the engine 12 | std::vector kernels; 13 | const int smArch = engine->getSMArch(); 14 | if (smArch >= 80) 15 | kernels = getCutlassConvInstances<80>(); 16 | else if (smArch >= 75) 17 | kernels = getCutlassConvInstances<75>(); 18 | else if (smArch >= 70) 19 | kernels = getCutlassConvInstances<70>(); 20 | else 21 | throw std::runtime_error("unsupported convolution"); 22 | 23 | // Select the likely fastest compatible kernel 24 | const auto problemSize = toCutlassProblemSize(desc); 25 | const auto gemmSize = cutlass::conv::implicit_gemm_problem_size(cutlass::conv::Operator::kFprop, problemSize); 26 | const size_t M = gemmSize.m(); 27 | const size_t N = gemmSize.n(); 28 | const size_t K = gemmSize.k(); 29 | 30 | const DataType accumType = desc.fastMath ? desc.srcDesc.dataType : DataType::Float32; 31 | 32 | const CutlassConvFactory* bestKernel = nullptr; 33 | int bestBlockSize = 0; 34 | size_t bestCost = std::numeric_limits::max(); 35 | 36 | for (const auto& kernel : kernels) 37 | { 38 | if (kernel.dataType != desc.srcDesc.dataType || kernel.accumType < accumType) 39 | continue; 40 | 41 | const int blockSize = kernel.blockM * kernel.blockN * kernel.blockK; 42 | const size_t cost = round_up(M, kernel.blockM) * round_up(N, kernel.blockN) * round_up(K, kernel.blockK); 43 | 44 | if ((cost < bestCost) || 45 | (cost == bestCost && blockSize > bestBlockSize) || 46 | (cost == bestCost && blockSize == bestBlockSize && kernel.accumType == accumType)) 47 | { 48 | bestKernel = &kernel; 49 | bestBlockSize = blockSize; 50 | bestCost = cost; 51 | } 52 | } 53 | 54 | if (!bestKernel) 55 | throw std::runtime_error("unsupported convolution"); 56 | 57 | return bestKernel->make(engine, desc); 58 | } 59 | 60 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cuda/cuda_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "cuda_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | Ref newCUDAConv(CUDAEngine* engine, const ConvDesc& desc); 12 | 13 | OIDN_NAMESPACE_END 14 | -------------------------------------------------------------------------------- /devices/cuda/cuda_device.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/device.h" 7 | #if defined(OIDN_DEVICE_CUDA_API_DRIVER) 8 | #include "curtn.h" 9 | #endif 10 | #include 11 | 12 | OIDN_NAMESPACE_BEGIN 13 | 14 | void checkError(cudaError_t error); 15 | 16 | class CUDAEngine; 17 | 18 | class CUDAPhysicalDevice : public PhysicalDevice 19 | { 20 | public: 21 | int deviceID; 22 | 23 | CUDAPhysicalDevice(int deviceID, const cudaDeviceProp& prop, int score); 24 | }; 25 | 26 | class CUDADevice final : public Device 27 | { 28 | friend class CUDAEngine; 29 | 30 | public: 31 | static std::vector> getPhysicalDevices(); 32 | static bool isSupported(const cudaDeviceProp& prop); 33 | static bool isSupported(int deviceID); 34 | 35 | CUDADevice(int deviceID, cudaStream_t stream); 36 | explicit CUDADevice(const Ref& physicalDevice); 37 | ~CUDADevice(); 38 | 39 | void enter() override; 40 | void leave() override; 41 | 42 | DeviceType getType() const override { return DeviceType::CUDA; } 43 | 44 | Storage getPtrStorage(const void* ptr) override; 45 | 46 | void wait() override; 47 | 48 | private: 49 | void init() override; 50 | 51 | int deviceID = 0; 52 | #if defined(OIDN_DEVICE_CUDA_API_DRIVER) 53 | CUdevice deviceHandle = -1; 54 | CUcontext context = nullptr; 55 | #else 56 | int prevDeviceID = -1; 57 | #endif 58 | cudaStream_t stream = nullptr; 59 | 60 | int maxWorkGroupSize = 0; 61 | int subgroupSize = 0; 62 | int smArch = 0; // compute capability 63 | }; 64 | 65 | OIDN_NAMESPACE_END 66 | -------------------------------------------------------------------------------- /devices/cuda/cuda_external_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/buffer.h" 7 | #include "cuda_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class CUDAExternalBuffer : public USMBuffer 12 | { 13 | public: 14 | CUDAExternalBuffer(Engine* engine, 15 | ExternalMemoryTypeFlag fdType, 16 | int fd, size_t byteSize); 17 | 18 | CUDAExternalBuffer(Engine* engine, 19 | ExternalMemoryTypeFlag handleType, 20 | void* handle, const void* name, size_t byteSize); 21 | 22 | ~CUDAExternalBuffer(); 23 | 24 | private: 25 | cudaExternalMemory_t extMem; 26 | 27 | void init(const cudaExternalMemoryHandleDesc& handleDesc); 28 | }; 29 | 30 | OIDN_NAMESPACE_END 31 | -------------------------------------------------------------------------------- /devices/cuda/cuda_module.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "core/context.h" 5 | #include "cuda_device.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | class CUDADeviceFactory : public CUDADeviceFactoryBase 10 | { 11 | public: 12 | bool isDeviceSupported(int deviceID) override 13 | { 14 | return CUDADevice::isSupported(deviceID); 15 | } 16 | 17 | Ref newDevice(const int* deviceIDs, const cudaStream_t* streams, int numPairs) override 18 | { 19 | if (numPairs != 1) 20 | throw Exception(Error::InvalidArgument, "invalid number of CUDA devices/streams"); 21 | if (deviceIDs == nullptr) 22 | throw Exception(Error::InvalidArgument, "array of CUDA devices is null"); 23 | if (streams == nullptr) 24 | throw Exception(Error::InvalidArgument, "array of CUDA streams is null"); 25 | 26 | return makeRef(deviceIDs[0], streams[0]); 27 | } 28 | 29 | Ref newDevice(const Ref& physicalDevice) override 30 | { 31 | assert(physicalDevice->type == DeviceType::CUDA); 32 | return makeRef(staticRefCast(physicalDevice)); 33 | } 34 | }; 35 | 36 | OIDN_DECLARE_INIT_MODULE(device_cuda) 37 | { 38 | #if defined(OIDN_DEVICE_CUDA_API_DRIVER) 39 | if (curtn::init() != cudaSuccess) 40 | return; 41 | #endif 42 | 43 | Context::registerDeviceType(DeviceType::CUDA, CUDADevice::getPhysicalDevices()); 44 | } 45 | 46 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cuda/curtn.h: -------------------------------------------------------------------------------- 1 | // CURTN: a nano implementation of the CUDA Runtime API on top of the Driver API 2 | // Copyright 2024 Intel Corporation 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include 6 | #include 7 | 8 | namespace curtn 9 | { 10 | // Unlike CUDA Runtime, CURTN requires explicit initialization before the first API call 11 | cudaError_t init(); 12 | 13 | // Unlike CUDA Runtime, CURTN requires explicit initialization and cleanup of the current context 14 | cudaError_t initContext(); 15 | cudaError_t cleanupContext(); 16 | } -------------------------------------------------------------------------------- /devices/cuda/cutlass_conv_sm70.cu: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cutlass_conv.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | // Volta (SM 7.0) 9 | template<> 10 | std::vector getCutlassConvInstances<70>() 11 | { 12 | using namespace cutlass::arch; 13 | using cutlass::gemm::GemmShape; 14 | 15 | return 16 | { 17 | CutlassConvInstance, GemmShape<64, 32, 32>, 2>::get(), 18 | CutlassConvInstance, GemmShape<64, 64, 32>, 2>::get(), 19 | 20 | CutlassConvInstance, GemmShape<64, 32, 32>, 2>::get(), 21 | CutlassConvInstance, GemmShape<64, 64, 32>, 2>::get(), 22 | }; 23 | } 24 | 25 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cuda/cutlass_conv_sm75.cu: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cutlass_conv.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | // Turing (SM 7.5) 9 | template<> 10 | std::vector getCutlassConvInstances<75>() 11 | { 12 | using namespace cutlass::arch; 13 | using cutlass::gemm::GemmShape; 14 | 15 | return 16 | { 17 | CutlassConvInstance, GemmShape<64, 32, 32>, 2>::get(), 18 | CutlassConvInstance, GemmShape<64, 64, 32>, 2>::get(), 19 | 20 | CutlassConvInstance, GemmShape<64, 32, 32>, 2>::get(), 21 | CutlassConvInstance, GemmShape<64, 64, 32>, 2>::get(), 22 | }; 23 | } 24 | 25 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/cuda/cutlass_conv_sm80.cu: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cutlass_conv.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | // Ampere (SM 8.0), Ada Lovelace (SM 8.9), Hopper (SM 9.0), Blackwell (SM 10.0, 12.0) 9 | template<> 10 | std::vector getCutlassConvInstances<80>() 11 | { 12 | using namespace cutlass::arch; 13 | using cutlass::gemm::GemmShape; 14 | 15 | return 16 | { 17 | CutlassConvInstance, GemmShape<64, 32, 32>, 3 /*4*/>::get(), 18 | CutlassConvInstance, GemmShape<64, 64, 32>, 3>::get(), 19 | 20 | CutlassConvInstance, GemmShape<64, 32, 32>, 3 /*4*/>::get(), 21 | CutlassConvInstance, GemmShape<64, 64, 32>, 3>::get(), 22 | }; 23 | } 24 | 25 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/gpu/gpu_image_copy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/kernel.h" 7 | #include "core/image_accessor.h" 8 | 9 | #if !defined(OIDN_COMPILE_METAL_DEVICE) 10 | #include "core/image_copy.h" 11 | #endif 12 | 13 | OIDN_NAMESPACE_BEGIN 14 | 15 | struct GPUImageCopyKernel 16 | { 17 | ImageAccessor src; 18 | ImageAccessor dst; 19 | 20 | oidn_device_inline void operator ()(const oidn_private WorkItem<2>& it) const 21 | { 22 | const int h = it.getGlobalID<0>(); 23 | const int w = it.getGlobalID<1>(); 24 | const vec3f value = src.get3(h, w); 25 | dst.set3(h, w, value); 26 | } 27 | }; 28 | 29 | #if !defined(OIDN_COMPILE_METAL_DEVICE) 30 | 31 | template 32 | class GPUImageCopy final : public ImageCopy 33 | { 34 | public: 35 | explicit GPUImageCopy(EngineT* engine) 36 | : engine(engine) {} 37 | 38 | Engine* getEngine() const override { return engine; } 39 | 40 | #if defined(OIDN_COMPILE_METAL) 41 | void finalize() override 42 | { 43 | pipeline = engine->newPipeline("imageCopy"); 44 | } 45 | #endif 46 | 47 | void submitKernels(const Ref& ct) override 48 | { 49 | check(); 50 | 51 | GPUImageCopyKernel kernel; 52 | kernel.src = *src; 53 | kernel.dst = *dst; 54 | 55 | #if defined(OIDN_COMPILE_METAL) 56 | engine->submitKernel(WorkDim<2>(dst->getH(), dst->getW()), kernel, 57 | pipeline, {src->getBuffer(), dst->getBuffer()}); 58 | #else 59 | engine->submitKernel(WorkDim<2>(dst->getH(), dst->getW()), kernel); 60 | #endif 61 | } 62 | 63 | private: 64 | EngineT* engine; 65 | 66 | #if defined(OIDN_COMPILE_METAL) 67 | Ref pipeline; 68 | #endif 69 | }; 70 | 71 | #endif // !defined(OIDN_COMPILE_METAL_DEVICE) 72 | 73 | OIDN_NAMESPACE_END 74 | -------------------------------------------------------------------------------- /devices/gpu/gpu_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/pool.h" 7 | #include "core/tensor_accessor.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | template 12 | struct GPUPoolKernel 13 | { 14 | TensorAccessor3D src; 15 | TensorAccessor3D dst; 16 | 17 | oidn_device_inline void operator ()(const WorkItem<3>& it) const 18 | { 19 | const int c = it.getGlobalID<0>(); 20 | const int h = it.getGlobalID<1>(); 21 | const int w = it.getGlobalID<2>(); 22 | 23 | const T x0 = src(c, h*2, w*2); 24 | const T x1 = src(c, h*2, w*2+1); 25 | const T x2 = src(c, h*2+1, w*2); 26 | const T x3 = src(c, h*2+1, w*2+1); 27 | 28 | dst(c, h, w) = math::max(math::max(x0, x1), math::max(x2, x3)); 29 | } 30 | }; 31 | 32 | // Optimized for HWC layout (memory coalescing) 33 | template 34 | struct GPUPoolKernel 35 | { 36 | TensorAccessor3D src; 37 | TensorAccessor3D dst; 38 | 39 | oidn_device_inline void operator ()(const WorkItem<3>& it) const 40 | { 41 | const int h = it.getGlobalID<0>(); 42 | const int w = it.getGlobalID<1>(); 43 | const int c = it.getGlobalID<2>(); 44 | 45 | const T x0 = src(c, h*2, w*2); 46 | const T x1 = src(c, h*2, w*2+1); 47 | const T x2 = src(c, h*2+1, w*2); 48 | const T x3 = src(c, h*2+1, w*2+1); 49 | 50 | dst(c, h, w) = math::max(math::max(x0, x1), math::max(x2, x3)); 51 | } 52 | }; 53 | 54 | template 55 | class GPUPool : public Pool 56 | { 57 | public: 58 | GPUPool(EngineT* engine, 59 | const PoolDesc& desc) 60 | : Pool(desc), 61 | engine(engine) {} 62 | 63 | Engine* getEngine() const override { return engine; } 64 | 65 | void submitKernels(const Ref& ct) override 66 | { 67 | if (!src || !dst) 68 | throw std::logic_error("pooling source/destination not set"); 69 | 70 | GPUPoolKernel kernel; 71 | kernel.src = *src; 72 | kernel.dst = *dst; 73 | 74 | if (srcDstLayout == TensorLayout::hwc) 75 | engine->submitKernel(WorkDim<3>(dst->getH(), dst->getW(), dst->getPaddedC()), kernel); 76 | else 77 | engine->submitKernel(WorkDim<3>(dst->getPaddedC(), dst->getH(), dst->getW()), kernel); 78 | } 79 | 80 | private: 81 | EngineT* engine; 82 | }; 83 | 84 | OIDN_NAMESPACE_END 85 | -------------------------------------------------------------------------------- /devices/gpu/gpu_upsample.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/upsample.h" 7 | #include "core/tensor_accessor.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | template 12 | struct GPUUpsampleKernel 13 | { 14 | TensorAccessor3D src; 15 | TensorAccessor3D dst; 16 | 17 | oidn_device_inline void operator ()(const WorkItem<3>& it) const 18 | { 19 | const int c = it.getGlobalID<0>(); 20 | const int h = it.getGlobalID<1>(); 21 | const int w = it.getGlobalID<2>(); 22 | 23 | const T x = src(c, h, w); 24 | 25 | dst(c, h*2, w*2) = x; 26 | dst(c, h*2, w*2+1) = x; 27 | dst(c, h*2+1, w*2) = x; 28 | dst(c, h*2+1, w*2+1) = x; 29 | } 30 | }; 31 | 32 | // Optimized for HWC layout (memory coalescing) 33 | template 34 | struct GPUUpsampleKernel 35 | { 36 | TensorAccessor3D src; 37 | TensorAccessor3D dst; 38 | 39 | oidn_device_inline void operator ()(const WorkItem<3>& it) const 40 | { 41 | const int h = it.getGlobalID<0>(); 42 | const int w = it.getGlobalID<1>(); 43 | const int c = it.getGlobalID<2>(); 44 | 45 | const T x = src(c, h, w); 46 | 47 | dst(c, h*2, w*2) = x; 48 | dst(c, h*2, w*2+1) = x; 49 | dst(c, h*2+1, w*2) = x; 50 | dst(c, h*2+1, w*2+1) = x; 51 | } 52 | }; 53 | 54 | template 55 | class GPUUpsample : public Upsample 56 | { 57 | public: 58 | GPUUpsample(EngineT* engine, 59 | const UpsampleDesc& desc) 60 | : Upsample(desc), 61 | engine(engine) {} 62 | 63 | Engine* getEngine() const override { return engine; } 64 | 65 | void submitKernels(const Ref& ct) override 66 | { 67 | if (!src || !dst) 68 | throw std::logic_error("upsampling source/destination not set"); 69 | 70 | GPUUpsampleKernel kernel; 71 | kernel.src = *src; 72 | kernel.dst = *dst; 73 | 74 | if (srcDstLayout == TensorLayout::hwc) 75 | engine->submitKernel(WorkDim<3>(src->getH(), src->getW(), src->getPaddedC()), kernel); 76 | else 77 | engine->submitKernel(WorkDim<3>(src->getPaddedC(), src->getH(), src->getW()), kernel); 78 | } 79 | 80 | private: 81 | EngineT* engine; 82 | }; 83 | 84 | OIDN_NAMESPACE_END 85 | -------------------------------------------------------------------------------- /devices/hip/ck_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | // FIXME: workaround for compile error when building for target unsupported by CK 7 | #include "ck/ck.hpp" 8 | #if !defined(CK_BUFFER_RESOURCE_3RD_DWORD) 9 | #define CK_BUFFER_RESOURCE_3RD_DWORD -1 10 | #endif 11 | 12 | #include "core/conv.h" 13 | #include "hip_engine.h" 14 | #include "ck/utility/data_type.hpp" 15 | 16 | OIDN_NAMESPACE_BEGIN 17 | 18 | template 19 | struct CKDataType { using Type = T; }; 20 | 21 | template<> 22 | struct CKDataType { using Type = ck::half_t; }; 23 | 24 | template 25 | using S = ck::Sequence; 26 | 27 | inline std::array getCKTensorLengths(const TensorDesc& td) 28 | { 29 | switch (td.layout) 30 | { 31 | case TensorLayout::x: 32 | return {1, 1, td.getPaddedX(), 1, 1}; // GNCHW 33 | case TensorLayout::hwc: 34 | return {1, 1, td.getPaddedC(), td.getH(), td.getW()}; // GNCHW 35 | case TensorLayout::ohwi: 36 | return {1, td.getPaddedO(), td.getPaddedI(), td.getH(), td.getW()}; // GKCYX 37 | default: 38 | throw std::invalid_argument("unsupported tensor layout"); 39 | } 40 | } 41 | 42 | inline std::array getCKTensorStrides(const TensorDesc& td) 43 | { 44 | switch (td.layout) 45 | { 46 | case TensorLayout::x: 47 | return {0, 0, 1, 0, 0}; // GNCHW 48 | case TensorLayout::hwc: 49 | return {0, 0, 1, td.getW() * td.getPaddedC(), td.getPaddedC()}; // GNCHW 50 | case TensorLayout::ohwi: 51 | return {0, td.getH() * td.getW() * td.getPaddedI(), 52 | 1, td.getW() * td.getPaddedI(), td.getPaddedI()}; // GKCYX 53 | default: 54 | throw std::invalid_argument("unsupported tensor layout"); 55 | } 56 | } 57 | 58 | struct CKConvFactory 59 | { 60 | Ref (*make)(HIPEngine*, const ConvDesc&); 61 | 62 | DataType dataType; 63 | DataType accumType; 64 | Activation activation; 65 | int blockM, blockN, blockK; // threadblock size 66 | }; 67 | 68 | template 69 | std::vector getCKConvInstances(Activation activation); 70 | 71 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/hip/hip_conv.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "hip_conv.h" 5 | #include "ck_conv.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | Ref newHIPConv(HIPEngine* engine, const ConvDesc& desc) 10 | { 11 | // Get the list of kernels supported by the engine 12 | std::vector kernels; 13 | switch (engine->getArch()) 14 | { 15 | case HIPArch::DL: 16 | kernels = getCKConvInstances(desc.activation); 17 | break; 18 | case HIPArch::WMMA: 19 | kernels = getCKConvInstances(desc.activation); 20 | break; 21 | default: 22 | throw std::runtime_error("unsupported architecture"); 23 | } 24 | 25 | // Select the likely fastest compatible kernel based on the GEMM dimensions 26 | const size_t M = desc.srcDesc.getH() * desc.srcDesc.getW(); // == destination dims 27 | const size_t N = desc.weightDesc.getPaddedO(); 28 | const size_t K = desc.weightDesc.getPaddedI() * desc.weightDesc.getH() * desc.weightDesc.getW(); 29 | 30 | const DataType accumType = DataType::Float32; 31 | 32 | const CKConvFactory* bestKernel = nullptr; 33 | int bestBlockSize = 0; 34 | size_t bestCost = std::numeric_limits::max(); 35 | 36 | for (const auto& kernel : kernels) 37 | { 38 | if (kernel.dataType != desc.srcDesc.dataType || kernel.accumType < accumType) 39 | continue; 40 | 41 | const int blockSize = kernel.blockM * kernel.blockN * kernel.blockK; 42 | const size_t cost = round_up(M, kernel.blockM) * round_up(N, kernel.blockN) * round_up(K, kernel.blockK); 43 | 44 | if ((cost < bestCost) || 45 | (cost == bestCost && blockSize > bestBlockSize) || 46 | (cost == bestCost && blockSize == bestBlockSize && kernel.accumType == accumType)) 47 | { 48 | bestKernel = &kernel; 49 | bestBlockSize = blockSize; 50 | bestCost = cost; 51 | } 52 | } 53 | 54 | if (!bestKernel) 55 | throw std::runtime_error("unsupported convolution"); 56 | 57 | return bestKernel->make(engine, desc); 58 | } 59 | 60 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/hip/hip_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "hip_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | Ref newHIPConv(HIPEngine* engine, const ConvDesc& desc); 12 | 13 | OIDN_NAMESPACE_END 14 | -------------------------------------------------------------------------------- /devices/hip/hip_device.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/device.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | void checkError(hipError_t error); 11 | 12 | class HIPEngine; 13 | 14 | // GPU matrix architecture 15 | enum class HIPArch 16 | { 17 | Unknown, 18 | DL, // RDNA 2 19 | WMMA, // RDNA 3 20 | }; 21 | 22 | class HIPPhysicalDevice : public PhysicalDevice 23 | { 24 | public: 25 | int deviceID; 26 | 27 | HIPPhysicalDevice(int deviceID, const hipDeviceProp_t& prop, int score); 28 | }; 29 | 30 | class HIPDevice final : public Device 31 | { 32 | friend class HIPEngine; 33 | 34 | public: 35 | static std::vector> getPhysicalDevices(); 36 | static std::string getName(const hipDeviceProp_t& prop); 37 | static std::string getArchName(const hipDeviceProp_t& prop); 38 | static HIPArch getArch(const hipDeviceProp_t& prop); 39 | static bool isSupported(int deviceID); 40 | 41 | HIPDevice(int deviceID, hipStream_t stream); 42 | explicit HIPDevice(const Ref& physicalDevice); 43 | ~HIPDevice(); 44 | 45 | void enter() override; 46 | void leave() override; 47 | 48 | DeviceType getType() const override { return DeviceType::HIP; } 49 | 50 | Storage getPtrStorage(const void* ptr) override; 51 | 52 | void wait() override; 53 | 54 | private: 55 | void init() override; 56 | 57 | int deviceID = 0; 58 | int prevDeviceID = -1; 59 | hipStream_t stream = nullptr; 60 | 61 | HIPArch arch = HIPArch::Unknown; 62 | int maxWorkGroupSize = 0; 63 | int subgroupSize = 0; 64 | }; 65 | 66 | OIDN_NAMESPACE_END 67 | -------------------------------------------------------------------------------- /devices/hip/hip_external_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/buffer.h" 7 | #include "hip_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class HIPExternalBuffer : public USMBuffer 12 | { 13 | public: 14 | HIPExternalBuffer(Engine* engine, 15 | ExternalMemoryTypeFlag fdType, 16 | int fd, size_t byteSize); 17 | 18 | HIPExternalBuffer(Engine* engine, 19 | ExternalMemoryTypeFlag handleType, 20 | void* handle, const void* name, size_t byteSize); 21 | 22 | ~HIPExternalBuffer(); 23 | 24 | private: 25 | hipExternalMemory_t extMem; 26 | 27 | void init(const hipExternalMemoryHandleDesc& handleDesc); 28 | }; 29 | 30 | OIDN_NAMESPACE_END 31 | -------------------------------------------------------------------------------- /devices/hip/hip_module.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "core/context.h" 5 | #include "hip_device.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | class HIPDeviceFactory : public HIPDeviceFactoryBase 10 | { 11 | public: 12 | bool isDeviceSupported(int deviceID) override 13 | { 14 | return HIPDevice::isSupported(deviceID); 15 | } 16 | 17 | Ref newDevice(const int* deviceIDs, const hipStream_t* streams, int numPairs) override 18 | { 19 | if (numPairs != 1) 20 | throw Exception(Error::InvalidArgument, "invalid number of HIP devices/streams"); 21 | if (deviceIDs == nullptr) 22 | throw Exception(Error::InvalidArgument, "array of HIP devices is null"); 23 | if (streams == nullptr) 24 | throw Exception(Error::InvalidArgument, "array of HIP streams is null"); 25 | 26 | return makeRef(deviceIDs[0], streams[0]); 27 | } 28 | 29 | Ref newDevice(const Ref& physicalDevice) override 30 | { 31 | assert(physicalDevice->type == DeviceType::HIP); 32 | return makeRef(staticRefCast(physicalDevice)); 33 | } 34 | }; 35 | 36 | OIDN_DECLARE_INIT_MODULE(device_hip) 37 | { 38 | Context::registerDeviceType(DeviceType::HIP, HIPDevice::getPhysicalDevices()); 39 | } 40 | 41 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/metal/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Copyright 2023 Apple Inc. 2 | ## Copyright 2023 Intel Corporation 3 | ## SPDX-License-Identifier: Apache-2.0 4 | 5 | cmake_minimum_required(VERSION 3.21) 6 | 7 | include(oidn_metal) 8 | 9 | set(OIDN_METAL_SOURCES 10 | metal_buffer.mm 11 | metal_buffer.h 12 | metal_common.h 13 | metal_common.mm 14 | metal_device.mm 15 | metal_device.h 16 | metal_conv.mm 17 | metal_conv.h 18 | metal_engine.mm 19 | metal_engine.h 20 | metal_heap.h 21 | metal_heap.mm 22 | metal_module.mm 23 | ) 24 | 25 | add_library(OpenImageDenoise_device_metal ${OIDN_LIB_TYPE} ${OIDN_METAL_SOURCES} ${OIDN_RESOURCE_FILE}) 26 | 27 | metallib_target_add_sources(OpenImageDenoise_device_metal metal_kernels 28 | metal_kernels.metal 29 | INCLUDE_DIRECTORIES 30 | ${PROJECT_SOURCE_DIR} 31 | COMPILE_OPTIONS 32 | -std=metal3.0 -fno-fast-math -m${OIDN_APPLE_SDK}-version-min=${OIDN_APPLE_SDK_VERSION_MIN} 33 | ) 34 | 35 | set_target_properties(OpenImageDenoise_device_metal PROPERTIES 36 | OUTPUT_NAME ${OIDN_LIBRARY_NAME}_device_metal 37 | CXX_STANDARD 17 38 | ) 39 | if(OIDN_LIBRARY_VERSIONED) 40 | set_target_properties(OpenImageDenoise_device_metal PROPERTIES VERSION ${PROJECT_VERSION}) 41 | endif() 42 | 43 | target_compile_definitions(OpenImageDenoise_device_metal PRIVATE OIDN_COMPILE_METAL_HOST) 44 | 45 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework Foundation") 46 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework Metal") 47 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework MetalPerformanceShadersGraph") 48 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework MetalPerformanceShaders") 49 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE OpenImageDenoise_core) 50 | 51 | if(OIDN_STATIC_LIB) 52 | oidn_install_static_module(OpenImageDenoise_device_metal) 53 | target_link_libraries(OpenImageDenoise PRIVATE OpenImageDenoise_device_metal) 54 | else() 55 | oidn_strip_symbols(OpenImageDenoise_device_metal) 56 | oidn_install_module(OpenImageDenoise_device_metal) 57 | endif() 58 | -------------------------------------------------------------------------------- /devices/metal/metal_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Apple Inc. 2 | // Copyright 2023 Intel Corporation 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include "core/buffer.h" 8 | #include "metal_engine.h" 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | class MetalBuffer : public Buffer 13 | { 14 | public: 15 | MetalBuffer(MetalEngine* engine, size_t byteSize, Storage storage); 16 | MetalBuffer(const Ref& arena, size_t byteSize, size_t byteOffset); 17 | MetalBuffer(MetalEngine* engine, void* data, size_t byteSize); 18 | MetalBuffer(MetalEngine* engine, id buffer); 19 | ~MetalBuffer(); 20 | 21 | Engine* getEngine() const override { return engine; } 22 | id getMTLBuffer() const { return buffer; } 23 | void* getPtr() const override; 24 | void* getHostPtr() const override; 25 | size_t getByteSize() const override { return byteSize; } 26 | bool isShared() const override { return shared; } 27 | Storage getStorage() const override { return storage; } 28 | 29 | void read(size_t byteOffset, size_t byteSize, void* dstHostPtr, 30 | SyncMode sync = SyncMode::Blocking) override; 31 | 32 | void write(size_t byteOffset, size_t byteSize, const void* srcHostPtr, 33 | SyncMode sync = SyncMode::Blocking) override; 34 | 35 | protected: 36 | void preRealloc() override; 37 | void postRealloc() override; 38 | 39 | private: 40 | void init(); 41 | void free(); 42 | 43 | MetalEngine* engine; 44 | id buffer; 45 | size_t byteSize; 46 | bool shared; 47 | Storage storage; 48 | }; 49 | 50 | OIDN_NAMESPACE_END 51 | -------------------------------------------------------------------------------- /devices/metal/metal_common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Apple Inc. 2 | // Copyright 2023 Intel Corporation 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include "core/tensor.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | OIDN_NAMESPACE_BEGIN 15 | 16 | class MetalDevice; 17 | struct PoolDesc; 18 | struct ConvDesc; 19 | 20 | MTLResourceOptions toMTLResourceOptions(Storage storage); 21 | 22 | MPSDataType toMPSDataType(DataType dataType); 23 | MPSShape* toMPSShape(const TensorDesc& td); 24 | 25 | MPSGraphTensor* toMPSGraphConst(MPSGraph* graph, const Ref& t); 26 | MPSGraphTensor* toMPSGraphPlaceholder(MPSGraph* graph, TensorDesc td); 27 | MPSGraphTensor* toMPSGraphPlaceholder(MPSGraph* graph, ImageDesc imd); 28 | MPSGraphTensorData* newMPSGraphTensorData(const Ref& tensor); 29 | 30 | id getMTLBuffer(Ref buffer); 31 | 32 | OIDN_NAMESPACE_END 33 | -------------------------------------------------------------------------------- /devices/metal/metal_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "metal_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class MetalConv final : public Conv 12 | { 13 | public: 14 | MetalConv(MetalEngine* engine, const ConvDesc& desc); 15 | ~MetalConv(); 16 | 17 | Engine* getEngine() const override { return engine; } 18 | void finalize() override; 19 | void submitKernels(const Ref& ct) override; 20 | 21 | private: 22 | void updateWeight() override; 23 | void updateBias() override; 24 | 25 | MetalEngine* engine; 26 | MPSGraph* mpsGraph = nullptr; 27 | MPSGraphTensor* mpsSrc = nullptr; 28 | MPSGraphTensor* mpsWeight = nullptr; 29 | MPSGraphTensor* mpsBias = nullptr; 30 | MPSGraphTensor* mpsDst = nullptr; 31 | }; 32 | 33 | OIDN_NAMESPACE_END 34 | -------------------------------------------------------------------------------- /devices/metal/metal_device.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Apple Inc. 2 | // Copyright 2023 Intel Corporation 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include "core/device.h" 8 | #include "metal_common.h" 9 | 10 | OIDN_NAMESPACE_BEGIN 11 | 12 | class MetalEngine; 13 | 14 | class MetalPhysicalDevice : public PhysicalDevice 15 | { 16 | public: 17 | id device; 18 | 19 | MetalPhysicalDevice(id device, int score); 20 | }; 21 | 22 | class MetalDevice final : public Device 23 | { 24 | friend class MetalEngine; 25 | 26 | public: 27 | static std::vector> getPhysicalDevices(); 28 | static bool isSupported(id device); 29 | 30 | explicit MetalDevice(const Ref& physicalDevice); 31 | explicit MetalDevice(id commandQueue); 32 | ~MetalDevice(); 33 | 34 | DeviceType getType() const override { return DeviceType::Metal; } 35 | id getMTLDevice() const { return device; } 36 | 37 | bool needWeightAndBiasOnDevice() const override { return false; } // due to MPSGraph 38 | Storage getPtrStorage(const void* ptr) override; 39 | 40 | void execute(std::function&& f, SyncMode sync) override; 41 | void flush() override; 42 | void wait() override; 43 | 44 | protected: 45 | void init() override; 46 | 47 | private: 48 | id device; 49 | id userCommandQueue = nil; // user-provided command queue (optional) 50 | }; 51 | 52 | OIDN_NAMESPACE_END 53 | -------------------------------------------------------------------------------- /devices/metal/metal_heap.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/heap.h" 7 | #include "metal_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class MetalHeap : public Heap 12 | { 13 | friend class MetalBuffer; 14 | 15 | public: 16 | MetalHeap(MetalEngine* engine, size_t byteSize, Storage storage); 17 | ~MetalHeap(); 18 | 19 | Engine* getEngine() const override { return engine; } 20 | size_t getByteSize() const override { return byteSize; } 21 | Storage getStorage() const override { return storage; } 22 | 23 | void realloc(size_t newByteSize) override; 24 | 25 | private: 26 | void init(); 27 | void free(); 28 | 29 | MetalEngine* engine; 30 | id heap; 31 | size_t byteSize; 32 | Storage storage; 33 | }; 34 | 35 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/metal/metal_heap.mm: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "metal_heap.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | MetalHeap::MetalHeap(MetalEngine* engine, size_t byteSize, Storage storage) 9 | : engine(engine), 10 | heap(nullptr), 11 | byteSize(byteSize), 12 | storage((storage == Storage::Undefined) ? Storage::Device : storage) 13 | { 14 | init(); 15 | } 16 | 17 | MetalHeap::~MetalHeap() 18 | { 19 | free(); 20 | } 21 | 22 | void MetalHeap::init() 23 | { 24 | if (byteSize == 0) 25 | return; 26 | 27 | MTLHeapDescriptor* desc = [MTLHeapDescriptor new]; 28 | desc.type = MTLHeapTypePlacement; 29 | desc.resourceOptions = toMTLResourceOptions(storage) | MTLResourceHazardTrackingModeTracked; 30 | desc.size = engine->getBufferByteSizeAndAlignment(byteSize, storage).size; 31 | 32 | heap = [engine->getMTLDevice() newHeapWithDescriptor: desc]; 33 | [desc release]; 34 | 35 | if (!heap) 36 | throw Exception(Error::OutOfMemory, "failed to create heap"); 37 | } 38 | 39 | void MetalHeap::free() 40 | { 41 | if (heap) 42 | [heap release]; 43 | heap = nullptr; 44 | } 45 | 46 | void MetalHeap::realloc(size_t newByteSize) 47 | { 48 | if (newByteSize == byteSize) 49 | return; 50 | 51 | preRealloc(); 52 | free(); 53 | byteSize = newByteSize; 54 | init(); 55 | postRealloc(); 56 | } 57 | 58 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/metal/metal_module.mm: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Apple Inc. 2 | // Copyright 2023 Intel Corporation 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "core/context.h" 6 | #include "metal_device.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | class MetalDeviceFactory : public MetalDeviceFactoryBase 11 | { 12 | public: 13 | bool isDeviceSupported(MTLDevice_id device) override 14 | { 15 | return MetalDevice::isSupported(device); 16 | } 17 | 18 | Ref newDevice(const Ref& physicalDevice) override 19 | { 20 | assert(physicalDevice->type == DeviceType::Metal); 21 | return makeRef(staticRefCast(physicalDevice)); 22 | } 23 | 24 | Ref newDevice(const MTLCommandQueue_id* commandQueues, int numQueues) override 25 | { 26 | if (numQueues != 1) 27 | throw Exception(Error::InvalidArgument, "invalid number of Metal command queues"); 28 | return makeRef(commandQueues[0]); 29 | } 30 | }; 31 | 32 | OIDN_DECLARE_INIT_STATIC_MODULE(device_metal) 33 | { 34 | Context::registerDeviceType(DeviceType::Metal, MetalDevice::getPhysicalDevices()); 35 | } 36 | 37 | OIDN_NAMESPACE_END 38 | -------------------------------------------------------------------------------- /devices/sycl/sycl_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/conv.h" 7 | #include "sycl_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | namespace xelp { 12 | Ref newSYCLConv(SYCLEngine* engine, const ConvDesc& desc); 13 | } 14 | 15 | namespace xehpg { 16 | Ref newSYCLConv(SYCLEngine* engine, const ConvDesc& desc); 17 | } 18 | 19 | #if defined(__linux__) 20 | namespace xehpc { 21 | Ref newSYCLConv(SYCLEngine* engine, const ConvDesc& desc); 22 | } 23 | #endif 24 | 25 | namespace xe2 { 26 | Ref newSYCLConv(SYCLEngine* engine, const ConvDesc& desc); 27 | } 28 | 29 | OIDN_NAMESPACE_END 30 | -------------------------------------------------------------------------------- /devices/sycl/sycl_conv_xe2.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #define OIDN_ARCH_XE2 5 | 6 | #include "sycl_conv_xe.h" -------------------------------------------------------------------------------- /devices/sycl/sycl_conv_xehpc.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #define OIDN_ARCH_XEHPC 5 | 6 | #include "sycl_conv_xe.h" -------------------------------------------------------------------------------- /devices/sycl/sycl_conv_xehpg.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #define OIDN_ARCH_XEHPG 5 | 6 | #include "sycl_conv_xe.h" -------------------------------------------------------------------------------- /devices/sycl/sycl_conv_xelp.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #define OIDN_ARCH_XELP 5 | 6 | #include "sycl_conv_xe.h" -------------------------------------------------------------------------------- /devices/sycl/sycl_device.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/device.h" 7 | #include 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class SYCLEngine; 12 | 13 | // GPU architecture 14 | enum class SYCLArch 15 | { 16 | Unknown, 17 | XeLP, 18 | XeLPG, 19 | XeLPGplus, 20 | XeHPG, 21 | XeHPC, 22 | XeHPC_NoDPAS, 23 | Xe2LPG, 24 | Xe2HPG, 25 | Xe3LPG, 26 | }; 27 | 28 | class SYCLPhysicalDevice : public PhysicalDevice 29 | { 30 | public: 31 | sycl::device syclDevice; 32 | 33 | SYCLPhysicalDevice(const sycl::device& syclDevice, int score); 34 | }; 35 | 36 | class SYCLDevice : public SYCLDeviceBase 37 | { 38 | public: 39 | static std::vector> getPhysicalDevices(); 40 | static bool isSupported(const sycl::device& syclDevice); 41 | static SYCLArch getArch(const sycl::device& syclDevice); 42 | static int getScore(const sycl::device& syclDevice); 43 | 44 | SYCLDevice(const std::vector& syclQueues); 45 | explicit SYCLDevice(const Ref& physicalDevice); 46 | 47 | DeviceType getType() const override { return DeviceType::SYCL; } 48 | ze_context_handle_t getZeContext() const { return zeContext; } 49 | 50 | int getInt(const std::string& name) override; 51 | void setInt(const std::string& name, int value) override; 52 | 53 | Storage getPtrStorage(const void* ptr) override; 54 | 55 | void submitBarrier() override; 56 | void wait() override; 57 | 58 | // Manually sets the dependent events for the next command on all engines 59 | void setDepEvents(const std::vector& events); 60 | void setDepEvents(const sycl::event* events, int numEvents) override; 61 | 62 | // Gets the list of events corresponding to the completion of all commands 63 | std::vector getDoneEvents(); 64 | void getDoneEvent(sycl::event& event) override; 65 | 66 | SYCLArch getArch() const { return arch; } 67 | 68 | private: 69 | void preinit(); 70 | void init() override; 71 | 72 | SYCLEngine* getSYCLEngine(int i) const; 73 | 74 | sycl::context syclContext; 75 | ze_context_handle_t zeContext = nullptr; // Level Zero context 76 | SYCLArch arch = SYCLArch::Unknown; 77 | int numSubdevices = 0; // autodetect by default 78 | 79 | // Used only for initialization 80 | Ref physicalDevice; 81 | std::vector syclQueues; 82 | }; 83 | 84 | OIDN_NAMESPACE_END 85 | -------------------------------------------------------------------------------- /devices/sycl/sycl_device_table.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "sycl_device.h" 7 | 8 | OIDN_NAMESPACE_BEGIN 9 | 10 | // Table of supported architectures and corresponding IP versions with revisions masked out 11 | // These should match the AOT targets defined in CMakeLists.txt 12 | // https://github.com/intel/compute-runtime/blob/14251c3d96e71e97e397b0c4fcb01557fca47f0e/shared/source/helpers/hw_ip_version.h 13 | // https://github.com/intel/compute-runtime/blob/master/third_party/aot_config_headers/platforms.h 14 | struct SYCLDeviceTableEntry 15 | { 16 | SYCLArch arch; 17 | std::vector ipVersions; 18 | }; 19 | 20 | constexpr uint32_t syclDeviceIPVersionMask = 0xffffffc0; 21 | 22 | inline const std::vector syclDeviceTable = 23 | { 24 | { 25 | SYCLArch::XeLP, 26 | { 27 | 0x03000000, // tgllp 28 | 0x03004000, // rkl 29 | 0x03008000, // adl-s 30 | 0x0300c000, // adl-p 31 | 0x03010000, // adl-n 32 | 0x03028000, // dg1 33 | } 34 | }, 35 | { 36 | SYCLArch::XeLPG, 37 | { 38 | 0x03118000, // mtl-u 39 | 0x0311c000, // mtl-h 40 | } 41 | }, 42 | { 43 | SYCLArch::XeLPGplus, 44 | { 45 | 0x03128000, // arl-h 46 | } 47 | }, 48 | { 49 | SYCLArch::XeHPG, 50 | { 51 | 0x030dc000, // acm-g10 52 | 0x030e0000, // acm-g11 53 | 0x030e4000, // acm-g12 54 | } 55 | }, 56 | #if defined(__linux__) 57 | { 58 | SYCLArch::XeHPC, 59 | { 60 | 0x030f0000, // pvc-sdv, pvc 61 | } 62 | }, 63 | { 64 | SYCLArch::XeHPC_NoDPAS, 65 | { 66 | 0x030f4000, // pvc-vg 67 | } 68 | }, 69 | #endif 70 | { 71 | SYCLArch::Xe2LPG, 72 | { 73 | 0x05010000, // lnl-m 74 | } 75 | }, 76 | { 77 | SYCLArch::Xe2HPG, 78 | { 79 | 0x05004000, // bmg-g21 80 | } 81 | }, 82 | { 83 | SYCLArch::Xe3LPG, 84 | { 85 | 0x07800000, // ptl-h 86 | 0x07804000, // ptl-u 87 | } 88 | }, 89 | }; 90 | 91 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/sycl/sycl_external_buffer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "sycl_external_buffer.h" 5 | 6 | OIDN_NAMESPACE_BEGIN 7 | 8 | SYCLExternalBuffer::SYCLExternalBuffer(SYCLEngine* engine, 9 | ExternalMemoryTypeFlag fdType, 10 | int fd, size_t byteSize) 11 | : USMBuffer(engine) 12 | { 13 | if (fdType != ExternalMemoryTypeFlag::DMABuf) 14 | throw Exception(Error::InvalidArgument, "external memory type not supported by the device"); 15 | 16 | ze_external_memory_import_fd_t importDesc = 17 | { 18 | ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD, 19 | nullptr, // pNext 20 | ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF, 21 | fd 22 | }; 23 | 24 | init(engine, &importDesc, byteSize); 25 | } 26 | 27 | SYCLExternalBuffer::SYCLExternalBuffer(SYCLEngine* engine, 28 | ExternalMemoryTypeFlag handleType, 29 | void* handle, const void* name, size_t byteSize) 30 | : USMBuffer(engine) 31 | { 32 | if (handleType != ExternalMemoryTypeFlag::OpaqueWin32) 33 | throw Exception(Error::InvalidArgument, "external memory type not supported by the device"); 34 | 35 | ze_external_memory_import_win32_handle_t importDesc = 36 | { 37 | ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32, 38 | nullptr, // pNext 39 | ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32, 40 | handle, 41 | name 42 | }; 43 | 44 | init(engine, &importDesc, byteSize); 45 | } 46 | 47 | void SYCLExternalBuffer::init(SYCLEngine* engine, const void* importDesc, size_t byteSize) 48 | { 49 | void* ptr = nullptr; 50 | 51 | ze_device_mem_alloc_desc_t allocDesc{}; 52 | allocDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; 53 | allocDesc.pNext = importDesc; 54 | 55 | auto result = zeMemAllocDevice(static_cast(engine->getDevice())->getZeContext(), 56 | &allocDesc, 57 | byteSize, 58 | 0, 59 | engine->getZeDevice(), 60 | &ptr); 61 | 62 | if (result != ZE_RESULT_SUCCESS) 63 | throw Exception(Error::InvalidOperation, "failed to import external memory"); 64 | 65 | this->ptr = (char*)ptr; 66 | this->byteSize = byteSize; 67 | this->shared = true; 68 | this->storage = Storage::Device; 69 | } 70 | 71 | SYCLExternalBuffer::~SYCLExternalBuffer() 72 | { 73 | zeMemFree(static_cast(getDevice())->getZeContext(), ptr); 74 | } 75 | 76 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /devices/sycl/sycl_external_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "core/buffer.h" 7 | #include "sycl_engine.h" 8 | 9 | OIDN_NAMESPACE_BEGIN 10 | 11 | class SYCLExternalBuffer : public USMBuffer 12 | { 13 | public: 14 | SYCLExternalBuffer(SYCLEngine* engine, 15 | ExternalMemoryTypeFlag fdType, 16 | int fd, size_t byteSize); 17 | 18 | SYCLExternalBuffer(SYCLEngine* engine, 19 | ExternalMemoryTypeFlag handleType, 20 | void* handle, const void* name, size_t byteSize); 21 | 22 | ~SYCLExternalBuffer(); 23 | 24 | private: 25 | void init(SYCLEngine* engine, const void* importDesc, size_t byteSize); 26 | }; 27 | 28 | OIDN_NAMESPACE_END 29 | -------------------------------------------------------------------------------- /devices/sycl/sycl_module.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Intel Corporation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "core/context.h" 5 | #include "sycl_device.h" 6 | 7 | OIDN_NAMESPACE_BEGIN 8 | 9 | class SYCLDeviceFactory : public SYCLDeviceFactoryBase 10 | { 11 | public: 12 | bool isDeviceSupported(const sycl::device* device) override 13 | { 14 | if (device == nullptr) 15 | throw Exception(Error::InvalidArgument, "SYCL device is null"); 16 | return SYCLDevice::isSupported(*device); 17 | } 18 | 19 | Ref newDevice(const sycl::queue* queues, int numQueues) override 20 | { 21 | if (numQueues < 1) 22 | throw Exception(Error::InvalidArgument, "invalid number of SYCL queues"); 23 | if (queues == nullptr) 24 | throw Exception(Error::InvalidArgument, "array of SYCL queues is null"); 25 | 26 | return makeRef(std::vector{queues, queues + numQueues}); 27 | } 28 | 29 | Ref newDevice(const Ref& physicalDevice) override 30 | { 31 | assert(physicalDevice->type == DeviceType::SYCL); 32 | return makeRef(staticRefCast(physicalDevice)); 33 | } 34 | }; 35 | 36 | OIDN_DECLARE_INIT_MODULE(device_sycl) 37 | { 38 | #if defined(OIDN_DEVICE_SYCL_JIT_CACHE) 39 | // Enable persistent JIT cache if not disabled explicitly 40 | setEnvVar("SYCL_CACHE_PERSISTENT", 1, false); 41 | setEnvVar("NEO_CACHE_PERSISTENT", 1, false); 42 | #else 43 | // Disable persistent JIT cache if not enabled explicitly 44 | setEnvVar("SYCL_CACHE_PERSISTENT", 0, false); 45 | setEnvVar("NEO_CACHE_PERSISTENT", 0, false); 46 | #endif 47 | 48 | Context::registerDeviceType(DeviceType::SYCL, SYCLDevice::getPhysicalDevices()); 49 | } 50 | 51 | OIDN_NAMESPACE_END -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated files and folders 2 | changelog.md 3 | www 4 | tmp 5 | __pycache__ 6 | images 7 | -------------------------------------------------------------------------------- /doc/documentation.md: -------------------------------------------------------------------------------- 1 | Documentation 2 | ============= 3 | 4 | The following [documentation][OIDNReadme] of Intel Open Image Denoise can 5 | also be found as a [pdf document][OIDNReadme]. 6 | 7 | 8 | -------------------------------------------------------------------------------- /doc/downloads.md: -------------------------------------------------------------------------------- 1 | Download Precompiled Intel Open Image Denoise Binary Packages 2 | ============================================================= 3 | 4 | For Linux we provide Intel Open Image Denoise precompiled for 64-bit as a TGZ 5 | archive: 6 | 7 | [oidn-.x86_64.linux.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.x86_64.linux.tar.gz) 8 | 9 | For macOS we provide Intel Open Image Denoise precompiled for Intel processors and Apple silicon as separate TGZ archives: 10 | 11 | [oidn-.x86_64.macos.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.x86_64.macos.tar.gz) 12 | 13 | [oidn-.arm64.macos.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.arm64.macos.tar.gz) 14 | 15 | For Windows we provide Intel Open Image Denoise binaries precompiled for 64-bit as a ZIP archive: 16 | 17 | [oidn-.x64.windows.zip](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.x64.windows.zip) 18 | 19 | The source code of the latest Intel Open Image Denoise version can be downloaded here: 20 | 21 | [oidn-.src.zip](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.src.zip) 22 | 23 | [oidn-.src.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v/oidn-.src.tar.gz) 24 | 25 | The TGZ/ZIP packages contain most needed 3rd party dependencies. 26 | 27 | You can also access [old Intel Open Image Denoise releases](https://github.com/OpenImageDenoise/oidn/releases). 28 | -------------------------------------------------------------------------------- /doc/examples.md: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | Intel Open Image Denoise ships with a couple of simple example applications. 5 | 6 | oidnDenoise 7 | ----------- 8 | 9 | `oidnDenoise` is a minimal working example demonstrating how to use Intel Open 10 | Image Denoise, which can be found at `apps/oidnDenoise.cpp`. It uses the C++11 11 | convenience wrappers of the C99 API. 12 | 13 | This example is a simple command-line application that denoises the provided 14 | image, which can optionally have auxiliary feature images as well (e.g. albedo 15 | and normal). By default the images must be stored in the [Portable 16 | FloatMap](http://www.pauldebevec.com/Research/HDR/PFM/) (PFM) format, and the 17 | color values must be encoded in little-endian format. To enable other image 18 | formats (e.g. OpenEXR, PNG) as well, the project has to be rebuilt with 19 | OpenImageIO support enabled. 20 | 21 | Running `oidnDenoise` without any arguments or the `-h` argument will bring up 22 | a list of command-line options. 23 | 24 | oidnBenchmark 25 | ------------- 26 | 27 | `oidnBenchmark` is a basic command-line benchmarking application for measuring 28 | denoising speed, which can be found at `apps/oidnBenchmark.cpp`. 29 | 30 | Running `oidnBenchmark` with the `-h` argument will bring up a list of 31 | command-line options. 32 | -------------------------------------------------------------------------------- /doc/filter-latex.py: -------------------------------------------------------------------------------- 1 | # 1. convert tables to use 'tabu' 2 | # 2. always add hypertargets, before headings, to workaround issue #2719 3 | # Based on Wagner Macedo's filter.py posted at 4 | # https://groups.google.com/forum/#!msg/pandoc-discuss/RUC-tuu_qf0/h-H3RRVt1coJ 5 | import pandocfilters as pf 6 | 7 | def latex(s): 8 | return pf.RawBlock('latex', s) 9 | 10 | def inlatex(s): 11 | return pf.RawInline('latex', s) 12 | 13 | def tbl_caption(s): 14 | return pf.Para([inlatex(r'\caption{')] + s + [inlatex(r'}')]) 15 | 16 | def tbl_alignment(a, w): 17 | aligns = { 18 | "AlignDefault": 'l', 19 | "AlignLeft": 'l', 20 | "AlignCenter": 'c', 21 | "AlignRight": 'r', 22 | } 23 | s = ''; 24 | for i in range(len(a)): 25 | s += 'X[%.3f,' % -w[i] + aligns[a[i]['t']] + ']' 26 | return s; 27 | 28 | def tbl_headers(s): 29 | result = s[0][0]['c'][:] 30 | for i in range(1, len(s)): 31 | result.append(inlatex(' & ')) 32 | result.extend(s[i][0]['c']) 33 | result.append(inlatex(r'\\' '\n')) 34 | return pf.Para(result) 35 | 36 | def tbl_contents(s): 37 | result = [] 38 | for row in s: 39 | para = [] 40 | for col in row: 41 | if col: 42 | para.extend(col[0]['c']) 43 | para.append(inlatex(' & ')) 44 | result.extend(para) 45 | result[-1] = inlatex(r'\\' '\n') 46 | return pf.Para(result) 47 | 48 | def do_filter(k, v, f, m): 49 | if k == "Table": 50 | w = v[2] 51 | if sum(w) == 0: 52 | w = [1 for e in w] 53 | wd = '' 54 | ha = r'\centering' 55 | else: 56 | wd = '*' 57 | ha = r'\raggedright' 58 | return [latex(r'\begin{table'+wd+'}[!h]'), 59 | tbl_caption(v[0]), 60 | latex(ha), 61 | latex(r'\begin{tabu} spread 0pt {' + tbl_alignment(v[1], w) + '}'), 62 | latex(r'\toprule'), 63 | tbl_headers(v[3]), 64 | latex(r'\midrule'), 65 | tbl_contents(v[4]), 66 | latex(r'\bottomrule' '\n' r'\end{tabu}'), 67 | latex(r'\end{table'+wd+'}')] 68 | if k == "Header": 69 | return [latex(r'\hypertarget{' + v[1][0] + r'}{}'), 70 | pf.Header(v[0], v[1], v[2])] 71 | 72 | if __name__ == "__main__": 73 | pf.toJSONFilter(do_filter) 74 | -------------------------------------------------------------------------------- /doc/filter-sectionnumbers.py: -------------------------------------------------------------------------------- 1 | # remove section numbers for subheadings 2 | # Based on Wagner Macedo's filter.py posted at 3 | # https://groups.google.com/forum/#!msg/pandoc-discuss/RUC-tuu_qf0/h-H3RRVt1coJ 4 | import pandocfilters as pf 5 | 6 | def do_filter(k, v, f, m): 7 | if k == "Header" and v[0] > 2: 8 | v[1][1].append('unnumbered') 9 | return pf.Header(v[0], v[1], v[2]) 10 | 11 | if __name__ == "__main__": 12 | pf.toJSONFilter(do_filter) 13 | -------------------------------------------------------------------------------- /doc/images.md: -------------------------------------------------------------------------------- 1 | [imgMazdaColor]: mazda_4spp_input.jpg { width=90% } 2 | [imgMazdaDenoised]: mazda_4spp_oidn.jpg { width=90% } 3 | [imgMazdaAlbedoFirstHit]: mazda_firsthit_512spp_albedo.jpg { width=90% } 4 | [imgMazdaAlbedoNonDeltaHit]: mazda_nondeltahit_512spp_albedo.jpg { width=90% } 5 | [imgMazdaNormalFirstHit]: mazda_firsthit_512spp_normal.jpg { width=90% } 6 | [imgMazdaNormalNonDeltaHit]: mazda_nondeltahit_512spp_normal.jpg { width=90% } 7 | -------------------------------------------------------------------------------- /doc/legal.md: -------------------------------------------------------------------------------- 1 | Disclaimer and Legal Information 2 | ================================ 3 | 4 | © 2018-2025 Intel Corporation 5 | 6 | [Privacy Notice](https://www.intel.com/privacy) 7 | 8 | Intel, the Intel logo, Xeon, Intel Xeon Phi, and Intel Core are 9 | trademarks of Intel Corporation in the U.S. and/or other countries. 10 | *Other names and brands may be claimed as the property of others. 11 | 12 | 13 | Optimization Notice: Intel's compilers may or may not optimize to the 14 | same degree for non-Intel microprocessors for optimizations that are not 15 | unique to Intel microprocessors. These optimizations include SSE2, SSE3, 16 | and SSSE3 instruction sets and other optimizations. Intel does not 17 | guarantee the availability, functionality, or effectiveness of any 18 | optimization on microprocessors not manufactured by Intel. 19 | Microprocessor-dependent optimizations in this product are intended for 20 | use with Intel microprocessors. Certain optimizations not specific to 21 | Intel microarchitecture are reserved for Intel microprocessors. Please 22 | refer to the applicable product User and Reference Guides for more 23 | information regarding the specific instruction sets covered by this 24 | notice. 25 | Notice Revision #20110804 26 | -------------------------------------------------------------------------------- /doc/links.md: -------------------------------------------------------------------------------- 1 | 2 | [OIDNReadme]: https://github.com/OpenImageDenoise/oidn/blob/master/readme.pdf "Intel Open Image Denoise Documentation" 3 | [Training]: #training 4 | -------------------------------------------------------------------------------- /doc/readme.tex: -------------------------------------------------------------------------------- 1 | \IfFileExists{oidn-doc/intel-spec.cls} 2 | { 3 | \documentclass[oneside]{oidn-doc/intel-spec} 4 | }{ 5 | \documentclass[oneside]{report} 6 | \newcommand{\copyrightyears}[1] {} 7 | \newcommand{\trademarkacknowledgment}[1] {} 8 | \newcommand{\performancedisclaimer}{} 9 | \newcommand{\optimizationdisclaimer}{} 10 | \newcommand{\makedisclaimers}{} 11 | \newcommand{\version}[1] { \author{Version ##1} } 12 | } 13 | 14 | \include{preamble} 15 | 16 | \begin{document} 17 | \title{Intel® Open Image Denoise\vskip0.3\baselineskip\IntelFontOneRegular\LARGE 18 | \noindent High-Performance Denoising Library\\for Ray Tracing} 19 | \version{\oidnversion} 20 | 21 | \maketitle 22 | \tableofcontents 23 | 24 | \input{tmp/overview} 25 | \input{tmp/changelog} 26 | \input{tmp/compilation} 27 | \addtocontents{toc}{\protect\setcounter{tocdepth}{2}} 28 | \hypersetup{bookmarksdepth=2} 29 | \input{tmp/api} 30 | \addtocontents{toc}{\protect\setcounter{tocdepth}{1}} 31 | \hypersetup{bookmarksdepth=1} 32 | \input{tmp/examples} 33 | \input{tmp/training} 34 | 35 | \makedisclaimers 36 | 37 | \end{document} 38 | -------------------------------------------------------------------------------- /doc/readme_head.md: -------------------------------------------------------------------------------- 1 | Intel® Open Image Denoise 2 | ========================= 3 | 4 | This is release v of Intel Open Image Denoise. For changes and new 5 | features see the [changelog](CHANGELOG.md). Visit 6 | https://www.openimagedenoise.org for more information. 7 | 8 | -------------------------------------------------------------------------------- /doc/related_projects.md: -------------------------------------------------------------------------------- 1 | Projects that make use of Intel Open Image Denoise 2 | ================================================== 3 | 4 | This page gives a brief (and incomplete) list of other projects that 5 | make use of Intel Open Image Denoise, as well as a set of related links to other 6 | projects and related information. 7 | 8 | If you have a project that makes use of Intel Open Image Denoise and would like 9 | this to be listed here, please let us know. 10 | 11 | - [Intel® OSPRay](https://www.ospray.org), a ray tracing based rendering engine for high-fidelity visualization 12 | 13 | 14 | Projects that are closely related to Intel Open Image Denoise 15 | ============================================================= 16 | 17 | - The [Intel® Embree](https://www.embree.org) Ray Tracing Kernel Framework 18 | 19 | 20 | -------------------------------------------------------------------------------- /doc/teaser.html: -------------------------------------------------------------------------------- 1 |
2 |

Intel® Open Image Denoise

3 |

High-Performance Denoising Library for Ray Tracing

4 |
5 |
6 |
7 | Denoised 8 |
Original
9 |
10 |

Evermotion [15th Anniversary Collection](https://evermotion.org/shop/show_product/15th-anniversary-collection/16094) scene rendered with [Chaos Corona](https://corona-renderer.com/) and denoised with Intel® Open Image Denoise using prefiltered albedo and normal buffers. Hover over the image (or tap on it) to move the slider between the original and denoised versions.

11 |
12 | 13 | -------------------------------------------------------------------------------- /doc/webtemplate.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Intel® Open Image Denoise 6 | 7 | $if(highlighting-css)$ 8 | 12 | $endif$ 13 | 14 | 15 | 16 | 36 | 37 |
38 |
39 | 40 | $body$ 41 | 42 |
43 |
44 | 45 | $if(select_legal)$ 46 | $else$ 47 | 51 | $endif$ 52 | 53 | 54 | -------------------------------------------------------------------------------- /readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RenderKit/oidn/7d23b193ee0cf3bc3ad03a3ac1886b34f496cc5c/readme.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.7.0 2 | tensorboard==2.19.0 3 | -------------------------------------------------------------------------------- /scripts/build_src.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Copyright 2022 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | import re 7 | import shutil 8 | 9 | from common import * 10 | 11 | # Detect the version 12 | print('Detecting the version') 13 | version_file = os.path.join(root_dir, 'README.md') 14 | with open(version_file, 'rb') as f: 15 | version_text = str(f.read()) 16 | version = re.findall('v[0-9a-z.-]+', version_text)[0][1:] 17 | 18 | # Copy the source into a temporary directory 19 | print('Copying the source code') 20 | src_name = f'oidn-{version}' 21 | src_dir = os.path.join(root_dir, src_name) 22 | shutil.copytree(root_dir, src_dir, ignore=shutil.ignore_patterns('.git', '.gitmodules', '__pycache__')) 23 | 24 | # Create the package 25 | build_dir = os.path.join(root_dir, 'build') 26 | if not os.path.isdir(build_dir): 27 | os.mkdir(build_dir) 28 | package_filename = os.path.join(build_dir, src_name + '.src' + ('.zip' if OS == 'windows' else '.tar.gz')) 29 | create_package(package_filename, src_dir) 30 | 31 | # Remove the temporary directory 32 | shutil.rmtree(src_dir) -------------------------------------------------------------------------------- /scripts/build_weights.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Copyright 2020 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | import argparse 7 | from common import * 8 | 9 | MODELS = [ 10 | 'rt_hdr_alb_nrm', 11 | 'rt_hdr_alb', 12 | 'rt_hdr', 13 | 'rt_ldr_alb_nrm', 14 | 'rt_ldr_alb', 15 | 'rt_ldr', 16 | 'rtlightmap_hdr' 17 | ] 18 | 19 | # Parse the command-line arguments 20 | parser = argparse.ArgumentParser(description='Builds the weights blobs from the training results.') 21 | parser.usage = '\rIntel(R) Open Image Denoise - Build Weights\n' + parser.format_usage() 22 | parser.add_argument('--results_dir', '-R', type=str, default=os.path.join(root_dir, 'training', 'results'), help='directory of training results') 23 | cfg = parser.parse_args() 24 | 25 | weights_dir = os.path.join(root_dir, 'weights') 26 | export_cmd = os.path.join(root_dir, 'training', 'export.py') 27 | 28 | # Export the weights blobs 29 | for model in MODELS: 30 | tza_filename = os.path.join(weights_dir, model + '.tza') 31 | run(export_cmd + f' -R {cfg.results_dir} -r {model} -o {tza_filename}') 32 | print() -------------------------------------------------------------------------------- /scripts/common.py: -------------------------------------------------------------------------------- 1 | ## Copyright 2020 Intel Corporation 2 | ## SPDX-License-Identifier: Apache-2.0 3 | 4 | import os 5 | import platform 6 | import subprocess 7 | import re 8 | import shutil 9 | import tarfile 10 | from zipfile import ZipFile 11 | from urllib.request import urlretrieve 12 | 13 | # Runs a command and checks the return value for success 14 | def run(command): 15 | status = os.system(command) 16 | if status != 0: 17 | print('Error: non-zero return value') 18 | exit(1) 19 | 20 | def download_file(url, output_dir): 21 | print('Downloading file:', url) 22 | filename = os.path.join(output_dir, os.path.basename(url)) 23 | urlretrieve(url, filename=filename) 24 | return filename 25 | 26 | def extract_package(filename, output_dir): 27 | print('Extracting package:', filename) 28 | # Detect the package format and open the package 29 | if re.search(r'(\.tar(\..+)?|tgz)$', filename): 30 | package = tarfile.open(filename) 31 | members = package.getnames() 32 | elif filename.endswith('.zip'): 33 | package = ZipFile(filename) 34 | members = package.namelist() 35 | else: 36 | raise Exception('unsupported package format') 37 | # Avoid nesting two top-level directories with the same name 38 | if os.path.commonpath(members) == os.path.basename(output_dir): 39 | output_dir = os.path.dirname(output_dir) 40 | # Create the output directory if it doesn't exist 41 | if not os.path.isdir(output_dir): 42 | os.makedirs(output_dir) 43 | # Extract the package 44 | package.extractall(output_dir) 45 | package.close() 46 | 47 | def create_package(filename, input_dir): 48 | print('Creating package:', filename) 49 | if filename.endswith('.tar.gz'): 50 | with tarfile.open(filename, "w:gz") as package: 51 | package.add(input_dir, arcname=os.path.basename(input_dir)) 52 | elif filename.endswith('.zip'): 53 | shutil.make_archive(filename[:-4], 'zip', os.path.dirname(input_dir), os.path.basename(input_dir)) 54 | else: 55 | raise Exception('unsupported package format') 56 | 57 | # Detect the OS and architecture 58 | OS = {'Windows' : 'windows', 'Linux' : 'linux', 'Darwin' : 'macos'}[platform.system()] 59 | 60 | ARCH = platform.machine().lower() 61 | if ARCH == 'amd64': 62 | ARCH = 'x86_64' 63 | elif ARCH == 'aarch64': 64 | ARCH = 'arm64' 65 | 66 | # Get the root directory 67 | root_dir = os.environ.get('OIDN_ROOT_DIR') 68 | if root_dir is None: 69 | root_dir = os.getcwd() -------------------------------------------------------------------------------- /scripts/csan.supp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Api 5 | 6 | CUDA_ERROR_OUT_OF_MEMORY 7 | 2 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/protex_scan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Copyright 2018 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | # Fail when individual commands fail (-e), also in intermediate steps in 7 | # pipelines (-o pipefail). 8 | set -euo pipefail 9 | 10 | # Debug only: print commands before executing them (-x). 11 | # set -x 12 | 13 | if [ -z "${OIDN_PROTEX_USER_HOME:-}" ]; then 14 | echo "Error: you must set OIDN_PROTEX_USER_HOME" 15 | exit 1 16 | fi 17 | 18 | if [ -z "${OIDN_PROTEX_PROJECT_NAME:-}" ]; then 19 | echo "Error: you must set OIDN_PROTEX_PROJECT_NAME" 20 | exit 1 21 | fi 22 | 23 | if [ -z "${OIDN_PROTEX_BDS:-}" ]; then 24 | echo "Error: you must set OIDN_PROTEX_BDS" 25 | exit 1 26 | fi 27 | 28 | if [ -z "${OIDN_PROTEX_SERVER_URL:-}" ]; then 29 | echo "Error: you must set OIDN_PROTEX_SERVER_URL" 30 | exit 1 31 | fi 32 | 33 | # Root dir defaults to $PWD 34 | ROOT_DIR=${OIDN_ROOT_DIR:-$PWD} 35 | 36 | export _JAVA_OPTIONS="-Duser.home=${OIDN_PROTEX_USER_HOME}" 37 | 38 | cd ${ROOT_DIR} 39 | 40 | ${OIDN_PROTEX_BDS} new-project --server ${OIDN_PROTEX_SERVER_URL} ${OIDN_PROTEX_PROJECT_NAME} |& tee ip_protex.log 41 | if grep -q "command failed" ip_protex.log; then 42 | exit 1 43 | fi 44 | 45 | ${OIDN_PROTEX_BDS} analyze --server ${OIDN_PROTEX_SERVER_URL} |& tee -a ip_protex.log 46 | if grep -q "command failed" ip_protex.log; then 47 | exit 1 48 | fi 49 | 50 | if grep -E "^Files pending identification: [0-9]+$" ip_protex.log; then 51 | echo "Protex scan FAILED!" 52 | exit 1 53 | fi 54 | 55 | echo "Protex scan PASSED!" 56 | exit 0 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /scripts/store-files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | ## Copyright 2019 Intel Corporation 3 | ## SPDX-License-Identifier: Apache-2.0 4 | 5 | project_name=$1 6 | build_id=$2 7 | group_name=$3 8 | files=$4 9 | STORAGE_DIR=$STORAGE_PATH/$project_name/$build_id/$group_name/ 10 | mkdir -p $STORAGE_DIR 11 | cp $files $STORAGE_DIR/ 12 | -------------------------------------------------------------------------------- /scripts/valgrind.supp: -------------------------------------------------------------------------------- 1 | { 2 | intel_sse2_strrchr_cond 3 | Memcheck:Cond 4 | fun:__intel_sse2_strrchr 5 | } 6 | 7 | { 8 | dlopen_leak 9 | Memcheck:Leak 10 | match-leak-kinds: definite,possible 11 | ... 12 | fun:dl_open_worker 13 | ... 14 | } 15 | 16 | { 17 | dlopen_addr 18 | Memcheck:Addr8 19 | fun:strncmp 20 | ... 21 | fun:dl_open_worker 22 | } 23 | 24 | { 25 | libsycl_leak 26 | Memcheck:Leak 27 | match-leak-kinds: definite,possible 28 | ... 29 | fun:_ZN4sycl3_* 30 | ... 31 | } 32 | 33 | { 34 | libcuda_leak 35 | Memcheck:Leak 36 | match-leak-kinds: definite,possible 37 | ... 38 | obj:*libcuda.so* 39 | ... 40 | } 41 | 42 | { 43 | libamdhip_leak 44 | Memcheck:Leak 45 | match-leak-kinds: definite,possible 46 | ... 47 | obj:*libamdhip64.so* 48 | ... 49 | } 50 | 51 | { 52 | libamd_comgr_cond 53 | Memcheck:Cond 54 | ... 55 | obj:*libamd_comgr.so* 56 | ... 57 | } 58 | 59 | { 60 | libamd_comgr_param 61 | Memcheck:Param 62 | write(buf) 63 | ... 64 | obj:*libamd_comgr.so* 65 | ... 66 | } 67 | 68 | { 69 | libhsa_leak 70 | Memcheck:Leak 71 | match-leak-kinds: definite,possible 72 | ... 73 | obj:*libhsa-runtime64.so* 74 | ... 75 | } 76 | 77 | { 78 | libze_leak 79 | Memcheck:Leak 80 | match-leak-kinds: definite,possible 81 | ... 82 | obj:*libze_*.so* 83 | ... 84 | } 85 | 86 | { 87 | libigd_leak 88 | Memcheck:Leak 89 | match-leak-kinds: definite,possible 90 | ... 91 | obj:*libigd?cl.so* 92 | ... 93 | } 94 | 95 | { 96 | ocl_leak 97 | Memcheck:Leak 98 | match-leak-kinds: definite,possible 99 | ... 100 | fun:clGetPlatformIDs 101 | ... 102 | } 103 | 104 | { 105 | ur_leak 106 | Memcheck:Leak 107 | match-leak-kinds: definite,possible 108 | ... 109 | fun:urInit 110 | ... 111 | } 112 | 113 | { 114 | oidn_sycl_leak 115 | Memcheck:Leak 116 | match-leak-kinds: definite,possible 117 | ... 118 | fun:oidn_init_module_device_sycl_v* 119 | ... 120 | } -------------------------------------------------------------------------------- /third-party-programs-oneTBB.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RenderKit/oidn/7d23b193ee0cf3bc3ad03a3ac1886b34f496cc5c/third-party-programs-oneTBB.txt -------------------------------------------------------------------------------- /training/.gitignore: -------------------------------------------------------------------------------- 1 | *.exr 2 | *.pfm 3 | *.png 4 | *.done 5 | *.pt 6 | *.pth 7 | *.tza 8 | *.json 9 | *.csv 10 | *.zip 11 | events.out.* 12 | latest 13 | -------------------------------------------------------------------------------- /training/compare_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Copyright 2018 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | import os 7 | 8 | from config import * 9 | from util import * 10 | from dataset import * 11 | from image import * 12 | from color import * 13 | 14 | def main(): 15 | # Parse the command line arguments 16 | cfg = parse_args(description='Compares two feature images using the specified quality metrics.') 17 | 18 | # Load the images 19 | image1, _ = load_image(cfg.input[0]) 20 | image2, _ = load_image(cfg.input[1]) 21 | 22 | feature1 = get_image_feature(cfg.input[0]) 23 | feature2 = get_image_feature(cfg.input[1]) 24 | if feature1 != feature2: 25 | error('cannot compare different features') 26 | 27 | # Load metadata for the images if it exists 28 | tonemap_exposure = cfg.exposure 29 | if os.path.dirname(cfg.input[0]) == os.path.dirname(cfg.input[1]): 30 | metadata = load_image_metadata(os.path.commonprefix(cfg.input)) 31 | if metadata: 32 | tonemap_exposure = metadata['exposure'] 33 | 34 | # Convert the images to tensors 35 | image1 = image_to_tensor(image1, batch=True) 36 | image2 = image_to_tensor(image2, batch=True) 37 | 38 | # Transform the images to sRGB 39 | image1 = transform_feature(image1, feature1, 'srgb', tonemap_exposure) 40 | image2 = transform_feature(image2, feature2, 'srgb', tonemap_exposure) 41 | 42 | # Compute the metrics 43 | metric_str = '' 44 | for metric in cfg.metric: 45 | value = compare_images(image1, image2, metric) 46 | if metric_str: 47 | metric_str += ', ' 48 | metric_str += '%s = %.4f' % (metric, value) 49 | if metric_str: 50 | print(metric_str) 51 | 52 | if __name__ == '__main__': 53 | main() -------------------------------------------------------------------------------- /training/convert_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Copyright 2018 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | import os 7 | 8 | from config import * 9 | from util import * 10 | from dataset import * 11 | from image import * 12 | 13 | def main(): 14 | # Parse the command line arguments 15 | cfg = parse_args(description='Converts a feature image to a different image format.') 16 | 17 | # Load the input image 18 | image, _ = load_image(cfg.input) 19 | 20 | # Load metadata for the image if it exists 21 | tonemap_exposure = cfg.exposure 22 | metadata = load_image_metadata(cfg.input) 23 | if metadata: 24 | tonemap_exposure = metadata['exposure'] 25 | 26 | # Convert the image to tensor 27 | image = image_to_tensor(image, batch=True) 28 | 29 | # Transform the image 30 | input_feature = get_image_feature(cfg.input) 31 | output_feature = get_image_feature(cfg.output) 32 | image = transform_feature(image, input_feature, output_feature, tonemap_exposure) 33 | 34 | # Save the image 35 | save_image(cfg.output, tensor_to_image(image)) 36 | 37 | if __name__ == '__main__': 38 | main() -------------------------------------------------------------------------------- /training/visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Copyright 2018 Intel Corporation 4 | ## SPDX-License-Identifier: Apache-2.0 5 | 6 | import os 7 | 8 | from config import * 9 | from util import * 10 | from result import * 11 | 12 | def main(): 13 | # Parse the command line arguments 14 | cfg = parse_args(description='Invokes TensorBoard for visualizing statistics of a training result.') 15 | 16 | result_dir = get_result_dir(cfg) 17 | if not os.path.isdir(result_dir): 18 | error('result does not exist') 19 | 20 | # Run TensorBoard 21 | log_dir = os.path.join(result_dir, 'log') 22 | os.system('tensorboard --logdir=' + log_dir) 23 | 24 | if __name__ == '__main__': 25 | main() 26 | --------------------------------------------------------------------------------