├── CLUtils ├── CLUtils.cpp └── CLUtils.hpp ├── Makefile ├── README.md ├── denoised.png ├── nondenoised.png ├── svgf.cl ├── svgf.cpp ├── svgf.hpp ├── utils.cpp └── utils.hpp /CLUtils/CLUtils.cpp: -------------------------------------------------------------------------------- 1 | /*! \file CLUtils.cpp 2 | * \brief Definitions of functions and methods for the CLUtils library 3 | * \details CLUtils offers utilities that help 4 | * setup and manage an OpenCL environment. 5 | * \author Nick Lamprianidis 6 | * \version 0.2.2 7 | * \date 2014-2015 8 | * \copyright The MIT License (MIT) 9 | * \par 10 | * Copyright (c) 2014 Nick Lamprianidis 11 | * \par 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy 13 | * of this software and associated documentation files (the "Software"), to deal 14 | * in the Software without restriction, including without limitation the rights 15 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 | * copies of the Software, and to permit persons to whom the Software is 17 | * furnished to do so, subject to the following conditions: 18 | * \par 19 | * The above copyright notice and this permission notice shall be included in 20 | * all copies or substantial portions of the Software. 21 | * \par 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 28 | * THE SOFTWARE. 29 | */ 30 | 31 | #include "CLUtils.hpp" 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #if defined(_WIN32) 40 | #include 41 | #elif defined(__linux__) 42 | #include 43 | #elif defined(__APPLE__) || defined(__MACOSX) 44 | #include 45 | #endif 46 | 47 | 48 | namespace clutils 49 | { 50 | 51 | /*! Gets an error code and returns its name. 52 | * 53 | * \param[in] errorCode an error code. 54 | * \return Its name as a char array. 55 | */ 56 | const char* getOpenCLErrorCodeString (int errorCode) 57 | { 58 | switch (errorCode) 59 | { 60 | case CL_SUCCESS: 61 | return "CL_SUCCESS"; 62 | case CL_DEVICE_NOT_FOUND: 63 | return "CL_DEVICE_NOT_FOUND"; 64 | case CL_DEVICE_NOT_AVAILABLE: 65 | return "CL_DEVICE_NOT_AVAILABLE"; 66 | case CL_COMPILER_NOT_AVAILABLE: 67 | return "CL_COMPILER_NOT_AVAILABLE"; 68 | case CL_MEM_OBJECT_ALLOCATION_FAILURE: 69 | return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; 70 | case CL_OUT_OF_RESOURCES: 71 | return "CL_OUT_OF_RESOURCES"; 72 | case CL_OUT_OF_HOST_MEMORY: 73 | return "CL_OUT_OF_HOST_MEMORY"; 74 | case CL_PROFILING_INFO_NOT_AVAILABLE: 75 | return "CL_PROFILING_INFO_NOT_AVAILABLE"; 76 | case CL_MEM_COPY_OVERLAP: 77 | return "CL_MEM_COPY_OVERLAP"; 78 | case CL_IMAGE_FORMAT_MISMATCH: 79 | return "CL_IMAGE_FORMAT_MISMATCH"; 80 | case CL_IMAGE_FORMAT_NOT_SUPPORTED: 81 | return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; 82 | case CL_BUILD_PROGRAM_FAILURE: 83 | return "CL_BUILD_PROGRAM_FAILURE"; 84 | case CL_MAP_FAILURE: 85 | return "CL_MAP_FAILURE"; 86 | case CL_MISALIGNED_SUB_BUFFER_OFFSET: 87 | return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; 88 | case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: 89 | return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; 90 | case CL_COMPILE_PROGRAM_FAILURE: 91 | return "CL_COMPILE_PROGRAM_FAILURE"; 92 | case CL_LINKER_NOT_AVAILABLE: 93 | return "CL_LINKER_NOT_AVAILABLE"; 94 | case CL_LINK_PROGRAM_FAILURE: 95 | return "CL_LINK_PROGRAM_FAILURE"; 96 | case CL_DEVICE_PARTITION_FAILED: 97 | return "CL_DEVICE_PARTITION_FAILED"; 98 | case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: 99 | return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; 100 | case CL_INVALID_VALUE: 101 | return "CL_INVALID_VALUE"; 102 | case CL_INVALID_DEVICE_TYPE: 103 | return "CL_INVALID_DEVICE_TYPE"; 104 | case CL_INVALID_PLATFORM: 105 | return "CL_INVALID_PLATFORM"; 106 | case CL_INVALID_DEVICE: 107 | return "CL_INVALID_DEVICE"; 108 | case CL_INVALID_CONTEXT: 109 | return "CL_INVALID_CONTEXT"; 110 | case CL_INVALID_QUEUE_PROPERTIES: 111 | return "CL_INVALID_QUEUE_PROPERTIES"; 112 | case CL_INVALID_COMMAND_QUEUE: 113 | return "CL_INVALID_COMMAND_QUEUE"; 114 | case CL_INVALID_HOST_PTR: 115 | return "CL_INVALID_HOST_PTR"; 116 | case CL_INVALID_MEM_OBJECT: 117 | return "CL_INVALID_MEM_OBJECT"; 118 | case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: 119 | return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; 120 | case CL_INVALID_IMAGE_SIZE: 121 | return "CL_INVALID_IMAGE_SIZE"; 122 | case CL_INVALID_SAMPLER: 123 | return "CL_INVALID_SAMPLER"; 124 | case CL_INVALID_BINARY: 125 | return "CL_INVALID_BINARY"; 126 | case CL_INVALID_BUILD_OPTIONS: 127 | return "CL_INVALID_BUILD_OPTIONS"; 128 | case CL_INVALID_PROGRAM: 129 | return "CL_INVALID_PROGRAM"; 130 | case CL_INVALID_PROGRAM_EXECUTABLE: 131 | return "CL_INVALID_PROGRAM_EXECUTABLE"; 132 | case CL_INVALID_KERNEL_NAME: 133 | return "CL_INVALID_KERNEL_NAME"; 134 | case CL_INVALID_KERNEL_DEFINITION: 135 | return "CL_INVALID_KERNEL_DEFINITION"; 136 | case CL_INVALID_KERNEL: 137 | return "CL_INVALID_KERNEL"; 138 | case CL_INVALID_ARG_INDEX: 139 | return "CL_INVALID_ARG_INDEX"; 140 | case CL_INVALID_ARG_VALUE: 141 | return "CL_INVALID_ARG_VALUE"; 142 | case CL_INVALID_ARG_SIZE: 143 | return "CL_INVALID_ARG_SIZE"; 144 | case CL_INVALID_KERNEL_ARGS: 145 | return "CL_INVALID_KERNEL_ARGS"; 146 | case CL_INVALID_WORK_DIMENSION: 147 | return "CL_INVALID_WORK_DIMENSION"; 148 | case CL_INVALID_WORK_GROUP_SIZE: 149 | return "CL_INVALID_WORK_GROUP_SIZE"; 150 | case CL_INVALID_WORK_ITEM_SIZE: 151 | return "CL_INVALID_WORK_ITEM_SIZE"; 152 | case CL_INVALID_GLOBAL_OFFSET: 153 | return "CL_INVALID_GLOBAL_OFFSET"; 154 | case CL_INVALID_EVENT_WAIT_LIST: 155 | return "CL_INVALID_EVENT_WAIT_LIST"; 156 | case CL_INVALID_EVENT: 157 | return "CL_INVALID_EVENT"; 158 | case CL_INVALID_OPERATION: 159 | return "CL_INVALID_OPERATION"; 160 | case CL_INVALID_GL_OBJECT: 161 | return "CL_INVALID_GL_OBJECT"; 162 | case CL_INVALID_BUFFER_SIZE: 163 | return "CL_INVALID_BUFFER_SIZE"; 164 | case CL_INVALID_MIP_LEVEL: 165 | return "CL_INVALID_MIP_LEVEL"; 166 | case CL_INVALID_GLOBAL_WORK_SIZE: 167 | return "CL_INVALID_GLOBAL_WORK_SIZE"; 168 | case CL_INVALID_PROPERTY: 169 | return "CL_INVALID_PROPERTY"; 170 | case CL_INVALID_IMAGE_DESCRIPTOR: 171 | return "CL_INVALID_IMAGE_DESCRIPTOR"; 172 | case CL_INVALID_COMPILER_OPTIONS: 173 | return "CL_INVALID_COMPILER_OPTIONS"; 174 | case CL_INVALID_LINKER_OPTIONS: 175 | return "CL_INVALID_LINKER_OPTIONS"; 176 | case CL_INVALID_DEVICE_PARTITION_COUNT: 177 | return "CL_INVALID_DEVICE_PARTITION_COUNT"; 178 | default: 179 | return "UNKNOWN_ERROR_CODE"; 180 | } 181 | } 182 | 183 | 184 | /*! \param[in] device a device for which to check the "GL Sharing" capability. 185 | * \return Returns true if "GL Sharing" is available, false otherwise. 186 | */ 187 | bool checkCLGLInterop (cl::Device &device) 188 | { 189 | std::string exts = device.getInfo (); 190 | 191 | #if defined(__APPLE__) || defined(__MACOSX) 192 | std::string glShare("cl_apple_gl_sharing"); 193 | #else 194 | std::string glShare("cl_khr_gl_sharing"); 195 | #endif 196 | 197 | return exts.find (glShare) != std::string::npos; 198 | } 199 | 200 | 201 | /*! \param[in] kernel_filenames a vector of strings with 202 | * the names of the kernel files (.cl). 203 | * \param[out] sourceCodes a vector of strings with the contents of the files. 204 | */ 205 | void readSource (const std::vector &kernel_filenames, 206 | std::vector &sourceCodes) 207 | { 208 | std::ifstream programSource; 209 | 210 | try 211 | { 212 | for (auto &fName : kernel_filenames) 213 | { 214 | programSource.exceptions (std::ifstream::failbit | std::ifstream::badbit); 215 | programSource.open (fName); 216 | sourceCodes.emplace_back (std::istreambuf_iterator (programSource), 217 | (std::istreambuf_iterator ())); 218 | programSource.close (); 219 | } 220 | } 221 | catch (std::ifstream::failure &error) 222 | { 223 | std::cerr << "Error when accessing kernel file: " << error.what () 224 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 225 | exit (EXIT_FAILURE); 226 | } 227 | } 228 | 229 | 230 | /*! \param[in] str string containing the tokens. 231 | * \param[in] delim delimiter on which to split the string. 232 | * \param[out] names a vector of all the tokens. 233 | */ 234 | void split (const std::string &str, char delim, 235 | std::vector &names) 236 | { 237 | std::stringstream ss (str); 238 | std::string item; 239 | while (std::getline (ss, item, delim)) 240 | names.push_back (item); 241 | } 242 | 243 | /*! It initializes the OpenCL environment. If a `kernel_filenames` argument 244 | * is provided, it creates a context for all the devices in the first 245 | * platform, and a command queue for the first device in that platform. 246 | * It also builds a program object from all the requested kernel files, 247 | * and extracts all kernels in that program. 248 | * 249 | * \param[in] kernel_filenames a vector of strings with 250 | * the names of the kernel files (.cl). 251 | * \param[in] build_options options that are forwarded to the OpenCL compiler. 252 | */ 253 | CLEnv::CLEnv (const std::vector &kernel_filenames, 254 | const char *build_options) 255 | { 256 | // Get the list of platforms 257 | cl::Platform::get (&platforms); 258 | 259 | if (!kernel_filenames.empty ()) 260 | { 261 | // Get the list of devices in platform 0 262 | devices.emplace_back (); 263 | platforms[0].getDevices (CL_DEVICE_TYPE_ALL, &devices[0]); 264 | 265 | // Create a context for those devices (in platform 0) 266 | contexts.emplace_back (devices[0]); 267 | 268 | // Create a command queue for device 0 in platform 0 269 | queues.emplace_back (); 270 | queues[0].emplace_back (contexts[0], devices[0][0]); 271 | 272 | // Create a sources object with all kernel files 273 | cl::Program::Sources sources; 274 | readSource (kernel_filenames, sources); 275 | 276 | // Create a program object from the source codes, targeting context 0 277 | programs.emplace_back (contexts[0], sources); 278 | 279 | try 280 | { 281 | // Build the program for all devices in context 0 (platform 0) 282 | programs[0].build (devices[0], build_options); 283 | } 284 | catch (const cl::Error &error) 285 | { 286 | std::cerr << error.what () 287 | << " (" << clutils::getOpenCLErrorCodeString (error.err ()) 288 | << ")" << std::endl << std::endl; 289 | 290 | std::string log = programs[0].getBuildInfo (devices[0][0]); 291 | std::cout << log << std::endl; 292 | 293 | exit (EXIT_FAILURE); 294 | } 295 | 296 | // Get the kernel names 297 | // Note: getInfo returns a ';' delimited string. 298 | std::string namesString = programs[0].getInfo (); 299 | std::vector kernel_names; 300 | clutils::split (namesString, ';', kernel_names); 301 | 302 | // Retrieve the kernels from program 0 303 | kernels.emplace_back (); 304 | kernelIdx.emplace_back (); 305 | for (unsigned int idx = 0; idx < kernel_names.size (); ++idx) 306 | { 307 | kernels[0].emplace_back (programs[0], kernel_names[idx].c_str ()); 308 | kernelIdx[0][kernel_names[idx]] = idx; 309 | } 310 | } 311 | } 312 | 313 | 314 | /*! \brief Delegating constructor 315 | * 316 | * \param[in] kernel_filename a string with the name of the kernel file (.cl). 317 | * \param[in] build_options options that are forwarded to the OpenCL compiler. 318 | */ 319 | CLEnv::CLEnv (const std::string &kernel_filename, const char *build_options) 320 | : CLEnv (std::vector { kernel_filename }, build_options) 321 | { 322 | } 323 | 324 | 325 | /*! \param[in] pIdx an index for the context. 326 | * Indices follow the order the contexts were created in. 327 | * \return The requested context. 328 | */ 329 | cl::Context& CLEnv::getContext (unsigned int pIdx) 330 | { 331 | try 332 | { 333 | return contexts.at (pIdx); 334 | } 335 | catch (const std::out_of_range &error) 336 | { 337 | std::cerr << "Out of Range error: " << error.what () 338 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 339 | exit (EXIT_FAILURE); 340 | } 341 | } 342 | 343 | 344 | /*! \param[in] ctxIdx the index for the context the requested queue is in. 345 | * Indices follow the order the contexts were created in. 346 | * \param[in] qIdx an index for the command queue. 347 | * Indices follow the order the queues were created in. 348 | * \return The requested command queue. 349 | */ 350 | cl::CommandQueue& CLEnv::getQueue (unsigned int ctxIdx, unsigned int qIdx) 351 | { 352 | try 353 | { 354 | return queues.at (ctxIdx).at (qIdx); 355 | } 356 | catch (const std::out_of_range &error) 357 | { 358 | std::cerr << "Out of Range error: " << error.what () 359 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 360 | exit (EXIT_FAILURE); 361 | } 362 | } 363 | 364 | 365 | /*! \param[in] pgIdx the index of the requested program object. 366 | * Indices follow the order the programs were created in. 367 | * \return The requested program. 368 | */ 369 | cl::Program& CLEnv::getProgram (unsigned int pgIdx) 370 | { 371 | try 372 | { 373 | return programs.at (pgIdx); 374 | } 375 | catch (const std::out_of_range &error) 376 | { 377 | std::cerr << "Out of Range error: " << error.what () 378 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 379 | exit (EXIT_FAILURE); 380 | } 381 | } 382 | 383 | 384 | /*! \param[in] kernel_name the name of the kernel. 385 | * \param[in] pgIdx the index of the program the kernel belongs to. 386 | * Indices follow the order the programs were created in. 387 | * \return The requested kernel. 388 | */ 389 | cl::Kernel& CLEnv::getKernel (const char *kernel_name, size_t pgIdx) 390 | { 391 | try 392 | { 393 | /*! \sa kernelIdx */ 394 | unsigned int kIdx = kernelIdx.at (pgIdx).at (std::string (kernel_name)); 395 | return kernels[pgIdx][kIdx]; 396 | } 397 | catch (const std::out_of_range &error) 398 | { 399 | std::cerr << "Out of Range error: " << error.what () 400 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 401 | exit (EXIT_FAILURE); 402 | } 403 | } 404 | 405 | 406 | /*! \details It allows to create a GL-shared context. If GL-Sharing is not 407 | * supported for the associated device, an exception is thrown. 408 | * It also calls `initGLMemObjects` to initialize the GL buffers. 409 | * \param[in] pIdx an index for the platform for which to create the context. 410 | * Indices follow the order the platforms got returned in 411 | * by the OpenCL runtime. 412 | * \param[in] gl_shared a flag for whether or not to create a GL-shared CL context. 413 | * \return A reference to the created context. 414 | */ 415 | cl::Context& CLEnv::addContext (unsigned int pIdx, const bool gl_shared) 416 | { 417 | try 418 | { 419 | size_t idx = devices.size (); 420 | devices.emplace_back (); 421 | platforms.at (pIdx).getDevices (CL_DEVICE_TYPE_ALL, &devices[idx]); 422 | cl_context_properties *props = nullptr; 423 | 424 | if (gl_shared) 425 | { 426 | // Set context properties (OS specific) 427 | #if defined(_WIN32) 428 | cl_context_properties _props[] = 429 | { 430 | CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext (), 431 | CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC (), 432 | CL_CONTEXT_PLATFORM, (cl_context_properties) (platforms.at (pIdx)) (), 433 | 0 434 | }; 435 | #elif defined(__APPLE__) || defined(__MACOSX) 436 | CGLContextObj kCGLContext = CGLGetCurrentContext (); 437 | CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup (kCGLContext); 438 | cl_context_properties _props[] = 439 | { 440 | CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties) kCGLShareGroup, 441 | 0 442 | }; 443 | #else 444 | cl_context_properties _props[] = 445 | { 446 | CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext (), 447 | CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay (), 448 | CL_CONTEXT_PLATFORM, (cl_context_properties) (platforms.at (pIdx)) (), 449 | 0 450 | }; 451 | #endif 452 | 453 | // Get the CL device associated with the GL context 454 | #if defined(__APPLE__) || defined(__MACOSX) 455 | cl::Device device = devices[idx][0]; 456 | #else 457 | cl_device_id devID; 458 | clGetGLContextInfoKHR_fn clGetGLContextInfo = (clGetGLContextInfoKHR_fn) 459 | clGetExtensionFunctionAddressForPlatform ((platforms.at (pIdx)) (), "clGetGLContextInfoKHR"); 460 | clGetGLContextInfo (_props, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof (cl_device_id), &devID, nullptr); 461 | cl::Device device (devID); 462 | #endif 463 | 464 | if (!checkCLGLInterop (device)) { 465 | std::cout << "Invalid device, exiting" << std::endl; // 466 | exit(-1); // 467 | // throw cl::Error (CL_INVALID_DEVICE, "CLEnv::addContext"); 468 | } 469 | props = _props; 470 | } 471 | 472 | contexts.emplace_back (devices[idx], props); 473 | // Initialize the vector for the queues 474 | // that will be handled by this context 475 | queues.emplace_back (); 476 | 477 | initGLMemObjects (); 478 | 479 | return contexts[idx]; 480 | } 481 | catch (const std::out_of_range &error) 482 | { 483 | std::cerr << "Out of Range error: " << error.what () 484 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 485 | exit (EXIT_FAILURE); 486 | } 487 | } 488 | 489 | 490 | /*! \param[in] ctxIdx the index of the context the device is handled by. 491 | * Indices follow the order the contexts were created in. 492 | * \param[in] dIdx the index of the device among those handled by the 493 | * specified context. Indices follow the order the devices 494 | * got returned in by the call to getDevices 495 | * on the proper platform. 496 | * \param[in] props bitfield to enable command queue properties. 497 | * \return A reference to the created queue. 498 | */ 499 | cl::CommandQueue& CLEnv::addQueue (unsigned int ctxIdx, unsigned int dIdx, 500 | cl_command_queue_properties props) 501 | { 502 | try 503 | { 504 | if (ctxIdx >= contexts.size ()) 505 | throw std::out_of_range ("vector::_M_range_check"); 506 | 507 | std::vector devs = contexts[ctxIdx].getInfo (); 508 | 509 | size_t qIdx = queues[ctxIdx].size (); 510 | queues[ctxIdx].emplace_back (contexts[ctxIdx], devs.at (dIdx), props); 511 | 512 | return queues[ctxIdx][qIdx]; 513 | } 514 | catch (const std::out_of_range &error) 515 | { 516 | std::cerr << "Out of Range error: " << error.what () 517 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 518 | exit (EXIT_FAILURE); 519 | } 520 | } 521 | 522 | 523 | /*! \param[in] ctxIdx the index of the context the GL-shared device is handled by. 524 | * Indices follow the order the contexts were created in. 525 | * \param[in] props bitfield to enable command queue properties. 526 | * \return A reference to the created queue. 527 | */ 528 | cl::CommandQueue& CLEnv::addQueueGL (unsigned int ctxIdx, cl_command_queue_properties props) 529 | { 530 | try 531 | { 532 | if (ctxIdx >= contexts.size ()) 533 | throw std::out_of_range ("vector::_M_range_check"); 534 | 535 | #if defined(__APPLE__) || defined(__MACOSX) 536 | std::vector devs = contexts[ctxIdx].getInfo (); 537 | cl::Device device = devs[0]; 538 | #else 539 | cl::Device device; 540 | std::vector ctxProps = contexts[ctxIdx].getInfo (); 541 | clGetGLContextInfoKHR_fn clGetGLContextInfo = (clGetGLContextInfoKHR_fn) 542 | clGetExtensionFunctionAddressForPlatform ((cl_platform_id) ctxProps[5], "clGetGLContextInfoKHR"); 543 | clGetGLContextInfo (ctxProps.data (), CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, 544 | sizeof (cl_device_id), &(device ()), nullptr); 545 | #endif 546 | 547 | size_t qIdx = queues[ctxIdx].size (); 548 | queues[ctxIdx].emplace_back (contexts[ctxIdx], device, props); 549 | 550 | return queues[ctxIdx][qIdx]; 551 | } 552 | catch (const std::out_of_range &error) 553 | { 554 | std::cerr << "Out of Range error: " << error.what () 555 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 556 | exit (EXIT_FAILURE); 557 | } 558 | } 559 | 560 | 561 | /*! \param[in] ctxIdx the index of the context the program is associated with. 562 | * Indices follow the order the contexts were created in. 563 | * \param[in] kernel_filenames a vector of strings with 564 | * the names of the kernel files (.cl). 565 | * \param[in] kernel_name the name of a requested kernel. 566 | * \param[in] build_options options that are forwarded to the OpenCL compiler. 567 | * \return The requested kernel. If kernel_name is NULL, the first kernel 568 | * of the program gets returned. 569 | */ 570 | cl::Kernel& CLEnv::addProgram (unsigned int ctxIdx, 571 | const std::vector &kernel_filenames, 572 | const char *kernel_name, const char *build_options) 573 | { 574 | try 575 | { 576 | // Create a sources object with all kernel files 577 | cl::Program::Sources sources; 578 | readSource (kernel_filenames, sources); 579 | 580 | // Create a program object from the source codes, 581 | // targeting the requested context 582 | programs.emplace_back (contexts.at (ctxIdx), sources); 583 | 584 | // Build the program for all devices in the requested context 585 | std::vector devs = contexts[ctxIdx].getInfo (); 586 | size_t pgIdx = programs.size () - 1; 587 | 588 | try 589 | { 590 | cl_int res = programs[pgIdx].build (devs, build_options); 591 | if(res != CL_SUCCESS) { 592 | std::cout << "Error adding program" << std::endl; 593 | exit(-1); 594 | } 595 | } 596 | catch (const cl::Error &error) 597 | { 598 | std::cerr << error.what () 599 | << " (" << clutils::getOpenCLErrorCodeString (error.err ()) 600 | << ")" << std::endl << std::endl; 601 | 602 | std::string log = programs[pgIdx].getBuildInfo (devs[0]); 603 | std::cout << log << std::endl; 604 | 605 | exit (EXIT_FAILURE); 606 | } 607 | 608 | // Get the kernel names 609 | // Note: getInfo returns a ';' delimited string. 610 | std::string namesString = programs[pgIdx].getInfo (); 611 | std::vector kernel_names; 612 | clutils::split (namesString, ';', kernel_names); 613 | 614 | // Retrieve the kernels from the newly created program 615 | kernels.emplace_back (); 616 | kernelIdx.emplace_back (); 617 | for (unsigned int idx = 0; idx < kernel_names.size (); ++idx) 618 | { 619 | kernels[pgIdx].emplace_back (programs[pgIdx], kernel_names[idx].c_str ()); 620 | kernelIdx[pgIdx][kernel_names[idx]] = idx; 621 | } 622 | 623 | if (kernel_name == nullptr) 624 | return getKernel (kernel_names.at (0).c_str (), pgIdx); 625 | else 626 | return getKernel (kernel_name, pgIdx); 627 | } 628 | catch (const std::out_of_range &error) 629 | { 630 | std::cerr << "Out of Range error: " << error.what () 631 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 632 | exit (EXIT_FAILURE); 633 | } 634 | } 635 | 636 | 637 | /*! \param[in] ctxIdx the index of the context the program is associated with. 638 | * Indices follow the order the contexts were created in. 639 | * \param[in] kernel_filename a string with the name of the kernel file (.cl). 640 | * \param[in] kernel_name the name of a requested kernel. 641 | * \param[in] build_options options that are forwarded to the OpenCL compiler. 642 | * \return The requested kernel. If kernel_name is NULL, the first kernel 643 | * of the program gets returned. 644 | * \sa addProgram 645 | */ 646 | cl::Kernel& CLEnv::addProgram (unsigned int ctxIdx, 647 | const std::string &kernel_filename, 648 | const char *kernel_name, const char *build_options) 649 | { 650 | return addProgram (ctxIdx, std::vector { kernel_filename }, 651 | kernel_name, build_options); 652 | } 653 | 654 | } 655 | -------------------------------------------------------------------------------- /CLUtils/CLUtils.hpp: -------------------------------------------------------------------------------- 1 | /*! \file CLUtils.hpp 2 | * \brief Declarations of objects, 3 | * functions and classes for the CLUtils library. 4 | * \details CLUtils offers utilities that help 5 | setup and manage an OpenCL environment. 6 | * \author Nick Lamprianidis 7 | * \version 0.2.2 8 | * \date 2014-2015 9 | * \copyright The MIT License (MIT) 10 | * \par 11 | * Copyright (c) 2014 Nick Lamprianidis 12 | * \par 13 | * Permission is hereby granted, free of charge, to any person obtaining a copy 14 | * of this software and associated documentation files (the "Software"), to deal 15 | * in the Software without restriction, including without limitation the rights 16 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the Software is 18 | * furnished to do so, subject to the following conditions: 19 | * \par 20 | * The above copyright notice and this permission notice shall be included in 21 | * all copies or substantial portions of the Software. 22 | * \par 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | * THE SOFTWARE. 30 | */ 31 | 32 | #ifndef CLUTILS_HPP 33 | #define CLUTILS_HPP 34 | 35 | #define CL_HPP_ENABLE_EXCEPTIONS 36 | #define CL_HPP_TARGET_OPENCL_VERSION 120 37 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120 38 | 39 | #if defined(__APPLE__) || defined(__MACOSX) 40 | #include 41 | #else 42 | #include 43 | #endif 44 | 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | 55 | /*! \brief It brings together functionality common to all OpenCL projects. 56 | * 57 | * It offers structures that aim to ease the process of setting up and 58 | * maintaining an OpenCL environment. 59 | */ 60 | namespace clutils 61 | { 62 | /*! \brief Returns the name of an error code. */ 63 | const char* getOpenCLErrorCodeString (int errorCode); 64 | 65 | /*! \brief Checks the availability of the "GL Sharing" capability. */ 66 | bool checkCLGLInterop (cl::Device &device); 67 | 68 | /*! \brief Reads in the contents from the requested files. */ 69 | void readSource (const std::vector &kernel_filenames, 70 | std::vector &sourceCodes); 71 | 72 | /*! \brief Splits a string on the requested delimiter. */ 73 | void split (const std::string &str, char delim, 74 | std::vector &names); 75 | 76 | /*! \brief Sets up an OpenCL environment. 77 | * \details Prepares the essential OpenCL objects for the execution of 78 | * kernels. This class aims to allow rapid prototyping by hiding 79 | * away all the boilerplate code necessary for establishing 80 | * an OpenCL environment. 81 | */ 82 | class CLEnv 83 | { 84 | public: 85 | CLEnv (const std::vector &kernel_filenames = std::vector (), 86 | const char *build_options = nullptr); 87 | CLEnv (const std::string &kernel_filename, 88 | const char *build_options = nullptr); 89 | virtual ~CLEnv () {}; 90 | /*! \brief Gets back one of the existing contexts. */ 91 | cl::Context& getContext (unsigned int pIdx = 0); 92 | /*! \brief Gets back one of the existing command queues 93 | * in the specified context. */ 94 | cl::CommandQueue& getQueue (unsigned int ctxIdx = 0, unsigned int qIdx = 0); 95 | /*! \brief Gets back one of the existing programs. */ 96 | cl::Program& getProgram (unsigned int pgIdx = 0); 97 | /*! \brief Gets back one of the existing kernels in some program. */ 98 | cl::Kernel& getKernel (const char *kernelName, size_t pgIdx = 0); 99 | /*! \brief Creates a context for all devices in the requested platform. */ 100 | cl::Context& addContext (unsigned int pIdx, const bool gl_shared = false); 101 | /*! \brief Creates a queue for the specified device in the specified context. */ 102 | cl::CommandQueue& addQueue (unsigned int ctxIdx, unsigned int dIdx, cl_command_queue_properties props = 0); 103 | /*! \brief Creates a queue for the GL-shared device in the specified context. */ 104 | cl::CommandQueue& addQueueGL (unsigned int ctxIdx, cl_command_queue_properties props = 0); 105 | /*! \brief Creates a program for the specified context. */ 106 | cl::Kernel& addProgram (unsigned int ctxIdx, 107 | const std::vector &kernel_filenames, 108 | const char *kernel_name = nullptr, 109 | const char *build_options = nullptr); 110 | cl::Kernel& addProgram (unsigned int ctxIdx, 111 | const std::string &kernel_filename, 112 | const char *kernel_name = nullptr, 113 | const char *build_options = nullptr); 114 | 115 | // Objects associated with an OpenCL environment. 116 | // For each of a number of objects, there is a vector that 117 | // can hold all instances of that object. 118 | 119 | std::vector platforms; /*!< List of platforms. */ 120 | /*! \brief List of devices per platform. 121 | * \details Holds a vector of devices per platform. */ 122 | std::vector< std::vector > devices; 123 | 124 | private: 125 | std::vector contexts; /*!< List of contexts. */ 126 | /*! \brief List of queues per context. 127 | * \details Holds a vector of queues per context. */ 128 | std::vector< std::vector > queues; 129 | std::vector programs; /*!< List of programs. */ 130 | /*! \brief List of kernels per program. 131 | * \details Holds a vector of kernels per program. */ 132 | std::vector< std::vector > kernels; 133 | 134 | protected: 135 | /*! \brief Initializes the OpenGL memory buffers. 136 | * \details If CL-GL interop is desirable, CLEnv has to be derived and 137 | * `initGLMemObjects` be implemented. `initGLMemObjects` will 138 | * have to create all necessary OpenGL memory buffers. 139 | * \note Setting up CL-GL interop requires the following procedure: 140 | * (i) Initialize OpenGL context, (ii) Initilize OpenCL context, 141 | * (iii) Create OpenGL buffers, (iv) Create OpenCL buffers. 142 | * \note Do not call `initGLMemObjects` directly. `initGLMemObjects` 143 | * will be called by `addContext` when it is asked for a 144 | * GL-shared CL context to be created. 145 | */ 146 | virtual void initGLMemObjects () {}; 147 | 148 | private: 149 | /*! \brief Maps kernel names to kernel indices. 150 | * There is one unordered_map for every program. 151 | * 152 | * For every program in programs, there is an element in kernelIdx. 153 | * For every kernel in program i, there is a mapping from the kernel 154 | * name to the kernel index in kernels[i]. 155 | */ 156 | std::vector< std::unordered_map > kernelIdx; 157 | }; 158 | 159 | 160 | /*! \brief Facilitates the conveyance of `CLEnv` arguments. 161 | * \details `CLEnv` creates an OpenCL environment. A `CLEnv` object 162 | * potentially contains many platforms, contexts, queues, etc, 163 | * that are to be used by different (independent) subsystems. 164 | * Those subsystems will have to know where to look inside CLEnv 165 | * for their associated CL objects. `CLEnvInfo` organizes this 166 | * process of information transfer between OpenCL systems. 167 | * 168 | * \tparam nQueues the number of command queue indices to be held by `CLEnvInfo`. 169 | */ 170 | template 171 | class CLEnvInfo 172 | { 173 | public: 174 | /*! \brief Initializes a `CLEnvInfo` object. 175 | * \details All provided indices are supposed to follow the order the 176 | * associated objects were created in the associated `CLEnv` instance. 177 | * 178 | * \param[in] _pIdx platform index. 179 | * \param[in] _dIdx device index. 180 | * \param[in] _ctxIdx context index. 181 | * \param[in] _qIdx vector with command queue indices. 182 | * \param[in] _pgIdx program index. 183 | */ 184 | CLEnvInfo (unsigned int _pIdx = 0, unsigned int _dIdx = 0, unsigned int _ctxIdx = 0, 185 | const std::vector _qIdx = { 0 }, unsigned int _pgIdx = 0) : 186 | pIdx (_pIdx), dIdx (_dIdx), ctxIdx (_ctxIdx), pgIdx (_pgIdx) 187 | { 188 | try 189 | { 190 | if (_qIdx.size () != nQueues) 191 | throw "The provided vector of command queue indices has the wrong size"; 192 | 193 | qIdx = _qIdx; 194 | } 195 | catch (const char *error) 196 | { 197 | std::cerr << "Error[CLEnvInfo]: " << error << std::endl; 198 | exit (EXIT_FAILURE); 199 | } 200 | } 201 | 202 | 203 | /*! \brief Creates a new `CLEnvInfo` object with the specified command queue. 204 | * \details Maintains the same OpenCL configuration, but chooses only one 205 | * of the available command queues to include. 206 | * 207 | * \param[in] idx an index for the `qIdx` vector. 208 | */ 209 | CLEnvInfo<1> getCLEnvInfo (unsigned int idx) 210 | { 211 | try 212 | { 213 | return CLEnvInfo<1> (pIdx, dIdx, ctxIdx, { qIdx.at (idx) }, pgIdx); 214 | } 215 | catch (const std::out_of_range &error) 216 | { 217 | std::cerr << "Out of Range error: " << error.what () 218 | << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl; 219 | exit (EXIT_FAILURE); 220 | } 221 | } 222 | 223 | 224 | unsigned int pIdx; /*!< Platform index. */ 225 | unsigned int dIdx; /*!< Device index. */ 226 | unsigned int ctxIdx; /*!< Context index. */ 227 | std::vector qIdx; /*!< Vector of queue indices. */ 228 | unsigned int pgIdx; /*!< Program index. */ 229 | }; 230 | 231 | 232 | /*! \brief A class that collects and manipulates timing information 233 | * about a test. 234 | * \details It stores the execution times of a test in a vector, 235 | * and then offers summarizing results. 236 | * 237 | * \tparam nSize the number of test repetitions. 238 | * \tparam rep the type of the values the class stores and returns. 239 | */ 240 | template 241 | class ProfilingInfo 242 | { 243 | public: 244 | /*! \param[in] pLabel a label characterizing the test. 245 | * \param[in] pUnit a name for the time unit to be printed 246 | * when displaying the results. 247 | */ 248 | ProfilingInfo (std::string pLabel = std::string (), std::string pUnit = std::string ("ms")) 249 | : label (pLabel), tExec (nSize), tWidth (4 + log10 (nSize)), tUnit (pUnit) 250 | { 251 | } 252 | 253 | /*! \param[in] idx subscript index. */ 254 | rep& operator[] (const unsigned int idx) 255 | { 256 | assert (idx >= 0 && idx < nSize); 257 | return tExec[idx]; 258 | } 259 | 260 | /*! \brief Returns the sum of the \#nSize executon times. 261 | * 262 | * \param[in] initVal an initial value from which to start counting. 263 | * \return The sum of the vector elements. 264 | */ 265 | rep total (rep initVal = 0.0) 266 | { 267 | return std::accumulate (tExec.begin (), tExec.end (), initVal); 268 | } 269 | 270 | /*! \brief Returns the mean time of the \#nSize executon times. 271 | * 272 | * \return The mean of the vector elements. 273 | */ 274 | rep mean () 275 | { 276 | return total() / (rep) tExec.size (); 277 | } 278 | 279 | /*! \brief Returns the min time of the \#nSize executon times. 280 | * 281 | * \return The min of the vector elements. 282 | */ 283 | rep min () 284 | { 285 | return *std::min_element (tExec.begin (), tExec.end ()); 286 | } 287 | 288 | /*! \brief Returns the max time of the \#nSize executon times. 289 | * 290 | * \return The max of the vector elements. 291 | */ 292 | rep max () 293 | { 294 | return *std::max_element (tExec.begin (), tExec.end ()); 295 | } 296 | 297 | /*! \brief Returns the relative performance speedup wrt `refProf`. 298 | * 299 | * \param[in] refProf a reference test. 300 | * \return The factor of execution time decrease. 301 | */ 302 | rep speedup (ProfilingInfo &refProf) 303 | { 304 | return refProf.mean () / mean (); 305 | } 306 | 307 | /*! \brief Displays summarizing results on the test. 308 | * 309 | * \param[in] title a title for the table of results. 310 | * \param[in] bLine a flag for whether or not to print a newline 311 | * at the end of the table. 312 | */ 313 | void print (std::ostream& stream, const char *title = nullptr, bool bLine = true) 314 | { 315 | std::ios::fmtflags f (stream.flags ()); 316 | stream << std::fixed << std::setprecision (3); 317 | 318 | if (title) 319 | stream << std::endl << title << std::endl << std::endl; 320 | else 321 | stream << std::endl; 322 | 323 | stream << " " << label << std::endl; 324 | stream << " " << std::string (label.size (), '-') << std::endl; 325 | stream << " Mean : " << std::setw (tWidth) << mean () << " " << tUnit << std::endl; 326 | stream << " Min : " << std::setw (tWidth) << min () << " " << tUnit << std::endl; 327 | stream << " Max : " << std::setw (tWidth) << max () << " " << tUnit << std::endl; 328 | stream << " Total : " << std::setw (tWidth) << total () << " " << tUnit << std::endl; 329 | if (bLine) stream << std::endl; 330 | 331 | stream.flags (f); 332 | } 333 | 334 | /*! \brief Displays summarizing results on two tests. 335 | * \details Compares the two tests by calculating the speedup 336 | * on the mean execution times. 337 | * \note I didn't bother handling the units. It's your responsibility 338 | * to enforce the same unit of time on the two objects. 339 | * 340 | * \param[in] refProf a reference test. 341 | * \param[in] title a title for the table of results. 342 | */ 343 | void print (ProfilingInfo &refProf, const char *title = nullptr, std::ostream& stream = std::cout) 344 | { 345 | if (title) 346 | stream << std::endl << title << std::endl; 347 | 348 | refProf.print (stream, nullptr, false); 349 | print (stream, nullptr, false); 350 | 351 | stream << std::endl << " Benchmark" << std::endl << " ---------" << std::endl; 352 | 353 | stream << " Speedup: " << std::setw (tWidth) << speedup (refProf) << std::endl << std::endl; 354 | } 355 | 356 | private: 357 | std::string label; /*!< A label characterizing the test. */ 358 | std::vector tExec; /*!< Execution times. */ 359 | uint8_t tWidth; /*!< Width of the results when printing. */ 360 | std::string tUnit; /*!< Time unit to display when printing the results. */ 361 | }; 362 | 363 | 364 | /*! \brief A class for measuring execution times. 365 | * \details CPUTimer is an interface for `std::chrono::duration`. 366 | * 367 | * \tparam rep the type of the value returned by `duration`. 368 | * \tparam period the unit of time for the value returned by `duration`. 369 | * It is declared as an `std::ratio`. 370 | */ 371 | template 372 | class CPUTimer 373 | { 374 | public: 375 | /*! \brief Constructs a timer. 376 | * \details The timer doesn't start automatically. 377 | * 378 | * \param[in] initVal a value to initialize the timer with. 379 | */ 380 | CPUTimer (int initVal = 0) : tDuration (initVal) 381 | { 382 | } 383 | 384 | /*! \brief Starts the timer. 385 | * 386 | * \param[in] tReset a flag for resetting the timer before the timer starts. 387 | * If `false`, the timer starts counting from 388 | * the point it reached the last time it stopped. 389 | */ 390 | void start (bool tReset = true) 391 | { 392 | if (tReset) 393 | reset (); 394 | 395 | tReference = std::chrono::high_resolution_clock::now (); 396 | } 397 | 398 | /*! \brief Stops the timer. 399 | * 400 | * \return The time measured up to this point in `period` units. 401 | */ 402 | rep stop () 403 | { 404 | tDuration += std::chrono::duration_cast< std::chrono::duration > 405 | (std::chrono::high_resolution_clock::now () - tReference); 406 | 407 | return duration (); 408 | } 409 | 410 | /*! \brief Returns the time measured by the timer. 411 | * \details This time is measured up to the point the timer last time stopped. 412 | * 413 | * \return The time in `period` units. 414 | */ 415 | rep duration () 416 | { 417 | return tDuration.count (); 418 | } 419 | 420 | /*! \brief Resets the timer. */ 421 | void reset () 422 | { 423 | tDuration = std::chrono::duration::zero (); 424 | } 425 | 426 | private: 427 | /*! A reference point for when the timer started. */ 428 | std::chrono::time_point tReference; 429 | /*! The time measured by the timer. */ 430 | std::chrono::duration tDuration; 431 | }; 432 | 433 | 434 | /*! \brief A class for profiling CL devices. 435 | * 436 | * \tparam period the unit of time for the value returned by `duration`. 437 | * It is declared as an `std::ratio`. 438 | */ 439 | template 440 | class GPUTimer 441 | { 442 | public: 443 | /*! \param[in] device the targeted for profiling CL device. 444 | */ 445 | GPUTimer (cl::Device &device) 446 | { 447 | period tPeriod; 448 | // Converts nanoseconds to seconds and then to the requested scale 449 | tUnit = (double) tPeriod.den / (double) tPeriod.num / 1000000000.0; 450 | } 451 | 452 | /*! \brief Returns a new unpopulated event. 453 | * \details The last populated event gets dismissed. 454 | * 455 | * \return An event for the profiling process. 456 | */ 457 | cl::Event& event () 458 | { 459 | return pEvent; 460 | } 461 | 462 | /*! \brief This is an interface for `cl::Event::wait`. 463 | */ 464 | void wait () 465 | { 466 | pEvent.wait (); 467 | } 468 | 469 | /*! \brief Returns the time measured by the timer. 470 | * \note It's important that it's called after a call to `wait`. 471 | * 472 | * \return The time in `period` units. 473 | */ 474 | double duration () 475 | { 476 | cl_ulong start = pEvent.getProfilingInfo (); 477 | cl_ulong end = pEvent.getProfilingInfo (); 478 | 479 | return (end - start) * tUnit; 480 | } 481 | 482 | /*! \brief Returns the unit of the timer. 483 | * 484 | * \return The time unit in seconds. 485 | */ 486 | double getUnit () 487 | { 488 | return tUnit; 489 | } 490 | 491 | private: 492 | cl::Event pEvent; /*!< The profiling event. */ 493 | double tUnit; /*!< A factor to set the scale for the measured time. */ 494 | }; 495 | 496 | } 497 | 498 | #endif // CLUTILS_HPP 499 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | CFLAGS= -std=c++11 -Wall 3 | LIBS = -lOpenCL -lOpenImageIO -lGL -fopenmp 4 | 5 | svgf: svgf.cpp CLUtils/CLUtils.hpp CLUtils/CLUtils.cpp utils.cpp svgf.hpp 6 | g++ -o $@ $^ $(LIBS) -I . -Wno-ignored-attributes -g -m64 -DDEBUG -DUNIX 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spatiotemporal Variance-Guided Filtering implementation in OpenCL 2 | 3 | This is an implementation of the the SVGF algorithm ([Schied et al. 2017][SVGF]) for denoising ray traced images with low sample count. 4 | 5 | ## Modifications 6 | 7 | A few simplifications are made from the original algorithm: 8 | 9 | - This implementation does not separate direct and indirect lighting 10 | - It is also only made to tackle static scenes, i.e. there are no motion vectors 11 | - There is also no use of mesh IDs to accumulate samples correctly, which is less of a problem when the scene is assumed static. 12 | 13 | ## Sample Results 14 | 15 | The following shows a ray traced scene with one ray sample per pixel, without denoising 16 | 17 | ![Original, non-denoised rendering](nondenoised.png) 18 | 19 | The following shows the same image, after denoising has been applied: 20 | 21 | ![Denoised rendering](denoised.png) 22 | 23 | The algorithm uses buffers containing world-positions, surface normals and albedo for each pixel, as well as samples from previous frames to generate the result. 24 | 25 | These performance results are obtained on a laptop with GeForce GTX 960M: 26 | 27 | 28 | ``` 29 | Reprojecting samples 30 | -------------------- 31 | Mean : 2.022 ms 32 | Min : 1.974 ms 33 | Max : 2.106 ms 34 | Total : 119.273 ms 35 | 36 | 37 | Computing variance 38 | ------------------ 39 | Mean : 10.180 ms 40 | Min : 8.363 ms 41 | Max : 21.723 ms 42 | Total : 600.639 ms 43 | 44 | 45 | Running atrous multiple iterations 46 | ---------------------------------- 47 | Mean : 39.488 ms 48 | Min : 39.071 ms 49 | Max : 40.125 ms 50 | Total : 2329.813 ms 51 | 52 | 53 | Total 54 | ----- 55 | Mean : 51.696 ms 56 | Min : 49.515 ms 57 | Max : 63.598 ms 58 | Total : 3050.049 ms 59 | ``` 60 | 61 | The code is not thoroughly optimized due to time constraints. 62 | 63 | When run on desktop hardware, the algorithm is fit for more than 60 frames per second. 64 | 65 | ## Acknowledgements 66 | 67 | Much of the utility code and the structure of the OpenCL code is adapted from [Koskela et al.'s Blockwise Multi-Order Feature Regression implementation][BMFR], although the algorithm itself is completely different. 68 | 69 | 70 | [SVGF]: https://research.nvidia.com/publication/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A 71 | 72 | [BMFR]: https://github.com/maZZZu/bmfr -------------------------------------------------------------------------------- /denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheVaffel/spatiotemporal-variance-guided-filtering/e845ff7b6ce7a7126eab50c91297b19e960061a0/denoised.png -------------------------------------------------------------------------------- /nondenoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheVaffel/spatiotemporal-variance-guided-filtering/e845ff7b6ce7a7126eab50c91297b19e960061a0/nondenoised.png -------------------------------------------------------------------------------- /svgf.cl: -------------------------------------------------------------------------------- 1 | 2 | static inline float3 linear_to_srgb(float3 x) { 3 | float3 r; 4 | 5 | // Adapted from cuda_utils.h in TwinkleBear's ChameleonRT 6 | if(x.x <= 0.0031308f) { 7 | r.x = 12.92f * x.x; 8 | } else { 9 | r.x = 1.055f * powr(x.x, 1.f/2.4f) - 0.055f; 10 | } 11 | 12 | if(x.y <= 0.0031308f) { 13 | r.y = 12.92f * x.y; 14 | } else { 15 | r.y = 1.055f * powr(x.y, 1.f/2.4f) - 0.055f; 16 | } 17 | 18 | if(x.z <= 0.0031308f) { 19 | r.z = 12.92f * x.z; 20 | } else { 21 | r.z = 1.055f * powr(x.z, 1.f/2.4f) - 0.055f; 22 | } 23 | 24 | return r; 25 | } 26 | 27 | 28 | 29 | #define load_float3(buffer, index) ((float3) \ 30 | {buffer[(index) * 3], buffer[(index) * 3 + 1], buffer[(index) * 3 + 2]}) 31 | 32 | #define load_float4(buffer, index) ((float4) \ 33 | {buffer[(index) * 4], buffer[(index) * 4 + 1], buffer[(index) * 4 + 2], buffer[(index) * 4 + 3]}) 34 | 35 | static inline void store_float3( 36 | __global float* restrict buffer, 37 | const int index, 38 | const float3 value){ 39 | 40 | buffer[index * 3 + 0] = value.x; 41 | buffer[index * 3 + 1] = value.y; 42 | buffer[index * 3 + 2] = value.z; 43 | } 44 | 45 | static inline void store_float4( 46 | __global float* restrict buffer, 47 | const int index, 48 | const float4 value) { 49 | buffer[index * 4 + 0] = value.x; 50 | buffer[index * 4 + 1] = value.y; 51 | buffer[index * 4 + 2] = value.z; 52 | buffer[index * 4 + 3] = value.w; 53 | } 54 | 55 | static inline float luminance(float3 c) { 56 | return c.x * 0.2126 + c.y * 0.7152 + c.z * 0.0722; 57 | } 58 | 59 | static inline int linear(int2 p) { 60 | return p.y * IMAGE_WIDTH + p.x; 61 | } 62 | 63 | __kernel void test(const __global float* restrict noisy_input, 64 | __global float* restrict output) { 65 | 66 | const int2 gid = {get_global_id(0), get_global_id(1)}; 67 | const int linear_pixel = gid.y * IMAGE_WIDTH + gid.x; 68 | 69 | const float3 in_color = load_float3(noisy_input, linear_pixel); 70 | 71 | const float3 vp = (float3){(float)gid.x / (float)IMAGE_WIDTH, 0, (float)gid.y / (float)IMAGE_HEIGHT}; 72 | 73 | float3 res = in_color * vp; 74 | 75 | store_float3(output, linear_pixel, res); 76 | } 77 | 78 | 79 | __kernel void reproject(const __global float* restrict input_noise, 80 | const __global float* restrict curr_normals, 81 | const __global float* restrict prev_normals, 82 | const __global float* restrict curr_positions, 83 | const __global float* restrict prev_positions, 84 | __global float* restrict curr_accumulated, 85 | const __global float* restrict prev_accumulated, 86 | __global float* restrict curr_util, 87 | const __global float* restrict prev_util, 88 | const float4 curr_view_matrix_z_row, 89 | const float16 prev_cam, 90 | const int frame_number) { 91 | 92 | 93 | const float NORMAL_TOLERANCE = 5.0e-2; 94 | const float POSITION_TOLERANCE = 1e-2; 95 | 96 | const int2 gid = {get_global_id(0), get_global_id(1)}; 97 | const int linear_pixel = linear(gid); 98 | if(gid.x >= IMAGE_WIDTH || gid.y >= IMAGE_HEIGHT) { 99 | return; 100 | } 101 | 102 | float3 noise_load = load_float3(input_noise, linear_pixel); 103 | float3 pos_load = load_float3(curr_positions, linear_pixel); 104 | float4 world_position = (float4){pos_load.x, pos_load.y, pos_load.z, 1.0}; 105 | 106 | float z_coord = dot(curr_view_matrix_z_row, world_position); 107 | float linear_z = fabs(z_coord); 108 | 109 | float3 util_val; 110 | float3 acc_out = noise_load; 111 | 112 | if(frame_number == 0) { 113 | // store_float3(curr_accumulated, linear_pixel, noise_load); 114 | 115 | float lum = luminance(noise_load); 116 | util_val.x = 1.0; 117 | util_val.y = lum * lum; 118 | util_val.z = linear_z; 119 | // store_float3(curr_util, linear_pixel, util_val); 120 | // return; 121 | } else { 122 | 123 | float3 util_load = load_float3(prev_util, linear_pixel); 124 | float3 normal_load = load_float3(curr_normals, linear_pixel); 125 | 126 | 127 | float2 position_in_prev_frame = (float2){ dot(prev_cam.s048c, world_position), 128 | dot(prev_cam.s159d, world_position)}; 129 | float pos_w = dot(prev_cam.s37bf, world_position); 130 | position_in_prev_frame /= pos_w; 131 | position_in_prev_frame += 1.f; 132 | position_in_prev_frame /= 2.f; 133 | 134 | position_in_prev_frame *= (float2){IMAGE_WIDTH, IMAGE_HEIGHT}; 135 | position_in_prev_frame -= (float2){0.5, 0.5}; 136 | 137 | int2 prev_frame_pixel = convert_int2_rtn(position_in_prev_frame); 138 | 139 | int2 offsets[4]; 140 | offsets[0] = (int2){0, 0}; 141 | offsets[1] = (int2){1, 0}; 142 | offsets[2] = (int2){0, 1}; 143 | offsets[3] = (int2){1, 1}; 144 | 145 | float2 pix_fract = position_in_prev_frame - convert_float2(prev_frame_pixel); 146 | float2 inv_pix_fract = 1.0f - pix_fract; 147 | float weights[4]; 148 | weights[0] = inv_pix_fract.x * inv_pix_fract.y; 149 | weights[1] = pix_fract.x * inv_pix_fract.y; 150 | weights[2] = inv_pix_fract.x * pix_fract.y; 151 | weights[3] = pix_fract.x * pix_fract.y; 152 | 153 | float sum_weight = 0; 154 | float3 sum_val = 0.0; 155 | float sum_spp = 0.0; 156 | float sum_moment = 0.0; 157 | 158 | for(int i = 0; i < 4; i++) { 159 | int2 p = prev_frame_pixel + offsets[i]; 160 | 161 | int linear_p = linear(p); 162 | 163 | if(p.x < 0 || p.y < 0 || 164 | p.x >= IMAGE_WIDTH || p.y >= IMAGE_HEIGHT) { 165 | continue; 166 | } 167 | 168 | 169 | float3 prev_wp = load_float3(prev_positions, linear_p); 170 | float3 pos_diff = prev_wp - pos_load; 171 | float ps_diff_squared = dot(pos_diff, pos_diff); 172 | 173 | if(ps_diff_squared >= POSITION_TOLERANCE) { 174 | continue; 175 | } 176 | 177 | float3 prev_normal_load = load_float3(prev_normals, linear_p); 178 | float3 n_dist = normal_load - prev_normal_load; 179 | 180 | if(dot(n_dist, n_dist) >= NORMAL_TOLERANCE) { 181 | continue; 182 | } 183 | 184 | float3 val = load_float3(prev_accumulated, linear_p); 185 | float3 prev_ut_val = load_float3(prev_util, linear_p); 186 | 187 | sum_val += weights[i] * val; 188 | sum_spp += weights[i] * prev_ut_val.x; 189 | sum_moment += weights[i] * prev_ut_val.y; 190 | sum_weight += weights[i]; 191 | } 192 | 193 | if(sum_weight > 0.0) { 194 | sum_spp /= sum_weight; 195 | sum_val /= sum_weight; 196 | sum_moment /= sum_weight; 197 | } 198 | 199 | float blend_a = max(1.0 / (sum_spp + 1.0), 0.15); 200 | float moment_a = max(1.0 / (sum_spp + 1.0), 0.2); 201 | 202 | float new_spp = sum_spp + 1.0; 203 | float new_moment = (1 - moment_a) * sum_moment + moment_a * pow(luminance(noise_load), 2.0f); 204 | 205 | acc_out = (1 - blend_a) * sum_val + blend_a * noise_load; 206 | 207 | util_val = (float3){new_spp, new_moment, linear_z}; 208 | } 209 | 210 | store_float3(curr_accumulated, linear_pixel, acc_out); 211 | store_float3(curr_util, linear_pixel, util_val); 212 | } 213 | 214 | __kernel void compute_variance(const __global float* restrict curr_normals, 215 | const __global float* restrict curr_accumulated, 216 | const __global float* restrict curr_util, 217 | __global float* restrict output_image) { 218 | 219 | const float NORMAL_PHI = 1e-2; 220 | // const float POSITION_PHI = 1e0; // Should depend on depth, but oh well 221 | const float COLOR_PHI = 1.0e1; 222 | 223 | const int2 gid = {get_global_id(0), get_global_id(1)}; 224 | 225 | const int linear_pixel = linear(gid); 226 | 227 | float3 normal_load = load_float3(curr_normals, linear_pixel); 228 | float3 acc_load = load_float3(curr_accumulated, linear_pixel); 229 | float3 util_load = load_float3(curr_util, linear_pixel); 230 | 231 | float curr_lum = luminance(acc_load); 232 | 233 | float spp = util_load.x; 234 | float moment = util_load.y; 235 | float linear_z = util_load.z; 236 | 237 | float phiDepth = max(5e-3, 1e-8) * 3.0; 238 | 239 | float sum_weights = 0.0; 240 | float sum_moment = 0.0; 241 | float3 sum_accum = 0.0; 242 | 243 | float variance; 244 | 245 | if(spp < 4.0) { 246 | const int radius = 3; 247 | 248 | for(int yy = -radius; yy <= radius; yy++) { 249 | for(int xx = -radius; xx <= radius; xx++) { 250 | const int2 p = gid + (int2){xx, yy}; 251 | 252 | if(p.x < 0 || p.y < 0 || 253 | p.x >= IMAGE_WIDTH || p.y >= IMAGE_HEIGHT) { 254 | continue; 255 | } 256 | 257 | int lp = linear(p); 258 | 259 | float3 local_util = load_float3(curr_util, lp); 260 | 261 | float local_moment = local_util.y; 262 | float local_z = local_util.z; 263 | 264 | float3 local_normal = load_float3(curr_normals, lp); 265 | float3 local_accum = load_float3(curr_accumulated, lp); 266 | 267 | float wnorm = pow(max(dot(local_normal, normal_load), 0.0f), NORMAL_PHI); 268 | float wpos = (xx == 0 && yy == 0) ? 0.0 : fabs(local_z - linear_z) / (phiDepth * length(convert_float2(((int2){xx, yy})))); 269 | float wcolor = fabs(curr_lum - luminance(local_accum)) / COLOR_PHI; 270 | 271 | float weight = exp(- wcolor - wpos - wnorm); 272 | sum_weights += weight; 273 | sum_moment += weight * moment; 274 | sum_accum += weight * local_accum; 275 | } 276 | } 277 | 278 | sum_weights = fmax(sum_weights, 1e-5f); 279 | sum_moment /= sum_weights; 280 | sum_accum /= sum_weights; 281 | 282 | variance = sum_moment - pown(luminance(sum_accum), 2); 283 | variance *= 4.0 / spp; 284 | } else { 285 | variance = moment - curr_lum * curr_lum; 286 | sum_accum = acc_load; 287 | } 288 | 289 | store_float4(output_image, linear_pixel, (float4){sum_accum.x, sum_accum.y, sum_accum.z, variance}); 290 | } 291 | 292 | 293 | // util for atrous 294 | float variance_center(int2 pixel_coords, 295 | const __global float* restrict curr_accumulated) { 296 | const float kern[3] = 297 | { 1.0 / 4.0, 1.0 / 8.0, 1.0 / 16.0 }; 298 | 299 | const int radius = 1; 300 | float sum = 0.0; 301 | for(int yy = -1; yy <= radius; yy++) { 302 | for(int xx = -1; xx <= radius; xx++) { 303 | int2 p = pixel_coords + (int2){xx, yy}; 304 | 305 | if(p.x < 0 || p.y || p.x >= IMAGE_WIDTH || p.y >= IMAGE_HEIGHT) { 306 | continue; 307 | } 308 | 309 | int lp = linear(p); 310 | float k = kern[abs(xx) + abs(yy)]; 311 | 312 | float4 acc_load = load_float4(curr_accumulated, lp); 313 | sum += acc_load.w * k; 314 | } 315 | } 316 | 317 | return sum; 318 | } 319 | 320 | __kernel void atrous(const __global float* restrict curr_normals, 321 | const __global float* restrict albedo, 322 | const __global float* restrict curr_util, 323 | const __global float* restrict curr_accumulated, 324 | __global float* restrict output_image, 325 | int step_size, 326 | int last_time) { 327 | 328 | /* const float NORMAL_PHI = 3e-2; 329 | const float POSITION_PHI = 1.0; // Should depend on depth, but oh well 330 | const float COLOR_PHI = 10.0; */ 331 | 332 | const float NORMAL_PHI = 1e-2; 333 | // const float POSITION_PHI = 3e0; // Should depend on depth, but oh well 334 | const float COLOR_PHI = 1.0e1; 335 | 336 | const float kernelWeights[3] = { 1.0, 2.0 / 3.0, 1.0 / 6.0 }; 337 | 338 | const int2 gid = {get_global_id(0), get_global_id(1)}; 339 | 340 | const int linear_pixel = linear(gid); 341 | 342 | float var = variance_center(linear_pixel, curr_accumulated); 343 | 344 | float3 util_load = load_float3(curr_util, linear_pixel); 345 | float3 normal_load = load_float3(curr_normals, linear_pixel); 346 | float4 acc_load = load_float4(curr_accumulated, linear_pixel); 347 | 348 | float3 curr_acc = acc_load.xyz; 349 | float variance = acc_load.w; 350 | 351 | float curr_luminance = luminance(curr_acc); 352 | 353 | float linear_z = util_load.z; 354 | 355 | float phi_color = COLOR_PHI * sqrt(max(0.0f, 1e-10f + var)); 356 | float phiDepth = max(5e-3, 1e-8) * step_size; 357 | 358 | // Store center pixel with weight 1: 359 | float sum_weights = 1.0; 360 | float3 sum_accum = curr_acc; 361 | float sum_variance = variance; 362 | 363 | for(int yy = -2; yy <= 2; yy++) { 364 | for(int xx = -2; xx <= 2; xx++) { 365 | 366 | int2 p = gid + (int2){xx, yy} * step_size; 367 | 368 | if(p.x < 0 || p.y < 0 || 369 | p.x >= IMAGE_WIDTH || p.y >= IMAGE_HEIGHT || 370 | (xx == 0 && yy == 0)) { 371 | continue; 372 | } 373 | 374 | int lp = linear(p); 375 | 376 | float4 local_acc = load_float4(curr_accumulated, lp); 377 | float3 local_util = load_float3(curr_util, lp); 378 | float3 local_normal = load_float3(curr_normals, lp); 379 | 380 | float local_z = local_util.z; 381 | float3 local_acc_color = (float3){local_acc.x, local_acc.y, local_acc.z}; 382 | float local_luminance = luminance(local_acc_color); 383 | 384 | float wnorm = pow(max(dot(local_normal, normal_load), 0.0f), NORMAL_PHI); 385 | float wpos = fabs(local_z - linear_z) / (phiDepth * length(convert_float2(((int2){xx, yy})))); 386 | float wcolor = fabs(local_luminance - curr_luminance) / COLOR_PHI; 387 | 388 | float weight = exp(-wcolor - wpos - wnorm); 389 | 390 | float kx = kernelWeights[abs(xx)]; 391 | float ky = kernelWeights[abs(yy)]; 392 | 393 | weight *= kx * ky; 394 | 395 | float local_var = local_acc.w; 396 | 397 | sum_weights += weight; 398 | sum_accum += weight * local_acc_color; 399 | sum_variance += weight * weight * local_var; 400 | } 401 | } 402 | 403 | // Yes, this is right, accourding to Schied et al. 2017 404 | float total_var = sum_variance / (sum_weights * sum_weights); 405 | float3 total_accum = sum_accum / sum_weights; 406 | 407 | if(last_time == 1) { 408 | float3 alb = load_float3(albedo, linear_pixel); 409 | 410 | float3 modulated = clamp(linear_to_srgb(alb * total_accum), 0.0f, 1.0f); 411 | 412 | store_float3(output_image, linear_pixel, modulated); 413 | } else { 414 | float4 res = (float4){total_accum.x, total_accum.y, total_accum.z, total_var}; 415 | store_float4(output_image, linear_pixel, res); 416 | } 417 | } 418 | -------------------------------------------------------------------------------- /svgf.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "svgf.hpp" 3 | #include "utils.hpp" 4 | 5 | #include "OpenImageIO/imageio.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "CLUtils/CLUtils.hpp" 11 | 12 | #include 13 | #include 14 | 15 | namespace json = nlohmann; 16 | 17 | json::json getProfileArray(clutils::ProfilingInfo& profile) { 18 | json::json arr = json::json::array(); 19 | 20 | 21 | for(int i = 0; i < NUM_FRAMES - 1; i++) { 22 | arr.push_back(profile[i]); 23 | } 24 | 25 | return arr; 26 | } 27 | 28 | 29 | int main() { 30 | 31 | std::cout << "Initialize" << std::endl; 32 | 33 | 34 | ImageData image_data = initializeData(); 35 | 36 | 37 | clutils::CLEnv clEnv; 38 | cl::Context &context(clEnv.addContext(PLATFORM_INDEX)); 39 | 40 | // Find name of the used device 41 | std::string deviceName; 42 | clEnv.devices[0][DEVICE_INDEX].getInfo(CL_DEVICE_NAME, &deviceName); 43 | printf("Using device named: %s\n", deviceName.c_str()); 44 | 45 | cl::CommandQueue &queue(clEnv.addQueue(0, DEVICE_INDEX, CL_QUEUE_PROFILING_ENABLE)); 46 | 47 | 48 | std::stringstream build_options; 49 | build_options << " -D IMAGE_WIDTH=" << IMAGE_WIDTH 50 | << " -D IMAGE_HEIGHT=" << IMAGE_HEIGHT 51 | << " -D NUM_FRAMES=" << NUM_FRAMES; 52 | 53 | 54 | cl::Kernel reproject_kernel(clEnv.addProgram(0, "svgf.cl", "reproject", 55 | build_options.str().c_str())); 56 | cl::Kernel variance_kernel(clEnv.addProgram(0, "svgf.cl", "compute_variance", 57 | build_options.str().c_str())); 58 | cl::Kernel atrous_kernel(clEnv.addProgram(0, "svgf.cl", "atrous", 59 | build_options.str().c_str())); 60 | 61 | cl::NDRange global_range(IMAGE_WIDTH, IMAGE_HEIGHT); 62 | cl::NDRange local_range(LOCAL_SIZE_X, LOCAL_SIZE_Y); 63 | 64 | 65 | Double_buffer accumulated_buffer(context, 66 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 67 | Double_buffer normal_buffer(context, 68 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 69 | Double_buffer position_buffer(context, 70 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 71 | 72 | // Util buffer: x = spp, y = moment, z = linear_z 73 | Double_buffer util_buffer(context, 74 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 75 | Double_buffer temp_atrous_buffer(context, 76 | CL_MEM_READ_WRITE, IMAGE_SIZE * 4 * sizeof(cl_float)); 77 | 78 | Double_buffer in_buffer(context, 79 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 80 | cl::Buffer albedo_buffer(context, 81 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 82 | 83 | cl::Buffer output_buffer(context, 84 | CL_MEM_READ_WRITE, IMAGE_SIZE * 3 * sizeof(cl_float)); 85 | 86 | 87 | std::vector> reproject_timer; 88 | reproject_timer.assign(NUM_FRAMES - 1, clutils::GPUTimer(clEnv.devices[0][0])); 89 | 90 | std::vector> variance_timer; 91 | variance_timer.assign(NUM_FRAMES - 1, clutils::GPUTimer(clEnv.devices[0][0])); 92 | 93 | std::vector> atrous_timer[NUM_ATROUS_ITERATIONS]; 94 | for(int i = 0; i < NUM_ATROUS_ITERATIONS; i++) { 95 | atrous_timer[i].assign(NUM_FRAMES - 1, clutils::GPUTimer(clEnv.devices[0][0])); 96 | } 97 | 98 | clutils::ProfilingInfo profile_info_reproject("Reprojecting samples"); 99 | clutils::ProfilingInfo profile_info_variance("Computing variance"); 100 | clutils::ProfilingInfo profile_info_atrous("Running atrous multiple iterations"); 101 | clutils::ProfilingInfo profile_info_total("Total"); 102 | 103 | for(int frame = 0; frame < NUM_FRAMES; frame++) { 104 | queue.enqueueWriteBuffer(*in_buffer.current(), true, 0, IMAGE_SIZE * 3 * sizeof(cl_float), 105 | image_data.noisy_input[frame].data()); 106 | queue.enqueueWriteBuffer(*normal_buffer.current(), true, 0, IMAGE_SIZE * 3 * sizeof(cl_float), 107 | image_data.normals[frame].data()); 108 | queue.enqueueWriteBuffer(*position_buffer.current(), true, 0, IMAGE_SIZE * 3 * sizeof(cl_float), 109 | image_data.positions[frame].data()); 110 | queue.enqueueWriteBuffer(albedo_buffer, true, 0, IMAGE_SIZE * 3 * sizeof(cl_float), 111 | image_data.albedos[frame].data()); 112 | 113 | 114 | glm::mat4 curr_view_matrix = (*(glm::mat4*)inverse_perspective_matrix) * (*(glm::mat4*)(&camera_matrices[frame][0][0])); 115 | glm::vec4 cur_mat_row = glm::vec4(curr_view_matrix[0][2], 116 | curr_view_matrix[1][2], 117 | curr_view_matrix[2][2], 118 | curr_view_matrix[3][2]); 119 | 120 | int safe_prev = frame == 0 ? 0 : frame - 1; 121 | int arg_index = 0; 122 | reproject_kernel.setArg(arg_index++, *in_buffer.current()); 123 | reproject_kernel.setArg(arg_index++, *normal_buffer.current()); 124 | reproject_kernel.setArg(arg_index++, *normal_buffer.previous()); 125 | reproject_kernel.setArg(arg_index++, *position_buffer.current()); 126 | reproject_kernel.setArg(arg_index++, *position_buffer.previous()); 127 | reproject_kernel.setArg(arg_index++, *accumulated_buffer.current()); 128 | reproject_kernel.setArg(arg_index++, *accumulated_buffer.previous()); 129 | reproject_kernel.setArg(arg_index++, *util_buffer.current()); 130 | reproject_kernel.setArg(arg_index++, *util_buffer.previous()); 131 | reproject_kernel.setArg(arg_index++, sizeof(cl_float4), &cur_mat_row[0]); 132 | reproject_kernel.setArg(arg_index++, sizeof(cl_float16), &camera_matrices[safe_prev][0][0]); 133 | reproject_kernel.setArg(arg_index++, sizeof(cl_int), &frame); 134 | 135 | 136 | int res = queue.enqueueNDRangeKernel(reproject_kernel, cl::NullRange, global_range, 137 | local_range, nullptr, &reproject_timer[safe_prev].event()); 138 | 139 | 140 | arg_index = 0; 141 | variance_kernel.setArg(arg_index++, *normal_buffer.current()); 142 | variance_kernel.setArg(arg_index++, *accumulated_buffer.current()); 143 | variance_kernel.setArg(arg_index++, *util_buffer.current()); 144 | variance_kernel.setArg(arg_index++, *temp_atrous_buffer.current()); 145 | 146 | queue.enqueueNDRangeKernel(variance_kernel, cl::NullRange, global_range, 147 | local_range, nullptr, &variance_timer[safe_prev].event()); 148 | 149 | 150 | arg_index = 0; 151 | atrous_kernel.setArg(arg_index++, *normal_buffer.current()); 152 | atrous_kernel.setArg(arg_index++, albedo_buffer); 153 | atrous_kernel.setArg(arg_index++, *util_buffer.current()); 154 | 155 | for(int ai = 0; ai < NUM_ATROUS_ITERATIONS; ai++) { 156 | int step_size = 1 << ai; 157 | int last = ai == NUM_ATROUS_ITERATIONS - 1; 158 | 159 | arg_index = 3; 160 | atrous_kernel.setArg(arg_index++, *temp_atrous_buffer.current()); 161 | 162 | if(last) { 163 | atrous_kernel.setArg(arg_index++, output_buffer); 164 | } else { 165 | atrous_kernel.setArg(arg_index++, *temp_atrous_buffer.previous()); // Previous is now output 166 | } 167 | 168 | atrous_kernel.setArg(arg_index++, sizeof(cl_int), &step_size); 169 | atrous_kernel.setArg(arg_index++, sizeof(cl_int), &last); 170 | 171 | queue.enqueueNDRangeKernel(atrous_kernel, cl::NullRange, global_range, 172 | local_range, nullptr, &atrous_timer[ai][safe_prev].event()); 173 | 174 | temp_atrous_buffer.swap(); 175 | } 176 | 177 | res = queue.enqueueReadBuffer(output_buffer, false, 0, 178 | IMAGE_SIZE * 3 * sizeof(cl_float), image_data.out_data[frame].data()); 179 | 180 | normal_buffer.swap(); 181 | position_buffer.swap(); 182 | accumulated_buffer.swap(); 183 | util_buffer.swap(); 184 | in_buffer.swap(); 185 | 186 | } 187 | 188 | queue.finish(); 189 | 190 | for(int i = 0 ; i < NUM_FRAMES - 1; i++) { 191 | 192 | // profile_info_test[i] = test_timer[i].duration(); 193 | profile_info_reproject[i] = reproject_timer[i].duration(); 194 | profile_info_variance[i] = variance_timer[i].duration(); 195 | 196 | cl_ulong atrous_start = 197 | atrous_timer[0][i].event().getProfilingInfo(); 198 | cl_ulong atrous_end = 199 | atrous_timer[NUM_ATROUS_ITERATIONS - 1][i].event().getProfilingInfo(); 200 | profile_info_atrous[i] = 201 | (atrous_end - atrous_start) * atrous_timer[0][i].getUnit(); 202 | 203 | cl_ulong total_start = 204 | reproject_timer[i].event().getProfilingInfo(); 205 | cl_ulong total_end = atrous_end; 206 | 207 | profile_info_total[i] = (total_end - total_start) * atrous_timer[0][i].getUnit(); 208 | } 209 | 210 | // profile_info_test.print(std::cout); 211 | profile_info_reproject.print(std::cout); 212 | profile_info_variance.print(std::cout); 213 | profile_info_atrous.print(std::cout); 214 | profile_info_total.print(std::cout); 215 | 216 | writeOutputImages(image_data, std::string(OUTPUT_FILE_NAME)); 217 | 218 | json::json time_obj = {}; 219 | time_obj["reproject"] = getProfileArray(profile_info_reproject); 220 | time_obj["variance"] = getProfileArray(profile_info_variance); 221 | time_obj["atrous"] = getProfileArray(profile_info_atrous); 222 | time_obj["total"] = getProfileArray(profile_info_total); 223 | 224 | std::ofstream time_file(std::string(OUTPUT_FILE_NAME) + "_performance_results.txt"); 225 | time_file << std::setw(4) << time_obj << std::endl; 226 | time_file.close(); 227 | 228 | return 0; 229 | 230 | } 231 | -------------------------------------------------------------------------------- /svgf.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CLUtils/CLUtils.hpp" 4 | 5 | 6 | 7 | #define _CRT_SECURE_NO_WARNINGS 8 | #define STR_HELPER(x) #x 9 | #define STR(x) STR_HELPER(x) 10 | 11 | // ### Choose your OpenCL device and platform with these defines ### 12 | #define PLATFORM_INDEX 0 13 | #define DEVICE_INDEX 0 14 | 15 | // Location where input frames and feature buffers are located 16 | // #define INPUT_DATA_PATH /home/haakon/data/bmfr_data/sponza-(static-camera)/inputs 17 | #define INPUT_DATA_PATH /home/haakon/Documents/NTNU/TDT4900/dataconstruction/output 18 | 19 | #define INPUT_DATA_PATH_STR STR(INPUT_DATA_PATH) 20 | // camera_matrices.h is expected to be in the same folder 21 | #include STR(INPUT_DATA_PATH/camera_matrices.h) 22 | // These names are appended with NN.exr, where NN is the frame number 23 | #define NOISY_FILE_NAME INPUT_DATA_PATH_STR"/color" 24 | #define NORMAL_FILE_NAME INPUT_DATA_PATH_STR"/shading_normal" 25 | #define POSITION_FILE_NAME INPUT_DATA_PATH_STR"/world_position" 26 | #define ALBEDO_FILE_NAME INPUT_DATA_PATH_STR"/albedo" 27 | #define OUTPUT_FILE_NAME "outputs/output" 28 | 29 | const int IMAGE_WIDTH = 1280; 30 | const int IMAGE_HEIGHT = 720; 31 | const int IMAGE_SIZE = IMAGE_WIDTH * IMAGE_HEIGHT; 32 | 33 | const int LOCAL_SIZE_X = 32; 34 | const int LOCAL_SIZE_Y = 1; 35 | 36 | const int NUM_FRAMES = 60; 37 | 38 | const int NUM_ATROUS_ITERATIONS = 4; 39 | -------------------------------------------------------------------------------- /utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.hpp" 2 | 3 | #include 4 | 5 | 6 | ImageData initializeData() { 7 | ImageData image_data; 8 | // printf("Loading input data.\n"); 9 | std::cout << "Loading input data" << std::endl; 10 | bool error = false; 11 | 12 | #pragma omp parallel for 13 | for (int frame = 0; frame < NUM_FRAMES; ++frame) 14 | { 15 | if (error) 16 | continue; 17 | 18 | image_data.out_data[frame].resize(3 * IMAGE_SIZE); 19 | 20 | image_data.albedos[frame].resize(3 * IMAGE_WIDTH * IMAGE_HEIGHT); 21 | Operation_result result = load_image(image_data.albedos[frame].data(), ALBEDO_FILE_NAME, 22 | frame); 23 | if (!result.success) 24 | { 25 | error = true; 26 | printf("Albedo buffer loading failed, reason: %s\n", 27 | result.error_message.c_str()); 28 | continue; 29 | } 30 | 31 | image_data.normals[frame].resize(3 * IMAGE_WIDTH * IMAGE_HEIGHT); 32 | result = load_image(image_data.normals[frame].data(), NORMAL_FILE_NAME, frame); 33 | if (!result.success) 34 | { 35 | error = true; 36 | printf("Normal buffer loading failed, reason: %s\n", 37 | result.error_message.c_str()); 38 | continue; 39 | } 40 | 41 | image_data.positions[frame].resize(3 * IMAGE_WIDTH * IMAGE_HEIGHT); 42 | result = load_image(image_data.positions[frame].data(), POSITION_FILE_NAME, frame); 43 | if (!result.success) 44 | { 45 | error = true; 46 | printf("Position buffer loading failed, reason: %s\n", 47 | result.error_message.c_str()); 48 | continue; 49 | } 50 | 51 | image_data.noisy_input[frame].resize(3 * IMAGE_WIDTH * IMAGE_HEIGHT); 52 | result = load_image(image_data.noisy_input[frame].data(), NOISY_FILE_NAME, frame); 53 | if (!result.success) 54 | { 55 | error = true; 56 | printf("Noisy buffer loading failed, reason: %s\n", 57 | result.error_message.c_str()); 58 | continue; 59 | } 60 | 61 | std::cout << "Read buffers for frame " << frame << std::endl; 62 | } 63 | 64 | if (error) 65 | { 66 | printf("One or more errors occurred during buffer loading\n"); 67 | exit(-1); 68 | } 69 | 70 | return image_data; 71 | } 72 | 73 | 74 | 75 | Operation_result read_image_file( 76 | const std::string &file_name, const int frame, float *buffer) 77 | { 78 | std::unique_ptr in = OIIO::ImageInput::open( 79 | file_name + std::to_string(frame) + ".exr"); 80 | if (!in || in->spec().width != IMAGE_WIDTH || 81 | in->spec().height != IMAGE_HEIGHT || in->spec().nchannels != 3) 82 | { 83 | if(!in) { 84 | std::cerr << "Could not open " << (file_name + std::to_string(frame) + ".exr") << std::endl; 85 | } 86 | // std::cout << "in = " << in << std::endl; 87 | // std::cout << "width = " << in->spec().width << ", height = " << in->spec().height << ", nchannels = " << in->spec().nchannels << std::endl; 88 | return {false, "Can't open image file or it has wrong type: " + file_name}; 89 | } 90 | 91 | // NOTE: this converts .exr files that might be in halfs to single precision floats 92 | // In the dataset distributed with the BMFR paper all exr files are in single precision 93 | in->read_image(OIIO::TypeDesc::FLOAT, buffer); 94 | in->close(); 95 | 96 | return {true}; 97 | } 98 | 99 | Operation_result load_image(cl_float *image, const std::string file_name, const int frame) 100 | { 101 | Operation_result result = read_image_file(file_name, frame, image); 102 | if (!result.success) 103 | return result; 104 | 105 | return {true}; 106 | } 107 | 108 | void writeOutputImages(const ImageData& image_data, const std::string& output_file) { 109 | 110 | // Store results 111 | bool error = false; 112 | #pragma omp parallel for 113 | for (int frame = 0; frame < NUM_FRAMES; ++frame) 114 | { 115 | if (error) 116 | continue; 117 | 118 | // Output image 119 | std::string output_file_name = output_file + std::to_string(frame) + ".png"; 120 | // Crops back from WORKSET_SIZE to IMAGE_SIZE 121 | OIIO::ImageSpec spec(IMAGE_WIDTH, IMAGE_HEIGHT, 3, 122 | OIIO::TypeDesc::FLOAT); 123 | std::unique_ptr 124 | out(OIIO::ImageOutput::create(output_file_name)); 125 | if (out && out->open(output_file_name, spec)) 126 | { 127 | out->write_image(OIIO::TypeDesc::FLOAT, image_data.out_data[frame].data(), 128 | 3 * sizeof(cl_float), IMAGE_WIDTH * 3 * sizeof(cl_float), 0); 129 | out->close(); 130 | } 131 | else 132 | { 133 | printf("Can't create image file on disk to location %s\n", 134 | output_file_name.c_str()); 135 | error = true; 136 | continue; 137 | } 138 | } 139 | 140 | if (error) 141 | { 142 | printf("One or more errors occurred during image saving\n"); 143 | exit(-1); 144 | } 145 | 146 | 147 | printf("Wrote images with format %s\n", output_file.c_str()); 148 | 149 | } 150 | 151 | 152 | 153 | // Copied from https://stackoverflow.com/questions/24326432/convenient-way-to-show-opencl-error-codes 154 | 155 | const char *getErrorString(cl_int error) 156 | { 157 | switch(error){ 158 | // run-time and JIT compiler errors 159 | case 0: return "CL_SUCCESS"; 160 | case -1: return "CL_DEVICE_NOT_FOUND"; 161 | case -2: return "CL_DEVICE_NOT_AVAILABLE"; 162 | case -3: return "CL_COMPILER_NOT_AVAILABLE"; 163 | case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; 164 | case -5: return "CL_OUT_OF_RESOURCES"; 165 | case -6: return "CL_OUT_OF_HOST_MEMORY"; 166 | case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; 167 | case -8: return "CL_MEM_COPY_OVERLAP"; 168 | case -9: return "CL_IMAGE_FORMAT_MISMATCH"; 169 | case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; 170 | case -11: return "CL_BUILD_PROGRAM_FAILURE"; 171 | case -12: return "CL_MAP_FAILURE"; 172 | case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; 173 | case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; 174 | case -15: return "CL_COMPILE_PROGRAM_FAILURE"; 175 | case -16: return "CL_LINKER_NOT_AVAILABLE"; 176 | case -17: return "CL_LINK_PROGRAM_FAILURE"; 177 | case -18: return "CL_DEVICE_PARTITION_FAILED"; 178 | case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; 179 | 180 | // compile-time errors 181 | case -30: return "CL_INVALID_VALUE"; 182 | case -31: return "CL_INVALID_DEVICE_TYPE"; 183 | case -32: return "CL_INVALID_PLATFORM"; 184 | case -33: return "CL_INVALID_DEVICE"; 185 | case -34: return "CL_INVALID_CONTEXT"; 186 | case -35: return "CL_INVALID_QUEUE_PROPERTIES"; 187 | case -36: return "CL_INVALID_COMMAND_QUEUE"; 188 | case -37: return "CL_INVALID_HOST_PTR"; 189 | case -38: return "CL_INVALID_MEM_OBJECT"; 190 | case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; 191 | case -40: return "CL_INVALID_IMAGE_SIZE"; 192 | case -41: return "CL_INVALID_SAMPLER"; 193 | case -42: return "CL_INVALID_BINARY"; 194 | case -43: return "CL_INVALID_BUILD_OPTIONS"; 195 | case -44: return "CL_INVALID_PROGRAM"; 196 | case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; 197 | case -46: return "CL_INVALID_KERNEL_NAME"; 198 | case -47: return "CL_INVALID_KERNEL_DEFINITION"; 199 | case -48: return "CL_INVALID_KERNEL"; 200 | case -49: return "CL_INVALID_ARG_INDEX"; 201 | case -50: return "CL_INVALID_ARG_VALUE"; 202 | case -51: return "CL_INVALID_ARG_SIZE"; 203 | case -52: return "CL_INVALID_KERNEL_ARGS"; 204 | case -53: return "CL_INVALID_WORK_DIMENSION"; 205 | case -54: return "CL_INVALID_WORK_GROUP_SIZE"; 206 | case -55: return "CL_INVALID_WORK_ITEM_SIZE"; 207 | case -56: return "CL_INVALID_GLOBAL_OFFSET"; 208 | case -57: return "CL_INVALID_EVENT_WAIT_LIST"; 209 | case -58: return "CL_INVALID_EVENT"; 210 | case -59: return "CL_INVALID_OPERATION"; 211 | case -60: return "CL_INVALID_GL_OBJECT"; 212 | case -61: return "CL_INVALID_BUFFER_SIZE"; 213 | case -62: return "CL_INVALID_MIP_LEVEL"; 214 | case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; 215 | case -64: return "CL_INVALID_PROPERTY"; 216 | case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; 217 | case -66: return "CL_INVALID_COMPILER_OPTIONS"; 218 | case -67: return "CL_INVALID_LINKER_OPTIONS"; 219 | case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; 220 | 221 | // extension errors 222 | case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; 223 | case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; 224 | case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; 225 | case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; 226 | case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; 227 | case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; 228 | default: return "Unknown OpenCL error"; 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "svgf.hpp" 4 | 5 | #include 6 | 7 | struct ImageData { 8 | std::vector out_data[NUM_FRAMES]; 9 | std::vector albedos[NUM_FRAMES]; 10 | std::vector normals[NUM_FRAMES]; 11 | std::vector positions[NUM_FRAMES]; 12 | std::vector noisy_input[NUM_FRAMES]; 13 | }; 14 | 15 | ImageData initializeData(); 16 | 17 | 18 | // (Copied from BMFR) 19 | 20 | // Creates two same buffers and swap() call can be used to change which one is considered 21 | // current and which one previous 22 | template 23 | class Double_buffer 24 | { 25 | private: 26 | T a, b; 27 | bool swapped; 28 | 29 | public: 30 | template 31 | Double_buffer(Args... args) : a(args...), b(args...), swapped(false){}; 32 | T *current() { return swapped ? &a : &b; } 33 | T *previous() { return swapped ? &b : &a; } 34 | void swap() { swapped = !swapped; } 35 | }; 36 | 37 | struct Operation_result 38 | { 39 | bool success; 40 | std::string error_message; 41 | Operation_result(bool success, const std::string &error_message = "") : 42 | success(success), error_message(error_message) {} 43 | }; 44 | 45 | Operation_result load_image(cl_float *image, const std::string file_name, const int frame); 46 | Operation_result read_image_file(const std::string &file_name, const int frame, float *buffer); 47 | 48 | void writeOutputImages(const ImageData& image_data, const std::string& output_file); 49 | 50 | const char *getErrorString(cl_int error); 51 | --------------------------------------------------------------------------------