├── .gitignore ├── dog.jpg ├── dog_gradient.jpg ├── CMakeLists.txt ├── README.md ├── sobel_cpu.cc ├── sobel_gpu_1_naive.cc ├── sobel_gpu_2_single_alloc.cc ├── sobel_gpu_4_shared_mem.cc ├── sobel_gpu_3_pinned_mem.cc ├── sobel_gpu_5_shared_mem_streams.cc └── sobel_gpu_5_pinned_mem_streams.cc /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | 3 | -------------------------------------------------------------------------------- /dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidgeRun/getting-started-with-cuda-opencv/HEAD/dog.jpg -------------------------------------------------------------------------------- /dog_gradient.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidgeRun/getting-started-with-cuda-opencv/HEAD/dog_gradient.jpg -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.8) 2 | cmake_policy(SET CMP0048 NEW) 3 | 4 | # Initialize project 5 | project (GettingStartedWithOpenCVCuda LANGUAGES CXX VERSION 0.1.0) 6 | 7 | # Dependencies 8 | find_package (OpenCV REQUIRED) 9 | 10 | # Overlay include search path 11 | include_directories (${OpenCV_INCLUDE_DIRS}) 12 | 13 | # Programs 14 | add_executable (sobel_cpu sobel_cpu.cc) 15 | target_link_libraries (sobel_cpu ${OpenCV_LIBS}) 16 | 17 | add_executable (sobel_gpu_1_naive sobel_gpu_1_naive.cc) 18 | target_link_libraries (sobel_gpu_1_naive ${OpenCV_LIBS}) 19 | 20 | add_executable (sobel_gpu_2_single_alloc sobel_gpu_2_single_alloc.cc) 21 | target_link_libraries (sobel_gpu_2_single_alloc ${OpenCV_LIBS}) 22 | 23 | add_executable (sobel_gpu_3_pinned_mem sobel_gpu_3_pinned_mem.cc) 24 | target_link_libraries (sobel_gpu_3_pinned_mem ${OpenCV_LIBS}) 25 | 26 | add_executable (sobel_gpu_4_shared_mem sobel_gpu_4_shared_mem.cc) 27 | target_link_libraries (sobel_gpu_4_shared_mem ${OpenCV_LIBS}) 28 | 29 | add_executable (sobel_gpu_5_shared_mem_streams sobel_gpu_5_shared_mem_streams.cc) 30 | target_link_libraries (sobel_gpu_5_shared_mem_streams ${OpenCV_LIBS}) 31 | 32 | add_executable (sobel_gpu_5_pinned_mem_streams sobel_gpu_5_pinned_mem_streams.cc) 33 | target_link_libraries (sobel_gpu_5_pinned_mem_streams ${OpenCV_LIBS}) 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with CUDA Accelerated OpenCV 2 | 3 | > This repository contains the code presented in the GTC2021 S31701 4 | talk. 5 | 6 | ## Description 7 | 8 | This project presents a series of programs that guide you through the 9 | process of optimizing a CUDA accelerated OpenCV algorithm. This 10 | optimization is done through a series of well defined steps without 11 | getting into low-level CUDA programming. 12 | 13 | The algorithm chosen to illustrate the optimization process is the 14 | calculation of the [magnitude of the Sobel 15 | Derivatives](https://docs.opencv.org/3.4/d2/d2c/tutorial_sobel_derivatives.html). While 16 | not very interesting on its own, this algorithm is a foundational step 17 | in many algorithms such as edge detection, image segmentation, feature 18 | extraction, computer vision and more. While many optimizations can be 19 | achieved by approximating the underlying math, the original definition 20 | is kept for didactic purposes. The purpose is to focus the study on 21 | the appropriate OpenCV+CUDA handling. 22 | 23 |

24 | Original image of a cute, big-eyed puppy in grayscale 25 | Resulting gradient image 26 |

27 | 28 | ## Building the project 29 | 30 | As usual with OpenCV projects, the chosen build system was 31 | CMake. Start by making sure you have these dependencies installed: 32 | * CMake 33 | * OpenCV (with CUDA enabled) 34 | 35 | Then proceed normally as follows: 36 | ```bash 37 | # Clone the project 38 | git clone https://github.com/RidgeRun/getting-started-with-cuda-opencv.git 39 | cd getting-started-with-cuda-opencv 40 | 41 | # Configure the project 42 | mkdir build 43 | cd build 44 | cmake .. 45 | 46 | # Build the project 47 | make 48 | ``` 49 | 50 | If everything went okay, you should be able to run the demos. You may 51 | specify the input and output images as the first and second parameters 52 | respectively. Otherwise, "dog.jpg" and "dog_gradient_XXX.jpg" will be 53 | used by default. 54 | 55 | ```bash 56 | # Run from the build directory 57 | ./sobel_cpu ../dog.jpg 58 | 59 | # Specify an alternative output 60 | ./sobel_cpu ../dog.jpg alternative_output.jpg 61 | 62 | # Run from top-level with default parameters 63 | cd .. 64 | ./build/sobel_cpu 65 | ``` 66 | 67 | ## Program Breakdown 68 | 69 | The idea of the project is to use the CPU implementation as a baseline 70 | and then apply each optimization step incrementally. 71 | 72 | - **sobel_cpu**: CPU baseline implementation 73 | - **sobel_gpu_1_naive**: Literal port to GPU 74 | - **sobel_gpu_2_single_alloc**: Allocate only once the GPU memories 75 | and recicle them through all the iterations. 76 | - **sobel_gpu_3_pinned_mem**: Allocate host memory as 77 | non-pageable/pinned so that the transfer is highly optimized. 78 | - **sobel_gpu_4_shared_mem**: Allocate shared memory (if possible) for 79 | the GPU/CPU to eliminate the memory transfer. 80 | - **sobel_gpu_5_shared_mem_streams**: Use CUDA streams to process 81 | certain parts of the pipeline in parallel. 82 | - **sobel_gpu_5_pinned_mem_streams**: Use CUDA streams to process 83 | certain parts of the pipeline in parallel (alternative implementation 84 | for pinned memory instead of shared memory). 85 | -------------------------------------------------------------------------------- /sobel_cpu.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | static void 42 | sobel(const cv::Mat &input, cv::Mat &output) { 43 | // Lowpass filter to clean noise 44 | cv::Mat blurred; 45 | cv::GaussianBlur(input, blurred, cv::Size(7, 7), -1); 46 | 47 | // Compute X and Y derivatives 48 | cv::Mat x, y; 49 | cv::Sobel(blurred, x, CV_32F, 1, 0, 3, 1, 0); 50 | cv::Sobel(blurred, y, CV_32F, 0, 1, 3, 1, 0); 51 | 52 | // Compute X^2 and Y^2 53 | cv::Mat x2, y2; 54 | cv::pow(x, 2, x2); 55 | cv::pow(y, 2, y2); 56 | 57 | // Compute MAG2 = X^2 + Y^2 58 | cv::Mat mag2; 59 | cv::addWeighted(x2, 1, y2, 1, 0, mag2); 60 | 61 | // Compute MAG = √ (X^2 + Y^2) 62 | cv::Mat mag; 63 | cv::sqrt(mag2, mag); 64 | 65 | // Convert from floating point to char 66 | cv::convertScaleAbs(mag, output); 67 | } 68 | 69 | int 70 | main(int argc, char *argv[]) { 71 | std::string to_read = "dog.jpg"; 72 | if (argc >= 2) { 73 | to_read = argv[1]; 74 | } 75 | 76 | std::string to_write = "dog_gradient_cpu.jpg"; 77 | if (argc >= 3) { 78 | to_write = argv[2]; 79 | } 80 | 81 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 82 | 83 | if (input.empty()) { 84 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 85 | << std::endl; 86 | return 1; 87 | } 88 | 89 | // The first call is typically a warmup call so we dont benchmark 90 | cv::Mat output; 91 | sobel(input, output); 92 | 93 | int N = 100; 94 | double time = cv::getTickCount(); 95 | 96 | std::cout << "Performing " << N << " iterations..." << std::flush; 97 | 98 | for (int i = 0; i < N; i++) { 99 | sobel(input, output); 100 | } 101 | 102 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 103 | time /= N; 104 | 105 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 106 | << time << "ms" << std::endl; 107 | 108 | std::cout << "Resulting image wrote to " << to_write << std::endl; 109 | cv::imwrite(to_write, output); 110 | 111 | return 0; 112 | } 113 | -------------------------------------------------------------------------------- /sobel_gpu_1_naive.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | static void 51 | sobel(const Filters &filters, const cv::Mat &input, cv::Mat &output) { 52 | // Migrate data from the CPU to the GPU 53 | cv::cuda::GpuMat gpu_input; 54 | gpu_input.upload(input); 55 | 56 | // Low pass filter to clean noise 57 | cv::cuda::GpuMat gpu_blurred; 58 | filters.gaussian->apply(gpu_input, gpu_blurred); 59 | 60 | // X and Y derivatives 61 | cv::cuda::GpuMat gpu_x, gpu_y; 62 | filters.sobelx->apply(gpu_blurred, gpu_x); 63 | filters.sobely->apply(gpu_blurred, gpu_y); 64 | 65 | // X^2 and Y^2 66 | cv::cuda::GpuMat gpu_x2, gpu_y2; 67 | cv::cuda::pow(gpu_x, 2, gpu_x2); 68 | cv::cuda::pow(gpu_y, 2, gpu_y2); 69 | 70 | // MAG2 = X^2 + Y^2 71 | cv::cuda::GpuMat gpu_mag2; 72 | cv::cuda::addWeighted(gpu_x2, 1, gpu_y2, 1, 0, gpu_mag2); 73 | 74 | // MAG = √(X^2 + Y^2) 75 | cv::cuda::GpuMat gpu_mag; 76 | cv::cuda::sqrt(gpu_mag2, gpu_mag); 77 | 78 | // Convert from floating point to char 79 | cv::cuda::GpuMat gpu_output; 80 | gpu_mag.convertTo(gpu_output, CV_8UC1); 81 | 82 | // Migrate data back from GPU to CPU 83 | gpu_output.download(output); 84 | } 85 | 86 | int 87 | main(int argc, char *argv[]) { 88 | std::string to_read = "dog.jpg"; 89 | if (argc >= 2) { 90 | to_read = argv[1]; 91 | } 92 | 93 | std::string to_write = "dog_gradient_gpu_1_naive.jpg"; 94 | if (argc >= 3) { 95 | to_write = argv[2]; 96 | } 97 | 98 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 99 | 100 | if (input.empty()) { 101 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 102 | << std::endl; 103 | return 1; 104 | } 105 | 106 | // Filters in CUDA are created one time 107 | Filters filters = { 108 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 109 | cv::Size(7, 7), -1), 110 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 111 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 112 | }; 113 | 114 | // The first call is typically a warmup call so we dont benchmark 115 | cv::Mat output; 116 | sobel(filters, input, output); 117 | 118 | int N = 100; 119 | double time = cv::getTickCount(); 120 | 121 | std::cout << "Performing " << N << " iterations..." << std::flush; 122 | 123 | for (int i = 0; i < N; i++) { 124 | sobel(filters, input, output); 125 | } 126 | 127 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 128 | time /= N; 129 | 130 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 131 | << time << "ms" << std::endl; 132 | 133 | std::cout << "Resulting image wrote to " << to_write << std::endl; 134 | cv::imwrite(to_write, output); 135 | 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /sobel_gpu_2_single_alloc.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | struct GpuMemories { 51 | cv::cuda::GpuMat input; 52 | cv::cuda::GpuMat blurred; 53 | cv::cuda::GpuMat x; 54 | cv::cuda::GpuMat y; 55 | cv::cuda::GpuMat x2; 56 | cv::cuda::GpuMat y2; 57 | cv::cuda::GpuMat mag2; 58 | cv::cuda::GpuMat mag; 59 | cv::cuda::GpuMat output; 60 | }; 61 | 62 | static void 63 | sobel(const Filters &filters, GpuMemories &gpu, 64 | const cv::Mat &input, cv::Mat &output) { 65 | // Migrate data from the CPU to the GPU 66 | gpu.input.upload(input); 67 | 68 | // Low pass filter to clean noise 69 | filters.gaussian->apply(gpu.input, gpu.blurred); 70 | 71 | // X and Y derivatives 72 | filters.sobelx->apply(gpu.blurred, gpu.x); 73 | filters.sobely->apply(gpu.blurred, gpu.y); 74 | 75 | // X^2 and Y^2 76 | cv::cuda::pow(gpu.x, 2, gpu.x2); 77 | cv::cuda::pow(gpu.y, 2, gpu.y2); 78 | 79 | // MAG2 = X^2 + Y^2 80 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2); 81 | 82 | // MAG = √(X^2 + Y^2) 83 | cv::cuda::sqrt(gpu.mag2, gpu.mag); 84 | 85 | // Convert from floating point to char 86 | gpu.mag.convertTo(gpu.output, CV_8UC1); 87 | 88 | // Migrate data back from GPU to CPU 89 | gpu.output.download(output); 90 | } 91 | 92 | int 93 | main(int argc, char *argv[]) { 94 | std::string to_read = "dog.jpg"; 95 | if (argc >= 2) { 96 | to_read = argv[1]; 97 | } 98 | 99 | std::string to_write = "dog_gradient_gpu_2_single_alloc.jpg"; 100 | if (argc >= 3) { 101 | to_write = argv[2]; 102 | } 103 | 104 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 105 | 106 | if (input.empty()) { 107 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 108 | << std::endl; 109 | return 1; 110 | } 111 | 112 | // Filters in CUDA are created one time 113 | Filters filters = { 114 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 115 | cv::Size(7, 7), -1), 116 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 117 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 118 | }; 119 | 120 | GpuMemories gpu = { 121 | input : cv::cuda::GpuMat (input.size(), CV_8UC1), 122 | blurred : cv::cuda::GpuMat (input.size(), CV_8UC1), 123 | x : cv::cuda::GpuMat (input.size(), CV_32FC1), 124 | y : cv::cuda::GpuMat (input.size(), CV_32FC1), 125 | x2 : cv::cuda::GpuMat (input.size(), CV_32FC1), 126 | y2 : cv::cuda::GpuMat (input.size(), CV_32FC1), 127 | mag2 : cv::cuda::GpuMat (input.size(), CV_32FC1), 128 | mag : cv::cuda::GpuMat (input.size(), CV_32FC1), 129 | output : cv::cuda::GpuMat (input.size(), CV_8UC1) 130 | }; 131 | 132 | // The first call is typically a warmup call so we dont benchmark 133 | cv::Mat output; 134 | sobel(filters, gpu, input, output); 135 | 136 | int N = 100; 137 | double time = cv::getTickCount(); 138 | 139 | std::cout << "Performing " << N << " iterations..." << std::flush; 140 | 141 | for (int i = 0; i < N; i++) { 142 | sobel(filters, gpu, input, output); 143 | } 144 | 145 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 146 | time /= N; 147 | 148 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 149 | << time << "ms" << std::endl; 150 | 151 | std::cout << "Resulting image wrote to " << to_write << std::endl; 152 | cv::imwrite(to_write, output); 153 | 154 | return 0; 155 | } 156 | -------------------------------------------------------------------------------- /sobel_gpu_4_shared_mem.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | struct GpuMemories { 51 | cv::cuda::GpuMat input; 52 | cv::cuda::GpuMat blurred; 53 | cv::cuda::GpuMat x; 54 | cv::cuda::GpuMat y; 55 | cv::cuda::GpuMat x2; 56 | cv::cuda::GpuMat y2; 57 | cv::cuda::GpuMat mag2; 58 | cv::cuda::GpuMat mag; 59 | cv::cuda::GpuMat output; 60 | }; 61 | 62 | static void 63 | sobel(const Filters &filters, GpuMemories &gpu) { 64 | // Migrating data from the CPU to the GPU is no longer needed 65 | 66 | // Low pass filter to clean noise 67 | filters.gaussian->apply(gpu.input, gpu.blurred); 68 | 69 | // X and Y derivatives 70 | filters.sobelx->apply(gpu.blurred, gpu.x); 71 | filters.sobely->apply(gpu.blurred, gpu.y); 72 | 73 | // X^2 and Y^2 74 | cv::cuda::pow(gpu.x, 2, gpu.x2); 75 | cv::cuda::pow(gpu.y, 2, gpu.y2); 76 | 77 | // MAG2 = X^2 + Y^2 78 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2); 79 | 80 | // MAG = √(X^2 + Y^2) 81 | cv::cuda::sqrt(gpu.mag2, gpu.mag); 82 | 83 | // Convert from floating point to char 84 | gpu.mag.convertTo(gpu.output, CV_8UC1); 85 | 86 | // Migrating data back from GPU to CPU is no longer needed 87 | } 88 | 89 | int 90 | main(int argc, char *argv[]) { 91 | std::string to_read = "dog.jpg"; 92 | if (argc >= 2) { 93 | to_read = argv[1]; 94 | } 95 | 96 | std::string to_write = "dog_gradient_gpu_4_shared_mem.jpg"; 97 | if (argc >= 3) { 98 | to_write = argv[2]; 99 | } 100 | 101 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 102 | 103 | if (input.empty()) { 104 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 105 | << std::endl; 106 | return 1; 107 | } 108 | 109 | cv::cuda::HostMem shared_input(input, cv::cuda::HostMem::AllocType::SHARED); 110 | cv::cuda::HostMem shared_output(input, cv::cuda::HostMem::AllocType::SHARED); 111 | 112 | // Filters in CUDA are created one time 113 | Filters filters = { 114 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 115 | cv::Size(7, 7), -1), 116 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 117 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 118 | }; 119 | 120 | GpuMemories gpu = { 121 | input : shared_input.createGpuMatHeader(), 122 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1), 123 | x : cv::cuda::GpuMat(input.size(), CV_32FC1), 124 | y : cv::cuda::GpuMat(input.size(), CV_32FC1), 125 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 126 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 127 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 128 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1), 129 | output : shared_output.createGpuMatHeader() 130 | }; 131 | 132 | // The first call is typically a warmup call so we dont benchmark 133 | sobel(filters, gpu); 134 | 135 | int N = 100; 136 | double time = cv::getTickCount(); 137 | 138 | std::cout << "Performing " << N << " iterations..." << std::flush; 139 | 140 | for (int i = 0; i < N; i++) { 141 | sobel(filters, gpu); 142 | } 143 | 144 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 145 | time /= N; 146 | 147 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 148 | << time << "ms" << std::endl; 149 | 150 | std::cout << "Resulting image wrote to " << to_write << std::endl; 151 | cv::imwrite(to_write, shared_output); 152 | 153 | return 0; 154 | } 155 | -------------------------------------------------------------------------------- /sobel_gpu_3_pinned_mem.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | struct GpuMemories { 51 | cv::cuda::GpuMat input; 52 | cv::cuda::GpuMat blurred; 53 | cv::cuda::GpuMat x; 54 | cv::cuda::GpuMat y; 55 | cv::cuda::GpuMat x2; 56 | cv::cuda::GpuMat y2; 57 | cv::cuda::GpuMat mag2; 58 | cv::cuda::GpuMat mag; 59 | cv::cuda::GpuMat output; 60 | }; 61 | 62 | static void 63 | sobel(const Filters &filters, GpuMemories &gpu, 64 | const cv::cuda::HostMem &input, cv::cuda::HostMem &output) { 65 | // Migrate data from the CPU to the GPU 66 | gpu.input.upload(input); 67 | 68 | // Low pass filter to clean noise 69 | filters.gaussian->apply(gpu.input, gpu.blurred); 70 | 71 | // X and Y derivatives 72 | filters.sobelx->apply(gpu.blurred, gpu.x); 73 | filters.sobely->apply(gpu.blurred, gpu.y); 74 | 75 | // X^2 and Y^2 76 | cv::cuda::pow(gpu.x, 2, gpu.x2); 77 | cv::cuda::pow(gpu.y, 2, gpu.y2); 78 | 79 | // MAG2 = X^2 + Y^2 80 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2); 81 | 82 | // MAG = √(X^2 + Y^2) 83 | cv::cuda::sqrt(gpu.mag2, gpu.mag); 84 | 85 | // Convert from floating point to char 86 | gpu.mag.convertTo(gpu.output, CV_8UC1); 87 | 88 | // Migrate data back from GPU to CPU 89 | gpu.output.download(output); 90 | } 91 | 92 | int 93 | main(int argc, char *argv[]) { 94 | std::string to_read = "dog.jpg"; 95 | if (argc >= 2) { 96 | to_read = argv[1]; 97 | } 98 | 99 | std::string to_write = "dog_gradient_gpu_3_pinned_mem.jpg"; 100 | if (argc >= 3) { 101 | to_write = argv[2]; 102 | } 103 | 104 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 105 | 106 | if (input.empty()) { 107 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 108 | << std::endl; 109 | return 1; 110 | } 111 | 112 | cv::cuda::HostMem pinned_input(input, 113 | cv::cuda::HostMem::AllocType::PAGE_LOCKED); 114 | cv::cuda::HostMem pinned_output(cv::cuda::HostMem::AllocType::PAGE_LOCKED); 115 | 116 | // Filters in CUDA are created one time 117 | Filters filters = { 118 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 119 | cv::Size(7, 7), -1), 120 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 121 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 122 | }; 123 | 124 | GpuMemories gpu = { 125 | input : cv::cuda::GpuMat(input.size(), CV_8UC1), 126 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1), 127 | x : cv::cuda::GpuMat(input.size(), CV_32FC1), 128 | y : cv::cuda::GpuMat(input.size(), CV_32FC1), 129 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 130 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 131 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 132 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1), 133 | output : cv::cuda::GpuMat(input.size(), CV_8UC1) 134 | }; 135 | 136 | // The first call is typically a warmup call so we dont benchmark 137 | sobel(filters, gpu, pinned_input, pinned_output); 138 | 139 | int N = 100; 140 | double time = cv::getTickCount(); 141 | 142 | std::cout << "Performing " << N << " iterations..." << std::flush; 143 | 144 | for (int i = 0; i < N; i++) { 145 | sobel(filters, gpu, pinned_input, pinned_output); 146 | } 147 | 148 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 149 | time /= N; 150 | 151 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 152 | << time << "ms" << std::endl; 153 | 154 | std::cout << "Resulting image wrote to " << to_write << std::endl; 155 | cv::imwrite(to_write, pinned_output); 156 | 157 | return 0; 158 | } 159 | -------------------------------------------------------------------------------- /sobel_gpu_5_shared_mem_streams.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | struct GpuMemories { 51 | cv::cuda::GpuMat input; 52 | cv::cuda::GpuMat blurred; 53 | cv::cuda::GpuMat x; 54 | cv::cuda::GpuMat y; 55 | cv::cuda::GpuMat x2; 56 | cv::cuda::GpuMat y2; 57 | cv::cuda::GpuMat mag2; 58 | cv::cuda::GpuMat mag; 59 | cv::cuda::GpuMat output; 60 | }; 61 | 62 | struct Streams { 63 | cv::cuda::Stream x; 64 | cv::cuda::Stream y; 65 | }; 66 | 67 | static void 68 | sobel(const Filters &filters, Streams &streams, GpuMemories &gpu) { 69 | // Migrating data from the CPU to the GPU is no longer needed 70 | 71 | // Low pass filter to clean noise 72 | filters.gaussian->apply(gpu.input, gpu.blurred, streams.x); 73 | 74 | streams.x.waitForCompletion(); 75 | 76 | // X and Y derivatives 77 | filters.sobelx->apply(gpu.blurred, gpu.x, streams.x); 78 | filters.sobely->apply(gpu.blurred, gpu.y, streams.y); 79 | 80 | // X^2 and Y^2 81 | cv::cuda::pow(gpu.x, 2, gpu.x2, streams.x); 82 | cv::cuda::pow(gpu.y, 2, gpu.y2, streams.y); 83 | 84 | streams.y.waitForCompletion(); 85 | 86 | // MAG2 = X^2 + Y^2 87 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2, -1, streams.x); 88 | 89 | // MAG = √(X^2 + Y^2) 90 | cv::cuda::sqrt(gpu.mag2, gpu.mag, streams.x); 91 | 92 | // Convert from floating point to char 93 | gpu.mag.convertTo(gpu.output, CV_8UC1, streams.x); 94 | 95 | // Migrating data back from GPU to CPU is no longer needed 96 | 97 | streams.x.waitForCompletion(); 98 | } 99 | 100 | int 101 | main(int argc, char *argv[]) { 102 | std::string to_read = "dog.jpg"; 103 | if (argc >= 2) { 104 | to_read = argv[1]; 105 | } 106 | 107 | std::string to_write = "dog_gradient_gpu_5_shared_mem_streams.jpg"; 108 | if (argc >= 3) { 109 | to_write = argv[2]; 110 | } 111 | 112 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 113 | 114 | if (input.empty()) { 115 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 116 | << std::endl; 117 | return 1; 118 | } 119 | 120 | cv::cuda::HostMem shared_input(input, cv::cuda::HostMem::AllocType::SHARED); 121 | cv::cuda::HostMem shared_output(input, cv::cuda::HostMem::AllocType::SHARED); 122 | 123 | // Filters in CUDA are created one time 124 | Filters filters = { 125 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 126 | cv::Size(7, 7), -1), 127 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 128 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 129 | }; 130 | 131 | GpuMemories gpu = { 132 | input : shared_output.createGpuMatHeader(), 133 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1), 134 | x : cv::cuda::GpuMat(input.size(), CV_32FC1), 135 | y : cv::cuda::GpuMat(input.size(), CV_32FC1), 136 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 137 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 138 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 139 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1), 140 | output : shared_output.createGpuMatHeader() 141 | }; 142 | 143 | Streams streams; 144 | 145 | // The first call is typically a warmup call so we dont benchmark 146 | sobel(filters, streams, gpu); 147 | 148 | int N = 100; 149 | double time = cv::getTickCount(); 150 | 151 | std::cout << "Performing " << N << " iterations..." << std::flush; 152 | 153 | for (int i = 0; i < N; i++) { 154 | sobel(filters, streams, gpu); 155 | } 156 | 157 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 158 | time /= N; 159 | 160 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 161 | << time << "ms" << std::endl; 162 | 163 | std::cout << "Resulting image wrote to " << to_write << std::endl; 164 | cv::imwrite(to_write, shared_output); 165 | 166 | return 0; 167 | } 168 | -------------------------------------------------------------------------------- /sobel_gpu_5_pinned_mem_streams.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 Michael Gruner 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include // Basic OpenCV structures 38 | #include // Image processing methods for the CPU 39 | #include // Images IO 40 | 41 | #include // CUDA matrix operations 42 | #include // CUDA image filters 43 | 44 | struct Filters { 45 | const cv::Ptr gaussian; 46 | const cv::Ptr sobelx; 47 | const cv::Ptr sobely; 48 | }; 49 | 50 | struct GpuMemories { 51 | cv::cuda::GpuMat input; 52 | cv::cuda::GpuMat blurred; 53 | cv::cuda::GpuMat x; 54 | cv::cuda::GpuMat y; 55 | cv::cuda::GpuMat x2; 56 | cv::cuda::GpuMat y2; 57 | cv::cuda::GpuMat mag2; 58 | cv::cuda::GpuMat mag; 59 | cv::cuda::GpuMat output; 60 | }; 61 | 62 | struct Streams { 63 | cv::cuda::Stream x; 64 | cv::cuda::Stream y; 65 | }; 66 | 67 | static void 68 | sobel(const Filters &filters, Streams &streams, GpuMemories &gpu, 69 | const cv::cuda::HostMem &input, cv::cuda::HostMem &output) { 70 | // Migrate data from the CPU to the GPU 71 | gpu.input.upload(input, streams.x); 72 | 73 | // Low pass filter to clean noise 74 | filters.gaussian->apply(gpu.input, gpu.blurred, streams.x); 75 | 76 | streams.x.waitForCompletion(); 77 | 78 | // X and Y derivatives 79 | filters.sobelx->apply(gpu.blurred, gpu.x, streams.x); 80 | filters.sobely->apply(gpu.blurred, gpu.y, streams.y); 81 | 82 | // X^2 and Y^2 83 | cv::cuda::pow(gpu.x, 2, gpu.x2, streams.x); 84 | cv::cuda::pow(gpu.y, 2, gpu.y2, streams.y); 85 | 86 | streams.y.waitForCompletion(); 87 | 88 | // MAG2 = X^2 + Y^2 89 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2, -1, streams.x); 90 | 91 | // MAG = √(X^2 + Y^2) 92 | cv::cuda::sqrt(gpu.mag2, gpu.mag, streams.x); 93 | 94 | // Convert from floating point to char 95 | gpu.mag.convertTo(gpu.output, CV_8UC1, streams.x); 96 | 97 | // Migrate data back from GPU to CPU 98 | gpu.output.download(output, streams.x); 99 | 100 | streams.x.waitForCompletion(); 101 | } 102 | 103 | int 104 | main(int argc, char *argv[]) { 105 | std::string to_read = "dog.jpg"; 106 | if (argc >= 2) { 107 | to_read = argv[1]; 108 | } 109 | 110 | std::string to_write = "dog_gradient_gpu_5_pinned_mem_streams.jpg"; 111 | if (argc >= 3) { 112 | to_write = argv[2]; 113 | } 114 | 115 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE); 116 | 117 | if (input.empty()) { 118 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?" 119 | << std::endl; 120 | return 1; 121 | } 122 | 123 | cv::cuda::HostMem pinned_input(input, 124 | cv::cuda::HostMem::AllocType::PAGE_LOCKED); 125 | cv::cuda::HostMem pinned_output(cv::cuda::HostMem::AllocType::PAGE_LOCKED); 126 | 127 | // Filters in CUDA are created one time 128 | Filters filters = { 129 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, 130 | cv::Size(7, 7), -1), 131 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1), 132 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1) 133 | }; 134 | 135 | GpuMemories gpu = { 136 | input : cv::cuda::GpuMat(input.size(), CV_8UC1), 137 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1), 138 | x : cv::cuda::GpuMat(input.size(), CV_32FC1), 139 | y : cv::cuda::GpuMat(input.size(), CV_32FC1), 140 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 141 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 142 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1), 143 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1), 144 | output : cv::cuda::GpuMat(input.size(), CV_8UC1) 145 | }; 146 | 147 | Streams streams; 148 | 149 | // The first call is typically a warmup call so we dont benchmark 150 | sobel(filters, streams, gpu, pinned_input, pinned_output); 151 | 152 | int N = 100; 153 | double time = cv::getTickCount(); 154 | 155 | std::cout << "Performing " << N << " iterations..." << std::flush; 156 | 157 | for (int i = 0; i < N; i++) { 158 | sobel(filters, streams, gpu, pinned_input, pinned_output); 159 | } 160 | 161 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency(); 162 | time /= N; 163 | 164 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: " 165 | << time << "ms" << std::endl; 166 | 167 | std::cout << "Resulting image wrote to " << to_write << std::endl; 168 | cv::imwrite(to_write, pinned_output); 169 | 170 | return 0; 171 | } 172 | --------------------------------------------------------------------------------