├── .gitignore
├── dog.jpg
├── dog_gradient.jpg
├── CMakeLists.txt
├── README.md
├── sobel_cpu.cc
├── sobel_gpu_1_naive.cc
├── sobel_gpu_2_single_alloc.cc
├── sobel_gpu_4_shared_mem.cc
├── sobel_gpu_3_pinned_mem.cc
├── sobel_gpu_5_shared_mem_streams.cc
└── sobel_gpu_5_pinned_mem_streams.cc
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 |
3 |
--------------------------------------------------------------------------------
/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RidgeRun/getting-started-with-cuda-opencv/HEAD/dog.jpg
--------------------------------------------------------------------------------
/dog_gradient.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RidgeRun/getting-started-with-cuda-opencv/HEAD/dog_gradient.jpg
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.8)
2 | cmake_policy(SET CMP0048 NEW)
3 |
4 | # Initialize project
5 | project (GettingStartedWithOpenCVCuda LANGUAGES CXX VERSION 0.1.0)
6 |
7 | # Dependencies
8 | find_package (OpenCV REQUIRED)
9 |
10 | # Overlay include search path
11 | include_directories (${OpenCV_INCLUDE_DIRS})
12 |
13 | # Programs
14 | add_executable (sobel_cpu sobel_cpu.cc)
15 | target_link_libraries (sobel_cpu ${OpenCV_LIBS})
16 |
17 | add_executable (sobel_gpu_1_naive sobel_gpu_1_naive.cc)
18 | target_link_libraries (sobel_gpu_1_naive ${OpenCV_LIBS})
19 |
20 | add_executable (sobel_gpu_2_single_alloc sobel_gpu_2_single_alloc.cc)
21 | target_link_libraries (sobel_gpu_2_single_alloc ${OpenCV_LIBS})
22 |
23 | add_executable (sobel_gpu_3_pinned_mem sobel_gpu_3_pinned_mem.cc)
24 | target_link_libraries (sobel_gpu_3_pinned_mem ${OpenCV_LIBS})
25 |
26 | add_executable (sobel_gpu_4_shared_mem sobel_gpu_4_shared_mem.cc)
27 | target_link_libraries (sobel_gpu_4_shared_mem ${OpenCV_LIBS})
28 |
29 | add_executable (sobel_gpu_5_shared_mem_streams sobel_gpu_5_shared_mem_streams.cc)
30 | target_link_libraries (sobel_gpu_5_shared_mem_streams ${OpenCV_LIBS})
31 |
32 | add_executable (sobel_gpu_5_pinned_mem_streams sobel_gpu_5_pinned_mem_streams.cc)
33 | target_link_libraries (sobel_gpu_5_pinned_mem_streams ${OpenCV_LIBS})
34 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Getting Started with CUDA Accelerated OpenCV
2 |
3 | > This repository contains the code presented in the GTC2021 S31701
4 | talk.
5 |
6 | ## Description
7 |
8 | This project presents a series of programs that guide you through the
9 | process of optimizing a CUDA accelerated OpenCV algorithm. This
10 | optimization is done through a series of well defined steps without
11 | getting into low-level CUDA programming.
12 |
13 | The algorithm chosen to illustrate the optimization process is the
14 | calculation of the [magnitude of the Sobel
15 | Derivatives](https://docs.opencv.org/3.4/d2/d2c/tutorial_sobel_derivatives.html). While
16 | not very interesting on its own, this algorithm is a foundational step
17 | in many algorithms such as edge detection, image segmentation, feature
18 | extraction, computer vision and more. While many optimizations can be
19 | achieved by approximating the underlying math, the original definition
20 | is kept for didactic purposes. The purpose is to focus the study on
21 | the appropriate OpenCV+CUDA handling.
22 |
23 |
24 |
25 |
26 |
27 |
28 | ## Building the project
29 |
30 | As usual with OpenCV projects, the chosen build system was
31 | CMake. Start by making sure you have these dependencies installed:
32 | * CMake
33 | * OpenCV (with CUDA enabled)
34 |
35 | Then proceed normally as follows:
36 | ```bash
37 | # Clone the project
38 | git clone https://github.com/RidgeRun/getting-started-with-cuda-opencv.git
39 | cd getting-started-with-cuda-opencv
40 |
41 | # Configure the project
42 | mkdir build
43 | cd build
44 | cmake ..
45 |
46 | # Build the project
47 | make
48 | ```
49 |
50 | If everything went okay, you should be able to run the demos. You may
51 | specify the input and output images as the first and second parameters
52 | respectively. Otherwise, "dog.jpg" and "dog_gradient_XXX.jpg" will be
53 | used by default.
54 |
55 | ```bash
56 | # Run from the build directory
57 | ./sobel_cpu ../dog.jpg
58 |
59 | # Specify an alternative output
60 | ./sobel_cpu ../dog.jpg alternative_output.jpg
61 |
62 | # Run from top-level with default parameters
63 | cd ..
64 | ./build/sobel_cpu
65 | ```
66 |
67 | ## Program Breakdown
68 |
69 | The idea of the project is to use the CPU implementation as a baseline
70 | and then apply each optimization step incrementally.
71 |
72 | - **sobel_cpu**: CPU baseline implementation
73 | - **sobel_gpu_1_naive**: Literal port to GPU
74 | - **sobel_gpu_2_single_alloc**: Allocate only once the GPU memories
75 | and recicle them through all the iterations.
76 | - **sobel_gpu_3_pinned_mem**: Allocate host memory as
77 | non-pageable/pinned so that the transfer is highly optimized.
78 | - **sobel_gpu_4_shared_mem**: Allocate shared memory (if possible) for
79 | the GPU/CPU to eliminate the memory transfer.
80 | - **sobel_gpu_5_shared_mem_streams**: Use CUDA streams to process
81 | certain parts of the pipeline in parallel.
82 | - **sobel_gpu_5_pinned_mem_streams**: Use CUDA streams to process
83 | certain parts of the pipeline in parallel (alternative implementation
84 | for pinned memory instead of shared memory).
85 |
--------------------------------------------------------------------------------
/sobel_cpu.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | static void
42 | sobel(const cv::Mat &input, cv::Mat &output) {
43 | // Lowpass filter to clean noise
44 | cv::Mat blurred;
45 | cv::GaussianBlur(input, blurred, cv::Size(7, 7), -1);
46 |
47 | // Compute X and Y derivatives
48 | cv::Mat x, y;
49 | cv::Sobel(blurred, x, CV_32F, 1, 0, 3, 1, 0);
50 | cv::Sobel(blurred, y, CV_32F, 0, 1, 3, 1, 0);
51 |
52 | // Compute X^2 and Y^2
53 | cv::Mat x2, y2;
54 | cv::pow(x, 2, x2);
55 | cv::pow(y, 2, y2);
56 |
57 | // Compute MAG2 = X^2 + Y^2
58 | cv::Mat mag2;
59 | cv::addWeighted(x2, 1, y2, 1, 0, mag2);
60 |
61 | // Compute MAG = √ (X^2 + Y^2)
62 | cv::Mat mag;
63 | cv::sqrt(mag2, mag);
64 |
65 | // Convert from floating point to char
66 | cv::convertScaleAbs(mag, output);
67 | }
68 |
69 | int
70 | main(int argc, char *argv[]) {
71 | std::string to_read = "dog.jpg";
72 | if (argc >= 2) {
73 | to_read = argv[1];
74 | }
75 |
76 | std::string to_write = "dog_gradient_cpu.jpg";
77 | if (argc >= 3) {
78 | to_write = argv[2];
79 | }
80 |
81 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
82 |
83 | if (input.empty()) {
84 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
85 | << std::endl;
86 | return 1;
87 | }
88 |
89 | // The first call is typically a warmup call so we dont benchmark
90 | cv::Mat output;
91 | sobel(input, output);
92 |
93 | int N = 100;
94 | double time = cv::getTickCount();
95 |
96 | std::cout << "Performing " << N << " iterations..." << std::flush;
97 |
98 | for (int i = 0; i < N; i++) {
99 | sobel(input, output);
100 | }
101 |
102 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
103 | time /= N;
104 |
105 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
106 | << time << "ms" << std::endl;
107 |
108 | std::cout << "Resulting image wrote to " << to_write << std::endl;
109 | cv::imwrite(to_write, output);
110 |
111 | return 0;
112 | }
113 |
--------------------------------------------------------------------------------
/sobel_gpu_1_naive.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | static void
51 | sobel(const Filters &filters, const cv::Mat &input, cv::Mat &output) {
52 | // Migrate data from the CPU to the GPU
53 | cv::cuda::GpuMat gpu_input;
54 | gpu_input.upload(input);
55 |
56 | // Low pass filter to clean noise
57 | cv::cuda::GpuMat gpu_blurred;
58 | filters.gaussian->apply(gpu_input, gpu_blurred);
59 |
60 | // X and Y derivatives
61 | cv::cuda::GpuMat gpu_x, gpu_y;
62 | filters.sobelx->apply(gpu_blurred, gpu_x);
63 | filters.sobely->apply(gpu_blurred, gpu_y);
64 |
65 | // X^2 and Y^2
66 | cv::cuda::GpuMat gpu_x2, gpu_y2;
67 | cv::cuda::pow(gpu_x, 2, gpu_x2);
68 | cv::cuda::pow(gpu_y, 2, gpu_y2);
69 |
70 | // MAG2 = X^2 + Y^2
71 | cv::cuda::GpuMat gpu_mag2;
72 | cv::cuda::addWeighted(gpu_x2, 1, gpu_y2, 1, 0, gpu_mag2);
73 |
74 | // MAG = √(X^2 + Y^2)
75 | cv::cuda::GpuMat gpu_mag;
76 | cv::cuda::sqrt(gpu_mag2, gpu_mag);
77 |
78 | // Convert from floating point to char
79 | cv::cuda::GpuMat gpu_output;
80 | gpu_mag.convertTo(gpu_output, CV_8UC1);
81 |
82 | // Migrate data back from GPU to CPU
83 | gpu_output.download(output);
84 | }
85 |
86 | int
87 | main(int argc, char *argv[]) {
88 | std::string to_read = "dog.jpg";
89 | if (argc >= 2) {
90 | to_read = argv[1];
91 | }
92 |
93 | std::string to_write = "dog_gradient_gpu_1_naive.jpg";
94 | if (argc >= 3) {
95 | to_write = argv[2];
96 | }
97 |
98 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
99 |
100 | if (input.empty()) {
101 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
102 | << std::endl;
103 | return 1;
104 | }
105 |
106 | // Filters in CUDA are created one time
107 | Filters filters = {
108 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
109 | cv::Size(7, 7), -1),
110 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
111 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
112 | };
113 |
114 | // The first call is typically a warmup call so we dont benchmark
115 | cv::Mat output;
116 | sobel(filters, input, output);
117 |
118 | int N = 100;
119 | double time = cv::getTickCount();
120 |
121 | std::cout << "Performing " << N << " iterations..." << std::flush;
122 |
123 | for (int i = 0; i < N; i++) {
124 | sobel(filters, input, output);
125 | }
126 |
127 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
128 | time /= N;
129 |
130 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
131 | << time << "ms" << std::endl;
132 |
133 | std::cout << "Resulting image wrote to " << to_write << std::endl;
134 | cv::imwrite(to_write, output);
135 |
136 | return 0;
137 | }
138 |
--------------------------------------------------------------------------------
/sobel_gpu_2_single_alloc.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | struct GpuMemories {
51 | cv::cuda::GpuMat input;
52 | cv::cuda::GpuMat blurred;
53 | cv::cuda::GpuMat x;
54 | cv::cuda::GpuMat y;
55 | cv::cuda::GpuMat x2;
56 | cv::cuda::GpuMat y2;
57 | cv::cuda::GpuMat mag2;
58 | cv::cuda::GpuMat mag;
59 | cv::cuda::GpuMat output;
60 | };
61 |
62 | static void
63 | sobel(const Filters &filters, GpuMemories &gpu,
64 | const cv::Mat &input, cv::Mat &output) {
65 | // Migrate data from the CPU to the GPU
66 | gpu.input.upload(input);
67 |
68 | // Low pass filter to clean noise
69 | filters.gaussian->apply(gpu.input, gpu.blurred);
70 |
71 | // X and Y derivatives
72 | filters.sobelx->apply(gpu.blurred, gpu.x);
73 | filters.sobely->apply(gpu.blurred, gpu.y);
74 |
75 | // X^2 and Y^2
76 | cv::cuda::pow(gpu.x, 2, gpu.x2);
77 | cv::cuda::pow(gpu.y, 2, gpu.y2);
78 |
79 | // MAG2 = X^2 + Y^2
80 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2);
81 |
82 | // MAG = √(X^2 + Y^2)
83 | cv::cuda::sqrt(gpu.mag2, gpu.mag);
84 |
85 | // Convert from floating point to char
86 | gpu.mag.convertTo(gpu.output, CV_8UC1);
87 |
88 | // Migrate data back from GPU to CPU
89 | gpu.output.download(output);
90 | }
91 |
92 | int
93 | main(int argc, char *argv[]) {
94 | std::string to_read = "dog.jpg";
95 | if (argc >= 2) {
96 | to_read = argv[1];
97 | }
98 |
99 | std::string to_write = "dog_gradient_gpu_2_single_alloc.jpg";
100 | if (argc >= 3) {
101 | to_write = argv[2];
102 | }
103 |
104 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
105 |
106 | if (input.empty()) {
107 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
108 | << std::endl;
109 | return 1;
110 | }
111 |
112 | // Filters in CUDA are created one time
113 | Filters filters = {
114 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
115 | cv::Size(7, 7), -1),
116 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
117 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
118 | };
119 |
120 | GpuMemories gpu = {
121 | input : cv::cuda::GpuMat (input.size(), CV_8UC1),
122 | blurred : cv::cuda::GpuMat (input.size(), CV_8UC1),
123 | x : cv::cuda::GpuMat (input.size(), CV_32FC1),
124 | y : cv::cuda::GpuMat (input.size(), CV_32FC1),
125 | x2 : cv::cuda::GpuMat (input.size(), CV_32FC1),
126 | y2 : cv::cuda::GpuMat (input.size(), CV_32FC1),
127 | mag2 : cv::cuda::GpuMat (input.size(), CV_32FC1),
128 | mag : cv::cuda::GpuMat (input.size(), CV_32FC1),
129 | output : cv::cuda::GpuMat (input.size(), CV_8UC1)
130 | };
131 |
132 | // The first call is typically a warmup call so we dont benchmark
133 | cv::Mat output;
134 | sobel(filters, gpu, input, output);
135 |
136 | int N = 100;
137 | double time = cv::getTickCount();
138 |
139 | std::cout << "Performing " << N << " iterations..." << std::flush;
140 |
141 | for (int i = 0; i < N; i++) {
142 | sobel(filters, gpu, input, output);
143 | }
144 |
145 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
146 | time /= N;
147 |
148 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
149 | << time << "ms" << std::endl;
150 |
151 | std::cout << "Resulting image wrote to " << to_write << std::endl;
152 | cv::imwrite(to_write, output);
153 |
154 | return 0;
155 | }
156 |
--------------------------------------------------------------------------------
/sobel_gpu_4_shared_mem.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | struct GpuMemories {
51 | cv::cuda::GpuMat input;
52 | cv::cuda::GpuMat blurred;
53 | cv::cuda::GpuMat x;
54 | cv::cuda::GpuMat y;
55 | cv::cuda::GpuMat x2;
56 | cv::cuda::GpuMat y2;
57 | cv::cuda::GpuMat mag2;
58 | cv::cuda::GpuMat mag;
59 | cv::cuda::GpuMat output;
60 | };
61 |
62 | static void
63 | sobel(const Filters &filters, GpuMemories &gpu) {
64 | // Migrating data from the CPU to the GPU is no longer needed
65 |
66 | // Low pass filter to clean noise
67 | filters.gaussian->apply(gpu.input, gpu.blurred);
68 |
69 | // X and Y derivatives
70 | filters.sobelx->apply(gpu.blurred, gpu.x);
71 | filters.sobely->apply(gpu.blurred, gpu.y);
72 |
73 | // X^2 and Y^2
74 | cv::cuda::pow(gpu.x, 2, gpu.x2);
75 | cv::cuda::pow(gpu.y, 2, gpu.y2);
76 |
77 | // MAG2 = X^2 + Y^2
78 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2);
79 |
80 | // MAG = √(X^2 + Y^2)
81 | cv::cuda::sqrt(gpu.mag2, gpu.mag);
82 |
83 | // Convert from floating point to char
84 | gpu.mag.convertTo(gpu.output, CV_8UC1);
85 |
86 | // Migrating data back from GPU to CPU is no longer needed
87 | }
88 |
89 | int
90 | main(int argc, char *argv[]) {
91 | std::string to_read = "dog.jpg";
92 | if (argc >= 2) {
93 | to_read = argv[1];
94 | }
95 |
96 | std::string to_write = "dog_gradient_gpu_4_shared_mem.jpg";
97 | if (argc >= 3) {
98 | to_write = argv[2];
99 | }
100 |
101 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
102 |
103 | if (input.empty()) {
104 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
105 | << std::endl;
106 | return 1;
107 | }
108 |
109 | cv::cuda::HostMem shared_input(input, cv::cuda::HostMem::AllocType::SHARED);
110 | cv::cuda::HostMem shared_output(input, cv::cuda::HostMem::AllocType::SHARED);
111 |
112 | // Filters in CUDA are created one time
113 | Filters filters = {
114 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
115 | cv::Size(7, 7), -1),
116 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
117 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
118 | };
119 |
120 | GpuMemories gpu = {
121 | input : shared_input.createGpuMatHeader(),
122 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1),
123 | x : cv::cuda::GpuMat(input.size(), CV_32FC1),
124 | y : cv::cuda::GpuMat(input.size(), CV_32FC1),
125 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
126 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
127 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
128 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1),
129 | output : shared_output.createGpuMatHeader()
130 | };
131 |
132 | // The first call is typically a warmup call so we dont benchmark
133 | sobel(filters, gpu);
134 |
135 | int N = 100;
136 | double time = cv::getTickCount();
137 |
138 | std::cout << "Performing " << N << " iterations..." << std::flush;
139 |
140 | for (int i = 0; i < N; i++) {
141 | sobel(filters, gpu);
142 | }
143 |
144 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
145 | time /= N;
146 |
147 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
148 | << time << "ms" << std::endl;
149 |
150 | std::cout << "Resulting image wrote to " << to_write << std::endl;
151 | cv::imwrite(to_write, shared_output);
152 |
153 | return 0;
154 | }
155 |
--------------------------------------------------------------------------------
/sobel_gpu_3_pinned_mem.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | struct GpuMemories {
51 | cv::cuda::GpuMat input;
52 | cv::cuda::GpuMat blurred;
53 | cv::cuda::GpuMat x;
54 | cv::cuda::GpuMat y;
55 | cv::cuda::GpuMat x2;
56 | cv::cuda::GpuMat y2;
57 | cv::cuda::GpuMat mag2;
58 | cv::cuda::GpuMat mag;
59 | cv::cuda::GpuMat output;
60 | };
61 |
62 | static void
63 | sobel(const Filters &filters, GpuMemories &gpu,
64 | const cv::cuda::HostMem &input, cv::cuda::HostMem &output) {
65 | // Migrate data from the CPU to the GPU
66 | gpu.input.upload(input);
67 |
68 | // Low pass filter to clean noise
69 | filters.gaussian->apply(gpu.input, gpu.blurred);
70 |
71 | // X and Y derivatives
72 | filters.sobelx->apply(gpu.blurred, gpu.x);
73 | filters.sobely->apply(gpu.blurred, gpu.y);
74 |
75 | // X^2 and Y^2
76 | cv::cuda::pow(gpu.x, 2, gpu.x2);
77 | cv::cuda::pow(gpu.y, 2, gpu.y2);
78 |
79 | // MAG2 = X^2 + Y^2
80 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2);
81 |
82 | // MAG = √(X^2 + Y^2)
83 | cv::cuda::sqrt(gpu.mag2, gpu.mag);
84 |
85 | // Convert from floating point to char
86 | gpu.mag.convertTo(gpu.output, CV_8UC1);
87 |
88 | // Migrate data back from GPU to CPU
89 | gpu.output.download(output);
90 | }
91 |
92 | int
93 | main(int argc, char *argv[]) {
94 | std::string to_read = "dog.jpg";
95 | if (argc >= 2) {
96 | to_read = argv[1];
97 | }
98 |
99 | std::string to_write = "dog_gradient_gpu_3_pinned_mem.jpg";
100 | if (argc >= 3) {
101 | to_write = argv[2];
102 | }
103 |
104 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
105 |
106 | if (input.empty()) {
107 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
108 | << std::endl;
109 | return 1;
110 | }
111 |
112 | cv::cuda::HostMem pinned_input(input,
113 | cv::cuda::HostMem::AllocType::PAGE_LOCKED);
114 | cv::cuda::HostMem pinned_output(cv::cuda::HostMem::AllocType::PAGE_LOCKED);
115 |
116 | // Filters in CUDA are created one time
117 | Filters filters = {
118 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
119 | cv::Size(7, 7), -1),
120 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
121 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
122 | };
123 |
124 | GpuMemories gpu = {
125 | input : cv::cuda::GpuMat(input.size(), CV_8UC1),
126 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1),
127 | x : cv::cuda::GpuMat(input.size(), CV_32FC1),
128 | y : cv::cuda::GpuMat(input.size(), CV_32FC1),
129 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
130 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
131 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
132 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1),
133 | output : cv::cuda::GpuMat(input.size(), CV_8UC1)
134 | };
135 |
136 | // The first call is typically a warmup call so we dont benchmark
137 | sobel(filters, gpu, pinned_input, pinned_output);
138 |
139 | int N = 100;
140 | double time = cv::getTickCount();
141 |
142 | std::cout << "Performing " << N << " iterations..." << std::flush;
143 |
144 | for (int i = 0; i < N; i++) {
145 | sobel(filters, gpu, pinned_input, pinned_output);
146 | }
147 |
148 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
149 | time /= N;
150 |
151 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
152 | << time << "ms" << std::endl;
153 |
154 | std::cout << "Resulting image wrote to " << to_write << std::endl;
155 | cv::imwrite(to_write, pinned_output);
156 |
157 | return 0;
158 | }
159 |
--------------------------------------------------------------------------------
/sobel_gpu_5_shared_mem_streams.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | struct GpuMemories {
51 | cv::cuda::GpuMat input;
52 | cv::cuda::GpuMat blurred;
53 | cv::cuda::GpuMat x;
54 | cv::cuda::GpuMat y;
55 | cv::cuda::GpuMat x2;
56 | cv::cuda::GpuMat y2;
57 | cv::cuda::GpuMat mag2;
58 | cv::cuda::GpuMat mag;
59 | cv::cuda::GpuMat output;
60 | };
61 |
62 | struct Streams {
63 | cv::cuda::Stream x;
64 | cv::cuda::Stream y;
65 | };
66 |
67 | static void
68 | sobel(const Filters &filters, Streams &streams, GpuMemories &gpu) {
69 | // Migrating data from the CPU to the GPU is no longer needed
70 |
71 | // Low pass filter to clean noise
72 | filters.gaussian->apply(gpu.input, gpu.blurred, streams.x);
73 |
74 | streams.x.waitForCompletion();
75 |
76 | // X and Y derivatives
77 | filters.sobelx->apply(gpu.blurred, gpu.x, streams.x);
78 | filters.sobely->apply(gpu.blurred, gpu.y, streams.y);
79 |
80 | // X^2 and Y^2
81 | cv::cuda::pow(gpu.x, 2, gpu.x2, streams.x);
82 | cv::cuda::pow(gpu.y, 2, gpu.y2, streams.y);
83 |
84 | streams.y.waitForCompletion();
85 |
86 | // MAG2 = X^2 + Y^2
87 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2, -1, streams.x);
88 |
89 | // MAG = √(X^2 + Y^2)
90 | cv::cuda::sqrt(gpu.mag2, gpu.mag, streams.x);
91 |
92 | // Convert from floating point to char
93 | gpu.mag.convertTo(gpu.output, CV_8UC1, streams.x);
94 |
95 | // Migrating data back from GPU to CPU is no longer needed
96 |
97 | streams.x.waitForCompletion();
98 | }
99 |
100 | int
101 | main(int argc, char *argv[]) {
102 | std::string to_read = "dog.jpg";
103 | if (argc >= 2) {
104 | to_read = argv[1];
105 | }
106 |
107 | std::string to_write = "dog_gradient_gpu_5_shared_mem_streams.jpg";
108 | if (argc >= 3) {
109 | to_write = argv[2];
110 | }
111 |
112 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
113 |
114 | if (input.empty()) {
115 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
116 | << std::endl;
117 | return 1;
118 | }
119 |
120 | cv::cuda::HostMem shared_input(input, cv::cuda::HostMem::AllocType::SHARED);
121 | cv::cuda::HostMem shared_output(input, cv::cuda::HostMem::AllocType::SHARED);
122 |
123 | // Filters in CUDA are created one time
124 | Filters filters = {
125 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
126 | cv::Size(7, 7), -1),
127 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
128 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
129 | };
130 |
131 | GpuMemories gpu = {
132 | input : shared_output.createGpuMatHeader(),
133 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1),
134 | x : cv::cuda::GpuMat(input.size(), CV_32FC1),
135 | y : cv::cuda::GpuMat(input.size(), CV_32FC1),
136 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
137 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
138 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
139 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1),
140 | output : shared_output.createGpuMatHeader()
141 | };
142 |
143 | Streams streams;
144 |
145 | // The first call is typically a warmup call so we dont benchmark
146 | sobel(filters, streams, gpu);
147 |
148 | int N = 100;
149 | double time = cv::getTickCount();
150 |
151 | std::cout << "Performing " << N << " iterations..." << std::flush;
152 |
153 | for (int i = 0; i < N; i++) {
154 | sobel(filters, streams, gpu);
155 | }
156 |
157 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
158 | time /= N;
159 |
160 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
161 | << time << "ms" << std::endl;
162 |
163 | std::cout << "Resulting image wrote to " << to_write << std::endl;
164 | cv::imwrite(to_write, shared_output);
165 |
166 | return 0;
167 | }
168 |
--------------------------------------------------------------------------------
/sobel_gpu_5_pinned_mem_streams.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Michael Gruner
3 | *
4 | * Redistribution and use in source and binary forms, with or without
5 | * modification, are permitted provided that the following conditions
6 | * are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright
9 | * notice, this list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above
12 | * copyright notice, this list of conditions and the following
13 | * disclaimer in the documentation and/or other materials provided
14 | * with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived
18 | * from this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 | * OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 |
34 | #include
35 | #include
36 |
37 | #include // Basic OpenCV structures
38 | #include // Image processing methods for the CPU
39 | #include // Images IO
40 |
41 | #include // CUDA matrix operations
42 | #include // CUDA image filters
43 |
44 | struct Filters {
45 | const cv::Ptr gaussian;
46 | const cv::Ptr sobelx;
47 | const cv::Ptr sobely;
48 | };
49 |
50 | struct GpuMemories {
51 | cv::cuda::GpuMat input;
52 | cv::cuda::GpuMat blurred;
53 | cv::cuda::GpuMat x;
54 | cv::cuda::GpuMat y;
55 | cv::cuda::GpuMat x2;
56 | cv::cuda::GpuMat y2;
57 | cv::cuda::GpuMat mag2;
58 | cv::cuda::GpuMat mag;
59 | cv::cuda::GpuMat output;
60 | };
61 |
62 | struct Streams {
63 | cv::cuda::Stream x;
64 | cv::cuda::Stream y;
65 | };
66 |
67 | static void
68 | sobel(const Filters &filters, Streams &streams, GpuMemories &gpu,
69 | const cv::cuda::HostMem &input, cv::cuda::HostMem &output) {
70 | // Migrate data from the CPU to the GPU
71 | gpu.input.upload(input, streams.x);
72 |
73 | // Low pass filter to clean noise
74 | filters.gaussian->apply(gpu.input, gpu.blurred, streams.x);
75 |
76 | streams.x.waitForCompletion();
77 |
78 | // X and Y derivatives
79 | filters.sobelx->apply(gpu.blurred, gpu.x, streams.x);
80 | filters.sobely->apply(gpu.blurred, gpu.y, streams.y);
81 |
82 | // X^2 and Y^2
83 | cv::cuda::pow(gpu.x, 2, gpu.x2, streams.x);
84 | cv::cuda::pow(gpu.y, 2, gpu.y2, streams.y);
85 |
86 | streams.y.waitForCompletion();
87 |
88 | // MAG2 = X^2 + Y^2
89 | cv::cuda::addWeighted(gpu.x2, 1, gpu.y2, 1, 0, gpu.mag2, -1, streams.x);
90 |
91 | // MAG = √(X^2 + Y^2)
92 | cv::cuda::sqrt(gpu.mag2, gpu.mag, streams.x);
93 |
94 | // Convert from floating point to char
95 | gpu.mag.convertTo(gpu.output, CV_8UC1, streams.x);
96 |
97 | // Migrate data back from GPU to CPU
98 | gpu.output.download(output, streams.x);
99 |
100 | streams.x.waitForCompletion();
101 | }
102 |
103 | int
104 | main(int argc, char *argv[]) {
105 | std::string to_read = "dog.jpg";
106 | if (argc >= 2) {
107 | to_read = argv[1];
108 | }
109 |
110 | std::string to_write = "dog_gradient_gpu_5_pinned_mem_streams.jpg";
111 | if (argc >= 3) {
112 | to_write = argv[2];
113 | }
114 |
115 | cv::Mat input = cv::imread(to_read, cv::IMREAD_GRAYSCALE);
116 |
117 | if (input.empty()) {
118 | std::cerr << "Unable to find \"" << to_read << "\". Is the path ok?"
119 | << std::endl;
120 | return 1;
121 | }
122 |
123 | cv::cuda::HostMem pinned_input(input,
124 | cv::cuda::HostMem::AllocType::PAGE_LOCKED);
125 | cv::cuda::HostMem pinned_output(cv::cuda::HostMem::AllocType::PAGE_LOCKED);
126 |
127 | // Filters in CUDA are created one time
128 | Filters filters = {
129 | gaussian : cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1,
130 | cv::Size(7, 7), -1),
131 | sobelx : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 1, 0, 3, 1),
132 | sobely : cv::cuda::createSobelFilter(CV_8UC1, CV_32FC1, 0, 1, 3, 1)
133 | };
134 |
135 | GpuMemories gpu = {
136 | input : cv::cuda::GpuMat(input.size(), CV_8UC1),
137 | blurred : cv::cuda::GpuMat(input.size(), CV_8UC1),
138 | x : cv::cuda::GpuMat(input.size(), CV_32FC1),
139 | y : cv::cuda::GpuMat(input.size(), CV_32FC1),
140 | x2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
141 | y2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
142 | mag2 : cv::cuda::GpuMat(input.size(), CV_32FC1),
143 | mag : cv::cuda::GpuMat(input.size(), CV_32FC1),
144 | output : cv::cuda::GpuMat(input.size(), CV_8UC1)
145 | };
146 |
147 | Streams streams;
148 |
149 | // The first call is typically a warmup call so we dont benchmark
150 | sobel(filters, streams, gpu, pinned_input, pinned_output);
151 |
152 | int N = 100;
153 | double time = cv::getTickCount();
154 |
155 | std::cout << "Performing " << N << " iterations..." << std::flush;
156 |
157 | for (int i = 0; i < N; i++) {
158 | sobel(filters, streams, gpu, pinned_input, pinned_output);
159 | }
160 |
161 | time = 1000.0*(cv::getTickCount() - time)/cv::getTickFrequency();
162 | time /= N;
163 |
164 | std::cout << " done!" << std::endl << "Average for " << N << " CPU runs: "
165 | << time << "ms" << std::endl;
166 |
167 | std::cout << "Resulting image wrote to " << to_write << std::endl;
168 | cv::imwrite(to_write, pinned_output);
169 |
170 | return 0;
171 | }
172 |
--------------------------------------------------------------------------------