├── Median
    ├── a.out
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── medianFilter.cu
├── BoxFilter
    ├── a.out
    ├── .DS_Store
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── boxFilter.cu
├── Laplacian
    ├── a.out
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── laplacianFilter.cu
├── TVFilter
    ├── a.out
    ├── .DS_Store
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── tvFilter.cu
├── Sharpening
    ├── a.out
    ├── .DS_Store
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── sharpeningFilter.cu
├── SobelEdgeDetect
    ├── a.out
    ├── .DS_Store
    ├── sample.jpg
    ├── sample_gpu.jpg
    ├── main.cu
    └── sobelEdgeDetectionFilter.cu
└── readme.md


/Median/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/a.out


--------------------------------------------------------------------------------
/BoxFilter/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/a.out


--------------------------------------------------------------------------------
/Laplacian/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/a.out


--------------------------------------------------------------------------------
/TVFilter/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/a.out


--------------------------------------------------------------------------------
/Median/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/sample.jpg


--------------------------------------------------------------------------------
/Sharpening/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/a.out


--------------------------------------------------------------------------------
/BoxFilter/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/.DS_Store


--------------------------------------------------------------------------------
/BoxFilter/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/sample.jpg


--------------------------------------------------------------------------------
/Laplacian/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/sample.jpg


--------------------------------------------------------------------------------
/Sharpening/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/.DS_Store


--------------------------------------------------------------------------------
/TVFilter/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/.DS_Store


--------------------------------------------------------------------------------
/TVFilter/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/sample.jpg


--------------------------------------------------------------------------------
/Median/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/sample_gpu.jpg


--------------------------------------------------------------------------------
/Sharpening/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/sample.jpg


--------------------------------------------------------------------------------
/SobelEdgeDetect/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/a.out


--------------------------------------------------------------------------------
/BoxFilter/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/sample_gpu.jpg


--------------------------------------------------------------------------------
/Laplacian/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/sample_gpu.jpg


--------------------------------------------------------------------------------
/Sharpening/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/sample_gpu.jpg


--------------------------------------------------------------------------------
/SobelEdgeDetect/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/.DS_Store


--------------------------------------------------------------------------------
/TVFilter/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/sample_gpu.jpg


--------------------------------------------------------------------------------
/SobelEdgeDetect/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/sample.jpg


--------------------------------------------------------------------------------
/SobelEdgeDetect/sample_gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/sample_gpu.jpg


--------------------------------------------------------------------------------
/Median/main.cu:
--------------------------------------------------------------------------------
 1 | #include <opencv2/imgproc.hpp>
 2 | #include <opencv2/highgui.hpp>
 3 | #include <iostream>
 4 | #include <string>
 5 | #include <stdio.h>
 6 | #include "medianFilter.cu"
 7 | #include <opencv2/imgcodecs.hpp>
 8 | using namespace std;
 9 | 
10 | // Program main
11 | int main( int argc, char** argv ) {
12 | 
13 |    // name of image
14 |    string image_name = "sample";
15 | 
16 |    // input & output file names
17 |    string input_file =  image_name+".jpg";
18 |    string output_file_cpu = image_name+"_cpu.jpg";
19 |    string output_file_gpu = image_name+"_gpu.jpg";
20 | 
21 |    // Read input image 
22 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
23 |    if(srcImage.empty())
24 |    {
25 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
26 |       return -1;
27 |    }
28 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
29 |     
30 |    // Declare the output image  
31 |    cv::Mat dstImage (srcImage.size(), srcImage.type());
32 | 
33 |    // run median filter on GPU  
34 |    medianFilter_GPU_wrapper(srcImage, dstImage);
35 |    // Output image
36 |    imwrite(output_file_gpu, dstImage);
37 |        
38 |    return 0;
39 | }
40 | 


--------------------------------------------------------------------------------
/BoxFilter/main.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Box Filter using CUDA
 3 | //
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | #include <opencv2/highgui.hpp>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <stdio.h>
 9 | #include "boxFilter.cu"
10 | 
11 | using namespace std;
12 | 
13 | // Program main
14 | int main( int argc, char** argv ) {
15 | 
16 |    // name of image
17 |    string image_name = "sample";
18 | 
19 |    // input & output file names
20 |    string input_file =  image_name+".jpg";
21 |    string output_file_cpu = image_name+"_cpu.jpg";
22 |    string output_file_gpu = image_name+"_gpu.jpg";
23 | 
24 |    // Read input image 
25 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
26 |    if(srcImage.empty())
27 |    {
28 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
29 |       return -1;
30 |    }
31 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
32 |   
33 |    // Declare the output image  
34 |    cv::Mat dstImage (srcImage.size(), srcImage.type());
35 | 
36 |    // run box filter on GPU  
37 |    boxFilter_GPU_wrapper(srcImage, dstImage);
38 |    // Output image
39 |    imwrite(output_file_gpu, dstImage);
40 | 
41 |       
42 |    return 0;
43 | }
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/Sharpening/main.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Sharpening Filter using CUDA
 3 | //
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | #include <opencv2/highgui.hpp>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <stdio.h>
 9 | #include "sharpeningFilter.cu"
10 | 
11 | using namespace std;
12 | 
13 | // Program main
14 | int main( int argc, char** argv ) {
15 | 
16 |    // name of image
17 |    string image_name = "sample";
18 | 
19 |    // input & output file names
20 |    string input_file =  image_name+".jpg";
21 |    string output_file_cpu = image_name+"_cpu.jpg";
22 |    string output_file_gpu = image_name+"_gpu.jpg";
23 | 
24 |    // Read input image 
25 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
26 |    if(srcImage.empty())
27 |    {
28 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
29 |       return -1;
30 |    }
31 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
32 | 
33 |    // Declare the output image  
34 |    cv::Mat dstImage (srcImage.size(), srcImage.type());
35 | 
36 |    // run median filter on GPU  
37 |    sharpeningFilter_GPU_wrapper(srcImage, dstImage);
38 |    // Output image
39 |    imwrite(output_file_gpu, dstImage);
40 | 
41 |    return 0;
42 | }
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## Requirements for this project:
 2 | 
 3 | - Make sure to have OpenCV installed with path variables set (this project uses OPENCV version 4.2.0)
 4 | 
 5 | - Make sure to have CUDA installed with path variables set (this project uses CUDA version 10.1)
 6 |  
 7 | 
 8 | ## Steps To install OPENCV:
 9 | 
10 | ```
11 | $ sudo apt install libopencv-dev python3-opencv
12 | ```
13 | ```
14 | $ sudo apt install libopencv-dev python3-opencv
15 | ```
16 | ```
17 | $ python3 -c "import cv2; print(cv2.__version__)"
18 | ```
19 | Output
20 | ```4.2.0```
21 | ## Steps To install CUDA:
22 | ```
23 | $ sudo apt update
24 | ```
25 | ```
26 | $ sudo apt install nvidia-cuda-toolkit
27 | ```
28 | ```
29 | $ nvcc --version
30 | 
31 | nvcc: NVIDIA (R) Cuda compiler driver
32 | Copyright (c) 2005-2019 NVIDIA Corporation
33 | Built on Sun_Jul_28_19:07:16_PDT_2019
34 | Cuda compilation tools, release 10.1, V10.1.243
35 | ```
36 | 
37 | ## Create a symbolic link for OpenCV:
38 | ```
39 | $ sudo ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
40 | ```
41 | ## Steps to run this project:
42 | ```
43 | $ cd <filter Name>
44 | ex: cd BoxFilter
45 | ```
46 | ```
47 | $ nvcc main.cu `pkg-config --cflags --libs opencv4`
48 | ```
49 | ```
50 | $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
51 | ```
52 | ```
53 | $ ./a.out
54 | input image size: 600 298 3
55 | Processing time for GPU (ms): 0.05088
56 | ```


--------------------------------------------------------------------------------
/TVFilter/main.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Total Variation Filter using CUDA
 3 | //
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | #include <opencv2/highgui.hpp>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <stdio.h>
 9 | #include "tvFilter.cu"
10 | 
11 | using namespace std;
12 | 
13 | // Program main
14 | int main( int argc, char** argv ) {
15 | 
16 |    // name of image
17 |    string image_name = "sample";
18 | 
19 |    // input & output file names
20 |    string input_file =  image_name+".jpg";
21 |    string output_file_cpu = image_name+"_cpu.jpg";
22 |    string output_file_gpu = image_name+"_gpu.jpg";
23 | 
24 |    // Read input image 
25 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
26 |    if(srcImage.empty())
27 |    {
28 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
29 |       return -1;
30 |    }
31 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
32 | 
33 |    // convert RGB to gray scale
34 |    cv::cvtColor(srcImage, srcImage, cv::COLOR_BGR2GRAY);
35 |   
36 |    // Declare the output image  
37 |    cv::Mat dstImage_gpu (srcImage.size(), srcImage.type());
38 |    // run total variation filter on GPU  
39 |    tvFilter_GPU_wrapper(srcImage, dstImage_gpu);
40 |    // normalization to 0-255
41 |    dstImage_gpu.convertTo(dstImage_gpu, CV_32F, 1.0 / 255, 0);
42 |    dstImage_gpu*=255;
43 |    // Output image
44 |    imwrite(output_file_gpu, dstImage_gpu);
45 |       
46 |    return 0;
47 | }
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/SobelEdgeDetect/main.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Sobel Edge Detection Filter using CUDA
 3 | //
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | #include <opencv2/highgui.hpp>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <stdio.h>
 9 | #include "sobelEdgeDetectionFilter.cu"
10 | 
11 | using namespace std;
12 | 
13 | // Program main
14 | int main( int argc, char** argv ) {
15 | 
16 |    // name of image
17 |    string image_name = "sample";
18 | 
19 |    // input & output file names
20 |    string input_file =  image_name+".jpg";
21 |    string output_file_cpu = image_name+"_cpu.jpg";
22 |    string output_file_gpu = image_name+"_gpu.jpg";
23 | 
24 |    // Read input image 
25 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
26 |    if(srcImage.empty())
27 |    {
28 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
29 |       return -1;
30 |    }
31 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
32 | 
33 |    // convert RGB to gray scale
34 |    cv::cvtColor(srcImage, srcImage, cv::COLOR_BGR2GRAY);
35 |   
36 |    // Declare the output image  
37 |    cv::Mat dstImage (srcImage.size(), srcImage.type());
38 | 
39 |    // run sobel edge detection filter on GPU  
40 |    sobelFilter_GPU_wrapper(srcImage, dstImage);
41 |    // normalization to 0-255
42 |    dstImage.convertTo(dstImage, CV_32F, 1.0 / 255, 0);
43 |    dstImage*=255;
44 |    // Output image
45 |    imwrite(output_file_gpu, dstImage);      
46 |    return 0;
47 | }
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/Laplacian/main.cu:
--------------------------------------------------------------------------------
 1 | //
 2 | // Laplacian Filter using CUDA
 3 | //
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | #include <opencv2/highgui.hpp>
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <stdio.h>
 9 | #include "laplacianFilter.cu"
10 | 
11 | using namespace std;
12 | 
13 | 
14 | // Program main
15 | int main( int argc, char** argv ) {
16 | 
17 |    // name of image
18 |    string image_name = "sample";
19 | 
20 |    // input & output file names
21 |    string input_file =  image_name+".jpg";
22 |    string output_file_cpu = image_name+"_cpu.jpg";
23 |    string output_file_gpu = image_name+"_gpu.jpg";
24 | 
25 |    // Read input image 
26 |    cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED);
27 |    if(srcImage.empty())
28 |    {
29 |       std::cout<<"Image Not Found: "<< input_file << std::endl;
30 |       return -1;
31 |    }
32 |    cout <<"\ninput image size: "<<srcImage.cols<<" "<<srcImage.rows<<" "<<srcImage.channels()<<"\n";
33 | 
34 |    // convert RGB to gray scale
35 |    cv::cvtColor(srcImage, srcImage, cv::COLOR_BGR2GRAY);
36 |    //  cv::cvtColor(srcImage, srcImage, CV_BGR2GRAY);
37 | 
38 |    // Declare the output image  
39 |    cv::Mat dstImage (srcImage.size(), srcImage.type());
40 | 
41 |    // run laplacian filter on GPU  
42 |    laplacianFilter_GPU_wrapper(srcImage, dstImage);
43 |    // normalization to 0-255
44 |    dstImage.convertTo(dstImage, CV_32F, 1.0 / 255, 0);
45 |    dstImage*=255;
46 |    // Output image
47 |    imwrite(output_file_gpu, dstImage);
48 | 
49 |    return 0;
50 | }
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/TVFilter/tvFilter.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // CUDA implementation of Total Variation Filter
  3 | //
  4 | #include <opencv2/imgproc/imgproc.hpp>
  5 | #include <opencv2/highgui.hpp>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <stdio.h>
  9 | #include <cuda.h>
 10 | #include "cuda_runtime.h"
 11 | 
 12 | #define BLOCK_SIZE      16
 13 | #define FILTER_WIDTH    3       
 14 | #define FILTER_HEIGHT   3       
 15 | 
 16 | using namespace std;
 17 | 
 18 | // Run Total Variation Filter on GPU
 19 | __global__ void tvFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height)
 20 | {
 21 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 22 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 23 | 
 24 |    // only threads inside image will write results
 25 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 26 |    {
 27 |          float sod = 0;
 28 |          // Loop inside the filter to average pixel values
 29 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 30 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 31 |                float fl = srcImage[((y+ky)*width + (x+kx))];
 32 |                float center = srcImage[((y)*width + (x))];
 33 |                sod += fl-center;
 34 |             }
 35 |          }
 36 |          dstImage[(y*width+x)] = sod;
 37 |    }
 38 | }
 39 | 
 40 | 
 41 | // The wrapper is used to call total variation filter 
 42 | void tvFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 43 | {
 44 |         // Use cuda event to catch time
 45 |         cudaEvent_t start, stop;
 46 |         cudaEventCreate(&start);
 47 |         cudaEventCreate(&stop);
 48 | 
 49 |         // Calculate number of input & output bytes in each block
 50 |         const int inputSize = input.cols * input.rows;
 51 |         const int outputSize = output.cols * output.rows;
 52 |         unsigned char *d_input, *d_output;
 53 | 
 54 |         // Allocate device memory
 55 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 56 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 57 | 
 58 |         // Copy data from OpenCV input image to device memory
 59 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 60 | 
 61 |         // Specify block size
 62 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 63 | 
 64 |         // Calculate grid size to cover the whole image
 65 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 66 | 
 67 |         // Start time
 68 |         cudaEventRecord(start);
 69 | 
 70 |         // Run BoxFilter kernel on CUDA 
 71 |         tvFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows);
 72 | 
 73 |         // Stop time
 74 |         cudaEventRecord(stop);
 75 | 
 76 |         //Copy data from device memory to output image
 77 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 78 | 
 79 |         //Free the device memory
 80 |         cudaFree(d_input);
 81 |         cudaFree(d_output);
 82 | 
 83 |         cudaEventSynchronize(stop);
 84 |         float milliseconds = 0;
 85 |         
 86 |         // Calculate elapsed time in milisecond  
 87 |         cudaEventElapsedTime(&milliseconds, start, stop);
 88 |         cout<< "\nTotal processing time on GPU (ms): " << milliseconds << "\n";
 89 | }
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/Laplacian/laplacianFilter.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // CUDA implementation of Laplacian Filter
  3 | //
  4 | #include <opencv2/imgproc/imgproc.hpp>
  5 | #include <opencv2/highgui.hpp>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <stdio.h>
  9 | #include <cuda.h>
 10 | #include "cuda_runtime.h"
 11 | 
 12 | #define BLOCK_SIZE      16
 13 | #define FILTER_WIDTH    3       
 14 | #define FILTER_HEIGHT   3       
 15 | 
 16 | using namespace std;
 17 | 
 18 | // Run Laplacian Filter on GPU
 19 | __global__ void laplacianFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height)
 20 | {
 21 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 22 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 23 | 
 24 |    float kernel[3][3] = {0, -1, 0, -1, 4, -1, 0, -1, 0};
 25 |    //float kernel[3][3] = {-1, -1, -1, -1, 8, -1, -1, -1, -1};   
 26 |    // only threads inside image will write results
 27 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 28 |    {
 29 |          // Sum of pixel values 
 30 |          float sum = 0;
 31 |          // Loop inside the filter to average pixel values
 32 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 33 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 34 |                float fl = srcImage[((y+ky)*width + (x+kx))]; 
 35 |                sum += fl*kernel[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2];
 36 |             }
 37 |          }
 38 |          dstImage[(y*width+x)] =  sum;
 39 |    }
 40 | }
 41 | 
 42 | 
 43 | // The wrapper to call laplacian filter 
 44 | void laplacianFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 45 | {
 46 |         // Use cuda event to catch time
 47 |         cudaEvent_t start, stop;
 48 |         cudaEventCreate(&start);
 49 |         cudaEventCreate(&stop);
 50 | 
 51 |         // Calculate number of input & output bytes in each block
 52 |         const int inputSize = input.cols * input.rows;
 53 |         const int outputSize = output.cols * output.rows;
 54 |         unsigned char *d_input, *d_output;
 55 |         
 56 |         // Allocate device memory
 57 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 58 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 59 | 
 60 |         // Copy data from OpenCV input image to device memory
 61 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 62 | 
 63 |         // Specify block size
 64 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 65 | 
 66 |         // Calculate grid size to cover the whole image
 67 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 68 | 
 69 |         // Start time
 70 |         cudaEventRecord(start);
 71 | 
 72 |         // Run BoxFilter kernel on CUDA 
 73 |         laplacianFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows);
 74 | 
 75 |         // Stop time
 76 |         cudaEventRecord(stop);
 77 | 
 78 |         //Copy data from device memory to output image
 79 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 80 | 
 81 |         //Free the device memory
 82 |         cudaFree(d_input);
 83 |         cudaFree(d_output);
 84 | 
 85 |         cudaEventSynchronize(stop);
 86 |         float milliseconds = 0;
 87 |         
 88 |         // Calculate elapsed time in milisecond  
 89 |         cudaEventElapsedTime(&milliseconds, start, stop);
 90 |         cout<< "\nProcessing time for GPU (ms): " << milliseconds << "\n";
 91 | }
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/BoxFilter/boxFilter.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // CUDA implementation of Box Filter
  3 | //
  4 | #include <opencv2/imgproc/imgproc.hpp>
  5 | #include <opencv2/highgui.hpp>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <stdio.h>
  9 | #include <cuda.h>
 10 | #include "cuda_runtime.h"
 11 | 
 12 | #define BLOCK_SIZE      16
 13 | #define FILTER_WIDTH    3       
 14 | #define FILTER_HEIGHT   3       
 15 | 
 16 | using namespace std;
 17 | 
 18 | // Run Box Filter on GPU
 19 | __global__ void boxFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel)
 20 | {
 21 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 22 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 23 | 
 24 |    // only threads inside image will write results
 25 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 26 |    {
 27 |       for(int c=0 ; c<channel ; c++)   
 28 |       {
 29 |          // Sum of pixel values 
 30 |          float sum = 0;
 31 |          // Number of filter pixels 
 32 |          float kS = 0; 
 33 |          // Loop inside the filter to average pixel values
 34 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 35 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 36 |                float fl = srcImage[((y+ky)*width + (x+kx))*channel+c];
 37 |                sum += fl;
 38 |                kS += 1;
 39 |             }
 40 |          }
 41 |          dstImage[(y*width+x)*channel+c] =  sum / kS;
 42 |       }
 43 |    }
 44 | }
 45 | 
 46 | 
 47 | // The wrapper to call box filter 
 48 | void boxFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 49 | {
 50 |         // Use cuda event to catch time
 51 |         cudaEvent_t start, stop;
 52 |         cudaEventCreate(&start);
 53 |         cudaEventCreate(&stop);
 54 | 
 55 |         // Calculate number of image channels
 56 |         int channel = input.step/input.cols; 
 57 | 
 58 |         // Calculate number of input & output bytes in each block
 59 |         const int inputSize = input.cols * input.rows * channel;
 60 |         const int outputSize = output.cols * output.rows * channel;
 61 |         unsigned char *d_input, *d_output;
 62 |         
 63 |         // Allocate device memory
 64 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 65 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 66 | 
 67 |         // Copy data from OpenCV input image to device memory
 68 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 69 | 
 70 |         // Specify block size
 71 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 72 | 
 73 |         // Calculate grid size to cover the whole image
 74 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 75 | 
 76 |         // Start time
 77 |         cudaEventRecord(start);
 78 | 
 79 |         // Run BoxFilter kernel on CUDA 
 80 |         boxFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows, channel);
 81 | 
 82 |         // Stop time
 83 |         cudaEventRecord(stop);
 84 | 
 85 |         //Copy data from device memory to output image
 86 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 87 | 
 88 |         //Free the device memory
 89 |         cudaFree(d_input);
 90 |         cudaFree(d_output);
 91 | 
 92 |         cudaEventSynchronize(stop);
 93 |         float milliseconds = 0;
 94 |         
 95 |         // Calculate elapsed time in milisecond  
 96 |         cudaEventElapsedTime(&milliseconds, start, stop);
 97 |         cout<< "\nProcessing time for GPU (ms): " << milliseconds << "\n";
 98 | }
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/Sharpening/sharpeningFilter.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // CUDA implementation of Image Sharpening Filter
  3 | //
  4 | #include <opencv2/imgproc/imgproc.hpp>
  5 | #include <opencv2/highgui.hpp>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <stdio.h>
  9 | #include <cuda.h>
 10 | #include "cuda_runtime.h"
 11 | 
 12 | #define BLOCK_SIZE      16
 13 | #define FILTER_WIDTH    3       
 14 | #define FILTER_HEIGHT   3       
 15 | 
 16 | using namespace std;
 17 | 
 18 | // Run Sharpening Filter on GPU
 19 | __global__ void sharpeningFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel)
 20 | {
 21 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 22 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 23 | 
 24 |    float kernel[FILTER_WIDTH][FILTER_HEIGHT] = {-1, -1, -1, -1, 9, -1, -1, -1, -1};
 25 |    // only threads inside image will write results
 26 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 27 |    {
 28 |       for(int c=0 ; c<channel ; c++)   
 29 |       {
 30 |          // Sum of pixel values 
 31 |          float sum = 0;
 32 |          // Loop inside the filter to average pixel values
 33 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 34 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 35 |                float fl = srcImage[((y+ky)*width + (x+kx))*channel+c];
 36 |                sum += fl*kernel[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2];
 37 |             }
 38 |          }
 39 |          dstImage[(y*width+x)*channel+c] =  sum;
 40 |       }
 41 |    }
 42 | }
 43 | 
 44 | 
 45 | // The wrapper is used to call sharpening filter 
 46 | void sharpeningFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 47 | {
 48 |         // Use cuda event to catch time
 49 |         cudaEvent_t start, stop;
 50 |         cudaEventCreate(&start);
 51 |         cudaEventCreate(&stop);
 52 | 
 53 |         // Calculate number of image channels
 54 |         int channel = input.step/input.cols; 
 55 | 
 56 |         // Calculate number of input & output bytes in each block
 57 |         const int inputSize = input.cols * input.rows * channel;
 58 |         const int outputSize = output.cols * output.rows * channel;
 59 |         unsigned char *d_input, *d_output;
 60 |         
 61 |         // Allocate device memory
 62 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 63 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 64 | 
 65 |         // Copy data from OpenCV input image to device memory
 66 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 67 | 
 68 |         // Specify block size
 69 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 70 | 
 71 |         // Calculate grid size to cover the whole image
 72 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 73 | 
 74 |         // Start time
 75 |         cudaEventRecord(start);
 76 | 
 77 |         // Run BoxFilter kernel on CUDA 
 78 |         sharpeningFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows, channel);
 79 | 
 80 |         // Stop time
 81 |         cudaEventRecord(stop);
 82 | 
 83 |         //Copy data from device memory to output image
 84 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 85 | 
 86 |         //Free the device memory
 87 |         cudaFree(d_input);
 88 |         cudaFree(d_output);
 89 | 
 90 |         cudaEventSynchronize(stop);
 91 |         float milliseconds = 0;
 92 |         
 93 |         // Calculate elapsed time in milisecond  
 94 |         cudaEventElapsedTime(&milliseconds, start, stop);
 95 |         cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n";
 96 | }
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/SobelEdgeDetect/sobelEdgeDetectionFilter.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // CUDA implementation of Sobel Edge Detect Filter
  3 | //
  4 | #include <opencv2/imgproc/imgproc.hpp>
  5 | #include <opencv2/highgui.hpp>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <stdio.h>
  9 | #include <cuda.h>
 10 | #include "cuda_runtime.h"
 11 | 
 12 | #define BLOCK_SIZE      16
 13 | #define FILTER_WIDTH    3       
 14 | #define FILTER_HEIGHT   3       
 15 | 
 16 | using namespace std;
 17 | 
 18 | // Run Sobel Edge Detect Filter on GPU
 19 | __global__ void sobelFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height)
 20 | {
 21 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 22 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 23 | 
 24 |    float Kx[3][3] = {-1, 0, 1, -2, 0, 2, -1, 0, 1};
 25 |    float Ky[3][3] = {1, 2, 1, 0, 0, 0, -1, -2, -1};
 26 | 
 27 |    // only threads inside image will write results
 28 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 29 |    {
 30 |          // Gradient in x-direction 
 31 |          float Gx = 0;
 32 |          // Loop inside the filter to average pixel values
 33 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 34 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 35 |                float fl = srcImage[((y+ky)*width + (x+kx))];
 36 |                Gx += fl*Kx[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2];
 37 |             }
 38 |          }
 39 |          float Gx_abs = Gx < 0 ? -Gx : Gx;
 40 | 
 41 |          // Gradient in y-direction 
 42 |          float Gy = 0;
 43 |          // Loop inside the filter to average pixel values
 44 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 45 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 46 |                float fl = srcImage[((y+ky)*width + (x+kx))];
 47 |                Gy += fl*Ky[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2];
 48 |             }
 49 |          }
 50 |          float Gy_abs = Gy < 0 ? -Gy : Gy;
 51 | 
 52 |          dstImage[(y*width+x)] =  Gx_abs + Gy_abs;
 53 |    }
 54 | }
 55 | 
 56 | 
 57 | // The wrapper is use to call sobel edge detection filter 
 58 | void sobelFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 59 | {
 60 |         // Use cuda event to catch time
 61 |         cudaEvent_t start, stop;
 62 |         cudaEventCreate(&start);
 63 |         cudaEventCreate(&stop);
 64 | 
 65 |         // Calculate number of input & output bytes in each block
 66 |         const int inputSize = input.cols * input.rows;
 67 |         const int outputSize = output.cols * output.rows;
 68 |         unsigned char *d_input, *d_output;
 69 |         
 70 |         // Allocate device memory
 71 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 72 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 73 | 
 74 |         // Copy data from OpenCV input image to device memory
 75 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 76 | 
 77 |         // Specify block size
 78 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 79 | 
 80 |         // Calculate grid size to cover the whole image
 81 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 82 | 
 83 |         // Start time
 84 |         cudaEventRecord(start);
 85 | 
 86 |         // Run Sobel Edge Detection Filter kernel on CUDA 
 87 |         sobelFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows);
 88 | 
 89 |         // Stop time
 90 |         cudaEventRecord(stop);
 91 | 
 92 |         //Copy data from device memory to output image
 93 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 94 | 
 95 |         //Free the device memory
 96 |         cudaFree(d_input);
 97 |         cudaFree(d_output);
 98 | 
 99 |         cudaEventSynchronize(stop);
100 |         float milliseconds = 0;
101 |         
102 |         // Calculate elapsed time in milisecond  
103 |         cudaEventElapsedTime(&milliseconds, start, stop);
104 |         cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n";
105 | }
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/Median/medianFilter.cu:
--------------------------------------------------------------------------------
  1 | #include <opencv2/imgproc.hpp>
  2 | #include <opencv2/highgui.hpp>
  3 | #include <iostream>
  4 | #include <string>
  5 | #include <stdio.h>
  6 | #include <cuda.h>
  7 | #include <cuda_runtime.h>
  8 | #include <opencv2/imgcodecs.hpp>
  9 | #include <device_launch_parameters.h>
 10 | 
 11 | #define BLOCK_SIZE      16
 12 | #define FILTER_WIDTH    3       
 13 | #define FILTER_HEIGHT   3       
 14 | 
 15 | using namespace std;
 16 | 
 17 | // Sort function on device
 18 | __device__ void sort(unsigned char* filterVector)
 19 | {
 20 | 	 for (int i = 0; i < FILTER_WIDTH*FILTER_HEIGHT; i++) {
 21 | 	    for (int j = i + 1; j < FILTER_WIDTH*FILTER_HEIGHT; j++) {
 22 | 		if (filterVector[i] > filterVector[j]) { 
 23 | 	              //Swap the variables
 24 | 		      unsigned char tmp = filterVector[i];
 25 | 		      filterVector[i] = filterVector[j];
 26 | 		      filterVector[j] = tmp;
 27 | 		}
 28 |              }
 29 |          }
 30 | }
 31 | 
 32 | // Run Median Filter on GPU
 33 | __global__ void medianFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel)
 34 | {
 35 |    int x = blockIdx.x*blockDim.x + threadIdx.x;
 36 |    int y = blockIdx.y*blockDim.y + threadIdx.y;
 37 | 
 38 |    // only threads inside image will write results
 39 |    if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2)))
 40 |    {
 41 |       for(int c=0 ; c<channel ; c++)   
 42 |       {
 43 |          unsigned char filterVector[FILTER_WIDTH*FILTER_HEIGHT];     
 44 |          // Loop inside the filter to average pixel values
 45 |          for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) {
 46 |             for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) {
 47 |                filterVector[ky*FILTER_WIDTH+kx] = srcImage[((y+ky)*width + (x+kx))*channel+c];
 48 |             }
 49 |          }
 50 |          // Sorting values of filter   
 51 |          sort(filterVector);
 52 |          dstImage[(y*width+x)*channel+c] =  filterVector[(FILTER_WIDTH*FILTER_HEIGHT)/2];
 53 |       }
 54 |    }
 55 | }
 56 | 
 57 | 
 58 | // The wrapper to call median filter 
 59 | void medianFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output)
 60 | {
 61 |         // Use cuda event to catch time
 62 |         cudaEvent_t start, stop;
 63 |         cudaEventCreate(&start);
 64 |         cudaEventCreate(&stop);
 65 | 
 66 |         // Calculate number of image channels
 67 |         int channel = input.step/input.cols; 
 68 | 
 69 |         // Calculate number of input & output bytes in each block
 70 |         const int inputSize = input.cols * input.rows * channel;
 71 |         const int outputSize = output.cols * output.rows * channel;
 72 |         unsigned char *d_input, *d_output;
 73 |         
 74 |         // Allocate device memory
 75 |         cudaMalloc<unsigned char>(&d_input,inputSize);
 76 |         cudaMalloc<unsigned char>(&d_output,outputSize);
 77 | 
 78 |         // Copy data from OpenCV input image to device memory
 79 |         cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice);
 80 | 
 81 |         // Specify block size
 82 |         const dim3 block(BLOCK_SIZE,BLOCK_SIZE);
 83 | 
 84 |         // Calculate grid size to cover the whole image
 85 |         const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y);
 86 | 
 87 |         // Start time
 88 |         cudaEventRecord(start);
 89 | 
 90 |         // Run BoxFilter kernel on CUDA 
 91 |         medianFilter<<<grid,block>>>(d_input, d_output, output.cols, output.rows, channel);
 92 | 
 93 |         // Stop time
 94 |         cudaEventRecord(stop);
 95 | 
 96 |         //Copy data from device memory to output image
 97 |         cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost);
 98 | 
 99 |         //Free the device memory
100 |         cudaFree(d_input);
101 |         cudaFree(d_output);
102 | 
103 |         cudaEventSynchronize(stop);
104 |         float milliseconds = 0;
105 |         
106 |         // Calculate elapsed time in milisecond  
107 |         cudaEventElapsedTime(&milliseconds, start, stop);
108 |         cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n";
109 | }
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------