├── Median ├── a.out ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── medianFilter.cu ├── BoxFilter ├── a.out ├── .DS_Store ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── boxFilter.cu ├── Laplacian ├── a.out ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── laplacianFilter.cu ├── TVFilter ├── a.out ├── .DS_Store ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── tvFilter.cu ├── Sharpening ├── a.out ├── .DS_Store ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── sharpeningFilter.cu ├── SobelEdgeDetect ├── a.out ├── .DS_Store ├── sample.jpg ├── sample_gpu.jpg ├── main.cu └── sobelEdgeDetectionFilter.cu └── readme.md /Median/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/a.out -------------------------------------------------------------------------------- /BoxFilter/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/a.out -------------------------------------------------------------------------------- /Laplacian/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/a.out -------------------------------------------------------------------------------- /TVFilter/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/a.out -------------------------------------------------------------------------------- /Median/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/sample.jpg -------------------------------------------------------------------------------- /Sharpening/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/a.out -------------------------------------------------------------------------------- /BoxFilter/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/.DS_Store -------------------------------------------------------------------------------- /BoxFilter/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/sample.jpg -------------------------------------------------------------------------------- /Laplacian/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/sample.jpg -------------------------------------------------------------------------------- /Sharpening/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/.DS_Store -------------------------------------------------------------------------------- /TVFilter/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/.DS_Store -------------------------------------------------------------------------------- /TVFilter/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/sample.jpg -------------------------------------------------------------------------------- /Median/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Median/sample_gpu.jpg -------------------------------------------------------------------------------- /Sharpening/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/sample.jpg -------------------------------------------------------------------------------- /SobelEdgeDetect/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/a.out -------------------------------------------------------------------------------- /BoxFilter/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/BoxFilter/sample_gpu.jpg -------------------------------------------------------------------------------- /Laplacian/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Laplacian/sample_gpu.jpg -------------------------------------------------------------------------------- /Sharpening/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/Sharpening/sample_gpu.jpg -------------------------------------------------------------------------------- /SobelEdgeDetect/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/.DS_Store -------------------------------------------------------------------------------- /TVFilter/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/TVFilter/sample_gpu.jpg -------------------------------------------------------------------------------- /SobelEdgeDetect/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/sample.jpg -------------------------------------------------------------------------------- /SobelEdgeDetect/sample_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arihant-jain-09/Image_Filtering_Parallel/master/SobelEdgeDetect/sample_gpu.jpg -------------------------------------------------------------------------------- /Median/main.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "medianFilter.cu" 7 | #include 8 | using namespace std; 9 | 10 | // Program main 11 | int main( int argc, char** argv ) { 12 | 13 | // name of image 14 | string image_name = "sample"; 15 | 16 | // input & output file names 17 | string input_file = image_name+".jpg"; 18 | string output_file_cpu = image_name+"_cpu.jpg"; 19 | string output_file_gpu = image_name+"_gpu.jpg"; 20 | 21 | // Read input image 22 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 23 | if(srcImage.empty()) 24 | { 25 | std::cout<<"Image Not Found: "<< input_file << std::endl; 26 | return -1; 27 | } 28 | cout <<"\ninput image size: "< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "boxFilter.cu" 10 | 11 | using namespace std; 12 | 13 | // Program main 14 | int main( int argc, char** argv ) { 15 | 16 | // name of image 17 | string image_name = "sample"; 18 | 19 | // input & output file names 20 | string input_file = image_name+".jpg"; 21 | string output_file_cpu = image_name+"_cpu.jpg"; 22 | string output_file_gpu = image_name+"_gpu.jpg"; 23 | 24 | // Read input image 25 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 26 | if(srcImage.empty()) 27 | { 28 | std::cout<<"Image Not Found: "<< input_file << std::endl; 29 | return -1; 30 | } 31 | cout <<"\ninput image size: "< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "sharpeningFilter.cu" 10 | 11 | using namespace std; 12 | 13 | // Program main 14 | int main( int argc, char** argv ) { 15 | 16 | // name of image 17 | string image_name = "sample"; 18 | 19 | // input & output file names 20 | string input_file = image_name+".jpg"; 21 | string output_file_cpu = image_name+"_cpu.jpg"; 22 | string output_file_gpu = image_name+"_gpu.jpg"; 23 | 24 | // Read input image 25 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 26 | if(srcImage.empty()) 27 | { 28 | std::cout<<"Image Not Found: "<< input_file << std::endl; 29 | return -1; 30 | } 31 | cout <<"\ninput image size: "< 44 | ex: cd BoxFilter 45 | ``` 46 | ``` 47 | $ nvcc main.cu `pkg-config --cflags --libs opencv4` 48 | ``` 49 | ``` 50 | $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib 51 | ``` 52 | ``` 53 | $ ./a.out 54 | input image size: 600 298 3 55 | Processing time for GPU (ms): 0.05088 56 | ``` -------------------------------------------------------------------------------- /TVFilter/main.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Total Variation Filter using CUDA 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "tvFilter.cu" 10 | 11 | using namespace std; 12 | 13 | // Program main 14 | int main( int argc, char** argv ) { 15 | 16 | // name of image 17 | string image_name = "sample"; 18 | 19 | // input & output file names 20 | string input_file = image_name+".jpg"; 21 | string output_file_cpu = image_name+"_cpu.jpg"; 22 | string output_file_gpu = image_name+"_gpu.jpg"; 23 | 24 | // Read input image 25 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 26 | if(srcImage.empty()) 27 | { 28 | std::cout<<"Image Not Found: "<< input_file << std::endl; 29 | return -1; 30 | } 31 | cout <<"\ninput image size: "< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "sobelEdgeDetectionFilter.cu" 10 | 11 | using namespace std; 12 | 13 | // Program main 14 | int main( int argc, char** argv ) { 15 | 16 | // name of image 17 | string image_name = "sample"; 18 | 19 | // input & output file names 20 | string input_file = image_name+".jpg"; 21 | string output_file_cpu = image_name+"_cpu.jpg"; 22 | string output_file_gpu = image_name+"_gpu.jpg"; 23 | 24 | // Read input image 25 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 26 | if(srcImage.empty()) 27 | { 28 | std::cout<<"Image Not Found: "<< input_file << std::endl; 29 | return -1; 30 | } 31 | cout <<"\ninput image size: "< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "laplacianFilter.cu" 10 | 11 | using namespace std; 12 | 13 | 14 | // Program main 15 | int main( int argc, char** argv ) { 16 | 17 | // name of image 18 | string image_name = "sample"; 19 | 20 | // input & output file names 21 | string input_file = image_name+".jpg"; 22 | string output_file_cpu = image_name+"_cpu.jpg"; 23 | string output_file_gpu = image_name+"_gpu.jpg"; 24 | 25 | // Read input image 26 | cv::Mat srcImage = cv::imread(input_file ,cv::IMREAD_UNCHANGED); 27 | if(srcImage.empty()) 28 | { 29 | std::cout<<"Image Not Found: "<< input_file << std::endl; 30 | return -1; 31 | } 32 | cout <<"\ninput image size: "< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cuda_runtime.h" 11 | 12 | #define BLOCK_SIZE 16 13 | #define FILTER_WIDTH 3 14 | #define FILTER_HEIGHT 3 15 | 16 | using namespace std; 17 | 18 | // Run Total Variation Filter on GPU 19 | __global__ void tvFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height) 20 | { 21 | int x = blockIdx.x*blockDim.x + threadIdx.x; 22 | int y = blockIdx.y*blockDim.y + threadIdx.y; 23 | 24 | // only threads inside image will write results 25 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 26 | { 27 | float sod = 0; 28 | // Loop inside the filter to average pixel values 29 | for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) { 30 | for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) { 31 | float fl = srcImage[((y+ky)*width + (x+kx))]; 32 | float center = srcImage[((y)*width + (x))]; 33 | sod += fl-center; 34 | } 35 | } 36 | dstImage[(y*width+x)] = sod; 37 | } 38 | } 39 | 40 | 41 | // The wrapper is used to call total variation filter 42 | void tvFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output) 43 | { 44 | // Use cuda event to catch time 45 | cudaEvent_t start, stop; 46 | cudaEventCreate(&start); 47 | cudaEventCreate(&stop); 48 | 49 | // Calculate number of input & output bytes in each block 50 | const int inputSize = input.cols * input.rows; 51 | const int outputSize = output.cols * output.rows; 52 | unsigned char *d_input, *d_output; 53 | 54 | // Allocate device memory 55 | cudaMalloc(&d_input,inputSize); 56 | cudaMalloc(&d_output,outputSize); 57 | 58 | // Copy data from OpenCV input image to device memory 59 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 60 | 61 | // Specify block size 62 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 63 | 64 | // Calculate grid size to cover the whole image 65 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 66 | 67 | // Start time 68 | cudaEventRecord(start); 69 | 70 | // Run BoxFilter kernel on CUDA 71 | tvFilter<<>>(d_input, d_output, output.cols, output.rows); 72 | 73 | // Stop time 74 | cudaEventRecord(stop); 75 | 76 | //Copy data from device memory to output image 77 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 78 | 79 | //Free the device memory 80 | cudaFree(d_input); 81 | cudaFree(d_output); 82 | 83 | cudaEventSynchronize(stop); 84 | float milliseconds = 0; 85 | 86 | // Calculate elapsed time in milisecond 87 | cudaEventElapsedTime(&milliseconds, start, stop); 88 | cout<< "\nTotal processing time on GPU (ms): " << milliseconds << "\n"; 89 | } 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /Laplacian/laplacianFilter.cu: -------------------------------------------------------------------------------- 1 | // 2 | // CUDA implementation of Laplacian Filter 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cuda_runtime.h" 11 | 12 | #define BLOCK_SIZE 16 13 | #define FILTER_WIDTH 3 14 | #define FILTER_HEIGHT 3 15 | 16 | using namespace std; 17 | 18 | // Run Laplacian Filter on GPU 19 | __global__ void laplacianFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height) 20 | { 21 | int x = blockIdx.x*blockDim.x + threadIdx.x; 22 | int y = blockIdx.y*blockDim.y + threadIdx.y; 23 | 24 | float kernel[3][3] = {0, -1, 0, -1, 4, -1, 0, -1, 0}; 25 | //float kernel[3][3] = {-1, -1, -1, -1, 8, -1, -1, -1, -1}; 26 | // only threads inside image will write results 27 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 28 | { 29 | // Sum of pixel values 30 | float sum = 0; 31 | // Loop inside the filter to average pixel values 32 | for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) { 33 | for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) { 34 | float fl = srcImage[((y+ky)*width + (x+kx))]; 35 | sum += fl*kernel[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2]; 36 | } 37 | } 38 | dstImage[(y*width+x)] = sum; 39 | } 40 | } 41 | 42 | 43 | // The wrapper to call laplacian filter 44 | void laplacianFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output) 45 | { 46 | // Use cuda event to catch time 47 | cudaEvent_t start, stop; 48 | cudaEventCreate(&start); 49 | cudaEventCreate(&stop); 50 | 51 | // Calculate number of input & output bytes in each block 52 | const int inputSize = input.cols * input.rows; 53 | const int outputSize = output.cols * output.rows; 54 | unsigned char *d_input, *d_output; 55 | 56 | // Allocate device memory 57 | cudaMalloc(&d_input,inputSize); 58 | cudaMalloc(&d_output,outputSize); 59 | 60 | // Copy data from OpenCV input image to device memory 61 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 62 | 63 | // Specify block size 64 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 65 | 66 | // Calculate grid size to cover the whole image 67 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 68 | 69 | // Start time 70 | cudaEventRecord(start); 71 | 72 | // Run BoxFilter kernel on CUDA 73 | laplacianFilter<<>>(d_input, d_output, output.cols, output.rows); 74 | 75 | // Stop time 76 | cudaEventRecord(stop); 77 | 78 | //Copy data from device memory to output image 79 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 80 | 81 | //Free the device memory 82 | cudaFree(d_input); 83 | cudaFree(d_output); 84 | 85 | cudaEventSynchronize(stop); 86 | float milliseconds = 0; 87 | 88 | // Calculate elapsed time in milisecond 89 | cudaEventElapsedTime(&milliseconds, start, stop); 90 | cout<< "\nProcessing time for GPU (ms): " << milliseconds << "\n"; 91 | } 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /BoxFilter/boxFilter.cu: -------------------------------------------------------------------------------- 1 | // 2 | // CUDA implementation of Box Filter 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cuda_runtime.h" 11 | 12 | #define BLOCK_SIZE 16 13 | #define FILTER_WIDTH 3 14 | #define FILTER_HEIGHT 3 15 | 16 | using namespace std; 17 | 18 | // Run Box Filter on GPU 19 | __global__ void boxFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel) 20 | { 21 | int x = blockIdx.x*blockDim.x + threadIdx.x; 22 | int y = blockIdx.y*blockDim.y + threadIdx.y; 23 | 24 | // only threads inside image will write results 25 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 26 | { 27 | for(int c=0 ; c(&d_input,inputSize); 65 | cudaMalloc(&d_output,outputSize); 66 | 67 | // Copy data from OpenCV input image to device memory 68 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 69 | 70 | // Specify block size 71 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 72 | 73 | // Calculate grid size to cover the whole image 74 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 75 | 76 | // Start time 77 | cudaEventRecord(start); 78 | 79 | // Run BoxFilter kernel on CUDA 80 | boxFilter<<>>(d_input, d_output, output.cols, output.rows, channel); 81 | 82 | // Stop time 83 | cudaEventRecord(stop); 84 | 85 | //Copy data from device memory to output image 86 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 87 | 88 | //Free the device memory 89 | cudaFree(d_input); 90 | cudaFree(d_output); 91 | 92 | cudaEventSynchronize(stop); 93 | float milliseconds = 0; 94 | 95 | // Calculate elapsed time in milisecond 96 | cudaEventElapsedTime(&milliseconds, start, stop); 97 | cout<< "\nProcessing time for GPU (ms): " << milliseconds << "\n"; 98 | } 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /Sharpening/sharpeningFilter.cu: -------------------------------------------------------------------------------- 1 | // 2 | // CUDA implementation of Image Sharpening Filter 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cuda_runtime.h" 11 | 12 | #define BLOCK_SIZE 16 13 | #define FILTER_WIDTH 3 14 | #define FILTER_HEIGHT 3 15 | 16 | using namespace std; 17 | 18 | // Run Sharpening Filter on GPU 19 | __global__ void sharpeningFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel) 20 | { 21 | int x = blockIdx.x*blockDim.x + threadIdx.x; 22 | int y = blockIdx.y*blockDim.y + threadIdx.y; 23 | 24 | float kernel[FILTER_WIDTH][FILTER_HEIGHT] = {-1, -1, -1, -1, 9, -1, -1, -1, -1}; 25 | // only threads inside image will write results 26 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 27 | { 28 | for(int c=0 ; c(&d_input,inputSize); 63 | cudaMalloc(&d_output,outputSize); 64 | 65 | // Copy data from OpenCV input image to device memory 66 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 67 | 68 | // Specify block size 69 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 70 | 71 | // Calculate grid size to cover the whole image 72 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 73 | 74 | // Start time 75 | cudaEventRecord(start); 76 | 77 | // Run BoxFilter kernel on CUDA 78 | sharpeningFilter<<>>(d_input, d_output, output.cols, output.rows, channel); 79 | 80 | // Stop time 81 | cudaEventRecord(stop); 82 | 83 | //Copy data from device memory to output image 84 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 85 | 86 | //Free the device memory 87 | cudaFree(d_input); 88 | cudaFree(d_output); 89 | 90 | cudaEventSynchronize(stop); 91 | float milliseconds = 0; 92 | 93 | // Calculate elapsed time in milisecond 94 | cudaEventElapsedTime(&milliseconds, start, stop); 95 | cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n"; 96 | } 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /SobelEdgeDetect/sobelEdgeDetectionFilter.cu: -------------------------------------------------------------------------------- 1 | // 2 | // CUDA implementation of Sobel Edge Detect Filter 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cuda_runtime.h" 11 | 12 | #define BLOCK_SIZE 16 13 | #define FILTER_WIDTH 3 14 | #define FILTER_HEIGHT 3 15 | 16 | using namespace std; 17 | 18 | // Run Sobel Edge Detect Filter on GPU 19 | __global__ void sobelFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height) 20 | { 21 | int x = blockIdx.x*blockDim.x + threadIdx.x; 22 | int y = blockIdx.y*blockDim.y + threadIdx.y; 23 | 24 | float Kx[3][3] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; 25 | float Ky[3][3] = {1, 2, 1, 0, 0, 0, -1, -2, -1}; 26 | 27 | // only threads inside image will write results 28 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 29 | { 30 | // Gradient in x-direction 31 | float Gx = 0; 32 | // Loop inside the filter to average pixel values 33 | for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) { 34 | for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) { 35 | float fl = srcImage[((y+ky)*width + (x+kx))]; 36 | Gx += fl*Kx[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2]; 37 | } 38 | } 39 | float Gx_abs = Gx < 0 ? -Gx : Gx; 40 | 41 | // Gradient in y-direction 42 | float Gy = 0; 43 | // Loop inside the filter to average pixel values 44 | for(int ky=-FILTER_HEIGHT/2; ky<=FILTER_HEIGHT/2; ky++) { 45 | for(int kx=-FILTER_WIDTH/2; kx<=FILTER_WIDTH/2; kx++) { 46 | float fl = srcImage[((y+ky)*width + (x+kx))]; 47 | Gy += fl*Ky[ky+FILTER_HEIGHT/2][kx+FILTER_WIDTH/2]; 48 | } 49 | } 50 | float Gy_abs = Gy < 0 ? -Gy : Gy; 51 | 52 | dstImage[(y*width+x)] = Gx_abs + Gy_abs; 53 | } 54 | } 55 | 56 | 57 | // The wrapper is use to call sobel edge detection filter 58 | void sobelFilter_GPU_wrapper(const cv::Mat& input, cv::Mat& output) 59 | { 60 | // Use cuda event to catch time 61 | cudaEvent_t start, stop; 62 | cudaEventCreate(&start); 63 | cudaEventCreate(&stop); 64 | 65 | // Calculate number of input & output bytes in each block 66 | const int inputSize = input.cols * input.rows; 67 | const int outputSize = output.cols * output.rows; 68 | unsigned char *d_input, *d_output; 69 | 70 | // Allocate device memory 71 | cudaMalloc(&d_input,inputSize); 72 | cudaMalloc(&d_output,outputSize); 73 | 74 | // Copy data from OpenCV input image to device memory 75 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 76 | 77 | // Specify block size 78 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 79 | 80 | // Calculate grid size to cover the whole image 81 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 82 | 83 | // Start time 84 | cudaEventRecord(start); 85 | 86 | // Run Sobel Edge Detection Filter kernel on CUDA 87 | sobelFilter<<>>(d_input, d_output, output.cols, output.rows); 88 | 89 | // Stop time 90 | cudaEventRecord(stop); 91 | 92 | //Copy data from device memory to output image 93 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 94 | 95 | //Free the device memory 96 | cudaFree(d_input); 97 | cudaFree(d_output); 98 | 99 | cudaEventSynchronize(stop); 100 | float milliseconds = 0; 101 | 102 | // Calculate elapsed time in milisecond 103 | cudaEventElapsedTime(&milliseconds, start, stop); 104 | cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n"; 105 | } 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /Median/medianFilter.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define BLOCK_SIZE 16 12 | #define FILTER_WIDTH 3 13 | #define FILTER_HEIGHT 3 14 | 15 | using namespace std; 16 | 17 | // Sort function on device 18 | __device__ void sort(unsigned char* filterVector) 19 | { 20 | for (int i = 0; i < FILTER_WIDTH*FILTER_HEIGHT; i++) { 21 | for (int j = i + 1; j < FILTER_WIDTH*FILTER_HEIGHT; j++) { 22 | if (filterVector[i] > filterVector[j]) { 23 | //Swap the variables 24 | unsigned char tmp = filterVector[i]; 25 | filterVector[i] = filterVector[j]; 26 | filterVector[j] = tmp; 27 | } 28 | } 29 | } 30 | } 31 | 32 | // Run Median Filter on GPU 33 | __global__ void medianFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height, int channel) 34 | { 35 | int x = blockIdx.x*blockDim.x + threadIdx.x; 36 | int y = blockIdx.y*blockDim.y + threadIdx.y; 37 | 38 | // only threads inside image will write results 39 | if((x>=FILTER_WIDTH/2) && (x<(width-FILTER_WIDTH/2)) && (y>=FILTER_HEIGHT/2) && (y<(height-FILTER_HEIGHT/2))) 40 | { 41 | for(int c=0 ; c(&d_input,inputSize); 76 | cudaMalloc(&d_output,outputSize); 77 | 78 | // Copy data from OpenCV input image to device memory 79 | cudaMemcpy(d_input,input.ptr(),inputSize,cudaMemcpyHostToDevice); 80 | 81 | // Specify block size 82 | const dim3 block(BLOCK_SIZE,BLOCK_SIZE); 83 | 84 | // Calculate grid size to cover the whole image 85 | const dim3 grid((output.cols + block.x - 1)/block.x, (output.rows + block.y - 1)/block.y); 86 | 87 | // Start time 88 | cudaEventRecord(start); 89 | 90 | // Run BoxFilter kernel on CUDA 91 | medianFilter<<>>(d_input, d_output, output.cols, output.rows, channel); 92 | 93 | // Stop time 94 | cudaEventRecord(stop); 95 | 96 | //Copy data from device memory to output image 97 | cudaMemcpy(output.ptr(),d_output,outputSize,cudaMemcpyDeviceToHost); 98 | 99 | //Free the device memory 100 | cudaFree(d_input); 101 | cudaFree(d_output); 102 | 103 | cudaEventSynchronize(stop); 104 | float milliseconds = 0; 105 | 106 | // Calculate elapsed time in milisecond 107 | cudaEventElapsedTime(&milliseconds, start, stop); 108 | cout<< "\nProcessing time on GPU (ms): " << milliseconds << "\n"; 109 | } 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | --------------------------------------------------------------------------------