├── image.bmp ├── kernel.cu └── result.bmp /image.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunagatov/MedianFilter/4421bd89a796a72a8699ddb122117cb7a0d4df62/image.bmp -------------------------------------------------------------------------------- /kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define BLOCK_SIZE 16 11 | 12 | using namespace std; 13 | using namespace cv; 14 | 15 | texture inTexture; 16 | 17 | __device__ inline void sortArr(float arr[], int left, int right) { 18 | int i = left, j = right; 19 | int tmp; 20 | int pivot = arr[(left + right) / 2]; 21 | while (i <= j) { 22 | while (arr[i] < pivot) 23 | i++; 24 | while (arr[j] > pivot) 25 | j--; 26 | if (i <= j) { 27 | tmp = arr[i]; 28 | arr[i] = arr[j]; 29 | arr[j] = tmp; 30 | i++; 31 | j--; 32 | } 33 | }; 34 | if (left < j) 35 | sortArr(arr, left, j); 36 | if (i < right) 37 | sortArr(arr, i, right); 38 | } 39 | 40 | __global__ void gpuCalculation(unsigned char* input, unsigned char* output, int width, int height) { 41 | 42 | int x = blockIdx.x * BLOCK_SIZE + threadIdx.x; 43 | int y = blockIdx.y * BLOCK_SIZE + threadIdx.y; 44 | 45 | float sortMas[9]; 46 | 47 | if ((x < width) && (y < height)) 48 | { 49 | int vector_counter = 0; 50 | for (int i = -1; i <= 1; i++) { 51 | for (int j = -1; j <= 1; j++) { 52 | sortMas[vector_counter] = tex2D(inTexture, x + j, y + i); 53 | vector_counter++; 54 | } 55 | } 56 | sortArr(sortMas, 0, 8); 57 | output[y * width + x] = sortMas[4]; 58 | } 59 | } 60 | 61 | void medianFilter(const Mat& input, Mat& output) { 62 | 63 | cudaEvent_t start, stop; 64 | float time; 65 | cudaEventCreate(&start); 66 | cudaEventCreate(&stop); 67 | 68 | int gray_size = input.step * input.rows; 69 | 70 | size_t pitch; 71 | unsigned char* d_input = NULL; 72 | unsigned char* d_output; 73 | 74 | cudaMallocPitch(&d_input, &pitch, sizeof(unsigned char) * input.step, input.rows); 75 | cudaMemcpy2D(d_input, pitch, input.ptr(), sizeof(unsigned char) * input.step, sizeof(unsigned char) * input.step, input.rows, cudaMemcpyHostToDevice); 76 | cudaBindTexture2D(0, inTexture, d_input, input.step, input.rows, pitch); 77 | cudaMalloc(&d_output, gray_size); 78 | 79 | dim3 block(BLOCK_SIZE, BLOCK_SIZE); 80 | dim3 grid((input.cols + block.x - 1) / block.x, (input.rows + block.y - 1) / block.y); 81 | 82 | cudaEventRecord(start, 0); 83 | 84 | gpuCalculation <<>> (d_input, d_output, input.cols, input.rows); 85 | 86 | cudaEventRecord(stop, 0); 87 | cudaEventSynchronize(stop); 88 | 89 | cudaMemcpy(output.ptr(), d_output, gray_size, cudaMemcpyDeviceToHost); 90 | 91 | cudaFree(d_input); 92 | cudaFree(d_output); 93 | 94 | cudaEventElapsedTime(&time, start, stop); 95 | cout << "Time on GPU: " << time << " milliseconds" << endl; 96 | } 97 | 98 | int main() { 99 | 100 | Mat input = imread("image.bmp", IMREAD_GRAYSCALE); 101 | //Mat output_own(input.rows, input.cols, CV_8UC1); 102 | //medianFilter(input, output_own); 103 | imwrite("result.bmp", input); 104 | getchar(); 105 | return 0; 106 | } -------------------------------------------------------------------------------- /result.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunagatov/MedianFilter/4421bd89a796a72a8699ddb122117cb7a0d4df62/result.bmp --------------------------------------------------------------------------------