├── .gitignore ├── README.md ├── cudaConstantMemoryConvolution ├── Image.cpp ├── Image.h ├── PPM.cpp ├── PPM.h ├── Utils.h ├── img │ ├── computer_programming.ppm │ └── panda.ppm ├── mainConstant.cu └── output │ └── result.ppm ├── cudaConstantSharedConvolution ├── Image.cpp ├── Image.h ├── PPM.cpp ├── PPM.h ├── Utils.h ├── img │ ├── computer_programming.ppm │ └── panda.ppm ├── mainConstantShared.cu └── output │ └── result.ppm ├── cudaGlobalMemoryConvolution ├── Image.cpp ├── Image.h ├── PPM.cpp ├── PPM.h ├── Utils.h ├── img │ ├── computer_programming.ppm │ └── panda.ppm ├── mainGlobal.cu └── output │ └── result.ppm ├── cudaSharedMemoryConvolution ├── Image.cpp ├── Image.h ├── PPM.cpp ├── PPM.h ├── Utils.h ├── img │ ├── computer_programming.ppm │ └── panda.ppm ├── mainShared.cu └── output │ └── result.ppm ├── paper_image-convolution.pdf ├── presentation_image-convolution.pdf └── sequentialConvolution ├── Image.cpp ├── Image.h ├── PPM.cpp ├── PPM.h ├── Utils.h ├── img ├── computer_programming.ppm └── panda.ppm ├── main.cu └── output └── result.ppm /.gitignore: -------------------------------------------------------------------------------- 1 | *.i 2 | *.ii 3 | *.gpu 4 | *.ptx 5 | *.cubin 6 | *.fatbin 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageConvolution 2 | Implementations of parallel 2D Image Convolution algorithm with CUDA (using global memory, shared memory and constant memory) and C++11 3 | 4 | cudaGlobalMemoryConvolution ---> using global memory of GPU 5 | 6 | cudaConstantMemoryConvolution ---> using global memory and the mask in constant memory 7 | 8 | cudaSharedMemoryConvolution ---> using shared memory of GPU (tiling) 9 | 10 | cudaConstantSharedMemoryConvolution ---> using shared memory and the mask in constant memory (tiling) 11 | 12 | * .ppm image format is used 13 | * chrono library is used to measure the execution time 14 | 15 | ------------------------------------------------------ 16 | INPUT: 17 | 18 | ![alt text](https://github.com/pietrobongini/CUDA-ImageConvolution/blob/master/sequentialConvolution/img/computer_programming.ppm) 19 | 20 | OUTPUT: 21 | 22 | ![alt text](https://github.com/pietrobongini/CUDA-ImageConvolution/blob/master/cudaGlobalMemoryConvolution/output/result.ppm) 23 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/Image.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "Image.h" 6 | #include "Utils.h" 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | //metodo che restituisce l'immagine 13 | Image_t* Image_new(int width, int height, int channels, float *data) { 14 | Image_t* img; 15 | 16 | img = (Image_t*) malloc(sizeof(Image_t)); 17 | 18 | Image_setWidth(img, width); 19 | Image_setHeight(img, height); 20 | Image_setChannels(img, channels); 21 | Image_setPitch(img, width * channels); 22 | 23 | Image_setData(img, data); 24 | return img; 25 | } 26 | 27 | Image_t* Image_new(int width, int height, int channels) { 28 | float *data = (float*) malloc(sizeof(float) * width * height * channels); 29 | return Image_new(width, height, channels, data); 30 | } 31 | 32 | Image_t* Image_new(int width, int height) { 33 | return Image_new(width, height, Image_channels); 34 | } 35 | 36 | //metodo che cancella l'immagine 37 | void Image_delete(Image_t* img) { 38 | if (img != NULL) { 39 | if (Image_getData(img) != NULL) { 40 | free(Image_getData(img)); 41 | } 42 | free(img); 43 | } 44 | } 45 | 46 | //metodo setter per un pixel 47 | void Image_setPixel(Image_t* img, int x, int y, int c, float val) { 48 | float *data = Image_getData(img); 49 | int channels = Image_getChannels(img); 50 | int pitch = Image_getPitch(img); 51 | 52 | data[y * pitch + x * channels + c] = val; 53 | 54 | return; 55 | } 56 | 57 | //metodo getter per un pixel 58 | float Image_getPixel(Image_t* img, int x, int y, int c) { 59 | float *data = Image_getData(img); 60 | int channels = Image_getChannels(img); 61 | int pitch = Image_getPitch(img); 62 | 63 | return data[y * pitch + x * channels + c]; 64 | } 65 | 66 | //confronto tra immagini 67 | bool Image_is_same(Image_t* a, Image_t* b) { 68 | if (a == NULL || b == NULL) { 69 | std::cerr << "Comparing null images." << std::endl; 70 | return false; 71 | } else if (a == b) { 72 | return true; 73 | } else if (Image_getWidth(a) != Image_getWidth(b)) { 74 | std::cerr << "Image widths do not match." << std::endl; 75 | return false; 76 | } else if (Image_getHeight(a) != Image_getHeight(b)) { 77 | std::cerr << "Image heights do not match." << std::endl; 78 | return false; 79 | } else if (Image_getChannels(a) != Image_getChannels(b)) { 80 | std::cerr << "Image channels do not match." << std::endl; 81 | return false; 82 | } else { 83 | float *aData, *bData; 84 | int width, height, channels; 85 | int ii, jj, kk; 86 | 87 | aData = Image_getData(a); 88 | bData = Image_getData(b); 89 | 90 | assert(aData != NULL); 91 | assert(bData != NULL); 92 | 93 | width = Image_getWidth(a); 94 | height = Image_getHeight(a); 95 | channels = Image_getChannels(a); 96 | 97 | for (ii = 0; ii < height; ii++) { 98 | for (jj = 0; jj < width; jj++) { 99 | for (kk = 0; kk < channels; kk++) { 100 | float x, y; 101 | if (channels <= 3) { 102 | x = clamp(*aData++, 0, 1); 103 | y = clamp(*bData++, 0, 1); 104 | } else { 105 | x = *aData++; 106 | y = *bData++; 107 | } 108 | if (almostUnequalFloat(x, y)) { 109 | std::cerr 110 | << "Image pixels do not match at position ( row = " 111 | << ii << ", col = " << jj << ", channel = " 112 | << kk << ") expecting a value of " << y 113 | << " but got a value of " << x << std::endl; 114 | 115 | return false; 116 | } 117 | } 118 | } 119 | } 120 | return true; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/Image.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_IMAGE_H 6 | #define KERNELPROCESSING_IMAGE_H 7 | 8 | 9 | #ifndef IMAGE_H_ 10 | #define IMAGE_H_ 11 | 12 | typedef struct { //struct per l'immagine 13 | int width; 14 | int height; 15 | int channels; 16 | int pitch; 17 | float *data; 18 | } Image_t; 19 | 20 | #define Image_channels 3 21 | 22 | //metodi getter per i vari elementi dell'immagine 23 | #define Image_getWidth(img) ((img)->width) 24 | #define Image_getHeight(img) ((img)->height) 25 | #define Image_getChannels(img) ((img)->channels) 26 | #define Image_getPitch(img) ((img)->pitch) 27 | #define Image_getData(img) ((img)->data) 28 | 29 | //metodi setter per i vari elementi dell'immagine 30 | #define Image_setWidth(img, val) (Image_getWidth(img) = val) 31 | #define Image_setHeight(img, val) (Image_getHeight(img) = val) 32 | #define Image_setChannels(img, val) (Image_getChannels(img) = val) 33 | #define Image_setPitch(img, val) (Image_getPitch(img) = val) 34 | #define Image_setData(img, val) (Image_getData(img) = val) 35 | 36 | //vari metodi per la creazione dell'immagine 37 | Image_t* Image_new(int width, int height, int channels, float *data); 38 | Image_t* Image_new(int width, int height, int channels); 39 | Image_t* Image_new(int width, int height); 40 | 41 | //metodi getter e setter per pixel 42 | float Image_getPixel(Image_t* img, int x, int y, int c); 43 | void Image_setPixel(Image_t* img, int x, int y, int c, float val); 44 | 45 | //metodo per cancellare l'immagine 46 | void Image_delete(Image_t* img); 47 | 48 | //metodo booleano per confronto immagini 49 | bool Image_is_same(Image_t* a, Image_t* b); 50 | 51 | #endif /* IMAGE_H_ */ 52 | 53 | 54 | 55 | #endif //KERNELPROCESSING_IMAGE_H 56 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/PPM.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "PPM.h" 6 | #include "Utils.h" 7 | #include "Image.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | #define PPMREADBUFLEN 256 17 | 18 | static const char *skipSpaces(const char *line) { 19 | while (*line == ' ' || *line == '\t') { 20 | line++; 21 | if (*line == '\0') { 22 | break; 23 | } 24 | } 25 | return line; 26 | } 27 | 28 | static char nextNonSpaceChar(const char *line0) { 29 | const char *line = skipSpaces(line0); 30 | return *line; 31 | } 32 | 33 | static bool isComment(const char *line) { 34 | char nextChar = nextNonSpaceChar(line); 35 | if (nextChar == '\0') { 36 | return true; 37 | } else { 38 | return nextChar == '#'; 39 | } 40 | } 41 | 42 | static void parseDimensions(const char *line0, int *width, int *height) { 43 | const char *line = skipSpaces(line0); 44 | sscanf(line, "%d %d", width, height); 45 | } 46 | 47 | static void parseDimensions(const char *line0, int *width, int *height, 48 | int *channels) { 49 | const char *line = skipSpaces(line0); 50 | sscanf(line, "%d %d %d", width, height, channels); 51 | } 52 | 53 | static void parseDepth(const char *line0, int *depth) { 54 | const char *line = skipSpaces(line0); 55 | sscanf(line, "%d", depth); 56 | } 57 | 58 | static char *File_readLine(FILE* file) { 59 | static char buffer[PPMREADBUFLEN]; 60 | if (file == NULL) { 61 | return NULL; 62 | } 63 | memset(buffer, 0, PPMREADBUFLEN); 64 | 65 | if (fgets(buffer, PPMREADBUFLEN - 1, file)) { 66 | return buffer; 67 | } else { 68 | return NULL; 69 | } 70 | } 71 | 72 | static char *nextLine(FILE* file) { 73 | char *line = NULL; 74 | while ((line = File_readLine(file)) != NULL) { 75 | if (!isComment(line)) { 76 | break; 77 | } 78 | } 79 | return line; 80 | } 81 | 82 | char* File_read(FILE* file, size_t size, size_t count) { 83 | size_t res; 84 | char *buffer; 85 | size_t bufferLen; 86 | 87 | if (file == NULL) { 88 | return NULL; 89 | } 90 | 91 | bufferLen = size * count + 1; 92 | buffer = (char*) malloc(sizeof(char) * bufferLen); 93 | 94 | res = fread(buffer, size, count, file); 95 | // make valid C string 96 | buffer[size * res] = '\0'; 97 | 98 | return buffer; 99 | } 100 | 101 | bool File_write(FILE* file, const void *buffer, size_t size, size_t count) { 102 | if (file == NULL) { 103 | return false; 104 | } 105 | 106 | size_t res = fwrite(buffer, size, count, file); 107 | if (res != count) { 108 | printf("ERROR: Failed to write data to PPM file"); 109 | } 110 | 111 | return true; 112 | } 113 | 114 | Image_t* PPM_import(const char *filename) { 115 | Image_t* img; 116 | FILE* file; 117 | char *header; 118 | char *line; 119 | int ii, jj, kk, channels; 120 | int width, height, depth; 121 | unsigned char *charData, *charIter; 122 | float *imgData, *floatIter; 123 | float scale; 124 | 125 | img = NULL; 126 | 127 | file = fopen(filename, "rb"); 128 | if (file == NULL) { 129 | printf("Could not open %s\n", filename); 130 | goto cleanup; 131 | } 132 | 133 | header = File_readLine(file); 134 | if (header == NULL) { 135 | printf("Could not read from %s\n", filename); 136 | goto cleanup; 137 | } else if (strcmp(header, "P6") != 0 && strcmp(header, "P6\n") != 0 138 | && strcmp(header, "P5") != 0 && strcmp(header, "P5\n") != 0 139 | && strcmp(header, "S6") != 0 && strcmp(header, "S6\n") != 0) { 140 | printf("Could not find magic number for %s\n", filename); 141 | goto cleanup; 142 | } 143 | 144 | // P5 are monochrome while P6/S6 are RGB 145 | // S6 needs to parse number of channels out of file 146 | if (strcmp(header, "P5") == 0 || strcmp(header, "P5\n") == 0) { 147 | channels = 1; 148 | line = nextLine(file); 149 | parseDimensions(line, &width, &height); 150 | } else if (strcmp(header, "P6") == 0 || strcmp(header, "P6\n") == 0) { 151 | channels = 3; 152 | line = nextLine(file); 153 | parseDimensions(line, &width, &height); 154 | } else { 155 | line = nextLine(file); 156 | parseDimensions(line, &width, &height, &channels); 157 | } 158 | 159 | // the line now contains the depth information 160 | line = nextLine(file); 161 | parseDepth(line, &depth); 162 | 163 | // the rest of the lines contain the data in binary format 164 | charData = (unsigned char *) File_read(file, 165 | width * channels * sizeof(unsigned char), height); 166 | 167 | img = Image_new(width, height, channels); 168 | 169 | imgData = Image_getData(img); 170 | 171 | charIter = charData; 172 | floatIter = imgData; 173 | scale = 1.0f / ((float) depth); 174 | 175 | for (ii = 0; ii < height; ii++) { 176 | for (jj = 0; jj < width; jj++) { 177 | for (kk = 0; kk < channels; kk++) { 178 | *floatIter = ((float) *charIter) * scale; 179 | floatIter++; 180 | charIter++; 181 | } 182 | } 183 | } 184 | 185 | cleanup: fclose(file); 186 | return img; 187 | } 188 | 189 | bool PPM_export(const char *filename, Image_t* img) { 190 | int ii; 191 | int jj; 192 | int kk; 193 | int depth; 194 | int width; 195 | int height; 196 | int channels; 197 | FILE* file; 198 | float *floatIter; 199 | unsigned char *charData; 200 | unsigned char *charIter; 201 | 202 | file = fopen(filename, "wb+"); 203 | if (file == NULL) { 204 | printf("Could not open %s in mode %s\n", filename, "wb+"); 205 | return false; 206 | } 207 | 208 | width = Image_getWidth(img); 209 | height = Image_getHeight(img); 210 | channels = Image_getChannels(img); 211 | depth = 255; 212 | 213 | if (channels == 1) { 214 | fprintf(file, "P5\n"); 215 | } else { 216 | fprintf(file, "P6\n"); 217 | } 218 | fprintf(file, "#Created via PPM Export\n"); 219 | fprintf(file, "%d %d\n", width, height); 220 | fprintf(file, "%d\n", depth); 221 | 222 | charData = (unsigned char*) malloc( 223 | sizeof(unsigned char) * width * height * channels); 224 | 225 | charIter = charData; 226 | floatIter = Image_getData(img); 227 | for (ii = 0; ii < height; ii++) { 228 | for (jj = 0; jj < width; jj++) { 229 | for (kk = 0; kk < channels; kk++) { 230 | *charIter = (unsigned char) ceil( 231 | clamp(*floatIter, 0, 1) * depth); 232 | floatIter++; 233 | charIter++; 234 | } 235 | } 236 | } 237 | 238 | bool writeResult = File_write(file, charData, 239 | width * channels * sizeof(unsigned char), height); 240 | 241 | free(charData); 242 | fflush(file); 243 | fclose(file); 244 | 245 | return true; 246 | } -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/PPM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_PPM_H 6 | #define KERNELPROCESSING_PPM_H 7 | 8 | #include "Image.h" 9 | 10 | Image_t* PPM_import(const char *filename); 11 | bool PPM_export(const char *filename, Image_t* img); 12 | 13 | 14 | 15 | #endif //KERNELPROCESSING_PPM_H 16 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/Utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_UTILS_H 6 | #define KERNELPROCESSING_UTILS_H 7 | 8 | template 9 | static inline T _abs(const T &a) { 10 | return a < 0 ? -a : a; 11 | } 12 | 13 | static inline bool almostEqualFloat(float A, float B, float eps) { 14 | if (A == 0) { 15 | return _abs(B) < eps; 16 | } else if (B == 0) { 17 | return _abs(A) < eps; 18 | } else { 19 | #if 0 20 | float d = max(_abs(A), _abs(B)); 21 | float g = (_abs(A - B) / d); 22 | #else 23 | float g = _abs(A - B); 24 | #endif 25 | if (g <= eps) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | } 32 | 33 | static inline bool almostEqualFloat(float A, float B) { 34 | return almostEqualFloat(A, B, 0.2f); 35 | } 36 | 37 | static inline bool almostUnequalFloat(float a, float b) { 38 | return !almostEqualFloat(a, b); 39 | } 40 | 41 | static inline float _min(float x, float y) { 42 | return x < y ? x : y; 43 | } 44 | 45 | static inline float _max(float x, float y) { 46 | return x > y ? x : y; 47 | } 48 | 49 | static inline float clamp(float x, float start, float end) { 50 | return _min(_max(x, start), end); 51 | } 52 | 53 | 54 | #endif //KERNELPROCESSING_UTILS_H 55 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/img/computer_programming.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantMemoryConvolution/img/computer_programming.ppm -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/img/panda.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantMemoryConvolution/img/panda.ppm -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/mainConstant.cu: -------------------------------------------------------------------------------- 1 | #include "Image.h" 2 | #include "PPM.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | using namespace std:: chrono; 11 | 12 | 13 | #define maskCols 5 14 | #define maskRows 5 15 | 16 | // mask in constant memory 17 | __constant__ float deviceMaskData[maskRows * maskCols]; 18 | __global__ void constantKernelConvolution(float * InputImageData, const float *__restrict__ kernel, 19 | float* outputImageData, int channels, int width, int height){ 20 | 21 | 22 | float accum; 23 | int col = threadIdx.x + blockIdx.x * blockDim.x; //col index 24 | int row = threadIdx.y + blockIdx.y * blockDim.y; //row index 25 | int maskRowsRadius = maskRows/2; 26 | int maskColsRadius = maskCols/2; 27 | 28 | 29 | for (int k = 0; k < channels; k++){ //cycle on channels 30 | if(row < height && col < width ){ 31 | accum = 0; 32 | int startRow = row - maskRowsRadius; //row index shifted by mask radius 33 | int startCol = col - maskColsRadius; //col index shifted by mask radius 34 | 35 | for(int i = 0; i < maskRows; i++){ //cycle on mask rows 36 | 37 | for(int j = 0; j < maskCols; j++){ //cycle on mask cols 38 | 39 | int currentRow = startRow + i; //row index to fetch data from input image 40 | int currentCol = startCol + j; //col index to fetch data from input image 41 | 42 | if(currentRow >= 0 && currentRow < height && currentCol >= 0 && currentCol < width){ 43 | 44 | accum += InputImageData[(currentRow * width + currentCol )*channels + k] * 45 | deviceMaskData[i * maskRows + j]; 46 | } 47 | else accum = 0; 48 | } 49 | 50 | } 51 | outputImageData[(row* width + col) * channels + k] = accum; 52 | } 53 | 54 | } 55 | 56 | } 57 | 58 | 59 | 60 | int main(){ 61 | 62 | 63 | int imageChannels; 64 | int imageHeight; 65 | int imageWidth; 66 | Image_t* inputImage; 67 | Image_t* outputImage; 68 | float* hostInputImageData; 69 | float* hostOutputImageData; 70 | float* deviceInputImageData; 71 | float* deviceOutputImageData; 72 | float hostMaskData[maskRows * maskCols]={ 73 | 0.04, 0.04, 0.04, 0.04, 0.04, 74 | 0.04, 0.04, 0.04, 0.04, 0.04, 75 | 0.04, 0.04, 0.04, 0.04, 0.04, 76 | 0.04, 0.04, 0.04, 0.04, 0.04, 77 | 0.04, 0.04, 0.04, 0.04, 0.04 78 | 79 | }; 80 | 81 | inputImage = PPM_import("/home/pietrobongini/cuda-workspace/cudaConstantMemoryConvolution/img/computer_programming.ppm"); 82 | 83 | imageWidth = Image_getWidth(inputImage); 84 | imageHeight = Image_getHeight(inputImage); 85 | imageChannels = Image_getChannels(inputImage); 86 | 87 | outputImage = Image_new(imageWidth, imageHeight, imageChannels); 88 | 89 | hostInputImageData = Image_getData(inputImage); 90 | hostOutputImageData = Image_getData(outputImage); 91 | 92 | cudaDeviceReset(); 93 | 94 | cudaMalloc((void **) &deviceInputImageData, imageWidth * imageHeight * 95 | imageChannels * sizeof(float)); 96 | cudaMalloc((void **) &deviceOutputImageData, imageWidth * imageHeight * 97 | imageChannels * sizeof(float)); 98 | 99 | cudaMemcpy(deviceInputImageData, hostInputImageData, 100 | imageWidth * imageHeight * imageChannels * sizeof(float), 101 | cudaMemcpyHostToDevice); 102 | 103 | cudaMemcpyToSymbol(deviceMaskData, hostMaskData, maskRows * maskCols * sizeof(float)); 104 | 105 | dim3 dimGrid(ceil((float) imageWidth/16), 106 | ceil((float) imageHeight/16)); 107 | dim3 dimBlock(16,16,1); 108 | 109 | 110 | cout << "CONSTANT MEMORY KERNEL CONVOLUTION" << endl; 111 | cout << "image dimensions: "<< imageWidth << "x" << imageHeight << endl; 112 | cout << "start parallelizing" << endl; 113 | cout << "elapsed in time: "; 114 | high_resolution_clock::time_point start= high_resolution_clock::now(); 115 | 116 | constantKernelConvolution<<>>(deviceInputImageData, deviceMaskData, deviceOutputImageData, 117 | imageChannels, imageWidth, imageHeight); 118 | 119 | high_resolution_clock::time_point end= high_resolution_clock::now(); 120 | chrono::duration duration = end - start; 121 | cout << duration.count()*1000 << endl; 122 | cout << "----------------------------------" << endl; 123 | 124 | 125 | cudaMemcpy(hostOutputImageData, deviceOutputImageData, imageWidth * imageHeight * 126 | imageChannels * sizeof(float), cudaMemcpyDeviceToHost); 127 | 128 | PPM_export("/home/pietrobongini/cuda-workspace/cudaConstantMemoryConvolution/output/result.ppm", outputImage); 129 | 130 | cudaMemset(deviceInputImageData,0,imageWidth * imageHeight * 131 | imageChannels * sizeof(float)); 132 | cudaMemset(deviceOutputImageData,0,imageWidth * imageHeight * 133 | imageChannels * sizeof(float)); 134 | cudaMemset(deviceMaskData,0,maskRows * maskCols 135 | * sizeof(float)); 136 | cudaFree(deviceInputImageData); 137 | cudaFree(deviceOutputImageData); 138 | cudaFree(deviceMaskData); 139 | Image_delete(outputImage); 140 | Image_delete(inputImage); 141 | 142 | } 143 | -------------------------------------------------------------------------------- /cudaConstantMemoryConvolution/output/result.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantMemoryConvolution/output/result.ppm -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/Image.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "Image.h" 6 | #include "Utils.h" 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | //metodo che restituisce l'immagine 13 | Image_t* Image_new(int width, int height, int channels, float *data) { 14 | Image_t* img; 15 | 16 | img = (Image_t*) malloc(sizeof(Image_t)); 17 | 18 | Image_setWidth(img, width); 19 | Image_setHeight(img, height); 20 | Image_setChannels(img, channels); 21 | Image_setPitch(img, width * channels); 22 | 23 | Image_setData(img, data); 24 | return img; 25 | } 26 | 27 | Image_t* Image_new(int width, int height, int channels) { 28 | float *data = (float*) malloc(sizeof(float) * width * height * channels); 29 | return Image_new(width, height, channels, data); 30 | } 31 | 32 | Image_t* Image_new(int width, int height) { 33 | return Image_new(width, height, Image_channels); 34 | } 35 | 36 | //metodo che cancella l'immagine 37 | void Image_delete(Image_t* img) { 38 | if (img != NULL) { 39 | if (Image_getData(img) != NULL) { 40 | free(Image_getData(img)); 41 | } 42 | free(img); 43 | } 44 | } 45 | 46 | //metodo setter per un pixel 47 | void Image_setPixel(Image_t* img, int x, int y, int c, float val) { 48 | float *data = Image_getData(img); 49 | int channels = Image_getChannels(img); 50 | int pitch = Image_getPitch(img); 51 | 52 | data[y * pitch + x * channels + c] = val; 53 | 54 | return; 55 | } 56 | 57 | //metodo getter per un pixel 58 | float Image_getPixel(Image_t* img, int x, int y, int c) { 59 | float *data = Image_getData(img); 60 | int channels = Image_getChannels(img); 61 | int pitch = Image_getPitch(img); 62 | 63 | return data[y * pitch + x * channels + c]; 64 | } 65 | 66 | //confronto tra immagini 67 | bool Image_is_same(Image_t* a, Image_t* b) { 68 | if (a == NULL || b == NULL) { 69 | std::cerr << "Comparing null images." << std::endl; 70 | return false; 71 | } else if (a == b) { 72 | return true; 73 | } else if (Image_getWidth(a) != Image_getWidth(b)) { 74 | std::cerr << "Image widths do not match." << std::endl; 75 | return false; 76 | } else if (Image_getHeight(a) != Image_getHeight(b)) { 77 | std::cerr << "Image heights do not match." << std::endl; 78 | return false; 79 | } else if (Image_getChannels(a) != Image_getChannels(b)) { 80 | std::cerr << "Image channels do not match." << std::endl; 81 | return false; 82 | } else { 83 | float *aData, *bData; 84 | int width, height, channels; 85 | int ii, jj, kk; 86 | 87 | aData = Image_getData(a); 88 | bData = Image_getData(b); 89 | 90 | assert(aData != NULL); 91 | assert(bData != NULL); 92 | 93 | width = Image_getWidth(a); 94 | height = Image_getHeight(a); 95 | channels = Image_getChannels(a); 96 | 97 | for (ii = 0; ii < height; ii++) { 98 | for (jj = 0; jj < width; jj++) { 99 | for (kk = 0; kk < channels; kk++) { 100 | float x, y; 101 | if (channels <= 3) { 102 | x = clamp(*aData++, 0, 1); 103 | y = clamp(*bData++, 0, 1); 104 | } else { 105 | x = *aData++; 106 | y = *bData++; 107 | } 108 | if (almostUnequalFloat(x, y)) { 109 | std::cerr 110 | << "Image pixels do not match at position ( row = " 111 | << ii << ", col = " << jj << ", channel = " 112 | << kk << ") expecting a value of " << y 113 | << " but got a value of " << x << std::endl; 114 | 115 | return false; 116 | } 117 | } 118 | } 119 | } 120 | return true; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/Image.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_IMAGE_H 6 | #define KERNELPROCESSING_IMAGE_H 7 | 8 | 9 | #ifndef IMAGE_H_ 10 | #define IMAGE_H_ 11 | 12 | typedef struct { //struct per l'immagine 13 | int width; 14 | int height; 15 | int channels; 16 | int pitch; 17 | float *data; 18 | } Image_t; 19 | 20 | #define Image_channels 3 21 | 22 | //metodi getter per i vari elementi dell'immagine 23 | #define Image_getWidth(img) ((img)->width) 24 | #define Image_getHeight(img) ((img)->height) 25 | #define Image_getChannels(img) ((img)->channels) 26 | #define Image_getPitch(img) ((img)->pitch) 27 | #define Image_getData(img) ((img)->data) 28 | 29 | //metodi setter per i vari elementi dell'immagine 30 | #define Image_setWidth(img, val) (Image_getWidth(img) = val) 31 | #define Image_setHeight(img, val) (Image_getHeight(img) = val) 32 | #define Image_setChannels(img, val) (Image_getChannels(img) = val) 33 | #define Image_setPitch(img, val) (Image_getPitch(img) = val) 34 | #define Image_setData(img, val) (Image_getData(img) = val) 35 | 36 | //vari metodi per la creazione dell'immagine 37 | Image_t* Image_new(int width, int height, int channels, float *data); 38 | Image_t* Image_new(int width, int height, int channels); 39 | Image_t* Image_new(int width, int height); 40 | 41 | //metodi getter e setter per pixel 42 | float Image_getPixel(Image_t* img, int x, int y, int c); 43 | void Image_setPixel(Image_t* img, int x, int y, int c, float val); 44 | 45 | //metodo per cancellare l'immagine 46 | void Image_delete(Image_t* img); 47 | 48 | //metodo booleano per confronto immagini 49 | bool Image_is_same(Image_t* a, Image_t* b); 50 | 51 | #endif /* IMAGE_H_ */ 52 | 53 | 54 | 55 | #endif //KERNELPROCESSING_IMAGE_H 56 | -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/PPM.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "PPM.h" 6 | #include "Utils.h" 7 | #include "Image.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | #define PPMREADBUFLEN 256 17 | 18 | static const char *skipSpaces(const char *line) { 19 | while (*line == ' ' || *line == '\t') { 20 | line++; 21 | if (*line == '\0') { 22 | break; 23 | } 24 | } 25 | return line; 26 | } 27 | 28 | static char nextNonSpaceChar(const char *line0) { 29 | const char *line = skipSpaces(line0); 30 | return *line; 31 | } 32 | 33 | static bool isComment(const char *line) { 34 | char nextChar = nextNonSpaceChar(line); 35 | if (nextChar == '\0') { 36 | return true; 37 | } else { 38 | return nextChar == '#'; 39 | } 40 | } 41 | 42 | static void parseDimensions(const char *line0, int *width, int *height) { 43 | const char *line = skipSpaces(line0); 44 | sscanf(line, "%d %d", width, height); 45 | } 46 | 47 | static void parseDimensions(const char *line0, int *width, int *height, 48 | int *channels) { 49 | const char *line = skipSpaces(line0); 50 | sscanf(line, "%d %d %d", width, height, channels); 51 | } 52 | 53 | static void parseDepth(const char *line0, int *depth) { 54 | const char *line = skipSpaces(line0); 55 | sscanf(line, "%d", depth); 56 | } 57 | 58 | static char *File_readLine(FILE* file) { 59 | static char buffer[PPMREADBUFLEN]; 60 | if (file == NULL) { 61 | return NULL; 62 | } 63 | memset(buffer, 0, PPMREADBUFLEN); 64 | 65 | if (fgets(buffer, PPMREADBUFLEN - 1, file)) { 66 | return buffer; 67 | } else { 68 | return NULL; 69 | } 70 | } 71 | 72 | static char *nextLine(FILE* file) { 73 | char *line = NULL; 74 | while ((line = File_readLine(file)) != NULL) { 75 | if (!isComment(line)) { 76 | break; 77 | } 78 | } 79 | return line; 80 | } 81 | 82 | char* File_read(FILE* file, size_t size, size_t count) { 83 | size_t res; 84 | char *buffer; 85 | size_t bufferLen; 86 | 87 | if (file == NULL) { 88 | return NULL; 89 | } 90 | 91 | bufferLen = size * count + 1; 92 | buffer = (char*) malloc(sizeof(char) * bufferLen); 93 | 94 | res = fread(buffer, size, count, file); 95 | // make valid C string 96 | buffer[size * res] = '\0'; 97 | 98 | return buffer; 99 | } 100 | 101 | bool File_write(FILE* file, const void *buffer, size_t size, size_t count) { 102 | if (file == NULL) { 103 | return false; 104 | } 105 | 106 | size_t res = fwrite(buffer, size, count, file); 107 | if (res != count) { 108 | printf("ERROR: Failed to write data to PPM file"); 109 | } 110 | 111 | return true; 112 | } 113 | 114 | Image_t* PPM_import(const char *filename) { 115 | Image_t* img; 116 | FILE* file; 117 | char *header; 118 | char *line; 119 | int ii, jj, kk, channels; 120 | int width, height, depth; 121 | unsigned char *charData, *charIter; 122 | float *imgData, *floatIter; 123 | float scale; 124 | 125 | img = NULL; 126 | 127 | file = fopen(filename, "rb"); 128 | if (file == NULL) { 129 | printf("Could not open %s\n", filename); 130 | goto cleanup; 131 | } 132 | 133 | header = File_readLine(file); 134 | if (header == NULL) { 135 | printf("Could not read from %s\n", filename); 136 | goto cleanup; 137 | } else if (strcmp(header, "P6") != 0 && strcmp(header, "P6\n") != 0 138 | && strcmp(header, "P5") != 0 && strcmp(header, "P5\n") != 0 139 | && strcmp(header, "S6") != 0 && strcmp(header, "S6\n") != 0) { 140 | printf("Could not find magic number for %s\n", filename); 141 | goto cleanup; 142 | } 143 | 144 | // P5 are monochrome while P6/S6 are RGB 145 | // S6 needs to parse number of channels out of file 146 | if (strcmp(header, "P5") == 0 || strcmp(header, "P5\n") == 0) { 147 | channels = 1; 148 | line = nextLine(file); 149 | parseDimensions(line, &width, &height); 150 | } else if (strcmp(header, "P6") == 0 || strcmp(header, "P6\n") == 0) { 151 | channels = 3; 152 | line = nextLine(file); 153 | parseDimensions(line, &width, &height); 154 | } else { 155 | line = nextLine(file); 156 | parseDimensions(line, &width, &height, &channels); 157 | } 158 | 159 | // the line now contains the depth information 160 | line = nextLine(file); 161 | parseDepth(line, &depth); 162 | 163 | // the rest of the lines contain the data in binary format 164 | charData = (unsigned char *) File_read(file, 165 | width * channels * sizeof(unsigned char), height); 166 | 167 | img = Image_new(width, height, channels); 168 | 169 | imgData = Image_getData(img); 170 | 171 | charIter = charData; 172 | floatIter = imgData; 173 | scale = 1.0f / ((float) depth); 174 | 175 | for (ii = 0; ii < height; ii++) { 176 | for (jj = 0; jj < width; jj++) { 177 | for (kk = 0; kk < channels; kk++) { 178 | *floatIter = ((float) *charIter) * scale; 179 | floatIter++; 180 | charIter++; 181 | } 182 | } 183 | } 184 | 185 | cleanup: fclose(file); 186 | return img; 187 | } 188 | 189 | bool PPM_export(const char *filename, Image_t* img) { 190 | int ii; 191 | int jj; 192 | int kk; 193 | int depth; 194 | int width; 195 | int height; 196 | int channels; 197 | FILE* file; 198 | float *floatIter; 199 | unsigned char *charData; 200 | unsigned char *charIter; 201 | 202 | file = fopen(filename, "wb+"); 203 | if (file == NULL) { 204 | printf("Could not open %s in mode %s\n", filename, "wb+"); 205 | return false; 206 | } 207 | 208 | width = Image_getWidth(img); 209 | height = Image_getHeight(img); 210 | channels = Image_getChannels(img); 211 | depth = 255; 212 | 213 | if (channels == 1) { 214 | fprintf(file, "P5\n"); 215 | } else { 216 | fprintf(file, "P6\n"); 217 | } 218 | fprintf(file, "#Created via PPM Export\n"); 219 | fprintf(file, "%d %d\n", width, height); 220 | fprintf(file, "%d\n", depth); 221 | 222 | charData = (unsigned char*) malloc( 223 | sizeof(unsigned char) * width * height * channels); 224 | 225 | charIter = charData; 226 | floatIter = Image_getData(img); 227 | for (ii = 0; ii < height; ii++) { 228 | for (jj = 0; jj < width; jj++) { 229 | for (kk = 0; kk < channels; kk++) { 230 | *charIter = (unsigned char) ceil( 231 | clamp(*floatIter, 0, 1) * depth); 232 | floatIter++; 233 | charIter++; 234 | } 235 | } 236 | } 237 | 238 | bool writeResult = File_write(file, charData, 239 | width * channels * sizeof(unsigned char), height); 240 | 241 | free(charData); 242 | fflush(file); 243 | fclose(file); 244 | 245 | return true; 246 | } -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/PPM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_PPM_H 6 | #define KERNELPROCESSING_PPM_H 7 | 8 | #include "Image.h" 9 | 10 | Image_t* PPM_import(const char *filename); 11 | bool PPM_export(const char *filename, Image_t* img); 12 | 13 | 14 | 15 | #endif //KERNELPROCESSING_PPM_H 16 | -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/Utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_UTILS_H 6 | #define KERNELPROCESSING_UTILS_H 7 | 8 | template 9 | static inline T _abs(const T &a) { 10 | return a < 0 ? -a : a; 11 | } 12 | 13 | static inline bool almostEqualFloat(float A, float B, float eps) { 14 | if (A == 0) { 15 | return _abs(B) < eps; 16 | } else if (B == 0) { 17 | return _abs(A) < eps; 18 | } else { 19 | #if 0 20 | float d = max(_abs(A), _abs(B)); 21 | float g = (_abs(A - B) / d); 22 | #else 23 | float g = _abs(A - B); 24 | #endif 25 | if (g <= eps) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | } 32 | 33 | static inline bool almostEqualFloat(float A, float B) { 34 | return almostEqualFloat(A, B, 0.2f); 35 | } 36 | 37 | static inline bool almostUnequalFloat(float a, float b) { 38 | return !almostEqualFloat(a, b); 39 | } 40 | 41 | static inline float _min(float x, float y) { 42 | return x < y ? x : y; 43 | } 44 | 45 | static inline float _max(float x, float y) { 46 | return x > y ? x : y; 47 | } 48 | 49 | static inline float clamp(float x, float start, float end) { 50 | return _min(_max(x, start), end); 51 | } 52 | 53 | 54 | #endif //KERNELPROCESSING_UTILS_H 55 | -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/img/computer_programming.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantSharedConvolution/img/computer_programming.ppm -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/img/panda.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantSharedConvolution/img/panda.ppm -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/mainConstantShared.cu: -------------------------------------------------------------------------------- 1 | #include "Image.h" 2 | #include "PPM.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | using namespace std:: chrono; 11 | 12 | #define TILE_WIDTH 16 13 | #define maskCols 5 14 | #define maskRows 5 15 | #define w (TILE_WIDTH + maskCols -1) 16 | 17 | //mask in constant memory 18 | __constant__ float deviceMaskData[maskRows * maskCols]; 19 | __global__ void constantSharedKernelProcessing(float * InputImageData, const float *__restrict__ kernel, 20 | float* outputImageData, int channels, int width, int height){ 21 | 22 | __shared__ float N_ds[w][w]; //block of share memory 23 | 24 | 25 | // allocation in shared memory of image blocks 26 | int maskRadius = maskRows/2; 27 | for (int k = 0; k = 0 && srcY < height && srcX>=0 && srcX < width) 34 | N_ds[destY][destX] = InputImageData[(srcY *width +srcX) * channels + k]; 35 | else 36 | N_ds[destY][destX] = 0; 37 | 38 | 39 | dest = threadIdx.y * TILE_WIDTH+ threadIdx.x + TILE_WIDTH * TILE_WIDTH; 40 | destY = dest/w; 41 | destX = dest%w; 42 | srcY = blockIdx.y *TILE_WIDTH + destY - maskRadius; 43 | srcX = blockIdx.x *TILE_WIDTH + destX - maskRadius; 44 | if(destY < w){ 45 | if(srcY>= 0 && srcY < height && srcX>=0 && srcX < width) 46 | N_ds[destY][destX] = InputImageData[(srcY *width +srcX) * channels + k]; 47 | else 48 | N_ds[destY][destX] = 0; 49 | } 50 | 51 | __syncthreads(); 52 | 53 | 54 | //compute kernel convolution 55 | float accum = 0; 56 | int y, x; 57 | for (y= 0; y < maskCols; y++) 58 | for(x = 0; x>>(deviceInputImageData, deviceMaskData, deviceOutputImageData, 132 | imageChannels, imageWidth, imageHeight); 133 | 134 | high_resolution_clock::time_point end= high_resolution_clock::now(); 135 | chrono::duration duration = end - start; 136 | cout << duration.count()*1000 << endl; 137 | cout << "----------------------------------" << endl; 138 | 139 | cudaMemcpy(hostOutputImageData, deviceOutputImageData, imageWidth * imageHeight * 140 | imageChannels * sizeof(float), cudaMemcpyDeviceToHost); 141 | 142 | PPM_export("/home/pietrobongini/cuda-workspace/cudaConstantSharedConvolution/output/result.ppm", outputImage); 143 | 144 | cudaMemset(deviceInputImageData,0,imageWidth * imageHeight * 145 | imageChannels * sizeof(float)); 146 | cudaMemset(deviceOutputImageData,0,imageWidth * imageHeight * 147 | imageChannels * sizeof(float)); 148 | cudaMemset(deviceMaskData,0,maskRows * maskCols 149 | * sizeof(float)); 150 | cudaFree(deviceInputImageData); 151 | cudaFree(deviceOutputImageData); 152 | cudaFree(deviceMaskData); 153 | 154 | Image_delete(outputImage); 155 | Image_delete(inputImage); 156 | 157 | 158 | } 159 | -------------------------------------------------------------------------------- /cudaConstantSharedConvolution/output/result.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaConstantSharedConvolution/output/result.ppm -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/Image.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "Image.h" 6 | #include "Utils.h" 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | //metodo che restituisce l'immagine 13 | Image_t* Image_new(int width, int height, int channels, float *data) { 14 | Image_t* img; 15 | 16 | img = (Image_t*) malloc(sizeof(Image_t)); 17 | 18 | Image_setWidth(img, width); 19 | Image_setHeight(img, height); 20 | Image_setChannels(img, channels); 21 | Image_setPitch(img, width * channels); 22 | 23 | Image_setData(img, data); 24 | return img; 25 | } 26 | 27 | Image_t* Image_new(int width, int height, int channels) { 28 | float *data = (float*) malloc(sizeof(float) * width * height * channels); 29 | return Image_new(width, height, channels, data); 30 | } 31 | 32 | Image_t* Image_new(int width, int height) { 33 | return Image_new(width, height, Image_channels); 34 | } 35 | 36 | //metodo che cancella l'immagine 37 | void Image_delete(Image_t* img) { 38 | if (img != NULL) { 39 | if (Image_getData(img) != NULL) { 40 | free(Image_getData(img)); 41 | } 42 | free(img); 43 | } 44 | } 45 | 46 | //metodo setter per un pixel 47 | void Image_setPixel(Image_t* img, int x, int y, int c, float val) { 48 | float *data = Image_getData(img); 49 | int channels = Image_getChannels(img); 50 | int pitch = Image_getPitch(img); 51 | 52 | data[y * pitch + x * channels + c] = val; 53 | 54 | return; 55 | } 56 | 57 | //metodo getter per un pixel 58 | float Image_getPixel(Image_t* img, int x, int y, int c) { 59 | float *data = Image_getData(img); 60 | int channels = Image_getChannels(img); 61 | int pitch = Image_getPitch(img); 62 | 63 | return data[y * pitch + x * channels + c]; 64 | } 65 | 66 | //confronto tra immagini 67 | bool Image_is_same(Image_t* a, Image_t* b) { 68 | if (a == NULL || b == NULL) { 69 | std::cerr << "Comparing null images." << std::endl; 70 | return false; 71 | } else if (a == b) { 72 | return true; 73 | } else if (Image_getWidth(a) != Image_getWidth(b)) { 74 | std::cerr << "Image widths do not match." << std::endl; 75 | return false; 76 | } else if (Image_getHeight(a) != Image_getHeight(b)) { 77 | std::cerr << "Image heights do not match." << std::endl; 78 | return false; 79 | } else if (Image_getChannels(a) != Image_getChannels(b)) { 80 | std::cerr << "Image channels do not match." << std::endl; 81 | return false; 82 | } else { 83 | float *aData, *bData; 84 | int width, height, channels; 85 | int ii, jj, kk; 86 | 87 | aData = Image_getData(a); 88 | bData = Image_getData(b); 89 | 90 | assert(aData != NULL); 91 | assert(bData != NULL); 92 | 93 | width = Image_getWidth(a); 94 | height = Image_getHeight(a); 95 | channels = Image_getChannels(a); 96 | 97 | for (ii = 0; ii < height; ii++) { 98 | for (jj = 0; jj < width; jj++) { 99 | for (kk = 0; kk < channels; kk++) { 100 | float x, y; 101 | if (channels <= 3) { 102 | x = clamp(*aData++, 0, 1); 103 | y = clamp(*bData++, 0, 1); 104 | } else { 105 | x = *aData++; 106 | y = *bData++; 107 | } 108 | if (almostUnequalFloat(x, y)) { 109 | std::cerr 110 | << "Image pixels do not match at position ( row = " 111 | << ii << ", col = " << jj << ", channel = " 112 | << kk << ") expecting a value of " << y 113 | << " but got a value of " << x << std::endl; 114 | 115 | return false; 116 | } 117 | } 118 | } 119 | } 120 | return true; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/Image.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_IMAGE_H 6 | #define KERNELPROCESSING_IMAGE_H 7 | 8 | 9 | #ifndef IMAGE_H_ 10 | #define IMAGE_H_ 11 | 12 | typedef struct { //struct per l'immagine 13 | int width; 14 | int height; 15 | int channels; 16 | int pitch; 17 | float *data; 18 | } Image_t; 19 | 20 | #define Image_channels 3 21 | 22 | //metodi getter per i vari elementi dell'immagine 23 | #define Image_getWidth(img) ((img)->width) 24 | #define Image_getHeight(img) ((img)->height) 25 | #define Image_getChannels(img) ((img)->channels) 26 | #define Image_getPitch(img) ((img)->pitch) 27 | #define Image_getData(img) ((img)->data) 28 | 29 | //metodi setter per i vari elementi dell'immagine 30 | #define Image_setWidth(img, val) (Image_getWidth(img) = val) 31 | #define Image_setHeight(img, val) (Image_getHeight(img) = val) 32 | #define Image_setChannels(img, val) (Image_getChannels(img) = val) 33 | #define Image_setPitch(img, val) (Image_getPitch(img) = val) 34 | #define Image_setData(img, val) (Image_getData(img) = val) 35 | 36 | //vari metodi per la creazione dell'immagine 37 | Image_t* Image_new(int width, int height, int channels, float *data); 38 | Image_t* Image_new(int width, int height, int channels); 39 | Image_t* Image_new(int width, int height); 40 | 41 | //metodi getter e setter per pixel 42 | float Image_getPixel(Image_t* img, int x, int y, int c); 43 | void Image_setPixel(Image_t* img, int x, int y, int c, float val); 44 | 45 | //metodo per cancellare l'immagine 46 | void Image_delete(Image_t* img); 47 | 48 | //metodo booleano per confronto immagini 49 | bool Image_is_same(Image_t* a, Image_t* b); 50 | 51 | #endif /* IMAGE_H_ */ 52 | 53 | 54 | 55 | #endif //KERNELPROCESSING_IMAGE_H 56 | -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/PPM.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "PPM.h" 6 | #include "Utils.h" 7 | #include "Image.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | #define PPMREADBUFLEN 256 17 | 18 | static const char *skipSpaces(const char *line) { 19 | while (*line == ' ' || *line == '\t') { 20 | line++; 21 | if (*line == '\0') { 22 | break; 23 | } 24 | } 25 | return line; 26 | } 27 | 28 | static char nextNonSpaceChar(const char *line0) { 29 | const char *line = skipSpaces(line0); 30 | return *line; 31 | } 32 | 33 | static bool isComment(const char *line) { 34 | char nextChar = nextNonSpaceChar(line); 35 | if (nextChar == '\0') { 36 | return true; 37 | } else { 38 | return nextChar == '#'; 39 | } 40 | } 41 | 42 | static void parseDimensions(const char *line0, int *width, int *height) { 43 | const char *line = skipSpaces(line0); 44 | sscanf(line, "%d %d", width, height); 45 | } 46 | 47 | static void parseDimensions(const char *line0, int *width, int *height, 48 | int *channels) { 49 | const char *line = skipSpaces(line0); 50 | sscanf(line, "%d %d %d", width, height, channels); 51 | } 52 | 53 | static void parseDepth(const char *line0, int *depth) { 54 | const char *line = skipSpaces(line0); 55 | sscanf(line, "%d", depth); 56 | } 57 | 58 | static char *File_readLine(FILE* file) { 59 | static char buffer[PPMREADBUFLEN]; 60 | if (file == NULL) { 61 | return NULL; 62 | } 63 | memset(buffer, 0, PPMREADBUFLEN); 64 | 65 | if (fgets(buffer, PPMREADBUFLEN - 1, file)) { 66 | return buffer; 67 | } else { 68 | return NULL; 69 | } 70 | } 71 | 72 | static char *nextLine(FILE* file) { 73 | char *line = NULL; 74 | while ((line = File_readLine(file)) != NULL) { 75 | if (!isComment(line)) { 76 | break; 77 | } 78 | } 79 | return line; 80 | } 81 | 82 | char* File_read(FILE* file, size_t size, size_t count) { 83 | size_t res; 84 | char *buffer; 85 | size_t bufferLen; 86 | 87 | if (file == NULL) { 88 | return NULL; 89 | } 90 | 91 | bufferLen = size * count + 1; 92 | buffer = (char*) malloc(sizeof(char) * bufferLen); 93 | 94 | res = fread(buffer, size, count, file); 95 | // make valid C string 96 | buffer[size * res] = '\0'; 97 | 98 | return buffer; 99 | } 100 | 101 | bool File_write(FILE* file, const void *buffer, size_t size, size_t count) { 102 | if (file == NULL) { 103 | return false; 104 | } 105 | 106 | size_t res = fwrite(buffer, size, count, file); 107 | if (res != count) { 108 | printf("ERROR: Failed to write data to PPM file"); 109 | } 110 | 111 | return true; 112 | } 113 | 114 | Image_t* PPM_import(const char *filename) { 115 | Image_t* img; 116 | FILE* file; 117 | char *header; 118 | char *line; 119 | int ii, jj, kk, channels; 120 | int width, height, depth; 121 | unsigned char *charData, *charIter; 122 | float *imgData, *floatIter; 123 | float scale; 124 | 125 | img = NULL; 126 | 127 | file = fopen(filename, "rb"); 128 | if (file == NULL) { 129 | printf("Could not open %s\n", filename); 130 | goto cleanup; 131 | } 132 | 133 | header = File_readLine(file); 134 | if (header == NULL) { 135 | printf("Could not read from %s\n", filename); 136 | goto cleanup; 137 | } else if (strcmp(header, "P6") != 0 && strcmp(header, "P6\n") != 0 138 | && strcmp(header, "P5") != 0 && strcmp(header, "P5\n") != 0 139 | && strcmp(header, "S6") != 0 && strcmp(header, "S6\n") != 0) { 140 | printf("Could not find magic number for %s\n", filename); 141 | goto cleanup; 142 | } 143 | 144 | // P5 are monochrome while P6/S6 are RGB 145 | // S6 needs to parse number of channels out of file 146 | if (strcmp(header, "P5") == 0 || strcmp(header, "P5\n") == 0) { 147 | channels = 1; 148 | line = nextLine(file); 149 | parseDimensions(line, &width, &height); 150 | } else if (strcmp(header, "P6") == 0 || strcmp(header, "P6\n") == 0) { 151 | channels = 3; 152 | line = nextLine(file); 153 | parseDimensions(line, &width, &height); 154 | } else { 155 | line = nextLine(file); 156 | parseDimensions(line, &width, &height, &channels); 157 | } 158 | 159 | // the line now contains the depth information 160 | line = nextLine(file); 161 | parseDepth(line, &depth); 162 | 163 | // the rest of the lines contain the data in binary format 164 | charData = (unsigned char *) File_read(file, 165 | width * channels * sizeof(unsigned char), height); 166 | 167 | img = Image_new(width, height, channels); 168 | 169 | imgData = Image_getData(img); 170 | 171 | charIter = charData; 172 | floatIter = imgData; 173 | scale = 1.0f / ((float) depth); 174 | 175 | for (ii = 0; ii < height; ii++) { 176 | for (jj = 0; jj < width; jj++) { 177 | for (kk = 0; kk < channels; kk++) { 178 | *floatIter = ((float) *charIter) * scale; 179 | floatIter++; 180 | charIter++; 181 | } 182 | } 183 | } 184 | 185 | cleanup: fclose(file); 186 | return img; 187 | } 188 | 189 | bool PPM_export(const char *filename, Image_t* img) { 190 | int ii; 191 | int jj; 192 | int kk; 193 | int depth; 194 | int width; 195 | int height; 196 | int channels; 197 | FILE* file; 198 | float *floatIter; 199 | unsigned char *charData; 200 | unsigned char *charIter; 201 | 202 | file = fopen(filename, "wb+"); 203 | if (file == NULL) { 204 | printf("Could not open %s in mode %s\n", filename, "wb+"); 205 | return false; 206 | } 207 | 208 | width = Image_getWidth(img); 209 | height = Image_getHeight(img); 210 | channels = Image_getChannels(img); 211 | depth = 255; 212 | 213 | if (channels == 1) { 214 | fprintf(file, "P5\n"); 215 | } else { 216 | fprintf(file, "P6\n"); 217 | } 218 | fprintf(file, "#Created via PPM Export\n"); 219 | fprintf(file, "%d %d\n", width, height); 220 | fprintf(file, "%d\n", depth); 221 | 222 | charData = (unsigned char*) malloc( 223 | sizeof(unsigned char) * width * height * channels); 224 | 225 | charIter = charData; 226 | floatIter = Image_getData(img); 227 | for (ii = 0; ii < height; ii++) { 228 | for (jj = 0; jj < width; jj++) { 229 | for (kk = 0; kk < channels; kk++) { 230 | *charIter = (unsigned char) ceil( 231 | clamp(*floatIter, 0, 1) * depth); 232 | floatIter++; 233 | charIter++; 234 | } 235 | } 236 | } 237 | 238 | bool writeResult = File_write(file, charData, 239 | width * channels * sizeof(unsigned char), height); 240 | 241 | free(charData); 242 | fflush(file); 243 | fclose(file); 244 | 245 | return true; 246 | } -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/PPM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_PPM_H 6 | #define KERNELPROCESSING_PPM_H 7 | 8 | #include "Image.h" 9 | 10 | Image_t* PPM_import(const char *filename); 11 | bool PPM_export(const char *filename, Image_t* img); 12 | 13 | 14 | 15 | #endif //KERNELPROCESSING_PPM_H 16 | -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/Utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_UTILS_H 6 | #define KERNELPROCESSING_UTILS_H 7 | 8 | template 9 | static inline T _abs(const T &a) { 10 | return a < 0 ? -a : a; 11 | } 12 | 13 | static inline bool almostEqualFloat(float A, float B, float eps) { 14 | if (A == 0) { 15 | return _abs(B) < eps; 16 | } else if (B == 0) { 17 | return _abs(A) < eps; 18 | } else { 19 | #if 0 20 | float d = max(_abs(A), _abs(B)); 21 | float g = (_abs(A - B) / d); 22 | #else 23 | float g = _abs(A - B); 24 | #endif 25 | if (g <= eps) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | } 32 | 33 | static inline bool almostEqualFloat(float A, float B) { 34 | return almostEqualFloat(A, B, 0.2f); 35 | } 36 | 37 | static inline bool almostUnequalFloat(float a, float b) { 38 | return !almostEqualFloat(a, b); 39 | } 40 | 41 | static inline float _min(float x, float y) { 42 | return x < y ? x : y; 43 | } 44 | 45 | static inline float _max(float x, float y) { 46 | return x > y ? x : y; 47 | } 48 | 49 | static inline float clamp(float x, float start, float end) { 50 | return _min(_max(x, start), end); 51 | } 52 | 53 | 54 | #endif //KERNELPROCESSING_UTILS_H 55 | -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/img/computer_programming.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaGlobalMemoryConvolution/img/computer_programming.ppm -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/img/panda.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaGlobalMemoryConvolution/img/panda.ppm -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/mainGlobal.cu: -------------------------------------------------------------------------------- 1 | #include "Image.h" 2 | #include "PPM.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | using namespace std:: chrono; 11 | 12 | 13 | #define maskCols 5 14 | #define maskRows 5 15 | 16 | 17 | __global__ void slowKernelProcessing(float * InputImageData, const float *__restrict__ kernel, 18 | float* outputImageData, int channels, int width, int height){ 19 | 20 | 21 | float accum; 22 | int col = threadIdx.x + blockIdx.x * blockDim.x; //col index 23 | int row = threadIdx.y + blockIdx.y * blockDim.y; //row index 24 | int maskRowsRadius = maskRows/2; 25 | int maskColsRadius = maskCols/2; 26 | 27 | 28 | for (int k = 0; k < channels; k++){ //cycle on kernel channels 29 | if(row < height && col < width ){ 30 | accum = 0; 31 | int startRow = row - maskRowsRadius; //row index shifted by mask radius 32 | int startCol = col - maskColsRadius; //col index shifted by mask radius 33 | 34 | for(int i = 0; i < maskRows; i++){ //cycle on mask rows 35 | 36 | for(int j = 0; j < maskCols; j++){ //cycle on mask columns 37 | 38 | int currentRow = startRow + i; // row index to fetch data from input image 39 | int currentCol = startCol + j; // col index to fetch data from input image 40 | 41 | if(currentRow >= 0 && currentRow < height && currentCol >= 0 && currentCol < width){ 42 | 43 | accum += InputImageData[(currentRow * width + currentCol )*channels + k] * 44 | kernel[i * maskRows + j]; 45 | } 46 | else accum = 0; 47 | } 48 | 49 | } 50 | outputImageData[(row* width + col) * channels + k] = accum; 51 | } 52 | 53 | } 54 | 55 | } 56 | 57 | 58 | 59 | int main(){ 60 | 61 | 62 | int imageChannels; 63 | int imageHeight; 64 | int imageWidth; 65 | Image_t* inputImage; 66 | Image_t* outputImage; 67 | float* hostInputImageData; 68 | float* hostOutputImageData; 69 | float* deviceInputImageData; 70 | float* deviceOutputImageData; 71 | float* deviceMaskData; 72 | float hostMaskData[maskRows * maskCols]={ 73 | 0.04, 0.04, 0.04, 0.04, 0.04, 74 | 0.04, 0.04, 0.04, 0.04, 0.04, 75 | 0.04, 0.04, 0.04, 0.04, 0.04, 76 | 0.04, 0.04, 0.04, 0.04, 0.04, 77 | 0.04, 0.04, 0.04, 0.04, 0.04 78 | 79 | }; 80 | 81 | 82 | inputImage = PPM_import("/home/pietrobongini/cuda-workspace/cudaGlobalMemoryConvolution/img/computer_programming.ppm"); 83 | 84 | imageWidth = Image_getWidth(inputImage); 85 | imageHeight = Image_getHeight(inputImage); 86 | imageChannels = Image_getChannels(inputImage); 87 | 88 | outputImage = Image_new(imageWidth, imageHeight, imageChannels); 89 | 90 | hostInputImageData = Image_getData(inputImage); 91 | hostOutputImageData = Image_getData(outputImage); 92 | 93 | 94 | cudaDeviceReset(); 95 | cudaMalloc((void **) &deviceInputImageData, imageWidth * imageHeight * 96 | imageChannels * sizeof(float)); 97 | cudaMalloc((void **) &deviceOutputImageData, imageWidth * imageHeight * 98 | imageChannels * sizeof(float)); 99 | cudaMalloc((void **) &deviceMaskData, maskRows * maskCols 100 | * sizeof(float)); 101 | cudaMemcpy(deviceInputImageData, hostInputImageData, 102 | imageWidth * imageHeight * imageChannels * sizeof(float), 103 | cudaMemcpyHostToDevice); 104 | cudaMemcpy(deviceMaskData, hostMaskData, 105 | maskRows * maskCols * sizeof(float), 106 | cudaMemcpyHostToDevice); 107 | 108 | dim3 dimGrid(ceil((float) imageWidth/16), 109 | ceil((float) imageHeight/16)); 110 | dim3 dimBlock(16,16,1); 111 | 112 | 113 | cout << "GLOBAL MEMORY KERNEL CONVOLUTION" << endl; 114 | cout << "image dimensions: "<< imageWidth << "x" << imageHeight << endl; 115 | cout << "start parallelizing" << endl; 116 | cout << "elapsed in time: "; 117 | high_resolution_clock::time_point start= high_resolution_clock::now(); 118 | 119 | slowKernelProcessing<<>>(deviceInputImageData, deviceMaskData, deviceOutputImageData, 120 | imageChannels, imageWidth, imageHeight); 121 | 122 | high_resolution_clock::time_point end= high_resolution_clock::now(); 123 | chrono::duration duration = end - start; 124 | cout << duration.count()*1000 << endl; 125 | 126 | cout << "----------------------------------" << endl; 127 | 128 | cudaMemcpy(hostOutputImageData, deviceOutputImageData, imageWidth * imageHeight * 129 | imageChannels * sizeof(float), cudaMemcpyDeviceToHost); 130 | 131 | PPM_export("/home/pietrobongini/cuda-workspace/cudaGlobalMemoryConvolution/output/result.ppm", outputImage); 132 | 133 | cudaMemset(deviceInputImageData,0,imageWidth * imageHeight * 134 | imageChannels * sizeof(float)); 135 | cudaMemset(deviceOutputImageData,0,imageWidth * imageHeight * 136 | imageChannels * sizeof(float)); 137 | cudaMemset(deviceMaskData,0,maskRows * maskCols 138 | * sizeof(float)); 139 | cudaFree(deviceInputImageData); 140 | cudaFree(deviceOutputImageData); 141 | cudaFree(deviceMaskData); 142 | 143 | Image_delete(outputImage); 144 | Image_delete(inputImage); 145 | 146 | return 0; 147 | 148 | 149 | } 150 | -------------------------------------------------------------------------------- /cudaGlobalMemoryConvolution/output/result.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaGlobalMemoryConvolution/output/result.ppm -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/Image.cpp: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Created by pietro bongini on 28/09/17. 4 | // 5 | 6 | #include "Image.h" 7 | #include "Utils.h" 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | //metodo che restituisce l'immagine 14 | Image_t* Image_new(int width, int height, int channels, float *data) { 15 | Image_t* img; 16 | 17 | img = (Image_t*) malloc(sizeof(Image_t)); 18 | 19 | Image_setWidth(img, width); 20 | Image_setHeight(img, height); 21 | Image_setChannels(img, channels); 22 | Image_setPitch(img, width * channels); 23 | 24 | Image_setData(img, data); 25 | return img; 26 | } 27 | 28 | Image_t* Image_new(int width, int height, int channels) { 29 | float *data = (float*) malloc(sizeof(float) * width * height * channels); 30 | return Image_new(width, height, channels, data); 31 | } 32 | 33 | Image_t* Image_new(int width, int height) { 34 | return Image_new(width, height, Image_channels); 35 | } 36 | 37 | //metodo che cancella l'immagine 38 | void Image_delete(Image_t* img) { 39 | if (img != NULL) { 40 | if (Image_getData(img) != NULL) { 41 | free(Image_getData(img)); 42 | } 43 | free(img); 44 | } 45 | } 46 | 47 | //metodo setter per un pixel 48 | void Image_setPixel(Image_t* img, int x, int y, int c, float val) { 49 | float *data = Image_getData(img); 50 | int channels = Image_getChannels(img); 51 | int pitch = Image_getPitch(img); 52 | 53 | data[y * pitch + x * channels + c] = val; 54 | 55 | return; 56 | } 57 | 58 | //metodo getter per un pixel 59 | float Image_getPixel(Image_t* img, int x, int y, int c) { 60 | float *data = Image_getData(img); 61 | int channels = Image_getChannels(img); 62 | int pitch = Image_getPitch(img); 63 | 64 | return data[y * pitch + x * channels + c]; 65 | } 66 | 67 | //confronto tra immagini 68 | bool Image_is_same(Image_t* a, Image_t* b) { 69 | if (a == NULL || b == NULL) { 70 | std::cerr << "Comparing null images." << std::endl; 71 | return false; 72 | } else if (a == b) { 73 | return true; 74 | } else if (Image_getWidth(a) != Image_getWidth(b)) { 75 | std::cerr << "Image widths do not match." << std::endl; 76 | return false; 77 | } else if (Image_getHeight(a) != Image_getHeight(b)) { 78 | std::cerr << "Image heights do not match." << std::endl; 79 | return false; 80 | } else if (Image_getChannels(a) != Image_getChannels(b)) { 81 | std::cerr << "Image channels do not match." << std::endl; 82 | return false; 83 | } else { 84 | float *aData, *bData; 85 | int width, height, channels; 86 | int ii, jj, kk; 87 | 88 | aData = Image_getData(a); 89 | bData = Image_getData(b); 90 | 91 | assert(aData != NULL); 92 | assert(bData != NULL); 93 | 94 | width = Image_getWidth(a); 95 | height = Image_getHeight(a); 96 | channels = Image_getChannels(a); 97 | 98 | for (ii = 0; ii < height; ii++) { 99 | for (jj = 0; jj < width; jj++) { 100 | for (kk = 0; kk < channels; kk++) { 101 | float x, y; 102 | if (channels <= 3) { 103 | x = clamp(*aData++, 0, 1); 104 | y = clamp(*bData++, 0, 1); 105 | } else { 106 | x = *aData++; 107 | y = *bData++; 108 | } 109 | if (almostUnequalFloat(x, y)) { 110 | std::cerr 111 | << "Image pixels do not match at position ( row = " 112 | << ii << ", col = " << jj << ", channel = " 113 | << kk << ") expecting a value of " << y 114 | << " but got a value of " << x << std::endl; 115 | 116 | return false; 117 | } 118 | } 119 | } 120 | } 121 | return true; 122 | } 123 | } 124 | 125 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/Image.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_IMAGE_H 6 | #define KERNELPROCESSING_IMAGE_H 7 | 8 | #ifndef IMAGE_H_ 9 | #define IMAGE_H_ 10 | 11 | typedef struct { //struct per l'immagine 12 | int width; 13 | int height; 14 | int channels; 15 | int pitch; 16 | float *data; 17 | } Image_t; 18 | 19 | #define Image_channels 3 20 | 21 | //metodi getter per i vari elementi dell'immagine 22 | #define Image_getWidth(img) ((img)->width) 23 | #define Image_getHeight(img) ((img)->height) 24 | #define Image_getChannels(img) ((img)->channels) 25 | #define Image_getPitch(img) ((img)->pitch) 26 | #define Image_getData(img) ((img)->data) 27 | 28 | //metodi setter per i vari elementi dell'immagine 29 | #define Image_setWidth(img, val) (Image_getWidth(img) = val) 30 | #define Image_setHeight(img, val) (Image_getHeight(img) = val) 31 | #define Image_setChannels(img, val) (Image_getChannels(img) = val) 32 | #define Image_setPitch(img, val) (Image_getPitch(img) = val) 33 | #define Image_setData(img, val) (Image_getData(img) = val) 34 | 35 | //vari metodi per la creazione dell'immagine 36 | Image_t* Image_new(int width, int height, int channels, float *data); 37 | Image_t* Image_new(int width, int height, int channels); 38 | Image_t* Image_new(int width, int height); 39 | 40 | //metodi getter e setter per pixel 41 | float Image_getPixel(Image_t* img, int x, int y, int c); 42 | void Image_setPixel(Image_t* img, int x, int y, int c, float val); 43 | 44 | //metodo per cancellare l'immagine 45 | void Image_delete(Image_t* img); 46 | 47 | //metodo booleano per confronto immagini 48 | bool Image_is_same(Image_t* a, Image_t* b); 49 | 50 | #endif /* IMAGE_H_ */ 51 | 52 | 53 | 54 | #endif //KERNELPROCESSING_IMAGE_H 55 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/PPM.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "PPM.h" 6 | #include "Utils.h" 7 | #include "Image.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | #define PPMREADBUFLEN 256 17 | 18 | static const char *skipSpaces(const char *line) { 19 | while (*line == ' ' || *line == '\t') { 20 | line++; 21 | if (*line == '\0') { 22 | break; 23 | } 24 | } 25 | return line; 26 | } 27 | 28 | static char nextNonSpaceChar(const char *line0) { 29 | const char *line = skipSpaces(line0); 30 | return *line; 31 | } 32 | 33 | static bool isComment(const char *line) { 34 | char nextChar = nextNonSpaceChar(line); 35 | if (nextChar == '\0') { 36 | return true; 37 | } else { 38 | return nextChar == '#'; 39 | } 40 | } 41 | 42 | static void parseDimensions(const char *line0, int *width, int *height) { 43 | const char *line = skipSpaces(line0); 44 | sscanf(line, "%d %d", width, height); 45 | } 46 | 47 | static void parseDimensions(const char *line0, int *width, int *height, 48 | int *channels) { 49 | const char *line = skipSpaces(line0); 50 | sscanf(line, "%d %d %d", width, height, channels); 51 | } 52 | 53 | static void parseDepth(const char *line0, int *depth) { 54 | const char *line = skipSpaces(line0); 55 | sscanf(line, "%d", depth); 56 | } 57 | 58 | static char *File_readLine(FILE* file) { 59 | static char buffer[PPMREADBUFLEN]; 60 | if (file == NULL) { 61 | return NULL; 62 | } 63 | memset(buffer, 0, PPMREADBUFLEN); 64 | 65 | if (fgets(buffer, PPMREADBUFLEN - 1, file)) { 66 | return buffer; 67 | } else { 68 | return NULL; 69 | } 70 | } 71 | 72 | static char *nextLine(FILE* file) { 73 | char *line = NULL; 74 | while ((line = File_readLine(file)) != NULL) { 75 | if (!isComment(line)) { 76 | break; 77 | } 78 | } 79 | return line; 80 | } 81 | 82 | char* File_read(FILE* file, size_t size, size_t count) { 83 | size_t res; 84 | char *buffer; 85 | size_t bufferLen; 86 | 87 | if (file == NULL) { 88 | return NULL; 89 | } 90 | 91 | bufferLen = size * count + 1; 92 | buffer = (char*) malloc(sizeof(char) * bufferLen); 93 | 94 | res = fread(buffer, size, count, file); 95 | // make valid C string 96 | buffer[size * res] = '\0'; 97 | 98 | return buffer; 99 | } 100 | 101 | bool File_write(FILE* file, const void *buffer, size_t size, size_t count) { 102 | if (file == NULL) { 103 | return false; 104 | } 105 | 106 | size_t res = fwrite(buffer, size, count, file); 107 | if (res != count) { 108 | printf("ERROR: Failed to write data to PPM file"); 109 | } 110 | 111 | return true; 112 | } 113 | 114 | Image_t* PPM_import(const char *filename) { 115 | Image_t* img; 116 | FILE* file; 117 | char *header; 118 | char *line; 119 | int ii, jj, kk, channels; 120 | int width, height, depth; 121 | unsigned char *charData, *charIter; 122 | float *imgData, *floatIter; 123 | float scale; 124 | 125 | img = NULL; 126 | 127 | file = fopen(filename, "rb"); 128 | if (file == NULL) { 129 | printf("Could not open %s\n", filename); 130 | goto cleanup; 131 | } 132 | 133 | header = File_readLine(file); 134 | if (header == NULL) { 135 | printf("Could not read from %s\n", filename); 136 | goto cleanup; 137 | } else if (strcmp(header, "P6") != 0 && strcmp(header, "P6\n") != 0 138 | && strcmp(header, "P5") != 0 && strcmp(header, "P5\n") != 0 139 | && strcmp(header, "S6") != 0 && strcmp(header, "S6\n") != 0) { 140 | printf("Could not find magic number for %s\n", filename); 141 | goto cleanup; 142 | } 143 | 144 | // P5 are monochrome while P6/S6 are RGB 145 | // S6 needs to parse number of channels out of file 146 | if (strcmp(header, "P5") == 0 || strcmp(header, "P5\n") == 0) { 147 | channels = 1; 148 | line = nextLine(file); 149 | parseDimensions(line, &width, &height); 150 | } else if (strcmp(header, "P6") == 0 || strcmp(header, "P6\n") == 0) { 151 | channels = 3; 152 | line = nextLine(file); 153 | parseDimensions(line, &width, &height); 154 | } else { 155 | line = nextLine(file); 156 | parseDimensions(line, &width, &height, &channels); 157 | } 158 | 159 | // the line now contains the depth information 160 | line = nextLine(file); 161 | parseDepth(line, &depth); 162 | 163 | // the rest of the lines contain the data in binary format 164 | charData = (unsigned char *) File_read(file, 165 | width * channels * sizeof(unsigned char), height); 166 | 167 | img = Image_new(width, height, channels); 168 | 169 | imgData = Image_getData(img); 170 | 171 | charIter = charData; 172 | floatIter = imgData; 173 | scale = 1.0f / ((float) depth); 174 | 175 | for (ii = 0; ii < height; ii++) { 176 | for (jj = 0; jj < width; jj++) { 177 | for (kk = 0; kk < channels; kk++) { 178 | *floatIter = ((float) *charIter) * scale; 179 | floatIter++; 180 | charIter++; 181 | } 182 | } 183 | } 184 | 185 | cleanup: fclose(file); 186 | return img; 187 | } 188 | 189 | bool PPM_export(const char *filename, Image_t* img) { 190 | int ii; 191 | int jj; 192 | int kk; 193 | int depth; 194 | int width; 195 | int height; 196 | int channels; 197 | FILE* file; 198 | float *floatIter; 199 | unsigned char *charData; 200 | unsigned char *charIter; 201 | 202 | file = fopen(filename, "wb+"); 203 | if (file == NULL) { 204 | printf("Could not open %s in mode %s\n", filename, "wb+"); 205 | return false; 206 | } 207 | 208 | width = Image_getWidth(img); 209 | height = Image_getHeight(img); 210 | channels = Image_getChannels(img); 211 | depth = 255; 212 | 213 | if (channels == 1) { 214 | fprintf(file, "P5\n"); 215 | } else { 216 | fprintf(file, "P6\n"); 217 | } 218 | fprintf(file, "#Created via PPM Export\n"); 219 | fprintf(file, "%d %d\n", width, height); 220 | fprintf(file, "%d\n", depth); 221 | 222 | charData = (unsigned char*) malloc( 223 | sizeof(unsigned char) * width * height * channels); 224 | 225 | charIter = charData; 226 | floatIter = Image_getData(img); 227 | for (ii = 0; ii < height; ii++) { 228 | for (jj = 0; jj < width; jj++) { 229 | for (kk = 0; kk < channels; kk++) { 230 | *charIter = (unsigned char) ceil( 231 | clamp(*floatIter, 0, 1) * depth); 232 | floatIter++; 233 | charIter++; 234 | } 235 | } 236 | } 237 | 238 | bool writeResult = File_write(file, charData, 239 | width * channels * sizeof(unsigned char), height); 240 | 241 | free(charData); 242 | fflush(file); 243 | fclose(file); 244 | 245 | return true; 246 | } 247 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/PPM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_PPM_H 6 | #define KERNELPROCESSING_PPM_H 7 | 8 | #include "Image.h" 9 | 10 | Image_t* PPM_import(const char *filename); 11 | bool PPM_export(const char *filename, Image_t* img); 12 | 13 | 14 | 15 | #endif //KERNELPROCESSING_PPM_H 16 | 17 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/Utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_UTILS_H 6 | #define KERNELPROCESSING_UTILS_H 7 | 8 | template 9 | static inline T _abs(const T &a) { 10 | return a < 0 ? -a : a; 11 | } 12 | 13 | static inline bool almostEqualFloat(float A, float B, float eps) { 14 | if (A == 0) { 15 | return _abs(B) < eps; 16 | } else if (B == 0) { 17 | return _abs(A) < eps; 18 | } else { 19 | #if 0 20 | float d = max(_abs(A), _abs(B)); 21 | float g = (_abs(A - B) / d); 22 | #else 23 | float g = _abs(A - B); 24 | #endif 25 | if (g <= eps) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | } 32 | 33 | static inline bool almostEqualFloat(float A, float B) { 34 | return almostEqualFloat(A, B, 0.2f); 35 | } 36 | 37 | static inline bool almostUnequalFloat(float a, float b) { 38 | return !almostEqualFloat(a, b); 39 | } 40 | 41 | static inline float _min(float x, float y) { 42 | return x < y ? x : y; 43 | } 44 | 45 | static inline float _max(float x, float y) { 46 | return x > y ? x : y; 47 | } 48 | 49 | static inline float clamp(float x, float start, float end) { 50 | return _min(_max(x, start), end); 51 | } 52 | 53 | 54 | #endif //KERNELPROCESSING_UTILS_H 55 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/img/computer_programming.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaSharedMemoryConvolution/img/computer_programming.ppm -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/img/panda.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaSharedMemoryConvolution/img/panda.ppm -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/mainShared.cu: -------------------------------------------------------------------------------- 1 | #include "Image.h" 2 | #include "PPM.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | using namespace std:: chrono; 11 | 12 | 13 | #define TILE_WIDTH 16 14 | #define maskCols 5 15 | #define maskRows 5 16 | #define w (TILE_WIDTH + maskCols -1) 17 | 18 | 19 | __global__ void tilingKernelProcessing(float * InputImageData, const float *__restrict__ kernel, 20 | float* outputImageData, int channels, int width, int height){ 21 | 22 | __shared__ float N_ds[w][w]; //block of image in shared memory 23 | 24 | 25 | // allocation in shared memory of image blocks 26 | int maskRadius = maskRows/2; 27 | for (int k = 0; k = 0 && srcY < height && srcX>=0 && srcX < width) 35 | N_ds[destY][destX] = InputImageData[src]; // copy element of image in shared memory 36 | else 37 | N_ds[destY][destX] = 0; 38 | 39 | 40 | 41 | dest = threadIdx.y * TILE_WIDTH+ threadIdx.x + TILE_WIDTH * TILE_WIDTH; 42 | destY = dest/w; 43 | destX = dest%w; 44 | srcY = blockIdx.y *TILE_WIDTH + destY - maskRadius; 45 | srcX = blockIdx.x *TILE_WIDTH + destX - maskRadius; 46 | src = (srcY *width +srcX) * channels + k; 47 | if(destY < w){ 48 | if(srcY>= 0 && srcY < height && srcX>=0 && srcX < width) 49 | N_ds[destY][destX] = InputImageData[src]; 50 | else 51 | N_ds[destY][destX] = 0; 52 | } 53 | 54 | __syncthreads(); 55 | 56 | 57 | //compute kernel convolution 58 | float accum = 0; 59 | int y, x; 60 | for (y= 0; y < maskCols; y++) 61 | for(x = 0; x>>(deviceInputImageData, deviceMaskData, deviceOutputImageData, 139 | imageChannels, imageWidth, imageHeight); 140 | 141 | high_resolution_clock::time_point end= high_resolution_clock::now(); 142 | chrono::duration duration = end - start; 143 | cout << duration.count()*1000 << endl; 144 | cout << "----------------------------------" << endl; 145 | 146 | cudaMemcpy(hostOutputImageData, deviceOutputImageData, imageWidth * imageHeight * 147 | imageChannels * sizeof(float), cudaMemcpyDeviceToHost); 148 | 149 | PPM_export("/home/pietrobongini/cuda-workspace/cudaSharedMemoryConvolution/output/result.ppm", outputImage); 150 | 151 | cudaMemset(deviceInputImageData,0,imageWidth * imageHeight * 152 | imageChannels * sizeof(float)); 153 | cudaMemset(deviceOutputImageData,0,imageWidth * imageHeight * 154 | imageChannels * sizeof(float)); 155 | cudaMemset(deviceMaskData,0,maskRows * maskCols 156 | * sizeof(float)); 157 | cudaFree(deviceInputImageData); 158 | cudaFree(deviceOutputImageData); 159 | cudaFree(deviceMaskData); 160 | 161 | Image_delete(outputImage); 162 | Image_delete(inputImage); 163 | 164 | 165 | } 166 | -------------------------------------------------------------------------------- /cudaSharedMemoryConvolution/output/result.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/cudaSharedMemoryConvolution/output/result.ppm -------------------------------------------------------------------------------- /paper_image-convolution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/paper_image-convolution.pdf -------------------------------------------------------------------------------- /presentation_image-convolution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/presentation_image-convolution.pdf -------------------------------------------------------------------------------- /sequentialConvolution/Image.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "Image.h" 6 | #include "Utils.h" 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | //metodo che restituisce l'immagine 13 | Image_t* Image_new(int width, int height, int channels, float *data) { 14 | Image_t* img; 15 | 16 | img = (Image_t*) malloc(sizeof(Image_t)); 17 | 18 | Image_setWidth(img, width); 19 | Image_setHeight(img, height); 20 | Image_setChannels(img, channels); 21 | Image_setPitch(img, width * channels); 22 | 23 | Image_setData(img, data); 24 | return img; 25 | } 26 | 27 | Image_t* Image_new(int width, int height, int channels) { 28 | float *data = (float*) malloc(sizeof(float) * width * height * channels); 29 | return Image_new(width, height, channels, data); 30 | } 31 | 32 | Image_t* Image_new(int width, int height) { 33 | return Image_new(width, height, Image_channels); 34 | } 35 | 36 | //metodo che cancella l'immagine 37 | void Image_delete(Image_t* img) { 38 | if (img != NULL) { 39 | if (Image_getData(img) != NULL) { 40 | free(Image_getData(img)); 41 | } 42 | free(img); 43 | } 44 | } 45 | 46 | //metodo setter per un pixel 47 | void Image_setPixel(Image_t* img, int x, int y, int c, float val) { 48 | float *data = Image_getData(img); 49 | int channels = Image_getChannels(img); 50 | int pitch = Image_getPitch(img); 51 | 52 | data[y * pitch + x * channels + c] = val; 53 | 54 | return; 55 | } 56 | 57 | //metodo getter per un pixel 58 | float Image_getPixel(Image_t* img, int x, int y, int c) { 59 | float *data = Image_getData(img); 60 | int channels = Image_getChannels(img); 61 | int pitch = Image_getPitch(img); 62 | 63 | return data[y * pitch + x * channels + c]; 64 | } 65 | 66 | //confronto tra immagini 67 | bool Image_is_same(Image_t* a, Image_t* b) { 68 | if (a == NULL || b == NULL) { 69 | std::cerr << "Comparing null images." << std::endl; 70 | return false; 71 | } else if (a == b) { 72 | return true; 73 | } else if (Image_getWidth(a) != Image_getWidth(b)) { 74 | std::cerr << "Image widths do not match." << std::endl; 75 | return false; 76 | } else if (Image_getHeight(a) != Image_getHeight(b)) { 77 | std::cerr << "Image heights do not match." << std::endl; 78 | return false; 79 | } else if (Image_getChannels(a) != Image_getChannels(b)) { 80 | std::cerr << "Image channels do not match." << std::endl; 81 | return false; 82 | } else { 83 | float *aData, *bData; 84 | int width, height, channels; 85 | int ii, jj, kk; 86 | 87 | aData = Image_getData(a); 88 | bData = Image_getData(b); 89 | 90 | assert(aData != NULL); 91 | assert(bData != NULL); 92 | 93 | width = Image_getWidth(a); 94 | height = Image_getHeight(a); 95 | channels = Image_getChannels(a); 96 | 97 | for (ii = 0; ii < height; ii++) { 98 | for (jj = 0; jj < width; jj++) { 99 | for (kk = 0; kk < channels; kk++) { 100 | float x, y; 101 | if (channels <= 3) { 102 | x = clamp(*aData++, 0, 1); 103 | y = clamp(*bData++, 0, 1); 104 | } else { 105 | x = *aData++; 106 | y = *bData++; 107 | } 108 | if (almostUnequalFloat(x, y)) { 109 | std::cerr 110 | << "Image pixels do not match at position ( row = " 111 | << ii << ", col = " << jj << ", channel = " 112 | << kk << ") expecting a value of " << y 113 | << " but got a value of " << x << std::endl; 114 | 115 | return false; 116 | } 117 | } 118 | } 119 | } 120 | return true; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /sequentialConvolution/Image.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_IMAGE_H 6 | #define KERNELPROCESSING_IMAGE_H 7 | 8 | 9 | #ifndef IMAGE_H_ 10 | #define IMAGE_H_ 11 | 12 | typedef struct { //struct per l'immagine 13 | int width; 14 | int height; 15 | int channels; 16 | int pitch; 17 | float *data; 18 | } Image_t; 19 | 20 | #define Image_channels 3 21 | 22 | //metodi getter per i vari elementi dell'immagine 23 | #define Image_getWidth(img) ((img)->width) 24 | #define Image_getHeight(img) ((img)->height) 25 | #define Image_getChannels(img) ((img)->channels) 26 | #define Image_getPitch(img) ((img)->pitch) 27 | #define Image_getData(img) ((img)->data) 28 | 29 | //metodi setter per i vari elementi dell'immagine 30 | #define Image_setWidth(img, val) (Image_getWidth(img) = val) 31 | #define Image_setHeight(img, val) (Image_getHeight(img) = val) 32 | #define Image_setChannels(img, val) (Image_getChannels(img) = val) 33 | #define Image_setPitch(img, val) (Image_getPitch(img) = val) 34 | #define Image_setData(img, val) (Image_getData(img) = val) 35 | 36 | //vari metodi per la creazione dell'immagine 37 | Image_t* Image_new(int width, int height, int channels, float *data); 38 | Image_t* Image_new(int width, int height, int channels); 39 | Image_t* Image_new(int width, int height); 40 | 41 | //metodi getter e setter per pixel 42 | float Image_getPixel(Image_t* img, int x, int y, int c); 43 | void Image_setPixel(Image_t* img, int x, int y, int c, float val); 44 | 45 | //metodo per cancellare l'immagine 46 | void Image_delete(Image_t* img); 47 | 48 | //metodo booleano per confronto immagini 49 | bool Image_is_same(Image_t* a, Image_t* b); 50 | 51 | #endif /* IMAGE_H_ */ 52 | 53 | 54 | 55 | #endif //KERNELPROCESSING_IMAGE_H 56 | -------------------------------------------------------------------------------- /sequentialConvolution/PPM.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #include "PPM.h" 6 | #include "Utils.h" 7 | #include "Image.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | #define PPMREADBUFLEN 256 17 | 18 | static const char *skipSpaces(const char *line) { 19 | while (*line == ' ' || *line == '\t') { 20 | line++; 21 | if (*line == '\0') { 22 | break; 23 | } 24 | } 25 | return line; 26 | } 27 | 28 | static char nextNonSpaceChar(const char *line0) { 29 | const char *line = skipSpaces(line0); 30 | return *line; 31 | } 32 | 33 | static bool isComment(const char *line) { 34 | char nextChar = nextNonSpaceChar(line); 35 | if (nextChar == '\0') { 36 | return true; 37 | } else { 38 | return nextChar == '#'; 39 | } 40 | } 41 | 42 | static void parseDimensions(const char *line0, int *width, int *height) { 43 | const char *line = skipSpaces(line0); 44 | sscanf(line, "%d %d", width, height); 45 | } 46 | 47 | static void parseDimensions(const char *line0, int *width, int *height, 48 | int *channels) { 49 | const char *line = skipSpaces(line0); 50 | sscanf(line, "%d %d %d", width, height, channels); 51 | } 52 | 53 | static void parseDepth(const char *line0, int *depth) { 54 | const char *line = skipSpaces(line0); 55 | sscanf(line, "%d", depth); 56 | } 57 | 58 | static char *File_readLine(FILE* file) { 59 | static char buffer[PPMREADBUFLEN]; 60 | if (file == NULL) { 61 | return NULL; 62 | } 63 | memset(buffer, 0, PPMREADBUFLEN); 64 | 65 | if (fgets(buffer, PPMREADBUFLEN - 1, file)) { 66 | return buffer; 67 | } else { 68 | return NULL; 69 | } 70 | } 71 | 72 | static char *nextLine(FILE* file) { 73 | char *line = NULL; 74 | while ((line = File_readLine(file)) != NULL) { 75 | if (!isComment(line)) { 76 | break; 77 | } 78 | } 79 | return line; 80 | } 81 | 82 | char* File_read(FILE* file, size_t size, size_t count) { 83 | size_t res; 84 | char *buffer; 85 | size_t bufferLen; 86 | 87 | if (file == NULL) { 88 | return NULL; 89 | } 90 | 91 | bufferLen = size * count + 1; 92 | buffer = (char*) malloc(sizeof(char) * bufferLen); 93 | 94 | res = fread(buffer, size, count, file); 95 | // make valid C string 96 | buffer[size * res] = '\0'; 97 | 98 | return buffer; 99 | } 100 | 101 | bool File_write(FILE* file, const void *buffer, size_t size, size_t count) { 102 | if (file == NULL) { 103 | return false; 104 | } 105 | 106 | size_t res = fwrite(buffer, size, count, file); 107 | if (res != count) { 108 | printf("ERROR: Failed to write data to PPM file"); 109 | } 110 | 111 | return true; 112 | } 113 | 114 | Image_t* PPM_import(const char *filename) { 115 | Image_t* img; 116 | FILE* file; 117 | char *header; 118 | char *line; 119 | int ii, jj, kk, channels; 120 | int width, height, depth; 121 | unsigned char *charData, *charIter; 122 | float *imgData, *floatIter; 123 | float scale; 124 | 125 | img = NULL; 126 | 127 | file = fopen(filename, "rb"); 128 | if (file == NULL) { 129 | printf("Could not open %s\n", filename); 130 | goto cleanup; 131 | } 132 | 133 | header = File_readLine(file); 134 | if (header == NULL) { 135 | printf("Could not read from %s\n", filename); 136 | goto cleanup; 137 | } else if (strcmp(header, "P6") != 0 && strcmp(header, "P6\n") != 0 138 | && strcmp(header, "P5") != 0 && strcmp(header, "P5\n") != 0 139 | && strcmp(header, "S6") != 0 && strcmp(header, "S6\n") != 0) { 140 | printf("Could not find magic number for %s\n", filename); 141 | goto cleanup; 142 | } 143 | 144 | // P5 are monochrome while P6/S6 are RGB 145 | // S6 needs to parse number of channels out of file 146 | if (strcmp(header, "P5") == 0 || strcmp(header, "P5\n") == 0) { 147 | channels = 1; 148 | line = nextLine(file); 149 | parseDimensions(line, &width, &height); 150 | } else if (strcmp(header, "P6") == 0 || strcmp(header, "P6\n") == 0) { 151 | channels = 3; 152 | line = nextLine(file); 153 | parseDimensions(line, &width, &height); 154 | } else { 155 | line = nextLine(file); 156 | parseDimensions(line, &width, &height, &channels); 157 | } 158 | 159 | // the line now contains the depth information 160 | line = nextLine(file); 161 | parseDepth(line, &depth); 162 | 163 | // the rest of the lines contain the data in binary format 164 | charData = (unsigned char *) File_read(file, 165 | width * channels * sizeof(unsigned char), height); 166 | 167 | img = Image_new(width, height, channels); 168 | 169 | imgData = Image_getData(img); 170 | 171 | charIter = charData; 172 | floatIter = imgData; 173 | scale = 1.0f / ((float) depth); 174 | 175 | for (ii = 0; ii < height; ii++) { 176 | for (jj = 0; jj < width; jj++) { 177 | for (kk = 0; kk < channels; kk++) { 178 | *floatIter = ((float) *charIter) * scale; 179 | floatIter++; 180 | charIter++; 181 | } 182 | } 183 | } 184 | 185 | cleanup: fclose(file); 186 | return img; 187 | } 188 | 189 | bool PPM_export(const char *filename, Image_t* img) { 190 | int ii; 191 | int jj; 192 | int kk; 193 | int depth; 194 | int width; 195 | int height; 196 | int channels; 197 | FILE* file; 198 | float *floatIter; 199 | unsigned char *charData; 200 | unsigned char *charIter; 201 | 202 | file = fopen(filename, "wb+"); 203 | if (file == NULL) { 204 | printf("Could not open %s in mode %s\n", filename, "wb+"); 205 | return false; 206 | } 207 | 208 | width = Image_getWidth(img); 209 | height = Image_getHeight(img); 210 | channels = Image_getChannels(img); 211 | depth = 255; 212 | 213 | if (channels == 1) { 214 | fprintf(file, "P5\n"); 215 | } else { 216 | fprintf(file, "P6\n"); 217 | } 218 | fprintf(file, "#Created via PPM Export\n"); 219 | fprintf(file, "%d %d\n", width, height); 220 | fprintf(file, "%d\n", depth); 221 | 222 | charData = (unsigned char*) malloc( 223 | sizeof(unsigned char) * width * height * channels); 224 | 225 | charIter = charData; 226 | floatIter = Image_getData(img); 227 | for (ii = 0; ii < height; ii++) { 228 | for (jj = 0; jj < width; jj++) { 229 | for (kk = 0; kk < channels; kk++) { 230 | *charIter = (unsigned char) ceil( 231 | clamp(*floatIter, 0, 1) * depth); 232 | floatIter++; 233 | charIter++; 234 | } 235 | } 236 | } 237 | 238 | bool writeResult = File_write(file, charData, 239 | width * channels * sizeof(unsigned char), height); 240 | 241 | free(charData); 242 | fflush(file); 243 | fclose(file); 244 | 245 | return true; 246 | } -------------------------------------------------------------------------------- /sequentialConvolution/PPM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_PPM_H 6 | #define KERNELPROCESSING_PPM_H 7 | 8 | #include "Image.h" 9 | 10 | Image_t* PPM_import(const char *filename); 11 | bool PPM_export(const char *filename, Image_t* img); 12 | 13 | 14 | 15 | #endif //KERNELPROCESSING_PPM_H 16 | -------------------------------------------------------------------------------- /sequentialConvolution/Utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pietro bongini on 28/09/17. 3 | // 4 | 5 | #ifndef KERNELPROCESSING_UTILS_H 6 | #define KERNELPROCESSING_UTILS_H 7 | 8 | template 9 | static inline T _abs(const T &a) { 10 | return a < 0 ? -a : a; 11 | } 12 | 13 | static inline bool almostEqualFloat(float A, float B, float eps) { 14 | if (A == 0) { 15 | return _abs(B) < eps; 16 | } else if (B == 0) { 17 | return _abs(A) < eps; 18 | } else { 19 | #if 0 20 | float d = max(_abs(A), _abs(B)); 21 | float g = (_abs(A - B) / d); 22 | #else 23 | float g = _abs(A - B); 24 | #endif 25 | if (g <= eps) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | } 32 | 33 | static inline bool almostEqualFloat(float A, float B) { 34 | return almostEqualFloat(A, B, 0.2f); 35 | } 36 | 37 | static inline bool almostUnequalFloat(float a, float b) { 38 | return !almostEqualFloat(a, b); 39 | } 40 | 41 | static inline float _min(float x, float y) { 42 | return x < y ? x : y; 43 | } 44 | 45 | static inline float _max(float x, float y) { 46 | return x > y ? x : y; 47 | } 48 | 49 | static inline float clamp(float x, float start, float end) { 50 | return _min(_max(x, start), end); 51 | } 52 | 53 | 54 | #endif //KERNELPROCESSING_UTILS_H 55 | -------------------------------------------------------------------------------- /sequentialConvolution/img/computer_programming.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/sequentialConvolution/img/computer_programming.ppm -------------------------------------------------------------------------------- /sequentialConvolution/img/panda.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/sequentialConvolution/img/panda.ppm -------------------------------------------------------------------------------- /sequentialConvolution/main.cu: -------------------------------------------------------------------------------- 1 | #include "Image.h" 2 | #include "PPM.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | using namespace std:: chrono; 11 | 12 | void sequentialConvolution(Image_t*inputImage,const float * kernel ,float * outputImageData, int kernelSizeX, int kernelSizeY, int dataSizeX, int dataSizeY, int channels) 13 | { 14 | int i, j, m, n, mm, nn; 15 | int kCenterX, kCenterY; // center index of kernel 16 | float sum; // accumulation variable 17 | int rowIndex, colIndex; // indice di riga e di colonna 18 | 19 | float * inputImageData = Image_getData(inputImage); 20 | kCenterX = kernelSizeX / 2; 21 | kCenterY = kernelSizeY / 2; 22 | 23 | for (int k=0; k= 0 && rowIndex < dataSizeY && colIndex >= 0 && colIndex < dataSizeX) 43 | sum += inputImageData[(dataSizeX * rowIndex + colIndex)*channels + k] * kernel[kernelSizeX * mm + nn]; 44 | } 45 | } 46 | outputImageData[(dataSizeX * i + j)*channels + k] = sum; 47 | 48 | } 49 | } 50 | } 51 | } 52 | 53 | int main() { 54 | 55 | Image_t *inputImage; 56 | int imageChannels; 57 | int imageWidth; 58 | int imageHeight; 59 | float *InputImageData; 60 | Image_t *outputImage; 61 | 62 | const int kernelRows = 5; 63 | const int kernelColumns = 5; 64 | const float kernelMask[kernelRows * kernelColumns] = {0.04, 0.04, 0.04, 0.04, 0.04, 65 | 0.04, 0.04, 0.04, 0.04, 0.04, 66 | 0.04, 0.04, 0.04, 0.04, 0.04, 67 | 0.04, 0.04, 0.04, 0.04, 0.04, 68 | 0.04, 0.04, 0.04, 0.04, 0.04, 69 | 70 | 71 | 72 | }; 73 | 74 | inputImage = PPM_import("/home/pietrobongini/cuda-workspace/sequentialConvolution/img/computer_programming.ppm"); 75 | imageWidth = Image_getWidth(inputImage); 76 | imageHeight = Image_getHeight(inputImage); 77 | imageChannels = Image_getChannels(inputImage); 78 | cout << "dimensioni immagine: " << imageWidth << " x " << imageHeight << endl; 79 | cout << "numero canali: " << imageChannels << endl; 80 | InputImageData = Image_getData(inputImage); 81 | float *outputImageData = Image_getData(inputImage); 82 | 83 | outputImage = inputImage; 84 | cout << "-------------------------" << endl; 85 | cout << "sequential kernel convolution" <(endSeq - startSeq).count() / 1000; 92 | cout << "end kernel processing" << endl; 93 | cout << "elapsed in time: " << durationSeq << endl; 94 | Image_setData(outputImage, outputImageData); 95 | PPM_export("/home/pietrobongini/cuda-workspace/sequentialConvolution/output/result.ppm", outputImage); 96 | 97 | cout << "-------------------------" << endl; 98 | 99 | } 100 | -------------------------------------------------------------------------------- /sequentialConvolution/output/result.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietrobongini/CUDA-ImageConvolution/7948b1b96e08b1ea114d465e017eab023852a2c2/sequentialConvolution/output/result.ppm --------------------------------------------------------------------------------