├── .gitignore ├── LectureQuizzes ├── 1_30.cu ├── 2_18.cu ├── 2_37.cu └── gputimer.h ├── README.md └── assignments ├── CMakeLists.txt ├── HW1 ├── CMakeLists.txt ├── HW1.cpp ├── HW1_differenceImage.png ├── HW1_output.png ├── HW1_reference.png ├── Makefile ├── cinque_terre.gold ├── cinque_terre_small.jpg ├── compare.cpp ├── compare.h ├── main.cpp ├── reference_calc.cpp ├── reference_calc.h ├── student_func.cu ├── timer.h └── utils.h ├── HW2 ├── CMakeLists.txt ├── HW2.cpp ├── Makefile ├── cinque_terre.gold ├── cinque_terre_small.jpg ├── compare.cpp ├── compare.h ├── main.cpp ├── reference_calc.cpp ├── reference_calc.h ├── student_func.cu ├── timer.h └── utils.h ├── HW3 ├── CMakeLists.txt ├── HW3.cu ├── Makefile ├── compare.cpp ├── compare.h ├── loadSaveImage.cpp ├── loadSaveImage.h ├── main.cpp ├── memorial.exr ├── memorial_large.exr ├── memorial_png.gold ├── memorial_png_large.gold ├── memorial_raw.png ├── memorial_raw_large.png ├── reference_calc.cpp ├── reference_calc.h ├── student_func.cu ├── timer.h └── utils.h ├── HW4 ├── CMakeLists.txt ├── HW4.cu ├── Makefile ├── compare.cpp ├── compare.h ├── loadSaveImage.cpp ├── loadSaveImage.h ├── main.cpp ├── red_eye_effect.gold ├── red_eye_effect_5.jpg ├── red_eye_effect_template_5.jpg ├── reference_calc.cpp ├── reference_calc.h ├── student_func.cu ├── timer.h └── utils.h ├── HW5 ├── CMakeLists.txt ├── Makefile ├── main.cu ├── reference_calc.cpp ├── reference_calc.h ├── student.cu ├── timer.h └── utils.h ├── HW6 ├── CMakeLists.txt ├── HW6.cu ├── Makefile ├── blended.gold ├── compare.cpp ├── compare.h ├── destination.png ├── loadSaveImage.cpp ├── loadSaveImage.h ├── main.cpp ├── reference_calc.cpp ├── reference_calc.h ├── source.png ├── student_func.cu ├── timer.h └── utils.h └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | CMakeFiles 2 | *.cmake 3 | bin 4 | build 5 | -------------------------------------------------------------------------------- /LectureQuizzes/1_30.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __global__ void cube(float *d_out, float *d_in) { 4 | // Todo: Fill in this function 5 | } 6 | 7 | int main(int argc, char **argv) { 8 | const int ARRAY_SIZE = 64; 9 | const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float); 10 | 11 | // generate the input array on the host 12 | float h_in[ARRAY_SIZE]; 13 | for (int i = 0; i < ARRAY_SIZE; i++) { 14 | h_in[i] = float(i); 15 | } 16 | float h_out[ARRAY_SIZE]; 17 | 18 | // declare GPU memory pointers 19 | float *d_in; 20 | float *d_out; 21 | 22 | // allocate GPU memory 23 | cudaMalloc((void **)&d_in, ARRAY_BYTES); 24 | cudaMalloc((void **)&d_out, ARRAY_BYTES); 25 | 26 | // transfer the array to the GPU 27 | cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice); 28 | 29 | // launch the kernel 30 | cube<<<1, ARRAY_SIZE>>>(d_out, d_in); 31 | 32 | // copy back the result array to the CPU 33 | cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost); 34 | 35 | // print out the resulting array 36 | for (int i = 0; i < ARRAY_SIZE; i++) { 37 | printf("%f", h_out[i]); 38 | printf(((i % 4) != 3) ? "\t" : "\n"); 39 | } 40 | 41 | cudaFree(d_in); 42 | cudaFree(d_out); 43 | 44 | return 0; 45 | } -------------------------------------------------------------------------------- /LectureQuizzes/2_18.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define NUM_BLOCKS 16 4 | #define BLOCK_WIDTH 1 5 | 6 | __global__ void hello() 7 | { 8 | printf("Hello world! I'm a thread in block %d\n", blockIdx.x); 9 | } 10 | 11 | 12 | int main(int argc,char **argv) 13 | { 14 | // launch the kernel 15 | hello<<>>(); 16 | 17 | // force the printf()s to flush 18 | cudaDeviceSynchronize(); 19 | 20 | printf("That's all!\n"); 21 | 22 | return 0; 23 | } -------------------------------------------------------------------------------- /LectureQuizzes/2_37.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "gputimer.h" 3 | 4 | #define NUM_THREADS 1000000 5 | #define ARRAY_SIZE 100 6 | bool atomic = true; 7 | 8 | #define BLOCK_WIDTH 1000 9 | 10 | void print_array(int *array, int size) 11 | { 12 | printf("{ "); 13 | for (int i = 0; i < size; i++) { printf("%d ", array[i]); } 14 | printf("}\n"); 15 | } 16 | 17 | __global__ void increment_naive(int *g) 18 | { 19 | // which thread is this? 20 | int i = blockIdx.x * blockDim.x + threadIdx.x; 21 | 22 | // each thread to increment consecutive elements, wrapping at ARRAY_SIZE 23 | i = i % ARRAY_SIZE; 24 | g[i] = g[i] + 1; 25 | } 26 | 27 | __global__ void increment_atomic(int *g) 28 | { 29 | // which thread is this? 30 | int i = blockIdx.x * blockDim.x + threadIdx.x; 31 | 32 | // each thread to increment consecutive elements, wrapping at ARRAY_SIZE 33 | i = i % ARRAY_SIZE; 34 | atomicAdd(& g[i], 1); 35 | } 36 | 37 | int main(int argc,char **argv) 38 | { 39 | GpuTimer timer; 40 | if (atomic) { 41 | printf("atomic %d total threads in %d blocks writing into %d array elements\n", 42 | NUM_THREADS, NUM_THREADS / BLOCK_WIDTH, ARRAY_SIZE); 43 | } else { 44 | printf("%d total threads in %d blocks writing into %d array elements\n", 45 | NUM_THREADS, NUM_THREADS / BLOCK_WIDTH, ARRAY_SIZE); 46 | } 47 | 48 | // declare and allocate host memory 49 | int h_array[ARRAY_SIZE]; 50 | const int ARRAY_BYTES = ARRAY_SIZE * sizeof(int); 51 | 52 | // declare, allocate, and zero out GPU memory 53 | int * d_array; 54 | cudaMalloc((void **) &d_array, ARRAY_BYTES); 55 | cudaMemset((void *) d_array, 0, ARRAY_BYTES); 56 | 57 | // launch the kernel - comment out one of these 58 | timer.Start(); 59 | 60 | // Instructions: This program is needed for the next quiz 61 | // uncomment increment_naive to measure speed and accuracy 62 | // of non-atomic increments or uncomment increment_atomic to 63 | // measure speed and accuracy of atomic icrements 64 | if (atomic) { 65 | increment_atomic<<>>(d_array); 66 | } else { 67 | increment_naive<<>>(d_array); 68 | } 69 | timer.Stop(); 70 | 71 | // copy back the array of sums from GPU and print 72 | cudaMemcpy(h_array, d_array, ARRAY_BYTES, cudaMemcpyDeviceToHost); 73 | // print_array(h_array, ARRAY_SIZE); 74 | printf("Time elapsed = %g ms\n", timer.Elapsed()); 75 | 76 | // free GPU memory allocation and exit 77 | cudaFree(d_array); 78 | return 0; 79 | } -------------------------------------------------------------------------------- /LectureQuizzes/gputimer.h: -------------------------------------------------------------------------------- 1 | #ifndef __GPU_TIMER_H__ 2 | #define __GPU_TIMER_H__ 3 | 4 | struct GpuTimer 5 | { 6 | cudaEvent_t start; 7 | cudaEvent_t stop; 8 | 9 | GpuTimer() 10 | { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() 16 | { 17 | cudaEventDestroy(start); 18 | cudaEventDestroy(stop); 19 | } 20 | 21 | void Start() 22 | { 23 | cudaEventRecord(start, 0); 24 | } 25 | 26 | void Stop() 27 | { 28 | cudaEventRecord(stop, 0); 29 | } 30 | 31 | float Elapsed() 32 | { 33 | float elapsed; 34 | cudaEventSynchronize(stop); 35 | cudaEventElapsedTime(&elapsed, start, stop); 36 | return elapsed; 37 | } 38 | }; 39 | 40 | #endif /* __GPU_TIMER_H__ */ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | These assignments require OpenCV as a prerequisite. The easiest way to install is probably with conda. 2 | 3 | To install OpenCV in a conda environment. 4 | ``` 5 | conda create -n cs344 -y 6 | conda activate cs344 7 | conda install -y -c anaconda opencv 8 | ``` 9 | 10 | To build 11 | ``` 12 | cd assignments 13 | mkdir build 14 | cd build 15 | cmake .. 16 | make 17 | ``` 18 | The binaries will then be contained within `assignments/bin`. 19 | 20 | # HW1 Passing Instructions 21 | From the `HW1` directory. 22 | 23 | Run `../bin/HW1 HW1/cinque_terre_small.jpg'` -------------------------------------------------------------------------------- /assignments/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 9 | project(cs344) 10 | 11 | find_package(OpenCV REQUIRED) 12 | find_package(CUDA REQUIRED) 13 | 14 | link_libraries(${OpenCV_LIBS} ) 15 | 16 | set (EXECUTABLE_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/bin/") 17 | 18 | if(CUDA_FOUND) 19 | # compared to class settings, we let NVidia's FindCUDA CMake detect 20 | # whether to build x64. We tell it to support most devices, though, 21 | # to make sure more people can easily run class code without knowing 22 | # about this compiler argument 23 | set(CUDA_NVCC_FLAGS " 24 | -ccbin /usr/bin/gcc; 25 | -gencode;arch=compute_70,code=sm_70; 26 | -gencode;arch=compute_80,code=sm_80;") 27 | 28 | # add -Wextra compiler flag for gcc compilations 29 | if (UNIX) 30 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -Wextra") 31 | endif (UNIX) 32 | 33 | # add debugging to CUDA NVCC flags. For NVidia's NSight tools. 34 | set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG} "-G") 35 | 36 | add_subdirectory (HW1) 37 | add_subdirectory (HW2) 38 | add_subdirectory (HW3) 39 | add_subdirectory (HW4) 40 | add_subdirectory (HW5) 41 | add_subdirectory (HW6) 42 | else(CUDA_FOUND) 43 | message("CUDA is not installed on this system.") 44 | endif() 45 | -------------------------------------------------------------------------------- /assignments/HW1/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | # collect source files 9 | 10 | file( GLOB hdr *.hpp *.h ) 11 | file( GLOB cu *.cu) 12 | SET (HW1_files main.cpp reference_calc.cpp compare.cpp) 13 | 14 | CUDA_ADD_EXECUTABLE(HW1 ${HW1_files} ${hdr} ${cu}) -------------------------------------------------------------------------------- /assignments/HW1/HW1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "utils.h" 5 | #include 6 | #include 7 | #include 8 | 9 | cv::Mat imageRGBA; 10 | cv::Mat imageGrey; 11 | 12 | uchar4 *d_rgbaImage__; 13 | unsigned char *d_greyImage__; 14 | 15 | size_t numRows() { return imageRGBA.rows; } 16 | size_t numCols() { return imageRGBA.cols; } 17 | 18 | // return types are void since any internal error will be handled by quitting 19 | // no point in returning error codes... 20 | // returns a pointer to an RGBA version of the input image 21 | // and a pointer to the single channel grey-scale output 22 | // on both the host and device 23 | void preProcess(uchar4 **inputImage, unsigned char **greyImage, 24 | uchar4 **d_rgbaImage, unsigned char **d_greyImage, 25 | const std::string &filename) { 26 | // make sure the context initializes ok 27 | checkCudaErrors(cudaFree(0)); 28 | 29 | cv::Mat image; 30 | image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR); 31 | if (image.empty()) { 32 | std::cerr << "Couldn't open file: " << filename << std::endl; 33 | exit(1); 34 | } 35 | 36 | cv::cvtColor(image, imageRGBA, CV_BGR2RGBA); 37 | 38 | // allocate memory for the output 39 | imageGrey.create(image.rows, image.cols, CV_8UC1); 40 | 41 | // This shouldn't ever happen given the way the images are created 42 | // at least based upon my limited understanding of OpenCV, but better to check 43 | if (!imageRGBA.isContinuous() || !imageGrey.isContinuous()) { 44 | std::cerr << "Images aren't continuous!! Exiting." << std::endl; 45 | exit(1); 46 | } 47 | 48 | *inputImage = (uchar4 *)imageRGBA.ptr(0); 49 | *greyImage = imageGrey.ptr(0); 50 | 51 | const size_t numPixels = numRows() * numCols(); 52 | // allocate memory on the device for both input and output 53 | checkCudaErrors(cudaMalloc(d_rgbaImage, sizeof(uchar4) * numPixels)); 54 | checkCudaErrors(cudaMalloc(d_greyImage, sizeof(unsigned char) * numPixels)); 55 | checkCudaErrors(cudaMemset( 56 | *d_greyImage, 0, 57 | numPixels * 58 | sizeof(unsigned char))); // make sure no memory is left laying around 59 | 60 | // copy input array to the GPU 61 | checkCudaErrors(cudaMemcpy(*d_rgbaImage, *inputImage, 62 | sizeof(uchar4) * numPixels, 63 | cudaMemcpyHostToDevice)); 64 | 65 | d_rgbaImage__ = *d_rgbaImage; 66 | d_greyImage__ = *d_greyImage; 67 | } 68 | 69 | void postProcess(const std::string &output_file, unsigned char *data_ptr) { 70 | cv::Mat output(numRows(), numCols(), CV_8UC1, (void *)data_ptr); 71 | 72 | // output the image 73 | cv::imwrite(output_file.c_str(), output); 74 | } 75 | 76 | void cleanup() { 77 | // cleanup 78 | cudaFree(d_rgbaImage__); 79 | cudaFree(d_greyImage__); 80 | } 81 | 82 | void generateReferenceImage(std::string input_filename, 83 | std::string output_filename) { 84 | cv::Mat reference = cv::imread(input_filename, CV_LOAD_IMAGE_GRAYSCALE); 85 | 86 | cv::imwrite(output_filename, reference); 87 | } 88 | -------------------------------------------------------------------------------- /assignments/HW1/HW1_differenceImage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_differenceImage.png -------------------------------------------------------------------------------- /assignments/HW1/HW1_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_output.png -------------------------------------------------------------------------------- /assignments/HW1/HW1_reference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_reference.png -------------------------------------------------------------------------------- /assignments/HW1/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW1//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW1/CMakeFiles/HW1.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/CMakeFiles/HW1.dir/rule 122 | .PHONY : homework/HW1/CMakeFiles/HW1.dir/rule 123 | 124 | # Convenience name for target. 125 | HW1: homework/HW1/CMakeFiles/HW1.dir/rule 126 | .PHONY : HW1 127 | 128 | # fast build rule for target. 129 | HW1/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/build 131 | .PHONY : HW1/fast 132 | 133 | compare.o: compare.cpp.o 134 | .PHONY : compare.o 135 | 136 | # target to build an object file 137 | compare.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.o 139 | .PHONY : compare.cpp.o 140 | 141 | compare.i: compare.cpp.i 142 | .PHONY : compare.i 143 | 144 | # target to preprocess a source file 145 | compare.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.i 147 | .PHONY : compare.cpp.i 148 | 149 | compare.s: compare.cpp.s 150 | .PHONY : compare.s 151 | 152 | # target to generate assembly for a file 153 | compare.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.s 155 | .PHONY : compare.cpp.s 156 | 157 | main.o: main.cpp.o 158 | .PHONY : main.o 159 | 160 | # target to build an object file 161 | main.cpp.o: 162 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.o 163 | .PHONY : main.cpp.o 164 | 165 | main.i: main.cpp.i 166 | .PHONY : main.i 167 | 168 | # target to preprocess a source file 169 | main.cpp.i: 170 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.i 171 | .PHONY : main.cpp.i 172 | 173 | main.s: main.cpp.s 174 | .PHONY : main.s 175 | 176 | # target to generate assembly for a file 177 | main.cpp.s: 178 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.s 179 | .PHONY : main.cpp.s 180 | 181 | reference_calc.o: reference_calc.cpp.o 182 | .PHONY : reference_calc.o 183 | 184 | # target to build an object file 185 | reference_calc.cpp.o: 186 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.o 187 | .PHONY : reference_calc.cpp.o 188 | 189 | reference_calc.i: reference_calc.cpp.i 190 | .PHONY : reference_calc.i 191 | 192 | # target to preprocess a source file 193 | reference_calc.cpp.i: 194 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.i 195 | .PHONY : reference_calc.cpp.i 196 | 197 | reference_calc.s: reference_calc.cpp.s 198 | .PHONY : reference_calc.s 199 | 200 | # target to generate assembly for a file 201 | reference_calc.cpp.s: 202 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.s 203 | .PHONY : reference_calc.cpp.s 204 | 205 | # Help Target 206 | help: 207 | @echo "The following are some of the valid targets for this Makefile:" 208 | @echo "... all (the default if no target is provided)" 209 | @echo "... clean" 210 | @echo "... depend" 211 | @echo "... edit_cache" 212 | @echo "... rebuild_cache" 213 | @echo "... HW1" 214 | @echo "... compare.o" 215 | @echo "... compare.i" 216 | @echo "... compare.s" 217 | @echo "... main.o" 218 | @echo "... main.i" 219 | @echo "... main.s" 220 | @echo "... reference_calc.o" 221 | @echo "... reference_calc.i" 222 | @echo "... reference_calc.s" 223 | .PHONY : help 224 | 225 | 226 | 227 | #============================================================================= 228 | # Special targets to cleanup operation of make. 229 | 230 | # Special rule to run CMake to check the build system integrity. 231 | # No rule that depends on this can have commands that come from listfiles 232 | # because they might be regenerated. 233 | cmake_check_build_system: 234 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 235 | .PHONY : cmake_check_build_system 236 | 237 | -------------------------------------------------------------------------------- /assignments/HW1/cinque_terre.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/cinque_terre.gold -------------------------------------------------------------------------------- /assignments/HW1/cinque_terre_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/cinque_terre_small.jpg -------------------------------------------------------------------------------- /assignments/HW1/compare.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utils.h" 6 | 7 | void compareImages(std::string reference_filename, std::string test_filename, 8 | bool useEpsCheck, double perPixelError, double globalError) { 9 | cv::Mat reference = cv::imread(reference_filename, -1); 10 | cv::Mat test = cv::imread(test_filename, -1); 11 | 12 | cv::Mat diff = abs(reference - test); 13 | 14 | cv::Mat diffSingleChannel = 15 | diff.reshape(1, 0); // convert to 1 channel, same # rows 16 | 17 | double minVal, maxVal; 18 | 19 | cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL, 20 | NULL); // NULL because we don't care about location 21 | 22 | // now perform transform so that we bump values to the full range 23 | 24 | diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal)); 25 | 26 | diff = diffSingleChannel.reshape(reference.channels(), 0); 27 | 28 | cv::imwrite("HW1_differenceImage.png", diff); 29 | // OK, now we can start comparing values... 30 | unsigned char *referencePtr = reference.ptr(0); 31 | unsigned char *testPtr = test.ptr(0); 32 | 33 | if (useEpsCheck) { 34 | checkResultsEps(referencePtr, testPtr, 35 | reference.rows * reference.cols * reference.channels(), 36 | perPixelError, globalError); 37 | } else { 38 | checkResultsExact(referencePtr, testPtr, 39 | reference.rows * reference.cols * reference.channels()); 40 | } 41 | 42 | std::cout << "PASS" << std::endl; 43 | return; 44 | } 45 | -------------------------------------------------------------------------------- /assignments/HW1/compare.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPARE_H__ 2 | #define COMPARE_H__ 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /assignments/HW1/main.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW1 Solution 2 | 3 | #include 4 | #include "timer.h" 5 | #include "utils.h" 6 | #include 7 | #include 8 | #include "reference_calc.h" 9 | #include "compare.h" 10 | 11 | void your_rgba_to_greyscale(const uchar4 *const h_rgbaImage, 12 | uchar4 *const d_rgbaImage, 13 | unsigned char *const d_greyImage, size_t numRows, 14 | size_t numCols); 15 | 16 | // include the definitions of the above functions for this homework 17 | #include "HW1.cpp" 18 | 19 | int main(int argc, char **argv) { 20 | uchar4 *h_rgbaImage, *d_rgbaImage; 21 | unsigned char *h_greyImage, *d_greyImage; 22 | 23 | std::string input_file; 24 | std::string output_file; 25 | std::string reference_file; 26 | double perPixelError = 0.0; 27 | double globalError = 0.0; 28 | bool useEpsCheck = false; 29 | switch (argc) { 30 | case 2: 31 | input_file = std::string(argv[1]); 32 | output_file = "HW1_output.png"; 33 | reference_file = "HW1_reference.png"; 34 | break; 35 | case 3: 36 | input_file = std::string(argv[1]); 37 | output_file = std::string(argv[2]); 38 | reference_file = "HW1_reference.png"; 39 | break; 40 | case 4: 41 | input_file = std::string(argv[1]); 42 | output_file = std::string(argv[2]); 43 | reference_file = std::string(argv[3]); 44 | break; 45 | case 6: 46 | useEpsCheck = true; 47 | input_file = std::string(argv[1]); 48 | output_file = std::string(argv[2]); 49 | reference_file = std::string(argv[3]); 50 | perPixelError = atof(argv[4]); 51 | globalError = atof(argv[5]); 52 | break; 53 | default: 54 | std::cerr << "Usage: ./HW1 input_file [output_filename] " 55 | "[reference_filename] [perPixelError] [globalError]" 56 | << std::endl; 57 | exit(1); 58 | } 59 | // load the image and give us our input and output pointers 60 | preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage, 61 | input_file); 62 | 63 | GpuTimer timer; 64 | timer.Start(); 65 | // call the students' code 66 | your_rgba_to_greyscale(h_rgbaImage, d_rgbaImage, d_greyImage, numRows(), 67 | numCols()); 68 | timer.Stop(); 69 | cudaDeviceSynchronize(); 70 | checkCudaErrors(cudaGetLastError()); 71 | 72 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 73 | 74 | if (err < 0) { 75 | // Couldn't print! Probably the student closed stdout - bad news 76 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 77 | << std::endl; 78 | exit(1); 79 | } 80 | 81 | size_t numPixels = numRows() * numCols(); 82 | checkCudaErrors(cudaMemcpy(h_greyImage, d_greyImage, 83 | sizeof(unsigned char) * numPixels, 84 | cudaMemcpyDeviceToHost)); 85 | 86 | // check results and output the grey image 87 | postProcess(output_file, h_greyImage); 88 | 89 | referenceCalculation(h_rgbaImage, h_greyImage, numRows(), numCols()); 90 | 91 | postProcess(reference_file, h_greyImage); 92 | 93 | // generateReferenceImage(input_file, reference_file); 94 | compareImages(reference_file, output_file, useEpsCheck, perPixelError, 95 | globalError); 96 | 97 | cleanup(); 98 | 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /assignments/HW1/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | // for uchar4 struct 2 | #include 3 | 4 | void referenceCalculation(const uchar4 *const rgbaImage, 5 | unsigned char *const greyImage, size_t numRows, 6 | size_t numCols) { 7 | for (size_t r = 0; r < numRows; ++r) { 8 | for (size_t c = 0; c < numCols; ++c) { 9 | uchar4 rgba = rgbaImage[r * numCols + c]; 10 | float channelSum = .299f * rgba.x + .587f * rgba.y + .114f * rgba.z; 11 | greyImage[r * numCols + c] = channelSum; 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /assignments/HW1/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | void referenceCalculation(const uchar4 *const rgbaImage, 5 | unsigned char *const greyImage, size_t numRows, 6 | size_t numCols); 7 | 8 | #endif -------------------------------------------------------------------------------- /assignments/HW1/student_func.cu: -------------------------------------------------------------------------------- 1 | // Homework 1 2 | // Color to Greyscale Conversion 3 | 4 | // A common way to represent color images is known as RGBA - the color 5 | // is specified by how much Red, Grean and Blue is in it. 6 | // The 'A' stands for Alpha and is used for transparency, it will be 7 | // ignored in this homework. 8 | 9 | // Each channel Red, Blue, Green and Alpha is represented by one byte. 10 | // Since we are using one byte for each color there are 256 different 11 | // possible values for each color. This means we use 4 bytes per pixel. 12 | 13 | // Greyscale images are represented by a single intensity value per pixel 14 | // which is one byte in size. 15 | 16 | // To convert an image from color to grayscale one simple method is to 17 | // set the intensity to the average of the RGB channels. But we will 18 | // use a more sophisticated method that takes into account how the eye 19 | // perceives color and weights the channels unequally. 20 | 21 | // The eye responds most strongly to green followed by red and then blue. 22 | // The NTSC (National Television System Committee) recommends the following 23 | // formula for color to greyscale conversion: 24 | 25 | // I = .299f * R + .587f * G + .114f * B 26 | 27 | // Notice the trailing f's on the numbers which indicate that they are 28 | // single precision floating point constants and not double precision 29 | // constants. 30 | 31 | // You should fill in the kernel as well as set the block and grid sizes 32 | // so that the entire image is processed. 33 | 34 | #include "utils.h" 35 | 36 | __global__ void rgba_to_greyscale(const uchar4 *const rgbaImage, 37 | unsigned char *const greyImage, int numRows, 38 | int numCols) { 39 | // TODO 40 | // Fill in the kernel to convert from color to greyscale 41 | // the mapping from components of a uchar4 to RGBA is: 42 | // .x -> R ; .y -> G ; .z -> B ; .w -> A 43 | // 44 | // The output (greyImage) at each pixel should be the result of 45 | // applying the formula: output = .299f * R + .587f * G + .114f * B; 46 | // Note: We will be ignoring the alpha channel for this conversion 47 | 48 | // First create a mapping from the 2D block and grid locations 49 | // to an absolute 2D location in the image, then use that to 50 | // calculate a 1D offset 51 | } 52 | 53 | void your_rgba_to_greyscale(const uchar4 *const h_rgbaImage, 54 | uchar4 *const d_rgbaImage, 55 | unsigned char *const d_greyImage, size_t numRows, 56 | size_t numCols) { 57 | // You must fill in the correct sizes for the blockSize and gridSize 58 | // currently only one block with one thread is being launched 59 | const dim3 blockSize(1, 1, 1); // TODO 60 | const dim3 gridSize(1, 1, 1); // TODO 61 | rgba_to_greyscale<<>>(d_rgbaImage, d_greyImage, numRows, 62 | numCols); 63 | 64 | cudaDeviceSynchronize(); 65 | checkCudaErrors(cudaGetLastError()); 66 | } 67 | -------------------------------------------------------------------------------- /assignments/HW1/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW1/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/HW2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | # collect source files 9 | 10 | file( GLOB hdr *.hpp *.h ) 11 | file( GLOB cu *.cu) 12 | SET (HW2_files main.cpp reference_calc.cpp compare.cpp) 13 | 14 | CUDA_ADD_EXECUTABLE(HW2 ${HW2_files} ${hdr} ${cu}) 15 | -------------------------------------------------------------------------------- /assignments/HW2/HW2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "utils.h" 5 | #include 6 | #include 7 | #include 8 | 9 | cv::Mat imageInputRGBA; 10 | cv::Mat imageOutputRGBA; 11 | 12 | uchar4 *d_inputImageRGBA__; 13 | uchar4 *d_outputImageRGBA__; 14 | 15 | float *h_filter__; 16 | 17 | size_t numRows() { return imageInputRGBA.rows; } 18 | size_t numCols() { return imageInputRGBA.cols; } 19 | 20 | // return types are void since any internal error will be handled by quitting 21 | // no point in returning error codes... 22 | // returns a pointer to an RGBA version of the input image 23 | // and a pointer to the single channel grey-scale output 24 | // on both the host and device 25 | void preProcess(uchar4 **h_inputImageRGBA, uchar4 **h_outputImageRGBA, 26 | uchar4 **d_inputImageRGBA, uchar4 **d_outputImageRGBA, 27 | unsigned char **d_redBlurred, unsigned char **d_greenBlurred, 28 | unsigned char **d_blueBlurred, float **h_filter, 29 | int *filterWidth, const std::string &filename) { 30 | 31 | // make sure the context initializes ok 32 | checkCudaErrors(cudaFree(0)); 33 | 34 | cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR); 35 | if (image.empty()) { 36 | std::cerr << "Couldn't open file: " << filename << std::endl; 37 | exit(1); 38 | } 39 | 40 | cv::cvtColor(image, imageInputRGBA, CV_BGR2RGBA); 41 | 42 | // allocate memory for the output 43 | imageOutputRGBA.create(image.rows, image.cols, CV_8UC4); 44 | 45 | // This shouldn't ever happen given the way the images are created 46 | // at least based upon my limited understanding of OpenCV, but better to check 47 | if (!imageInputRGBA.isContinuous() || !imageOutputRGBA.isContinuous()) { 48 | std::cerr << "Images aren't continuous!! Exiting." << std::endl; 49 | exit(1); 50 | } 51 | 52 | *h_inputImageRGBA = (uchar4 *)imageInputRGBA.ptr(0); 53 | *h_outputImageRGBA = (uchar4 *)imageOutputRGBA.ptr(0); 54 | 55 | const size_t numPixels = numRows() * numCols(); 56 | // allocate memory on the device for both input and output 57 | checkCudaErrors(cudaMalloc(d_inputImageRGBA, sizeof(uchar4) * numPixels)); 58 | checkCudaErrors(cudaMalloc(d_outputImageRGBA, sizeof(uchar4) * numPixels)); 59 | checkCudaErrors(cudaMemset( 60 | *d_outputImageRGBA, 0, 61 | numPixels * sizeof(uchar4))); // make sure no memory is left laying around 62 | 63 | // copy input array to the GPU 64 | checkCudaErrors(cudaMemcpy(*d_inputImageRGBA, *h_inputImageRGBA, 65 | sizeof(uchar4) * numPixels, 66 | cudaMemcpyHostToDevice)); 67 | 68 | d_inputImageRGBA__ = *d_inputImageRGBA; 69 | d_outputImageRGBA__ = *d_outputImageRGBA; 70 | 71 | // now create the filter that they will use 72 | const int blurKernelWidth = 9; 73 | const float blurKernelSigma = 2.; 74 | 75 | *filterWidth = blurKernelWidth; 76 | 77 | // create and fill the filter we will convolve with 78 | *h_filter = new float[blurKernelWidth * blurKernelWidth]; 79 | h_filter__ = *h_filter; 80 | 81 | float filterSum = 0.f; // for normalization 82 | 83 | for (int r = -blurKernelWidth / 2; r <= blurKernelWidth / 2; ++r) { 84 | for (int c = -blurKernelWidth / 2; c <= blurKernelWidth / 2; ++c) { 85 | float filterValue = expf(-(float)(c * c + r * r) / 86 | (2.f * blurKernelSigma * blurKernelSigma)); 87 | (*h_filter)[(r + blurKernelWidth / 2) * blurKernelWidth + c + 88 | blurKernelWidth / 2] = filterValue; 89 | filterSum += filterValue; 90 | } 91 | } 92 | 93 | float normalizationFactor = 1.f / filterSum; 94 | 95 | for (int r = -blurKernelWidth / 2; r <= blurKernelWidth / 2; ++r) { 96 | for (int c = -blurKernelWidth / 2; c <= blurKernelWidth / 2; ++c) { 97 | (*h_filter)[(r + blurKernelWidth / 2) * blurKernelWidth + c + 98 | blurKernelWidth / 2] *= normalizationFactor; 99 | } 100 | } 101 | 102 | // blurred 103 | checkCudaErrors(cudaMalloc(d_redBlurred, sizeof(unsigned char) * numPixels)); 104 | checkCudaErrors( 105 | cudaMalloc(d_greenBlurred, sizeof(unsigned char) * numPixels)); 106 | checkCudaErrors(cudaMalloc(d_blueBlurred, sizeof(unsigned char) * numPixels)); 107 | checkCudaErrors( 108 | cudaMemset(*d_redBlurred, 0, sizeof(unsigned char) * numPixels)); 109 | checkCudaErrors( 110 | cudaMemset(*d_greenBlurred, 0, sizeof(unsigned char) * numPixels)); 111 | checkCudaErrors( 112 | cudaMemset(*d_blueBlurred, 0, sizeof(unsigned char) * numPixels)); 113 | } 114 | 115 | void postProcess(const std::string &output_file, uchar4 *data_ptr) { 116 | cv::Mat output(numRows(), numCols(), CV_8UC4, (void *)data_ptr); 117 | 118 | cv::Mat imageOutputBGR; 119 | cv::cvtColor(output, imageOutputBGR, CV_RGBA2BGR); 120 | // output the image 121 | cv::imwrite(output_file.c_str(), imageOutputBGR); 122 | } 123 | 124 | void cleanUp(void) { 125 | cudaFree(d_inputImageRGBA__); 126 | cudaFree(d_outputImageRGBA__); 127 | delete[] h_filter__; 128 | } 129 | 130 | // An unused bit of code showing how to accomplish this assignment using OpenCV. 131 | // It is much faster 132 | // than the naive implementation in reference_calc.cpp. 133 | void generateReferenceImage(std::string input_file, std::string reference_file, 134 | int kernel_size) { 135 | cv::Mat input = cv::imread(input_file); 136 | // Create an identical image for the output as a placeholder 137 | cv::Mat reference = cv::imread(input_file); 138 | cv::GaussianBlur(input, reference, cv::Size2i(kernel_size, kernel_size), 0); 139 | cv::imwrite(reference_file, reference); 140 | } 141 | -------------------------------------------------------------------------------- /assignments/HW2/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW2//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW2/CMakeFiles/HW2.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/CMakeFiles/HW2.dir/rule 122 | .PHONY : homework/HW2/CMakeFiles/HW2.dir/rule 123 | 124 | # Convenience name for target. 125 | HW2: homework/HW2/CMakeFiles/HW2.dir/rule 126 | .PHONY : HW2 127 | 128 | # fast build rule for target. 129 | HW2/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/build 131 | .PHONY : HW2/fast 132 | 133 | compare.o: compare.cpp.o 134 | .PHONY : compare.o 135 | 136 | # target to build an object file 137 | compare.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.o 139 | .PHONY : compare.cpp.o 140 | 141 | compare.i: compare.cpp.i 142 | .PHONY : compare.i 143 | 144 | # target to preprocess a source file 145 | compare.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.i 147 | .PHONY : compare.cpp.i 148 | 149 | compare.s: compare.cpp.s 150 | .PHONY : compare.s 151 | 152 | # target to generate assembly for a file 153 | compare.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.s 155 | .PHONY : compare.cpp.s 156 | 157 | main.o: main.cpp.o 158 | .PHONY : main.o 159 | 160 | # target to build an object file 161 | main.cpp.o: 162 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.o 163 | .PHONY : main.cpp.o 164 | 165 | main.i: main.cpp.i 166 | .PHONY : main.i 167 | 168 | # target to preprocess a source file 169 | main.cpp.i: 170 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.i 171 | .PHONY : main.cpp.i 172 | 173 | main.s: main.cpp.s 174 | .PHONY : main.s 175 | 176 | # target to generate assembly for a file 177 | main.cpp.s: 178 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.s 179 | .PHONY : main.cpp.s 180 | 181 | reference_calc.o: reference_calc.cpp.o 182 | .PHONY : reference_calc.o 183 | 184 | # target to build an object file 185 | reference_calc.cpp.o: 186 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.o 187 | .PHONY : reference_calc.cpp.o 188 | 189 | reference_calc.i: reference_calc.cpp.i 190 | .PHONY : reference_calc.i 191 | 192 | # target to preprocess a source file 193 | reference_calc.cpp.i: 194 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.i 195 | .PHONY : reference_calc.cpp.i 196 | 197 | reference_calc.s: reference_calc.cpp.s 198 | .PHONY : reference_calc.s 199 | 200 | # target to generate assembly for a file 201 | reference_calc.cpp.s: 202 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.s 203 | .PHONY : reference_calc.cpp.s 204 | 205 | # Help Target 206 | help: 207 | @echo "The following are some of the valid targets for this Makefile:" 208 | @echo "... all (the default if no target is provided)" 209 | @echo "... clean" 210 | @echo "... depend" 211 | @echo "... edit_cache" 212 | @echo "... rebuild_cache" 213 | @echo "... HW2" 214 | @echo "... compare.o" 215 | @echo "... compare.i" 216 | @echo "... compare.s" 217 | @echo "... main.o" 218 | @echo "... main.i" 219 | @echo "... main.s" 220 | @echo "... reference_calc.o" 221 | @echo "... reference_calc.i" 222 | @echo "... reference_calc.s" 223 | .PHONY : help 224 | 225 | 226 | 227 | #============================================================================= 228 | # Special targets to cleanup operation of make. 229 | 230 | # Special rule to run CMake to check the build system integrity. 231 | # No rule that depends on this can have commands that come from listfiles 232 | # because they might be regenerated. 233 | cmake_check_build_system: 234 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 235 | .PHONY : cmake_check_build_system 236 | 237 | -------------------------------------------------------------------------------- /assignments/HW2/cinque_terre.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW2/cinque_terre.gold -------------------------------------------------------------------------------- /assignments/HW2/cinque_terre_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW2/cinque_terre_small.jpg -------------------------------------------------------------------------------- /assignments/HW2/compare.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utils.h" 6 | 7 | void compareImages(std::string reference_filename, std::string test_filename, 8 | bool useEpsCheck, double perPixelError, double globalError) { 9 | cv::Mat reference = cv::imread(reference_filename, -1); 10 | cv::Mat test = cv::imread(test_filename, -1); 11 | 12 | cv::Mat diff = abs(reference - test); 13 | 14 | cv::Mat diffSingleChannel = 15 | diff.reshape(1, 0); // convert to 1 channel, same # rows 16 | 17 | double minVal, maxVal; 18 | 19 | cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL, 20 | NULL); // NULL because we don't care about location 21 | 22 | // now perform transform so that we bump values to the full range 23 | 24 | diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal)); 25 | 26 | diff = diffSingleChannel.reshape(reference.channels(), 0); 27 | 28 | cv::imwrite("HW2_differenceImage.png", diff); 29 | // OK, now we can start comparing values... 30 | unsigned char *referencePtr = reference.ptr(0); 31 | unsigned char *testPtr = test.ptr(0); 32 | 33 | if (useEpsCheck) { 34 | checkResultsEps(referencePtr, testPtr, 35 | reference.rows * reference.cols * reference.channels(), 36 | perPixelError, globalError); 37 | } else { 38 | checkResultsExact(referencePtr, testPtr, 39 | reference.rows * reference.cols * reference.channels()); 40 | } 41 | 42 | std::cout << "PASS" << std::endl; 43 | return; 44 | } -------------------------------------------------------------------------------- /assignments/HW2/compare.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPARE_H__ 2 | #define COMPARE_H__ 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError); 6 | 7 | #endif -------------------------------------------------------------------------------- /assignments/HW2/main.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW2 Driver 2 | 3 | #include 4 | #include "timer.h" 5 | #include "utils.h" 6 | #include 7 | #include 8 | 9 | #include "reference_calc.h" 10 | #include "compare.h" 11 | 12 | // include the definitions of the above functions for this homework 13 | #include "HW2.cpp" 14 | 15 | /******* DEFINED IN student_func.cu *********/ 16 | 17 | void your_gaussian_blur(const uchar4 *const h_inputImageRGBA, 18 | uchar4 *const d_inputImageRGBA, 19 | uchar4 *const d_outputImageRGBA, const size_t numRows, 20 | const size_t numCols, unsigned char *d_redBlurred, 21 | unsigned char *d_greenBlurred, 22 | unsigned char *d_blueBlurred, const int filterWidth); 23 | 24 | void allocateMemoryAndCopyToGPU(const size_t numRowsImage, 25 | const size_t numColsImage, 26 | const float *const h_filter, 27 | const size_t filterWidth); 28 | 29 | /******* Begin main *********/ 30 | 31 | int main(int argc, char **argv) { 32 | uchar4 *h_inputImageRGBA, *d_inputImageRGBA; 33 | uchar4 *h_outputImageRGBA, *d_outputImageRGBA; 34 | unsigned char *d_redBlurred, *d_greenBlurred, *d_blueBlurred; 35 | 36 | float *h_filter; 37 | int filterWidth; 38 | 39 | std::string input_file; 40 | std::string output_file; 41 | std::string reference_file; 42 | double perPixelError = 0.0; 43 | double globalError = 0.0; 44 | bool useEpsCheck = false; 45 | switch (argc) { 46 | case 2: 47 | input_file = std::string(argv[1]); 48 | output_file = "HW2_output.png"; 49 | reference_file = "HW2_reference.png"; 50 | break; 51 | case 3: 52 | input_file = std::string(argv[1]); 53 | output_file = std::string(argv[2]); 54 | reference_file = "HW2_reference.png"; 55 | break; 56 | case 4: 57 | input_file = std::string(argv[1]); 58 | output_file = std::string(argv[2]); 59 | reference_file = std::string(argv[3]); 60 | break; 61 | case 6: 62 | useEpsCheck = true; 63 | input_file = std::string(argv[1]); 64 | output_file = std::string(argv[2]); 65 | reference_file = std::string(argv[3]); 66 | perPixelError = atof(argv[4]); 67 | globalError = atof(argv[5]); 68 | break; 69 | default: 70 | std::cerr << "Usage: ./HW2 input_file [output_filename] " 71 | "[reference_filename] [perPixelError] [globalError]" 72 | << std::endl; 73 | exit(1); 74 | } 75 | // load the image and give us our input and output pointers 76 | preProcess(&h_inputImageRGBA, &h_outputImageRGBA, &d_inputImageRGBA, 77 | &d_outputImageRGBA, &d_redBlurred, &d_greenBlurred, &d_blueBlurred, 78 | &h_filter, &filterWidth, input_file); 79 | 80 | allocateMemoryAndCopyToGPU(numRows(), numCols(), h_filter, filterWidth); 81 | GpuTimer timer; 82 | timer.Start(); 83 | // call the students' code 84 | your_gaussian_blur(h_inputImageRGBA, d_inputImageRGBA, d_outputImageRGBA, 85 | numRows(), numCols(), d_redBlurred, d_greenBlurred, 86 | d_blueBlurred, filterWidth); 87 | timer.Stop(); 88 | cudaDeviceSynchronize(); 89 | checkCudaErrors(cudaGetLastError()); 90 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 91 | 92 | if (err < 0) { 93 | // Couldn't print! Probably the student closed stdout - bad news 94 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 95 | << std::endl; 96 | exit(1); 97 | } 98 | 99 | // check results and output the blurred image 100 | 101 | size_t numPixels = numRows() * numCols(); 102 | // copy the output back to the host 103 | checkCudaErrors(cudaMemcpy(h_outputImageRGBA, d_outputImageRGBA__, 104 | sizeof(uchar4) * numPixels, 105 | cudaMemcpyDeviceToHost)); 106 | 107 | postProcess(output_file, h_outputImageRGBA); 108 | 109 | referenceCalculation(h_inputImageRGBA, h_outputImageRGBA, numRows(), 110 | numCols(), h_filter, filterWidth); 111 | 112 | postProcess(reference_file, h_outputImageRGBA); 113 | 114 | // Cheater easy way with OpenCV 115 | // generateReferenceImage(input_file, reference_file, filterWidth); 116 | 117 | compareImages(reference_file, output_file, useEpsCheck, perPixelError, 118 | globalError); 119 | 120 | checkCudaErrors(cudaFree(d_redBlurred)); 121 | checkCudaErrors(cudaFree(d_greenBlurred)); 122 | checkCudaErrors(cudaFree(d_blueBlurred)); 123 | 124 | cleanUp(); 125 | 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /assignments/HW2/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // for uchar4 struct 4 | #include 5 | 6 | void channelConvolution(const unsigned char *const channel, 7 | unsigned char *const channelBlurred, 8 | const size_t numRows, const size_t numCols, 9 | const float *filter, const int filterWidth) { 10 | // Dealing with an even width filter is trickier 11 | assert(filterWidth % 2 == 1); 12 | 13 | // For every pixel in the image 14 | for (int r = 0; r < (int)numRows; ++r) { 15 | for (int c = 0; c < (int)numCols; ++c) { 16 | float result = 0.f; 17 | // For every value in the filter around the pixel (c, r) 18 | for (int filter_r = -filterWidth / 2; filter_r <= filterWidth / 2; 19 | ++filter_r) { 20 | for (int filter_c = -filterWidth / 2; filter_c <= filterWidth / 2; 21 | ++filter_c) { 22 | // Find the global image position for this filter position 23 | // clamp to boundary of the image 24 | int image_r = std::min(std::max(r + filter_r, 0), 25 | static_cast(numRows - 1)); 26 | int image_c = std::min(std::max(c + filter_c, 0), 27 | static_cast(numCols - 1)); 28 | 29 | float image_value = 30 | static_cast(channel[image_r * numCols + image_c]); 31 | float filter_value = 32 | filter[(filter_r + filterWidth / 2) * filterWidth + filter_c + 33 | filterWidth / 2]; 34 | 35 | result += image_value * filter_value; 36 | } 37 | } 38 | 39 | channelBlurred[r * numCols + c] = result; 40 | } 41 | } 42 | } 43 | 44 | void referenceCalculation(const uchar4 *const rgbaImage, 45 | uchar4 *const outputImage, size_t numRows, 46 | size_t numCols, const float *const filter, 47 | const int filterWidth) { 48 | unsigned char *red = new unsigned char[numRows * numCols]; 49 | unsigned char *blue = new unsigned char[numRows * numCols]; 50 | unsigned char *green = new unsigned char[numRows * numCols]; 51 | 52 | unsigned char *redBlurred = new unsigned char[numRows * numCols]; 53 | unsigned char *blueBlurred = new unsigned char[numRows * numCols]; 54 | unsigned char *greenBlurred = new unsigned char[numRows * numCols]; 55 | 56 | // First we separate the incoming RGBA image into three separate channels 57 | // for Red, Green and Blue 58 | for (size_t i = 0; i < numRows * numCols; ++i) { 59 | uchar4 rgba = rgbaImage[i]; 60 | red[i] = rgba.x; 61 | green[i] = rgba.y; 62 | blue[i] = rgba.z; 63 | } 64 | 65 | // Now we can do the convolution for each of the color channels 66 | channelConvolution(red, redBlurred, numRows, numCols, filter, filterWidth); 67 | channelConvolution(green, greenBlurred, numRows, numCols, filter, 68 | filterWidth); 69 | channelConvolution(blue, blueBlurred, numRows, numCols, filter, filterWidth); 70 | 71 | // now recombine into the output image - Alpha is 255 for no transparency 72 | for (size_t i = 0; i < numRows * numCols; ++i) { 73 | uchar4 rgba = 74 | make_uchar4(redBlurred[i], greenBlurred[i], blueBlurred[i], 255); 75 | outputImage[i] = rgba; 76 | } 77 | 78 | delete[] red; 79 | delete[] green; 80 | delete[] blue; 81 | 82 | delete[] redBlurred; 83 | delete[] greenBlurred; 84 | delete[] blueBlurred; 85 | } 86 | -------------------------------------------------------------------------------- /assignments/HW2/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | void referenceCalculation(const uchar4 *const rgbaImage, 5 | uchar4 *const outputImage, size_t numRows, 6 | size_t numCols, const float *const filter, 7 | const int filterWidth); 8 | 9 | #endif -------------------------------------------------------------------------------- /assignments/HW2/student_func.cu: -------------------------------------------------------------------------------- 1 | // Homework 2 2 | // Image Blurring 3 | // 4 | // In this homework we are blurring an image. To do this, imagine that we have 5 | // a square array of weight values. For each pixel in the image, imagine that we 6 | // overlay this square array of weights on top of the image such that the center 7 | // of the weight array is aligned with the current pixel. To compute a blurred 8 | // pixel value, we multiply each pair of numbers that line up. In other words, 9 | // we multiply each weight with the pixel underneath it. Finally, we add up all 10 | // of the multiplied numbers and assign that value to our output for the current 11 | // pixel. We repeat this process for all the pixels in the image. 12 | 13 | // To help get you started, we have included some useful notes here. 14 | 15 | //**************************************************************************** 16 | 17 | // For a color image that has multiple channels, we suggest separating 18 | // the different color channels so that each color is stored contiguously 19 | // instead of being interleaved. This will simplify your code. 20 | 21 | // That is instead of RGBARGBARGBARGBA... we suggest transforming to three 22 | // arrays (as in the previous homework we ignore the alpha channel again): 23 | // 1) RRRRRRRR... 24 | // 2) GGGGGGGG... 25 | // 3) BBBBBBBB... 26 | // 27 | // The original layout is known an Array of Structures (AoS) whereas the 28 | // format we are converting to is known as a Structure of Arrays (SoA). 29 | 30 | // As a warm-up, we will ask you to write the kernel that performs this 31 | // separation. You should then write the "meat" of the assignment, 32 | // which is the kernel that performs the actual blur. We provide code that 33 | // re-combines your blurred results for each color channel. 34 | 35 | //**************************************************************************** 36 | 37 | // You must fill in the gaussian_blur kernel to perform the blurring of the 38 | // inputChannel, using the array of weights, and put the result in the 39 | // outputChannel. 40 | 41 | // Here is an example of computing a blur, using a weighted average, for a 42 | // single pixel in a small image. 43 | // 44 | // Array of weights: 45 | // 46 | // 0.0 0.2 0.0 47 | // 0.2 0.2 0.2 48 | // 0.0 0.2 0.0 49 | // 50 | // Image (note that we align the array of weights to the center of the box): 51 | // 52 | // 1 2 5 2 0 3 53 | // ------- 54 | // 3 |2 5 1| 6 0 0.0*2 + 0.2*5 + 0.0*1 + 55 | // | | 56 | // 4 |3 6 2| 1 4 -> 0.2*3 + 0.2*6 + 0.2*2 + -> 3.2 57 | // | | 58 | // 0 |4 0 3| 4 2 0.0*4 + 0.2*0 + 0.0*3 59 | // ------- 60 | // 9 6 5 0 3 9 61 | // 62 | // (1) (2) (3) 63 | // 64 | // A good starting place is to map each thread to a pixel as you have before. 65 | // Then every thread can perform steps 2 and 3 in the diagram above 66 | // completely independently of one another. 67 | 68 | // Note that the array of weights is square, so its height is the same as its 69 | // width. We refer to the array of weights as a filter, and we refer to its 70 | // width with the variable filterWidth. 71 | 72 | //**************************************************************************** 73 | 74 | // Your homework submission will be evaluated based on correctness and speed. 75 | // We test each pixel against a reference solution. If any pixel differs by 76 | // more than some small threshold value, the system will tell you that your 77 | // solution is incorrect, and it will let you try again. 78 | 79 | // Once you have gotten that working correctly, then you can think about using 80 | // shared memory and having the threads cooperate to achieve better performance. 81 | 82 | //**************************************************************************** 83 | 84 | // Also note that we've supplied a helpful debugging function called 85 | // checkCudaErrors. You should wrap your allocation and copying statements like 86 | // we've done in the code we're supplying you. Here is an example of the unsafe 87 | // way to allocate memory on the GPU: 88 | // 89 | // cudaMalloc(&d_red, sizeof(unsigned char) * numRows * numCols); 90 | // 91 | // Here is an example of the safe way to do the same thing: 92 | // 93 | // checkCudaErrors(cudaMalloc(&d_red, sizeof(unsigned char) * numRows * 94 | // numCols)); 95 | // 96 | // Writing code the safe way requires slightly more typing, but is very helpful 97 | // for catching mistakes. If you write code the unsafe way and you make a 98 | // mistake, then any subsequent kernels won't compute anything, and it will be 99 | // hard to figure out why. Writing code the safe way will inform you as soon as 100 | // you make a mistake. 101 | 102 | // Finally, remember to free the memory you allocate at the end of the function. 103 | 104 | //**************************************************************************** 105 | 106 | #include "utils.h" 107 | 108 | __global__ void gaussian_blur(const unsigned char *const inputChannel, 109 | unsigned char *const outputChannel, int numRows, 110 | int numCols, const float *const filter, 111 | const int filterWidth) { 112 | // TODO 113 | 114 | // NOTE: Be sure to compute any intermediate results in floating point 115 | // before storing the final result as unsigned char. 116 | 117 | // NOTE: Be careful not to try to access memory that is outside the bounds of 118 | // the image. You'll want code that performs the following check before 119 | // accessing GPU memory: 120 | // 121 | // if ( absolute_image_position_x >= numCols || 122 | // absolute_image_position_y >= numRows ) 123 | // { 124 | // return; 125 | // } 126 | 127 | // NOTE: If a thread's absolute position 2D position is within the image, but 128 | // some of its neighbors are outside the image, then you will need to be extra 129 | // careful. Instead of trying to read such a neighbor value from GPU memory 130 | // (which won't work because the value is out of bounds), you should 131 | // explicitly clamp the neighbor values you read to be within the bounds of 132 | // the image. If this is not clear to you, then please refer to sequential 133 | // reference solution for the exact clamping semantics you should follow. 134 | } 135 | 136 | // This kernel takes in an image represented as a uchar4 and splits 137 | // it into three images consisting of only one color channel each 138 | __global__ void separateChannels(const uchar4 *const inputImageRGBA, 139 | int numRows, int numCols, 140 | unsigned char *const redChannel, 141 | unsigned char *const greenChannel, 142 | unsigned char *const blueChannel) { 143 | // TODO 144 | // 145 | // NOTE: Be careful not to try to access memory that is outside the bounds of 146 | // the image. You'll want code that performs the following check before 147 | // accessing GPU memory: 148 | // 149 | // if ( absolute_image_position_x >= numCols || 150 | // absolute_image_position_y >= numRows ) 151 | // { 152 | // return; 153 | // } 154 | } 155 | 156 | // This kernel takes in three color channels and recombines them 157 | // into one image. The alpha channel is set to 255 to represent 158 | // that this image has no transparency. 159 | __global__ void recombineChannels(const unsigned char *const redChannel, 160 | const unsigned char *const greenChannel, 161 | const unsigned char *const blueChannel, 162 | uchar4 *const outputImageRGBA, int numRows, 163 | int numCols) { 164 | const int2 thread_2D_pos = make_int2(blockIdx.x * blockDim.x + threadIdx.x, 165 | blockIdx.y * blockDim.y + threadIdx.y); 166 | 167 | const int thread_1D_pos = thread_2D_pos.y * numCols + thread_2D_pos.x; 168 | 169 | // make sure we don't try and access memory outside the image 170 | // by having any threads mapped there return early 171 | if (thread_2D_pos.x >= numCols || thread_2D_pos.y >= numRows) 172 | return; 173 | 174 | unsigned char red = redChannel[thread_1D_pos]; 175 | unsigned char green = greenChannel[thread_1D_pos]; 176 | unsigned char blue = blueChannel[thread_1D_pos]; 177 | 178 | // Alpha should be 255 for no transparency 179 | uchar4 outputPixel = make_uchar4(red, green, blue, 255); 180 | 181 | outputImageRGBA[thread_1D_pos] = outputPixel; 182 | } 183 | 184 | unsigned char *d_red, *d_green, *d_blue; 185 | float *d_filter; 186 | 187 | void allocateMemoryAndCopyToGPU(const size_t numRowsImage, 188 | const size_t numColsImage, 189 | const float *const h_filter, 190 | const size_t filterWidth) { 191 | 192 | // allocate memory for the three different channels 193 | // original 194 | checkCudaErrors( 195 | cudaMalloc(&d_red, sizeof(unsigned char) * numRowsImage * numColsImage)); 196 | checkCudaErrors(cudaMalloc(&d_green, sizeof(unsigned char) * numRowsImage * 197 | numColsImage)); 198 | checkCudaErrors( 199 | cudaMalloc(&d_blue, sizeof(unsigned char) * numRowsImage * numColsImage)); 200 | 201 | // TODO: 202 | // Allocate memory for the filter on the GPU 203 | // Use the pointer d_filter that we have already declared for you 204 | // You need to allocate memory for the filter with cudaMalloc 205 | // be sure to use checkCudaErrors like the above examples to 206 | // be able to tell if anything goes wrong 207 | // IMPORTANT: Notice that we pass a pointer to a pointer to cudaMalloc 208 | 209 | // TODO: 210 | // Copy the filter on the host (h_filter) to the memory you just allocated 211 | // on the GPU. cudaMemcpy(dst, src, numBytes, cudaMemcpyHostToDevice); 212 | // Remember to use checkCudaErrors! 213 | } 214 | 215 | void your_gaussian_blur(const uchar4 *const h_inputImageRGBA, 216 | uchar4 *const d_inputImageRGBA, 217 | uchar4 *const d_outputImageRGBA, const size_t numRows, 218 | const size_t numCols, unsigned char *d_redBlurred, 219 | unsigned char *d_greenBlurred, 220 | unsigned char *d_blueBlurred, const int filterWidth) { 221 | // TODO: Set reasonable block size (i.e., number of threads per block) 222 | const dim3 blockSize; 223 | 224 | // TODO: 225 | // Compute correct grid size (i.e., number of blocks per kernel launch) 226 | // from the image size and and block size. 227 | const dim3 gridSize; 228 | 229 | // TODO: Launch a kernel for separating the RGBA image into different color 230 | // channels 231 | 232 | // Call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after 233 | // launching your kernel to make sure that you didn't make any mistakes. 234 | cudaDeviceSynchronize(); 235 | checkCudaErrors(cudaGetLastError()); 236 | 237 | // TODO: Call your convolution kernel here 3 times, once for each color 238 | // channel. 239 | 240 | // Again, call cudaDeviceSynchronize(), then call checkCudaErrors() 241 | // immediately after launching your kernel to make sure that you didn't make 242 | // any mistakes. 243 | cudaDeviceSynchronize(); 244 | checkCudaErrors(cudaGetLastError()); 245 | 246 | // Now we recombine your results. We take care of launching this kernel for 247 | // you. 248 | // 249 | // NOTE: This kernel launch depends on the gridSize and blockSize variables, 250 | // which you must set yourself. 251 | recombineChannels<<>>(d_redBlurred, d_greenBlurred, 252 | d_blueBlurred, d_outputImageRGBA, 253 | numRows, numCols); 254 | cudaDeviceSynchronize(); 255 | checkCudaErrors(cudaGetLastError()); 256 | } 257 | 258 | // Free all the memory that we allocated 259 | // TODO: make sure you free any arrays that you allocated 260 | void cleanup() { 261 | checkCudaErrors(cudaFree(d_red)); 262 | checkCudaErrors(cudaFree(d_green)); 263 | checkCudaErrors(cudaFree(d_blue)); 264 | } 265 | -------------------------------------------------------------------------------- /assignments/HW2/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW2/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/HW3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | # minimum required cmake version 8 | cmake_minimum_required(VERSION 2.8) 9 | find_package(CUDA QUIET REQUIRED) 10 | 11 | SET (compare_files compare.cpp) 12 | 13 | file( GLOB hdr *.hpp *.h ) 14 | file( GLOB cu *.cu) 15 | SET (HW3_files main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp) 16 | 17 | CUDA_ADD_EXECUTABLE(HW3 ${HW3_files} ${hdr} ${cu}) 18 | -------------------------------------------------------------------------------- /assignments/HW3/HW3.cu: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | #include "loadSaveImage.h" 4 | #include 5 | 6 | // chroma-LogLuminance Space 7 | static float *d_x__; 8 | static float *d_y__; 9 | static float *d_logY__; 10 | 11 | // memory for the cdf 12 | static unsigned int *d_cdf__; 13 | 14 | static const int numBins = 1024; 15 | 16 | size_t numRows__; 17 | size_t numCols__; 18 | 19 | /* Copied from Mike's IPython notebook with some minor modifications 20 | * Mainly double precision constants to floats and log10 -> log10f 21 | * Also removed Luminance (Y) channel since it is never used eke*/ 22 | 23 | __global__ void rgb_to_xyY(float *d_r, float *d_g, float *d_b, float *d_x, 24 | float *d_y, float *d_log_Y, float delta, 25 | int num_pixels_y, int num_pixels_x) { 26 | int ny = num_pixels_y; 27 | int nx = num_pixels_x; 28 | int2 image_index_2d = make_int2((blockIdx.x * blockDim.x) + threadIdx.x, 29 | (blockIdx.y * blockDim.y) + threadIdx.y); 30 | int image_index_1d = (nx * image_index_2d.y) + image_index_2d.x; 31 | 32 | if (image_index_2d.x < nx && image_index_2d.y < ny) { 33 | float r = d_r[image_index_1d]; 34 | float g = d_g[image_index_1d]; 35 | float b = d_b[image_index_1d]; 36 | 37 | float X = (r * 0.4124f) + (g * 0.3576f) + (b * 0.1805f); 38 | float Y = (r * 0.2126f) + (g * 0.7152f) + (b * 0.0722f); 39 | float Z = (r * 0.0193f) + (g * 0.1192f) + (b * 0.9505f); 40 | 41 | float L = X + Y + Z; 42 | float x = X / L; 43 | float y = Y / L; 44 | 45 | float log_Y = log10f(delta + Y); 46 | 47 | d_x[image_index_1d] = x; 48 | d_y[image_index_1d] = y; 49 | d_log_Y[image_index_1d] = log_Y; 50 | } 51 | } 52 | 53 | /* Copied from Mike's IPython notebook * 54 | Modified just by having threads read the 55 | normalization constant directly from device memory 56 | instead of copying it back */ 57 | 58 | __global__ void normalize_cdf(unsigned int *d_input_cdf, float *d_output_cdf, 59 | int n) { 60 | const float normalization_constant = 1.f / d_input_cdf[n - 1]; 61 | 62 | int global_index_1d = (blockIdx.x * blockDim.x) + threadIdx.x; 63 | 64 | if (global_index_1d < n) { 65 | unsigned int input_value = d_input_cdf[global_index_1d]; 66 | float output_value = input_value * normalization_constant; 67 | 68 | d_output_cdf[global_index_1d] = output_value; 69 | } 70 | } 71 | 72 | /* Copied from Mike's IPython notebook * 73 | Modified double constants -> float * 74 | Perform tone mapping based upon new * 75 | luminance scaling */ 76 | 77 | __global__ void tonemap(float *d_x, float *d_y, float *d_log_Y, 78 | float *d_cdf_norm, float *d_r_new, float *d_g_new, 79 | float *d_b_new, float min_log_Y, float max_log_Y, 80 | float log_Y_range, int num_bins, int num_pixels_y, 81 | int num_pixels_x) { 82 | int ny = num_pixels_y; 83 | int nx = num_pixels_x; 84 | int2 image_index_2d = make_int2((blockIdx.x * blockDim.x) + threadIdx.x, 85 | (blockIdx.y * blockDim.y) + threadIdx.y); 86 | int image_index_1d = (nx * image_index_2d.y) + image_index_2d.x; 87 | 88 | if (image_index_2d.x < nx && image_index_2d.y < ny) { 89 | float x = d_x[image_index_1d]; 90 | float y = d_y[image_index_1d]; 91 | float log_Y = d_log_Y[image_index_1d]; 92 | int bin_index = 93 | min(num_bins - 1, int((num_bins * (log_Y - min_log_Y)) / log_Y_range)); 94 | float Y_new = d_cdf_norm[bin_index]; 95 | 96 | float X_new = x * (Y_new / y); 97 | float Z_new = (1 - x - y) * (Y_new / y); 98 | 99 | float r_new = (X_new * 3.2406f) + (Y_new * -1.5372f) + (Z_new * -0.4986f); 100 | float g_new = (X_new * -0.9689f) + (Y_new * 1.8758f) + (Z_new * 0.0415f); 101 | float b_new = (X_new * 0.0557f) + (Y_new * -0.2040f) + (Z_new * 1.0570f); 102 | 103 | d_r_new[image_index_1d] = r_new; 104 | d_g_new[image_index_1d] = g_new; 105 | d_b_new[image_index_1d] = b_new; 106 | } 107 | } 108 | 109 | // return types are void since any internal error will be handled by quitting 110 | // no point in returning error codes... 111 | void preProcess(float **d_luminance, unsigned int **d_cdf, size_t *numRows, 112 | size_t *numCols, unsigned int *numberOfBins, 113 | const std::string &filename) { 114 | // make sure the context initializes ok 115 | checkCudaErrors(cudaFree(0)); 116 | 117 | float *imgPtr; // we will become responsible for this pointer 118 | loadImageHDR(filename, &imgPtr, &numRows__, &numCols__); 119 | *numRows = numRows__; 120 | *numCols = numCols__; 121 | 122 | // first thing to do is split incoming BGR float data into separate channels 123 | size_t numPixels = numRows__ * numCols__; 124 | float *red = new float[numPixels]; 125 | float *green = new float[numPixels]; 126 | float *blue = new float[numPixels]; 127 | 128 | // Remeber image is loaded BGR 129 | for (size_t i = 0; i < numPixels; ++i) { 130 | blue[i] = imgPtr[3 * i + 0]; 131 | green[i] = imgPtr[3 * i + 1]; 132 | red[i] = imgPtr[3 * i + 2]; 133 | } 134 | 135 | delete[] imgPtr; // being good citizens are releasing resources 136 | // allocated in loadImageHDR 137 | 138 | float *d_red, *d_green, *d_blue; // RGB space 139 | 140 | size_t channelSize = sizeof(float) * numPixels; 141 | 142 | checkCudaErrors(cudaMalloc(&d_red, channelSize)); 143 | checkCudaErrors(cudaMalloc(&d_green, channelSize)); 144 | checkCudaErrors(cudaMalloc(&d_blue, channelSize)); 145 | checkCudaErrors(cudaMalloc(&d_x__, channelSize)); 146 | checkCudaErrors(cudaMalloc(&d_y__, channelSize)); 147 | checkCudaErrors(cudaMalloc(&d_logY__, channelSize)); 148 | 149 | checkCudaErrors(cudaMemcpy(d_red, red, channelSize, cudaMemcpyHostToDevice)); 150 | checkCudaErrors( 151 | cudaMemcpy(d_green, green, channelSize, cudaMemcpyHostToDevice)); 152 | checkCudaErrors( 153 | cudaMemcpy(d_blue, blue, channelSize, cudaMemcpyHostToDevice)); 154 | 155 | // convert from RGB space to chrominance/luminance space xyY 156 | const dim3 blockSize(32, 16, 1); 157 | const dim3 gridSize((numCols__ + blockSize.x - 1) / blockSize.x, 158 | (numRows__ + blockSize.y - 1) / blockSize.y, 1); 159 | rgb_to_xyY<<>>(d_red, d_green, d_blue, d_x__, d_y__, 160 | d_logY__, .0001f, numRows__, numCols__); 161 | 162 | cudaDeviceSynchronize(); 163 | checkCudaErrors(cudaGetLastError()); 164 | 165 | *d_luminance = d_logY__; 166 | 167 | // allocate memory for the cdf of the histogram 168 | *numberOfBins = numBins; 169 | checkCudaErrors(cudaMalloc(&d_cdf__, sizeof(unsigned int) * numBins)); 170 | checkCudaErrors(cudaMemset(d_cdf__, 0, sizeof(unsigned int) * numBins)); 171 | 172 | *d_cdf = d_cdf__; 173 | 174 | checkCudaErrors(cudaFree(d_red)); 175 | checkCudaErrors(cudaFree(d_green)); 176 | checkCudaErrors(cudaFree(d_blue)); 177 | 178 | delete[] red; 179 | delete[] green; 180 | delete[] blue; 181 | } 182 | 183 | void postProcess(const std::string &output_file, size_t numRows, size_t numCols, 184 | float min_log_Y, float max_log_Y) { 185 | const int numPixels = numRows__ * numCols__; 186 | 187 | const int numThreads = 192; 188 | 189 | float *d_cdf_normalized; 190 | 191 | checkCudaErrors(cudaMalloc(&d_cdf_normalized, sizeof(float) * numBins)); 192 | 193 | // first normalize the cdf to a maximum value of 1 194 | // this is how we compress the range of the luminance channel 195 | normalize_cdf<<<(numBins + numThreads - 1) / numThreads, numThreads>>>( 196 | d_cdf__, d_cdf_normalized, numBins); 197 | 198 | cudaDeviceSynchronize(); 199 | checkCudaErrors(cudaGetLastError()); 200 | 201 | // allocate memory for the output RGB channels 202 | float *h_red, *h_green, *h_blue; 203 | float *d_red, *d_green, *d_blue; 204 | 205 | h_red = new float[numPixels]; 206 | h_green = new float[numPixels]; 207 | h_blue = new float[numPixels]; 208 | 209 | checkCudaErrors(cudaMalloc(&d_red, sizeof(float) * numPixels)); 210 | checkCudaErrors(cudaMalloc(&d_green, sizeof(float) * numPixels)); 211 | checkCudaErrors(cudaMalloc(&d_blue, sizeof(float) * numPixels)); 212 | 213 | float log_Y_range = max_log_Y - min_log_Y; 214 | 215 | const dim3 blockSize(32, 16, 1); 216 | const dim3 gridSize((numCols + blockSize.x - 1) / blockSize.x, 217 | (numRows + blockSize.y - 1) / blockSize.y); 218 | // next perform the actual tone-mapping 219 | // we map each luminance value to its new value 220 | // and then transform back to RGB space 221 | tonemap<<>>(d_x__, d_y__, d_logY__, d_cdf_normalized, 222 | d_red, d_green, d_blue, min_log_Y, max_log_Y, 223 | log_Y_range, numBins, numRows, numCols); 224 | 225 | cudaDeviceSynchronize(); 226 | checkCudaErrors(cudaGetLastError()); 227 | 228 | checkCudaErrors(cudaMemcpy(h_red, d_red, sizeof(float) * numPixels, 229 | cudaMemcpyDeviceToHost)); 230 | checkCudaErrors(cudaMemcpy(h_green, d_green, sizeof(float) * numPixels, 231 | cudaMemcpyDeviceToHost)); 232 | checkCudaErrors(cudaMemcpy(h_blue, d_blue, sizeof(float) * numPixels, 233 | cudaMemcpyDeviceToHost)); 234 | 235 | // recombine the image channels 236 | float *imageHDR = new float[numPixels * 3]; 237 | 238 | for (int i = 0; i < numPixels; ++i) { 239 | imageHDR[3 * i + 0] = h_blue[i]; 240 | imageHDR[3 * i + 1] = h_green[i]; 241 | imageHDR[3 * i + 2] = h_red[i]; 242 | } 243 | 244 | saveImageHDR(imageHDR, numRows, numCols, output_file); 245 | 246 | delete[] imageHDR; 247 | delete[] h_red; 248 | delete[] h_green; 249 | delete[] h_blue; 250 | 251 | // cleanup 252 | checkCudaErrors(cudaFree(d_cdf_normalized)); 253 | } 254 | 255 | void cleanupGlobalMemory(void) { 256 | checkCudaErrors(cudaFree(d_x__)); 257 | checkCudaErrors(cudaFree(d_y__)); 258 | checkCudaErrors(cudaFree(d_logY__)); 259 | checkCudaErrors(cudaFree(d_cdf__)); 260 | } 261 | -------------------------------------------------------------------------------- /assignments/HW3/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW3//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW3/CMakeFiles/HW3.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/CMakeFiles/HW3.dir/rule 122 | .PHONY : homework/HW3/CMakeFiles/HW3.dir/rule 123 | 124 | # Convenience name for target. 125 | HW3: homework/HW3/CMakeFiles/HW3.dir/rule 126 | .PHONY : HW3 127 | 128 | # fast build rule for target. 129 | HW3/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/build 131 | .PHONY : HW3/fast 132 | 133 | compare.o: compare.cpp.o 134 | .PHONY : compare.o 135 | 136 | # target to build an object file 137 | compare.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.o 139 | .PHONY : compare.cpp.o 140 | 141 | compare.i: compare.cpp.i 142 | .PHONY : compare.i 143 | 144 | # target to preprocess a source file 145 | compare.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.i 147 | .PHONY : compare.cpp.i 148 | 149 | compare.s: compare.cpp.s 150 | .PHONY : compare.s 151 | 152 | # target to generate assembly for a file 153 | compare.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.s 155 | .PHONY : compare.cpp.s 156 | 157 | loadSaveImage.o: loadSaveImage.cpp.o 158 | .PHONY : loadSaveImage.o 159 | 160 | # target to build an object file 161 | loadSaveImage.cpp.o: 162 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.o 163 | .PHONY : loadSaveImage.cpp.o 164 | 165 | loadSaveImage.i: loadSaveImage.cpp.i 166 | .PHONY : loadSaveImage.i 167 | 168 | # target to preprocess a source file 169 | loadSaveImage.cpp.i: 170 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.i 171 | .PHONY : loadSaveImage.cpp.i 172 | 173 | loadSaveImage.s: loadSaveImage.cpp.s 174 | .PHONY : loadSaveImage.s 175 | 176 | # target to generate assembly for a file 177 | loadSaveImage.cpp.s: 178 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.s 179 | .PHONY : loadSaveImage.cpp.s 180 | 181 | main.o: main.cpp.o 182 | .PHONY : main.o 183 | 184 | # target to build an object file 185 | main.cpp.o: 186 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.o 187 | .PHONY : main.cpp.o 188 | 189 | main.i: main.cpp.i 190 | .PHONY : main.i 191 | 192 | # target to preprocess a source file 193 | main.cpp.i: 194 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.i 195 | .PHONY : main.cpp.i 196 | 197 | main.s: main.cpp.s 198 | .PHONY : main.s 199 | 200 | # target to generate assembly for a file 201 | main.cpp.s: 202 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.s 203 | .PHONY : main.cpp.s 204 | 205 | reference_calc.o: reference_calc.cpp.o 206 | .PHONY : reference_calc.o 207 | 208 | # target to build an object file 209 | reference_calc.cpp.o: 210 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.o 211 | .PHONY : reference_calc.cpp.o 212 | 213 | reference_calc.i: reference_calc.cpp.i 214 | .PHONY : reference_calc.i 215 | 216 | # target to preprocess a source file 217 | reference_calc.cpp.i: 218 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.i 219 | .PHONY : reference_calc.cpp.i 220 | 221 | reference_calc.s: reference_calc.cpp.s 222 | .PHONY : reference_calc.s 223 | 224 | # target to generate assembly for a file 225 | reference_calc.cpp.s: 226 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.s 227 | .PHONY : reference_calc.cpp.s 228 | 229 | # Help Target 230 | help: 231 | @echo "The following are some of the valid targets for this Makefile:" 232 | @echo "... all (the default if no target is provided)" 233 | @echo "... clean" 234 | @echo "... depend" 235 | @echo "... edit_cache" 236 | @echo "... rebuild_cache" 237 | @echo "... HW3" 238 | @echo "... compare.o" 239 | @echo "... compare.i" 240 | @echo "... compare.s" 241 | @echo "... loadSaveImage.o" 242 | @echo "... loadSaveImage.i" 243 | @echo "... loadSaveImage.s" 244 | @echo "... main.o" 245 | @echo "... main.i" 246 | @echo "... main.s" 247 | @echo "... reference_calc.o" 248 | @echo "... reference_calc.i" 249 | @echo "... reference_calc.s" 250 | .PHONY : help 251 | 252 | 253 | 254 | #============================================================================= 255 | # Special targets to cleanup operation of make. 256 | 257 | # Special rule to run CMake to check the build system integrity. 258 | # No rule that depends on this can have commands that come from listfiles 259 | # because they might be regenerated. 260 | cmake_check_build_system: 261 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 262 | .PHONY : cmake_check_build_system 263 | 264 | -------------------------------------------------------------------------------- /assignments/HW3/compare.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.h" 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError) { 6 | cv::Mat reference = cv::imread(reference_filename, -1); 7 | cv::Mat test = cv::imread(test_filename, -1); 8 | 9 | cv::Mat diff = abs(reference - test); 10 | 11 | cv::Mat diffSingleChannel = 12 | diff.reshape(1, 0); // convert to 1 channel, same # rows 13 | 14 | double minVal, maxVal; 15 | 16 | cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL, 17 | NULL); // NULL because we don't care about location 18 | 19 | // now perform transform so that we bump values to the full range 20 | 21 | diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal)); 22 | 23 | diff = diffSingleChannel.reshape(reference.channels(), 0); 24 | 25 | cv::imwrite("HW3_differenceImage.png", diff); 26 | // OK, now we can start comparing values... 27 | unsigned char *referencePtr = reference.ptr(0); 28 | unsigned char *testPtr = test.ptr(0); 29 | 30 | if (useEpsCheck) { 31 | checkResultsEps(referencePtr, testPtr, 32 | reference.rows * reference.cols * reference.channels(), 33 | perPixelError, globalError); 34 | } else { 35 | checkResultsExact(referencePtr, testPtr, 36 | reference.rows * reference.cols * reference.channels()); 37 | } 38 | 39 | std::cout << "PASS" << std::endl; 40 | return; 41 | } 42 | -------------------------------------------------------------------------------- /assignments/HW3/compare.h: -------------------------------------------------------------------------------- 1 | #ifndef HW3_H__ 2 | #define HW3_H__ 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /assignments/HW3/loadSaveImage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "cuda_runtime.h" 7 | 8 | // The caller becomes responsible for the returned pointer. This 9 | // is done in the interest of keeping this code as simple as possible. 10 | // In production code this is a bad idea - we should use RAII 11 | // to ensure the memory is freed. DO NOT COPY THIS AND USE IN PRODUCTION 12 | // CODE!!! 13 | void loadImageHDR(const std::string &filename, float **imagePtr, 14 | size_t *numRows, size_t *numCols) { 15 | cv::Mat originImg = cv::imread(filename.c_str(), 16 | CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH); 17 | 18 | cv::Mat image; 19 | 20 | if (originImg.type() != CV_32FC3) { 21 | originImg.convertTo(image, CV_32FC3); 22 | } else { 23 | image = originImg; 24 | } 25 | 26 | if (image.empty()) { 27 | std::cerr << "Couldn't open file: " << filename << std::endl; 28 | exit(1); 29 | } 30 | 31 | if (image.channels() != 3) { 32 | std::cerr << "Image must be color!" << std::endl; 33 | exit(1); 34 | } 35 | 36 | if (!image.isContinuous()) { 37 | std::cerr << "Image isn't continuous!" << std::endl; 38 | exit(1); 39 | } 40 | 41 | *imagePtr = new float[image.rows * image.cols * image.channels()]; 42 | 43 | float *cvPtr = image.ptr(0); 44 | for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i) 45 | (*imagePtr)[i] = cvPtr[i]; 46 | 47 | *numRows = image.rows; 48 | *numCols = image.cols; 49 | } 50 | 51 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 52 | size_t *numRows, size_t *numCols) { 53 | cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR); 54 | if (image.empty()) { 55 | std::cerr << "Couldn't open file: " << filename << std::endl; 56 | exit(1); 57 | } 58 | 59 | if (image.channels() != 3) { 60 | std::cerr << "Image must be color!" << std::endl; 61 | exit(1); 62 | } 63 | 64 | if (!image.isContinuous()) { 65 | std::cerr << "Image isn't continuous!" << std::endl; 66 | exit(1); 67 | } 68 | 69 | cv::Mat imageRGBA; 70 | cv::cvtColor(image, imageRGBA, CV_BGR2RGBA); 71 | 72 | *imagePtr = new uchar4[image.rows * image.cols]; 73 | 74 | unsigned char *cvPtr = imageRGBA.ptr(0); 75 | for (size_t i = 0; i < image.rows * image.cols; ++i) { 76 | (*imagePtr)[i].x = cvPtr[4 * i + 0]; 77 | (*imagePtr)[i].y = cvPtr[4 * i + 1]; 78 | (*imagePtr)[i].z = cvPtr[4 * i + 2]; 79 | (*imagePtr)[i].w = cvPtr[4 * i + 3]; 80 | } 81 | 82 | *numRows = image.rows; 83 | *numCols = image.cols; 84 | } 85 | 86 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 87 | const size_t numCols, const std::string &output_file) { 88 | int sizes[2]; 89 | sizes[0] = numRows; 90 | sizes[1] = numCols; 91 | cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image); 92 | cv::Mat imageOutputBGR; 93 | cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR); 94 | // output the image 95 | cv::imwrite(output_file.c_str(), imageOutputBGR); 96 | } 97 | 98 | // output an exr file 99 | // assumed to already be BGR 100 | void saveImageHDR(const float *const image, const size_t numRows, 101 | const size_t numCols, const std::string &output_file) { 102 | int sizes[2]; 103 | sizes[0] = numRows; 104 | sizes[1] = numCols; 105 | 106 | cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image); 107 | 108 | imageHDR = imageHDR * 255; 109 | 110 | cv::imwrite(output_file.c_str(), imageHDR); 111 | } 112 | -------------------------------------------------------------------------------- /assignments/HW3/loadSaveImage.h: -------------------------------------------------------------------------------- 1 | #ifndef LOADSAVEIMAGE_H__ 2 | #define LOADSAVEIMAGE_H__ 3 | 4 | #include 5 | #include //for uchar4 6 | 7 | void loadImageHDR(const std::string &filename, float **imagePtr, 8 | size_t *numRows, size_t *numCols); 9 | 10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 11 | size_t *numRows, size_t *numCols); 12 | 13 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 14 | const size_t numCols, const std::string &output_file); 15 | 16 | void saveImageHDR(const float *const image, const size_t numRows, 17 | const size_t numCols, const std::string &output_file); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /assignments/HW3/main.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW3 Driver 2 | 3 | #include 4 | #include "timer.h" 5 | #include "utils.h" 6 | #include 7 | #include 8 | #include 9 | 10 | #include "compare.h" 11 | #include "reference_calc.h" 12 | 13 | // Functions from HW3.cu 14 | void preProcess(float **d_luminance, unsigned int **d_cdf, size_t *numRows, 15 | size_t *numCols, unsigned int *numBins, 16 | const std::string &filename); 17 | 18 | void postProcess(const std::string &output_file, size_t numRows, size_t numCols, 19 | float min_logLum, float max_logLum); 20 | 21 | void cleanupGlobalMemory(void); 22 | 23 | // Function from student_func.cu 24 | void your_histogram_and_prefixsum(const float *const d_luminance, 25 | unsigned int *const d_cdf, float &min_logLum, 26 | float &max_logLum, const size_t numRows, 27 | const size_t numCols, const size_t numBins); 28 | 29 | int main(int argc, char **argv) { 30 | float *d_luminance; 31 | unsigned int *d_cdf; 32 | 33 | size_t numRows, numCols; 34 | unsigned int numBins; 35 | 36 | std::string input_file; 37 | std::string output_file; 38 | std::string reference_file; 39 | double perPixelError = 0.0; 40 | double globalError = 0.0; 41 | bool useEpsCheck = false; 42 | 43 | switch (argc) { 44 | case 2: 45 | input_file = std::string(argv[1]); 46 | output_file = "HW3_output.png"; 47 | reference_file = "HW3_reference.png"; 48 | break; 49 | case 3: 50 | input_file = std::string(argv[1]); 51 | output_file = std::string(argv[2]); 52 | reference_file = "HW3_reference.png"; 53 | break; 54 | case 4: 55 | input_file = std::string(argv[1]); 56 | output_file = std::string(argv[2]); 57 | reference_file = std::string(argv[3]); 58 | break; 59 | case 6: 60 | useEpsCheck = true; 61 | input_file = std::string(argv[1]); 62 | output_file = std::string(argv[2]); 63 | reference_file = std::string(argv[3]); 64 | perPixelError = atof(argv[4]); 65 | globalError = atof(argv[5]); 66 | break; 67 | default: 68 | std::cerr << "Usage: ./HW3 input_file [output_filename] " 69 | "[reference_filename] [perPixelError] [globalError]" 70 | << std::endl; 71 | exit(1); 72 | } 73 | // load the image and give us our input and output pointers 74 | preProcess(&d_luminance, &d_cdf, &numRows, &numCols, &numBins, input_file); 75 | 76 | GpuTimer timer; 77 | float min_logLum, max_logLum; 78 | min_logLum = 0.f; 79 | max_logLum = 1.f; 80 | timer.Start(); 81 | // call the students' code 82 | your_histogram_and_prefixsum(d_luminance, d_cdf, min_logLum, max_logLum, 83 | numRows, numCols, numBins); 84 | timer.Stop(); 85 | cudaDeviceSynchronize(); 86 | checkCudaErrors(cudaGetLastError()); 87 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 88 | 89 | if (err < 0) { 90 | // Couldn't print! Probably the student closed stdout - bad news 91 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 92 | << std::endl; 93 | exit(1); 94 | } 95 | 96 | float *h_luminance = (float *)malloc(sizeof(float) * numRows * numCols); 97 | unsigned int *h_cdf = (unsigned int *)malloc(sizeof(unsigned int) * numBins); 98 | 99 | checkCudaErrors(cudaMemcpy(h_luminance, d_luminance, 100 | numRows * numCols * sizeof(float), 101 | cudaMemcpyDeviceToHost)); 102 | 103 | // check results and output the tone-mapped image 104 | postProcess(output_file, numRows, numCols, min_logLum, max_logLum); 105 | 106 | for (size_t i = 1; i < numCols * numRows; ++i) { 107 | min_logLum = std::min(h_luminance[i], min_logLum); 108 | max_logLum = std::max(h_luminance[i], max_logLum); 109 | } 110 | 111 | referenceCalculation(h_luminance, h_cdf, numRows, numCols, numBins, 112 | min_logLum, max_logLum); 113 | 114 | checkCudaErrors(cudaMemcpy(d_cdf, h_cdf, sizeof(unsigned int) * numBins, 115 | cudaMemcpyHostToDevice)); 116 | 117 | // check results and output the tone-mapped image 118 | postProcess(reference_file, numRows, numCols, min_logLum, max_logLum); 119 | 120 | cleanupGlobalMemory(); 121 | 122 | compareImages(reference_file, output_file, useEpsCheck, perPixelError, 123 | globalError); 124 | 125 | return 0; 126 | } 127 | -------------------------------------------------------------------------------- /assignments/HW3/memorial.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial.exr -------------------------------------------------------------------------------- /assignments/HW3/memorial_large.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_large.exr -------------------------------------------------------------------------------- /assignments/HW3/memorial_png.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_png.gold -------------------------------------------------------------------------------- /assignments/HW3/memorial_png_large.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_png_large.gold -------------------------------------------------------------------------------- /assignments/HW3/memorial_raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_raw.png -------------------------------------------------------------------------------- /assignments/HW3/memorial_raw_large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_raw_large.png -------------------------------------------------------------------------------- /assignments/HW3/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void referenceCalculation(const float *const h_logLuminance, 5 | unsigned int *const h_cdf, const size_t numRows, 6 | const size_t numCols, const size_t numBins, 7 | float &logLumMin, float &logLumMax) { 8 | logLumMin = h_logLuminance[0]; 9 | logLumMax = h_logLuminance[0]; 10 | 11 | // Step 1 12 | // first we find the minimum and maximum across the entire image 13 | for (size_t i = 1; i < numCols * numRows; ++i) { 14 | logLumMin = std::min(h_logLuminance[i], logLumMin); 15 | logLumMax = std::max(h_logLuminance[i], logLumMax); 16 | } 17 | 18 | // Step 2 19 | float logLumRange = logLumMax - logLumMin; 20 | 21 | // Step 3 22 | // next we use the now known range to compute 23 | // a histogram of numBins bins 24 | unsigned int *histo = new unsigned int[numBins]; 25 | 26 | for (size_t i = 0; i < numBins; ++i) 27 | histo[i] = 0; 28 | 29 | for (size_t i = 0; i < numCols * numRows; ++i) { 30 | unsigned int bin = 31 | std::min(static_cast(numBins - 1), 32 | static_cast((h_logLuminance[i] - logLumMin) / 33 | logLumRange * numBins)); 34 | histo[bin]++; 35 | } 36 | 37 | // Step 4 38 | // finally we perform and exclusive scan (prefix sum) 39 | // on the histogram to get the cumulative distribution 40 | h_cdf[0] = 0; 41 | for (size_t i = 1; i < numBins; ++i) { 42 | h_cdf[i] = h_cdf[i - 1] + histo[i - 1]; 43 | } 44 | 45 | delete[] histo; 46 | } -------------------------------------------------------------------------------- /assignments/HW3/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | void referenceCalculation(const float *const h_logLuminance, 5 | unsigned int *const h_cdf, const size_t numRows, 6 | const size_t numCols, const size_t numBins, 7 | float &logLumMin, float &logLumMax); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /assignments/HW3/student_func.cu: -------------------------------------------------------------------------------- 1 | /* Udacity Homework 3 2 | HDR Tone-mapping 3 | 4 | Background HDR 5 | ============== 6 | 7 | A High Dynamic Range (HDR) image contains a wider variation of intensity 8 | and color than is allowed by the RGB format with 1 byte per channel that we 9 | have used in the previous assignment. 10 | 11 | To store this extra information we use single precision floating point for 12 | each channel. This allows for an extremely wide range of intensity values. 13 | 14 | In the image for this assignment, the inside of church with light coming in 15 | through stained glass windows, the raw input floating point values for the 16 | channels range from 0 to 275. But the mean is .41 and 98% of the values are 17 | less than 3! This means that certain areas (the windows) are extremely bright 18 | compared to everywhere else. If we linearly map this [0-275] range into the 19 | [0-255] range that we have been using then most values will be mapped to zero! 20 | The only thing we will be able to see are the very brightest areas - the 21 | windows - everything else will appear pitch black. 22 | 23 | The problem is that although we have cameras capable of recording the wide 24 | range of intensity that exists in the real world our monitors are not capable 25 | of displaying them. Our eyes are also quite capable of observing a much wider 26 | range of intensities than our image formats / monitors are capable of 27 | displaying. 28 | 29 | Tone-mapping is a process that transforms the intensities in the image so that 30 | the brightest values aren't nearly so far away from the mean. That way when 31 | we transform the values into [0-255] we can actually see the entire image. 32 | There are many ways to perform this process and it is as much an art as a 33 | science - there is no single "right" answer. In this homework we will 34 | implement one possible technique. 35 | 36 | Background Chrominance-Luminance 37 | ================================ 38 | 39 | The RGB space that we have been using to represent images can be thought of as 40 | one possible set of axes spanning a three dimensional space of color. We 41 | sometimes choose other axes to represent this space because they make certain 42 | operations more convenient. 43 | 44 | Another possible way of representing a color image is to separate the color 45 | information (chromaticity) from the brightness information. There are 46 | multiple different methods for doing this - a common one during the analog 47 | television days was known as Chrominance-Luminance or YUV. 48 | 49 | We choose to represent the image in this way so that we can remap only the 50 | intensity channel and then recombine the new intensity values with the color 51 | information to form the final image. 52 | 53 | Old TV signals used to be transmitted in this way so that black & white 54 | televisions could display the luminance channel while color televisions would 55 | display all three of the channels. 56 | 57 | 58 | Tone-mapping 59 | ============ 60 | 61 | In this assignment we are going to transform the luminance channel (actually 62 | the log of the luminance, but this is unimportant for the parts of the 63 | algorithm that you will be implementing) by compressing its range to [0, 1]. 64 | To do this we need the cumulative distribution of the luminance values. 65 | 66 | Example 67 | ------- 68 | 69 | input : [2 4 3 3 1 7 4 5 7 0 9 4 3 2] 70 | min / max / range: 0 / 9 / 9 71 | 72 | histo with 3 bins: [4 7 3] 73 | 74 | cdf : [4 11 14] 75 | 76 | 77 | Your task is to calculate this cumulative distribution by following these 78 | steps. 79 | 80 | */ 81 | 82 | #include "utils.h" 83 | 84 | void your_histogram_and_prefixsum(const float *const d_logLuminance, 85 | unsigned int *const d_cdf, float &min_logLum, 86 | float &max_logLum, const size_t numRows, 87 | const size_t numCols, const size_t numBins) { 88 | // TODO 89 | /*Here are the steps you need to implement 90 | 1) find the minimum and maximum value in the input logLuminance channel 91 | store in min_logLum and max_logLum 92 | 2) subtract them to find the range 93 | 3) generate a histogram of all the values in the logLuminance channel using 94 | the formula: bin = (lum[i] - lumMin) / lumRange * numBins 95 | 4) Perform an exclusive scan (prefix sum) on the histogram to get 96 | the cumulative distribution of luminance values (this should go in the 97 | incoming d_cdf pointer which already has been allocated for you) */ 98 | } 99 | -------------------------------------------------------------------------------- /assignments/HW3/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW3/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/HW4/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | # collect source files 9 | 10 | file( GLOB hdr *.hpp *.h ) 11 | file( GLOB cu *.cu) 12 | SET (HW4_files main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp) 13 | 14 | CUDA_ADD_EXECUTABLE(HW4 ${HW4_files} ${hdr} ${img} ${cu}) 15 | 16 | 17 | -------------------------------------------------------------------------------- /assignments/HW4/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW4//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW4/CMakeFiles/HW4.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/CMakeFiles/HW4.dir/rule 122 | .PHONY : homework/HW4/CMakeFiles/HW4.dir/rule 123 | 124 | # Convenience name for target. 125 | HW4: homework/HW4/CMakeFiles/HW4.dir/rule 126 | .PHONY : HW4 127 | 128 | # fast build rule for target. 129 | HW4/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/build 131 | .PHONY : HW4/fast 132 | 133 | compare.o: compare.cpp.o 134 | .PHONY : compare.o 135 | 136 | # target to build an object file 137 | compare.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.o 139 | .PHONY : compare.cpp.o 140 | 141 | compare.i: compare.cpp.i 142 | .PHONY : compare.i 143 | 144 | # target to preprocess a source file 145 | compare.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.i 147 | .PHONY : compare.cpp.i 148 | 149 | compare.s: compare.cpp.s 150 | .PHONY : compare.s 151 | 152 | # target to generate assembly for a file 153 | compare.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.s 155 | .PHONY : compare.cpp.s 156 | 157 | loadSaveImage.o: loadSaveImage.cpp.o 158 | .PHONY : loadSaveImage.o 159 | 160 | # target to build an object file 161 | loadSaveImage.cpp.o: 162 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.o 163 | .PHONY : loadSaveImage.cpp.o 164 | 165 | loadSaveImage.i: loadSaveImage.cpp.i 166 | .PHONY : loadSaveImage.i 167 | 168 | # target to preprocess a source file 169 | loadSaveImage.cpp.i: 170 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.i 171 | .PHONY : loadSaveImage.cpp.i 172 | 173 | loadSaveImage.s: loadSaveImage.cpp.s 174 | .PHONY : loadSaveImage.s 175 | 176 | # target to generate assembly for a file 177 | loadSaveImage.cpp.s: 178 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.s 179 | .PHONY : loadSaveImage.cpp.s 180 | 181 | main.o: main.cpp.o 182 | .PHONY : main.o 183 | 184 | # target to build an object file 185 | main.cpp.o: 186 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.o 187 | .PHONY : main.cpp.o 188 | 189 | main.i: main.cpp.i 190 | .PHONY : main.i 191 | 192 | # target to preprocess a source file 193 | main.cpp.i: 194 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.i 195 | .PHONY : main.cpp.i 196 | 197 | main.s: main.cpp.s 198 | .PHONY : main.s 199 | 200 | # target to generate assembly for a file 201 | main.cpp.s: 202 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.s 203 | .PHONY : main.cpp.s 204 | 205 | reference_calc.o: reference_calc.cpp.o 206 | .PHONY : reference_calc.o 207 | 208 | # target to build an object file 209 | reference_calc.cpp.o: 210 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.o 211 | .PHONY : reference_calc.cpp.o 212 | 213 | reference_calc.i: reference_calc.cpp.i 214 | .PHONY : reference_calc.i 215 | 216 | # target to preprocess a source file 217 | reference_calc.cpp.i: 218 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.i 219 | .PHONY : reference_calc.cpp.i 220 | 221 | reference_calc.s: reference_calc.cpp.s 222 | .PHONY : reference_calc.s 223 | 224 | # target to generate assembly for a file 225 | reference_calc.cpp.s: 226 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.s 227 | .PHONY : reference_calc.cpp.s 228 | 229 | # Help Target 230 | help: 231 | @echo "The following are some of the valid targets for this Makefile:" 232 | @echo "... all (the default if no target is provided)" 233 | @echo "... clean" 234 | @echo "... depend" 235 | @echo "... edit_cache" 236 | @echo "... rebuild_cache" 237 | @echo "... HW4" 238 | @echo "... compare.o" 239 | @echo "... compare.i" 240 | @echo "... compare.s" 241 | @echo "... loadSaveImage.o" 242 | @echo "... loadSaveImage.i" 243 | @echo "... loadSaveImage.s" 244 | @echo "... main.o" 245 | @echo "... main.i" 246 | @echo "... main.s" 247 | @echo "... reference_calc.o" 248 | @echo "... reference_calc.i" 249 | @echo "... reference_calc.s" 250 | .PHONY : help 251 | 252 | 253 | 254 | #============================================================================= 255 | # Special targets to cleanup operation of make. 256 | 257 | # Special rule to run CMake to check the build system integrity. 258 | # No rule that depends on this can have commands that come from listfiles 259 | # because they might be regenerated. 260 | cmake_check_build_system: 261 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 262 | .PHONY : cmake_check_build_system 263 | 264 | -------------------------------------------------------------------------------- /assignments/HW4/compare.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.h" 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError) { 6 | cv::Mat reference = cv::imread(reference_filename, -1); 7 | cv::Mat test = cv::imread(test_filename, -1); 8 | 9 | cv::Mat diff = abs(reference - test); 10 | 11 | cv::Mat diffSingleChannel = 12 | diff.reshape(1, 0); // convert to 1 channel, same # rows 13 | 14 | double minVal, maxVal; 15 | 16 | cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL, 17 | NULL); // NULL because we don't care about location 18 | 19 | // now perform transform so that we bump values to the full range 20 | 21 | diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal)); 22 | 23 | diff = diffSingleChannel.reshape(reference.channels(), 0); 24 | 25 | cv::imwrite("HW4_differenceImage.png", diff); 26 | // OK, now we can start comparing values... 27 | unsigned char *referencePtr = reference.ptr(0); 28 | unsigned char *testPtr = test.ptr(0); 29 | 30 | if (useEpsCheck) { 31 | checkResultsEps(referencePtr, testPtr, 32 | reference.rows * reference.cols * reference.channels(), 33 | perPixelError, globalError); 34 | } else { 35 | checkResultsExact(referencePtr, testPtr, 36 | reference.rows * reference.cols * reference.channels()); 37 | } 38 | 39 | std::cout << "PASS" << std::endl; 40 | return; 41 | } -------------------------------------------------------------------------------- /assignments/HW4/compare.h: -------------------------------------------------------------------------------- 1 | #ifndef HW4_H__ 2 | #define HW4_H__ 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError); 6 | 7 | #endif -------------------------------------------------------------------------------- /assignments/HW4/loadSaveImage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cuda_runtime.h" 6 | 7 | // The caller becomes responsible for the returned pointer. This 8 | // is done in the interest of keeping this code as simple as possible. 9 | // In production code this is a bad idea - we should use RAII 10 | // to ensure the memory is freed. DO NOT COPY THIS AND USE IN PRODUCTION 11 | // CODE!!! 12 | void loadImageHDR(const std::string &filename, float **imagePtr, 13 | size_t *numRows, size_t *numCols) { 14 | cv::Mat image = cv::imread(filename.c_str(), 15 | CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH); 16 | if (image.empty()) { 17 | std::cerr << "Couldn't open file: " << filename << std::endl; 18 | exit(1); 19 | } 20 | 21 | if (image.channels() != 3) { 22 | std::cerr << "Image must be color!" << std::endl; 23 | exit(1); 24 | } 25 | 26 | if (!image.isContinuous()) { 27 | std::cerr << "Image isn't continuous!" << std::endl; 28 | exit(1); 29 | } 30 | 31 | *imagePtr = new float[image.rows * image.cols * image.channels()]; 32 | 33 | float *cvPtr = image.ptr(0); 34 | for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i) 35 | (*imagePtr)[i] = cvPtr[i]; 36 | 37 | *numRows = image.rows; 38 | *numCols = image.cols; 39 | } 40 | 41 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 42 | size_t *numRows, size_t *numCols) { 43 | cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR); 44 | if (image.empty()) { 45 | std::cerr << "Couldn't open file: " << filename << std::endl; 46 | exit(1); 47 | } 48 | 49 | if (image.channels() != 3) { 50 | std::cerr << "Image must be color!" << std::endl; 51 | exit(1); 52 | } 53 | 54 | if (!image.isContinuous()) { 55 | std::cerr << "Image isn't continuous!" << std::endl; 56 | exit(1); 57 | } 58 | 59 | cv::Mat imageRGBA; 60 | cv::cvtColor(image, imageRGBA, CV_BGR2RGBA); 61 | 62 | *imagePtr = new uchar4[image.rows * image.cols]; 63 | 64 | unsigned char *cvPtr = imageRGBA.ptr(0); 65 | for (size_t i = 0; i < image.rows * image.cols; ++i) { 66 | (*imagePtr)[i].x = cvPtr[4 * i + 0]; 67 | (*imagePtr)[i].y = cvPtr[4 * i + 1]; 68 | (*imagePtr)[i].z = cvPtr[4 * i + 2]; 69 | (*imagePtr)[i].w = cvPtr[4 * i + 3]; 70 | } 71 | 72 | *numRows = image.rows; 73 | *numCols = image.cols; 74 | } 75 | 76 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 77 | const size_t numCols, const std::string &output_file) { 78 | int sizes[2]; 79 | sizes[0] = numRows; 80 | sizes[1] = numCols; 81 | cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image); 82 | cv::Mat imageOutputBGR; 83 | cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR); 84 | // output the image 85 | cv::imwrite(output_file.c_str(), imageOutputBGR); 86 | } 87 | 88 | // output an exr file 89 | // assumed to already be BGR 90 | void saveImageHDR(const float *const image, const size_t numRows, 91 | const size_t numCols, const std::string &output_file) { 92 | int sizes[2]; 93 | sizes[0] = numRows; 94 | sizes[1] = numCols; 95 | 96 | cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image); 97 | 98 | imageHDR = imageHDR * 255; 99 | 100 | cv::imwrite(output_file.c_str(), imageHDR); 101 | } 102 | -------------------------------------------------------------------------------- /assignments/HW4/loadSaveImage.h: -------------------------------------------------------------------------------- 1 | #ifndef LOADSAVEIMAGE_H__ 2 | #define LOADSAVEIMAGE_H__ 3 | 4 | #include 5 | #include //for uchar4 6 | 7 | void loadImageHDR(const std::string &filename, float **imagePtr, 8 | size_t *numRows, size_t *numCols); 9 | 10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 11 | size_t *numRows, size_t *numCols); 12 | 13 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 14 | const size_t numCols, const std::string &output_file); 15 | 16 | void saveImageHDR(const float *const image, const size_t numRows, 17 | const size_t numCols, const std::string &output_file); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /assignments/HW4/main.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW4 Driver 2 | 3 | #include 4 | #include "timer.h" 5 | #include "utils.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "compare.h" 12 | #include "reference_calc.h" 13 | 14 | void preProcess(unsigned int **inputVals, unsigned int **inputPos, 15 | unsigned int **outputVals, unsigned int **outputPos, 16 | size_t &numElems, const std::string &filename, 17 | const std::string &template_file); 18 | 19 | void postProcess(const unsigned int *const outputVals, 20 | const unsigned int *const outputPos, const size_t numElems, 21 | const std::string &output_file); 22 | 23 | void your_sort(unsigned int *const inputVals, unsigned int *const inputPos, 24 | unsigned int *const outputVals, unsigned int *const outputPos, 25 | const size_t numElems); 26 | 27 | int main(int argc, char **argv) { 28 | unsigned int *inputVals; 29 | unsigned int *inputPos; 30 | unsigned int *outputVals; 31 | unsigned int *outputPos; 32 | 33 | size_t numElems; 34 | 35 | std::string input_file; 36 | std::string template_file; 37 | std::string output_file; 38 | std::string reference_file; 39 | double perPixelError = 0.0; 40 | double globalError = 0.0; 41 | bool useEpsCheck = false; 42 | 43 | switch (argc) { 44 | case 3: 45 | input_file = std::string(argv[1]); 46 | template_file = std::string(argv[2]); 47 | output_file = "HW4_output.png"; 48 | break; 49 | case 4: 50 | input_file = std::string(argv[1]); 51 | template_file = std::string(argv[2]); 52 | output_file = std::string(argv[3]); 53 | break; 54 | default: 55 | std::cerr << "Usage: ./HW4 input_file template_file [output_filename]" 56 | << std::endl; 57 | exit(1); 58 | } 59 | // load the image and give us our input and output pointers 60 | preProcess(&inputVals, &inputPos, &outputVals, &outputPos, numElems, 61 | input_file, template_file); 62 | 63 | GpuTimer timer; 64 | timer.Start(); 65 | 66 | // call the students' code 67 | your_sort(inputVals, inputPos, outputVals, outputPos, numElems); 68 | 69 | timer.Stop(); 70 | cudaDeviceSynchronize(); 71 | checkCudaErrors(cudaGetLastError()); 72 | printf("\n"); 73 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 74 | 75 | if (err < 0) { 76 | // Couldn't print! Probably the student closed stdout - bad news 77 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 78 | << std::endl; 79 | exit(1); 80 | } 81 | 82 | // check results and output the red-eye corrected image 83 | postProcess(outputVals, outputPos, numElems, output_file); 84 | 85 | // check code moved from HW4.cu 86 | /**************************************************************************** 87 | * You can use the code below to help with debugging, but make sure to * 88 | * comment it out again before submitting your assignment for grading, * 89 | * otherwise this code will take too much time and make it seem like your * 90 | * GPU implementation isn't fast enough. * 91 | * * 92 | * This code MUST RUN BEFORE YOUR CODE in case you accidentally change * 93 | * the input values when implementing your radix sort. * 94 | * * 95 | * This code performs the reference radix sort on the host and compares your * 96 | * sorted values to the reference. * 97 | * * 98 | * Thrust containers are used for copying memory from the GPU * 99 | * ************************************************************************* 100 | */ 101 | thrust::device_ptr d_inputVals(inputVals); 102 | thrust::device_ptr d_inputPos(inputPos); 103 | 104 | thrust::host_vector h_inputVals(d_inputVals, 105 | d_inputVals + numElems); 106 | thrust::host_vector h_inputPos(d_inputPos, 107 | d_inputPos + numElems); 108 | 109 | thrust::host_vector h_outputVals(numElems); 110 | thrust::host_vector h_outputPos(numElems); 111 | 112 | reference_calculation(&h_inputVals[0], &h_inputPos[0], &h_outputVals[0], 113 | &h_outputPos[0], numElems); 114 | 115 | // postProcess(valsPtr, posPtr, numElems, reference_file); 116 | 117 | // compareImages(reference_file, output_file, useEpsCheck, perPixelError, 118 | // globalError); 119 | 120 | thrust::device_ptr d_outputVals(outputVals); 121 | thrust::device_ptr d_outputPos(outputPos); 122 | 123 | thrust::host_vector h_yourOutputVals(d_outputVals, 124 | d_outputVals + numElems); 125 | thrust::host_vector h_yourOutputPos(d_outputPos, 126 | d_outputPos + numElems); 127 | 128 | checkResultsExact(&h_outputVals[0], &h_yourOutputVals[0], numElems); 129 | checkResultsExact(&h_outputPos[0], &h_yourOutputPos[0], numElems); 130 | 131 | checkCudaErrors(cudaFree(inputVals)); 132 | checkCudaErrors(cudaFree(inputPos)); 133 | checkCudaErrors(cudaFree(outputVals)); 134 | checkCudaErrors(cudaFree(outputPos)); 135 | 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /assignments/HW4/red_eye_effect.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect.gold -------------------------------------------------------------------------------- /assignments/HW4/red_eye_effect_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect_5.jpg -------------------------------------------------------------------------------- /assignments/HW4/red_eye_effect_template_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect_template_5.jpg -------------------------------------------------------------------------------- /assignments/HW4/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // For memset 3 | #include 4 | 5 | void reference_calculation(unsigned int *inputVals, unsigned int *inputPos, 6 | unsigned int *outputVals, unsigned int *outputPos, 7 | const size_t numElems) { 8 | const int numBits = 1; 9 | const int numBins = 1 << numBits; 10 | 11 | unsigned int *binHistogram = new unsigned int[numBins]; 12 | unsigned int *binScan = new unsigned int[numBins]; 13 | 14 | unsigned int *vals_src = inputVals; 15 | unsigned int *pos_src = inputPos; 16 | 17 | unsigned int *vals_dst = outputVals; 18 | unsigned int *pos_dst = outputPos; 19 | 20 | // a simple radix sort - only guaranteed to work for numBits that are 21 | // multiples of 2 22 | for (unsigned int i = 0; i < 8 * sizeof(unsigned int); i += numBits) { 23 | unsigned int mask = (numBins - 1) << i; 24 | 25 | memset(binHistogram, 0, sizeof(unsigned int) * numBins); // zero out the 26 | // bins 27 | memset(binScan, 0, sizeof(unsigned int) * numBins); // zero out the bins 28 | 29 | // perform histogram of data & mask into bins 30 | for (unsigned int j = 0; j < numElems; ++j) { 31 | unsigned int bin = (vals_src[j] & mask) >> i; 32 | binHistogram[bin]++; 33 | } 34 | 35 | // perform exclusive prefix sum (scan) on binHistogram to get starting 36 | // location for each bin 37 | for (unsigned int j = 1; j < numBins; ++j) { 38 | binScan[j] = binScan[j - 1] + binHistogram[j - 1]; 39 | } 40 | 41 | // Gather everything into the correct location 42 | // need to move vals and positions 43 | for (unsigned int j = 0; j < numElems; ++j) { 44 | unsigned int bin = (vals_src[j] & mask) >> i; 45 | vals_dst[binScan[bin]] = vals_src[j]; 46 | pos_dst[binScan[bin]] = pos_src[j]; 47 | binScan[bin]++; 48 | } 49 | 50 | // swap the buffers (pointers only) 51 | std::swap(vals_dst, vals_src); 52 | std::swap(pos_dst, pos_src); 53 | } 54 | 55 | // we did an even number of iterations, need to copy from input buffer into 56 | // output 57 | std::copy(inputVals, inputVals + numElems, outputVals); 58 | std::copy(inputPos, inputPos + numElems, outputPos); 59 | 60 | delete[] binHistogram; 61 | delete[] binScan; 62 | } 63 | -------------------------------------------------------------------------------- /assignments/HW4/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | // A simple un-optimized reference radix sort calculation 5 | // Only deals with power-of-2 radices 6 | 7 | void reference_calculation(unsigned int *inputVals, unsigned int *inputPos, 8 | unsigned int *outputVals, unsigned int *outputPos, 9 | const size_t numElems); 10 | #endif -------------------------------------------------------------------------------- /assignments/HW4/student_func.cu: -------------------------------------------------------------------------------- 1 | // Udacity HW 4 2 | // Radix Sorting 3 | 4 | #include "utils.h" 5 | #include 6 | 7 | /* Red Eye Removal 8 | =============== 9 | 10 | For this assignment we are implementing red eye removal. This is 11 | accomplished by first creating a score for every pixel that tells us how 12 | likely it is to be a red eye pixel. We have already done this for you - you 13 | are receiving the scores and need to sort them in ascending order so that we 14 | know which pixels to alter to remove the red eye. 15 | 16 | Note: ascending order == smallest to largest 17 | 18 | Each score is associated with a position, when you sort the scores, you must 19 | also move the positions accordingly. 20 | 21 | Implementing Parallel Radix Sort with CUDA 22 | ========================================== 23 | 24 | The basic idea is to construct a histogram on each pass of how many of each 25 | "digit" there are. Then we scan this histogram so that we know where to put 26 | the output of each digit. For example, the first 1 must come after all the 27 | 0s so we have to know how many 0s there are to be able to start moving 1s 28 | into the correct position. 29 | 30 | 1) Histogram of the number of occurrences of each digit 31 | 2) Exclusive Prefix Sum of Histogram 32 | 3) Determine relative offset of each digit 33 | For example [0 0 1 1 0 0 1] 34 | -> [0 1 0 1 2 3 2] 35 | 4) Combine the results of steps 2 & 3 to determine the final 36 | output location for each element and move it there 37 | 38 | LSB Radix sort is an out-of-place sort and you will need to ping-pong values 39 | between the input and output buffers we have provided. Make sure the final 40 | sorted results end up in the output buffer! Hint: You may need to do a copy 41 | at the end. 42 | 43 | */ 44 | 45 | void your_sort(unsigned int *const d_inputVals, unsigned int *const d_inputPos, 46 | unsigned int *const d_outputVals, 47 | unsigned int *const d_outputPos, const size_t numElems) { 48 | // TODO 49 | // PUT YOUR SORT HERE 50 | } 51 | -------------------------------------------------------------------------------- /assignments/HW4/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW4/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/HW5/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | # collect source files 9 | 10 | file( GLOB hdr *.hpp *.h ) 11 | 12 | SET (HW5_files main.cu student.cu reference_calc.cpp) 13 | 14 | CUDA_ADD_EXECUTABLE(HW5 ${HW5_files} ${hdr}) 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /assignments/HW5/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW5//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW5/CMakeFiles/HW5.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/CMakeFiles/HW5.dir/rule 122 | .PHONY : homework/HW5/CMakeFiles/HW5.dir/rule 123 | 124 | # Convenience name for target. 125 | HW5: homework/HW5/CMakeFiles/HW5.dir/rule 126 | .PHONY : HW5 127 | 128 | # fast build rule for target. 129 | HW5/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/build 131 | .PHONY : HW5/fast 132 | 133 | reference_calc.o: reference_calc.cpp.o 134 | .PHONY : reference_calc.o 135 | 136 | # target to build an object file 137 | reference_calc.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.o 139 | .PHONY : reference_calc.cpp.o 140 | 141 | reference_calc.i: reference_calc.cpp.i 142 | .PHONY : reference_calc.i 143 | 144 | # target to preprocess a source file 145 | reference_calc.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.i 147 | .PHONY : reference_calc.cpp.i 148 | 149 | reference_calc.s: reference_calc.cpp.s 150 | .PHONY : reference_calc.s 151 | 152 | # target to generate assembly for a file 153 | reference_calc.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.s 155 | .PHONY : reference_calc.cpp.s 156 | 157 | # Help Target 158 | help: 159 | @echo "The following are some of the valid targets for this Makefile:" 160 | @echo "... all (the default if no target is provided)" 161 | @echo "... clean" 162 | @echo "... depend" 163 | @echo "... edit_cache" 164 | @echo "... rebuild_cache" 165 | @echo "... HW5" 166 | @echo "... reference_calc.o" 167 | @echo "... reference_calc.i" 168 | @echo "... reference_calc.s" 169 | .PHONY : help 170 | 171 | 172 | 173 | #============================================================================= 174 | # Special targets to cleanup operation of make. 175 | 176 | # Special rule to run CMake to check the build system integrity. 177 | # No rule that depends on this can have commands that come from listfiles 178 | # because they might be regenerated. 179 | cmake_check_build_system: 180 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 181 | .PHONY : cmake_check_build_system 182 | 183 | -------------------------------------------------------------------------------- /assignments/HW5/main.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "utils.h" 6 | #include "timer.h" 7 | #include 8 | #include 9 | #if defined(_WIN16) || defined(_WIN32) || defined(_WIN64) 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "reference_calc.h" 20 | 21 | void computeHistogram(const unsigned int *const d_vals, 22 | unsigned int *const d_histo, const unsigned int numBins, 23 | const unsigned int numElems); 24 | 25 | int main(void) { 26 | const unsigned int numBins = 1024; 27 | const unsigned int numElems = 10000 * numBins; 28 | const float stddev = 100.f; 29 | 30 | unsigned int *vals = new unsigned int[numElems]; 31 | unsigned int *h_vals = new unsigned int[numElems]; 32 | unsigned int *h_studentHisto = new unsigned int[numBins]; 33 | unsigned int *h_refHisto = new unsigned int[numBins]; 34 | 35 | #if defined(_WIN16) || defined(_WIN32) || defined(_WIN64) 36 | srand(GetTickCount()); 37 | #else 38 | timeval tv; 39 | gettimeofday(&tv, NULL); 40 | 41 | srand(tv.tv_usec); 42 | #endif 43 | 44 | // make the mean unpredictable, but close enough to the middle 45 | // so that timings are unaffected 46 | unsigned int mean = rand() % 100 + 462; 47 | 48 | // Output mean so that grading can happen with the same inputs 49 | std::cout << mean << std::endl; 50 | 51 | thrust::minstd_rand rng; 52 | 53 | thrust::random::normal_distribution normalDist((float)mean, stddev); 54 | 55 | // Generate the random values 56 | for (size_t i = 0; i < numElems; ++i) { 57 | vals[i] = 58 | std::min((unsigned int)std::max((int)normalDist(rng), 0), numBins - 1); 59 | } 60 | 61 | unsigned int *d_vals, *d_histo; 62 | 63 | GpuTimer timer; 64 | 65 | checkCudaErrors(cudaMalloc(&d_vals, sizeof(unsigned int) * numElems)); 66 | checkCudaErrors(cudaMalloc(&d_histo, sizeof(unsigned int) * numBins)); 67 | checkCudaErrors(cudaMemset(d_histo, 0, sizeof(unsigned int) * numBins)); 68 | 69 | checkCudaErrors(cudaMemcpy(d_vals, vals, sizeof(unsigned int) * numElems, 70 | cudaMemcpyHostToDevice)); 71 | 72 | timer.Start(); 73 | computeHistogram(d_vals, d_histo, numBins, numElems); 74 | timer.Stop(); 75 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 76 | 77 | if (err < 0) { 78 | // Couldn't print! Probably the student closed stdout - bad news 79 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 80 | << std::endl; 81 | exit(1); 82 | } 83 | 84 | // copy the student-computed histogram back to the host 85 | checkCudaErrors(cudaMemcpy(h_studentHisto, d_histo, 86 | sizeof(unsigned int) * numBins, 87 | cudaMemcpyDeviceToHost)); 88 | 89 | // generate reference for the given mean 90 | reference_calculation(vals, h_refHisto, numBins, numElems); 91 | 92 | // Now do the comparison 93 | checkResultsExact(h_refHisto, h_studentHisto, numBins); 94 | 95 | delete[] h_vals; 96 | delete[] h_refHisto; 97 | delete[] h_studentHisto; 98 | 99 | cudaFree(d_vals); 100 | cudaFree(d_histo); 101 | 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /assignments/HW5/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // Reference Histogram calculation 3 | 4 | void reference_calculation(const unsigned int *const vals, 5 | unsigned int *const histo, const size_t numBins, 6 | const size_t numElems) 7 | 8 | { 9 | // zero out bins 10 | for (size_t i = 0; i < numBins; ++i) 11 | histo[i] = 0; 12 | 13 | // go through vals and increment appropriate bin 14 | for (size_t i = 0; i < numElems; ++i) 15 | histo[vals[i]]++; 16 | } 17 | -------------------------------------------------------------------------------- /assignments/HW5/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | // Reference Histogram calculation 5 | 6 | void reference_calculation(const unsigned int *const vals, 7 | unsigned int *const histo, const size_t numBins, 8 | const size_t numElems); 9 | 10 | #endif -------------------------------------------------------------------------------- /assignments/HW5/student.cu: -------------------------------------------------------------------------------- 1 | /* Udacity HW5 2 | Histogramming for Speed 3 | 4 | The goal of this assignment is compute a histogram 5 | as fast as possible. We have simplified the problem as much as 6 | possible to allow you to focus solely on the histogramming algorithm. 7 | 8 | The input values that you need to histogram are already the exact 9 | bins that need to be updated. This is unlike in HW3 where you needed 10 | to compute the range of the data and then do: 11 | bin = (val - valMin) / valRange to determine the bin. 12 | 13 | Here the bin is just: 14 | bin = val 15 | 16 | so the serial histogram calculation looks like: 17 | for (i = 0; i < numElems; ++i) 18 | histo[val[i]]++; 19 | 20 | That's it! Your job is to make it run as fast as possible! 21 | 22 | The values are normally distributed - you may take 23 | advantage of this fact in your implementation. 24 | 25 | */ 26 | 27 | #include "utils.h" 28 | 29 | __global__ void yourHisto(const unsigned int *const vals, // INPUT 30 | unsigned int *const histo, // OUPUT 31 | int numVals) { 32 | // TODO fill in this kernel to calculate the histogram 33 | // as quickly as possible 34 | 35 | // Although we provide only one kernel skeleton, 36 | // feel free to use more if it will help you 37 | // write faster code 38 | } 39 | 40 | void computeHistogram(const unsigned int *const d_vals, // INPUT 41 | unsigned int *const d_histo, // OUTPUT 42 | const unsigned int numBins, const unsigned int numElems) { 43 | // TODO Launch the yourHisto kernel 44 | 45 | // if you want to use/launch more than one kernel, 46 | // feel free 47 | 48 | cudaDeviceSynchronize(); 49 | checkCudaErrors(cudaGetLastError()); 50 | } 51 | -------------------------------------------------------------------------------- /assignments/HW5/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW5/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/HW6/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # CMakeLists.txt for OpenCV and CUDA. 3 | # 2012-02-07 4 | # Quan Tran Minh. edit by Johannes Kast, Michael Sarahan 5 | # quantm@unist.ac.kr kast.jo@googlemail.com msarahan@gmail.com 6 | ############################################################################ 7 | 8 | # collect source files 9 | 10 | file( GLOB hdr *.hpp *.h ) 11 | 12 | SET (HW6_files student_func.cu HW6.cu main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp) 13 | 14 | CUDA_ADD_EXECUTABLE(HW6 ${HW6_files} ${hdr}) 15 | -------------------------------------------------------------------------------- /assignments/HW6/HW6.cu: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "loadSaveImage.h" 8 | #include 9 | 10 | // return types are void since any internal error will be handled by quitting 11 | // no point in returning error codes... 12 | void preProcess(uchar4 **sourceImg, size_t &numRows, size_t &numCols, 13 | uchar4 **destImg, uchar4 **blendedImg, 14 | const std::string &source_filename, 15 | const std::string &dest_filename) { 16 | 17 | // make sure the context initializes ok 18 | checkCudaErrors(cudaFree(0)); 19 | 20 | size_t numRowsSource, numColsSource, numRowsDest, numColsDest; 21 | 22 | loadImageRGBA(source_filename, sourceImg, &numRowsSource, &numColsSource); 23 | loadImageRGBA(dest_filename, destImg, &numRowsDest, &numColsDest); 24 | 25 | assert(numRowsSource == numRowsDest); 26 | assert(numColsSource == numColsDest); 27 | 28 | numRows = numRowsSource; 29 | numCols = numColsSource; 30 | 31 | *blendedImg = new uchar4[numRows * numCols]; 32 | } 33 | 34 | void postProcess(const uchar4 *const blendedImg, const size_t numRowsDest, 35 | const size_t numColsDest, const std::string &output_file) { 36 | // just need to save the image... 37 | saveImageRGBA(blendedImg, numRowsDest, numColsDest, output_file); 38 | } 39 | -------------------------------------------------------------------------------- /assignments/HW6/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target rebuild_cache 69 | rebuild_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 71 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : rebuild_cache 73 | 74 | # Special rule for the target rebuild_cache 75 | rebuild_cache/fast: rebuild_cache 76 | .PHONY : rebuild_cache/fast 77 | 78 | # Special rule for the target edit_cache 79 | edit_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 81 | /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 82 | .PHONY : edit_cache 83 | 84 | # Special rule for the target edit_cache 85 | edit_cache/fast: edit_cache 86 | .PHONY : edit_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW6//CMakeFiles/progress.marks 91 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | # Convenience name for target. 120 | homework/HW6/CMakeFiles/HW6.dir/rule: 121 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/CMakeFiles/HW6.dir/rule 122 | .PHONY : homework/HW6/CMakeFiles/HW6.dir/rule 123 | 124 | # Convenience name for target. 125 | HW6: homework/HW6/CMakeFiles/HW6.dir/rule 126 | .PHONY : HW6 127 | 128 | # fast build rule for target. 129 | HW6/fast: 130 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/build 131 | .PHONY : HW6/fast 132 | 133 | compare.o: compare.cpp.o 134 | .PHONY : compare.o 135 | 136 | # target to build an object file 137 | compare.cpp.o: 138 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.o 139 | .PHONY : compare.cpp.o 140 | 141 | compare.i: compare.cpp.i 142 | .PHONY : compare.i 143 | 144 | # target to preprocess a source file 145 | compare.cpp.i: 146 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.i 147 | .PHONY : compare.cpp.i 148 | 149 | compare.s: compare.cpp.s 150 | .PHONY : compare.s 151 | 152 | # target to generate assembly for a file 153 | compare.cpp.s: 154 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.s 155 | .PHONY : compare.cpp.s 156 | 157 | loadSaveImage.o: loadSaveImage.cpp.o 158 | .PHONY : loadSaveImage.o 159 | 160 | # target to build an object file 161 | loadSaveImage.cpp.o: 162 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.o 163 | .PHONY : loadSaveImage.cpp.o 164 | 165 | loadSaveImage.i: loadSaveImage.cpp.i 166 | .PHONY : loadSaveImage.i 167 | 168 | # target to preprocess a source file 169 | loadSaveImage.cpp.i: 170 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.i 171 | .PHONY : loadSaveImage.cpp.i 172 | 173 | loadSaveImage.s: loadSaveImage.cpp.s 174 | .PHONY : loadSaveImage.s 175 | 176 | # target to generate assembly for a file 177 | loadSaveImage.cpp.s: 178 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.s 179 | .PHONY : loadSaveImage.cpp.s 180 | 181 | main.o: main.cpp.o 182 | .PHONY : main.o 183 | 184 | # target to build an object file 185 | main.cpp.o: 186 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.o 187 | .PHONY : main.cpp.o 188 | 189 | main.i: main.cpp.i 190 | .PHONY : main.i 191 | 192 | # target to preprocess a source file 193 | main.cpp.i: 194 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.i 195 | .PHONY : main.cpp.i 196 | 197 | main.s: main.cpp.s 198 | .PHONY : main.s 199 | 200 | # target to generate assembly for a file 201 | main.cpp.s: 202 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.s 203 | .PHONY : main.cpp.s 204 | 205 | reference_calc.o: reference_calc.cpp.o 206 | .PHONY : reference_calc.o 207 | 208 | # target to build an object file 209 | reference_calc.cpp.o: 210 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.o 211 | .PHONY : reference_calc.cpp.o 212 | 213 | reference_calc.i: reference_calc.cpp.i 214 | .PHONY : reference_calc.i 215 | 216 | # target to preprocess a source file 217 | reference_calc.cpp.i: 218 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.i 219 | .PHONY : reference_calc.cpp.i 220 | 221 | reference_calc.s: reference_calc.cpp.s 222 | .PHONY : reference_calc.s 223 | 224 | # target to generate assembly for a file 225 | reference_calc.cpp.s: 226 | cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.s 227 | .PHONY : reference_calc.cpp.s 228 | 229 | # Help Target 230 | help: 231 | @echo "The following are some of the valid targets for this Makefile:" 232 | @echo "... all (the default if no target is provided)" 233 | @echo "... clean" 234 | @echo "... depend" 235 | @echo "... edit_cache" 236 | @echo "... rebuild_cache" 237 | @echo "... HW6" 238 | @echo "... compare.o" 239 | @echo "... compare.i" 240 | @echo "... compare.s" 241 | @echo "... loadSaveImage.o" 242 | @echo "... loadSaveImage.i" 243 | @echo "... loadSaveImage.s" 244 | @echo "... main.o" 245 | @echo "... main.i" 246 | @echo "... main.s" 247 | @echo "... reference_calc.o" 248 | @echo "... reference_calc.i" 249 | @echo "... reference_calc.s" 250 | .PHONY : help 251 | 252 | 253 | 254 | #============================================================================= 255 | # Special targets to cleanup operation of make. 256 | 257 | # Special rule to run CMake to check the build system integrity. 258 | # No rule that depends on this can have commands that come from listfiles 259 | # because they might be regenerated. 260 | cmake_check_build_system: 261 | cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 262 | .PHONY : cmake_check_build_system 263 | 264 | -------------------------------------------------------------------------------- /assignments/HW6/blended.gold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/blended.gold -------------------------------------------------------------------------------- /assignments/HW6/compare.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.h" 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError) { 6 | cv::Mat reference = cv::imread(reference_filename, -1); 7 | cv::Mat test = cv::imread(test_filename, -1); 8 | 9 | cv::Mat diff = abs(reference - test); 10 | 11 | cv::Mat diffSingleChannel = 12 | diff.reshape(1, 0); // convert to 1 channel, same # rows 13 | 14 | double minVal, maxVal; 15 | 16 | cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL, 17 | NULL); // NULL because we don't care about location 18 | 19 | // now perform transform so that we bump values to the full range 20 | 21 | diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal)); 22 | 23 | diff = diffSingleChannel.reshape(reference.channels(), 0); 24 | 25 | cv::imwrite("HW6_differenceImage.png", diff); 26 | // OK, now we can start comparing values... 27 | unsigned char *referencePtr = reference.ptr(0); 28 | unsigned char *testPtr = test.ptr(0); 29 | 30 | if (useEpsCheck) { 31 | checkResultsEps(referencePtr, testPtr, 32 | reference.rows * reference.cols * reference.channels(), 33 | perPixelError, globalError); 34 | } else { 35 | checkResultsExact(referencePtr, testPtr, 36 | reference.rows * reference.cols * reference.channels()); 37 | } 38 | 39 | std::cout << "PASS" << std::endl; 40 | return; 41 | } 42 | -------------------------------------------------------------------------------- /assignments/HW6/compare.h: -------------------------------------------------------------------------------- 1 | #ifndef HW3_H__ 2 | #define HW3_H__ 3 | 4 | void compareImages(std::string reference_filename, std::string test_filename, 5 | bool useEpsCheck, double perPixelError, double globalError); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /assignments/HW6/destination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/destination.png -------------------------------------------------------------------------------- /assignments/HW6/loadSaveImage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cuda_runtime.h" 6 | 7 | // The caller becomes responsible for the returned pointer. This 8 | // is done in the interest of keeping this code as simple as possible. 9 | // In production code this is a bad idea - we should use RAII 10 | // to ensure the memory is freed. DO NOT COPY THIS AND USE IN PRODUCTION 11 | // CODE!!! 12 | void loadImageHDR(const std::string &filename, float **imagePtr, 13 | size_t *numRows, size_t *numCols) { 14 | cv::Mat image = cv::imread(filename.c_str(), 15 | CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH); 16 | if (image.empty()) { 17 | std::cerr << "Couldn't open file: " << filename << std::endl; 18 | exit(1); 19 | } 20 | 21 | if (image.channels() != 3) { 22 | std::cerr << "Image must be color!" << std::endl; 23 | exit(1); 24 | } 25 | 26 | if (!image.isContinuous()) { 27 | std::cerr << "Image isn't continuous!" << std::endl; 28 | exit(1); 29 | } 30 | 31 | *imagePtr = new float[image.rows * image.cols * image.channels()]; 32 | 33 | float *cvPtr = image.ptr(0); 34 | for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i) 35 | (*imagePtr)[i] = cvPtr[i]; 36 | 37 | *numRows = image.rows; 38 | *numCols = image.cols; 39 | } 40 | 41 | void loadImageGrey(const std::string &filename, unsigned char **imagePtr, 42 | size_t *numRows, size_t *numCols) { 43 | cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_GRAYSCALE); 44 | if (image.empty()) { 45 | std::cerr << "Couldn't open file: " << filename << std::endl; 46 | exit(1); 47 | } 48 | 49 | if (image.channels() != 1) { 50 | std::cerr << "Image must be greyscale!" << std::endl; 51 | exit(1); 52 | } 53 | 54 | if (!image.isContinuous()) { 55 | std::cerr << "Image isn't continuous!" << std::endl; 56 | exit(1); 57 | } 58 | 59 | *imagePtr = new unsigned char[image.rows * image.cols]; 60 | 61 | unsigned char *cvPtr = image.ptr(0); 62 | for (size_t i = 0; i < image.rows * image.cols; ++i) { 63 | (*imagePtr)[i] = cvPtr[i]; 64 | } 65 | 66 | *numRows = image.rows; 67 | *numCols = image.cols; 68 | } 69 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 70 | size_t *numRows, size_t *numCols) { 71 | cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR); 72 | if (image.empty()) { 73 | std::cerr << "Couldn't open file: " << filename << std::endl; 74 | exit(1); 75 | } 76 | 77 | if (image.channels() != 3) { 78 | std::cerr << "Image must be color!" << std::endl; 79 | exit(1); 80 | } 81 | 82 | if (!image.isContinuous()) { 83 | std::cerr << "Image isn't continuous!" << std::endl; 84 | exit(1); 85 | } 86 | 87 | cv::Mat imageRGBA; 88 | cv::cvtColor(image, imageRGBA, CV_BGR2RGBA); 89 | 90 | *imagePtr = new uchar4[image.rows * image.cols]; 91 | 92 | unsigned char *cvPtr = imageRGBA.ptr(0); 93 | for (size_t i = 0; i < image.rows * image.cols; ++i) { 94 | (*imagePtr)[i].x = cvPtr[4 * i + 0]; 95 | (*imagePtr)[i].y = cvPtr[4 * i + 1]; 96 | (*imagePtr)[i].z = cvPtr[4 * i + 2]; 97 | (*imagePtr)[i].w = cvPtr[4 * i + 3]; 98 | } 99 | 100 | *numRows = image.rows; 101 | *numCols = image.cols; 102 | } 103 | 104 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 105 | const size_t numCols, const std::string &output_file) { 106 | int sizes[2]; 107 | sizes[0] = numRows; 108 | sizes[1] = numCols; 109 | cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image); 110 | cv::Mat imageOutputBGR; 111 | cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR); 112 | // output the image 113 | cv::imwrite(output_file.c_str(), imageOutputBGR); 114 | } 115 | 116 | // output an exr file 117 | // assumed to already be BGR 118 | void saveImageHDR(const float *const image, const size_t numRows, 119 | const size_t numCols, const std::string &output_file) { 120 | int sizes[2]; 121 | sizes[0] = numRows; 122 | sizes[1] = numCols; 123 | 124 | cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image); 125 | 126 | imageHDR = imageHDR * 255; 127 | 128 | cv::imwrite(output_file.c_str(), imageHDR); 129 | } 130 | -------------------------------------------------------------------------------- /assignments/HW6/loadSaveImage.h: -------------------------------------------------------------------------------- 1 | #ifndef LOADSAVEIMAGE_H__ 2 | #define LOADSAVEIMAGE_H__ 3 | 4 | #include 5 | #include //for uchar4 6 | 7 | void loadImageHDR(const std::string &filename, float **imagePtr, 8 | size_t *numRows, size_t *numCols); 9 | 10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr, 11 | size_t *numRows, size_t *numCols); 12 | 13 | void loadImageGrey(const std::string &filename, unsigned char **imagePtr, 14 | size_t *numRows, size_t *numCols); 15 | 16 | void saveImageRGBA(const uchar4 *const image, const size_t numRows, 17 | const size_t numCols, const std::string &output_file); 18 | 19 | void saveImageHDR(const float *const image, const size_t numRows, 20 | const size_t numCols, const std::string &output_file); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /assignments/HW6/main.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW6 Driver 2 | 3 | #include 4 | #include "timer.h" 5 | #include "utils.h" 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "reference_calc.h" 14 | #include "compare.h" 15 | 16 | void preProcess(uchar4 **sourceImg, size_t &numRowsSource, 17 | size_t &numColsSource, uchar4 **destImg, uchar4 **blendedImg, 18 | const std::string &source_filename, 19 | const std::string &dest_filename); 20 | 21 | void postProcess(const uchar4 *const blendedImg, const size_t numRowsDest, 22 | const size_t numColsDest, const std::string &output_file); 23 | 24 | void your_blend(const uchar4 *const sourceImg, const size_t numRowsSource, 25 | const size_t numColsSource, const uchar4 *const destImg, 26 | uchar4 *const blendedImg); 27 | 28 | int main(int argc, char **argv) { 29 | uchar4 *h_sourceImg, *h_destImg, *h_blendedImg; 30 | size_t numRowsSource, numColsSource; 31 | 32 | std::string input_source_file; 33 | std::string input_dest_file; 34 | std::string output_file; 35 | 36 | std::string reference_file; 37 | double perPixelError = 0.0; 38 | double globalError = 0.0; 39 | bool useEpsCheck = false; 40 | 41 | switch (argc) { 42 | case 3: 43 | input_source_file = std::string(argv[1]); 44 | input_dest_file = std::string(argv[2]); 45 | output_file = "HW6_output.png"; 46 | reference_file = "HW6_reference.png"; 47 | break; 48 | case 4: 49 | input_source_file = std::string(argv[1]); 50 | input_dest_file = std::string(argv[2]); 51 | output_file = std::string(argv[3]); 52 | reference_file = "HW6_reference.png"; 53 | break; 54 | case 5: 55 | input_source_file = std::string(argv[1]); 56 | input_dest_file = std::string(argv[2]); 57 | output_file = std::string(argv[3]); 58 | reference_file = std::string(argv[4]); 59 | break; 60 | case 7: 61 | useEpsCheck = true; 62 | input_source_file = std::string(argv[1]); 63 | input_dest_file = std::string(argv[2]); 64 | output_file = std::string(argv[3]); 65 | reference_file = std::string(argv[4]); 66 | perPixelError = atof(argv[5]); 67 | globalError = atof(argv[6]); 68 | break; 69 | default: 70 | std::cerr << "Usage: ./HW6 input_source_file input_dest_filename " 71 | "[output_filename] [reference_filename] [perPixelError] " 72 | "[globalError]" 73 | << std::endl; 74 | exit(1); 75 | } 76 | 77 | // load the image and give us our input and output pointers 78 | preProcess(&h_sourceImg, numRowsSource, numColsSource, &h_destImg, 79 | &h_blendedImg, input_source_file, input_dest_file); 80 | 81 | GpuTimer timer; 82 | timer.Start(); 83 | 84 | // call the students' code 85 | your_blend(h_sourceImg, numRowsSource, numColsSource, h_destImg, 86 | h_blendedImg); 87 | 88 | timer.Stop(); 89 | cudaDeviceSynchronize(); 90 | checkCudaErrors(cudaGetLastError()); 91 | int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); 92 | printf("\n"); 93 | if (err < 0) { 94 | // Couldn't print! Probably the student closed stdout - bad news 95 | std::cerr << "Couldn't print timing information! STDOUT Closed!" 96 | << std::endl; 97 | exit(1); 98 | } 99 | 100 | // check results and output the tone-mapped image 101 | postProcess(h_blendedImg, numRowsSource, numColsSource, output_file); 102 | 103 | // calculate the reference image 104 | uchar4 *h_reference = new uchar4[numRowsSource * numColsSource]; 105 | reference_calc(h_sourceImg, numRowsSource, numColsSource, h_destImg, 106 | h_reference); 107 | 108 | // save the reference image 109 | postProcess(h_reference, numRowsSource, numColsSource, reference_file); 110 | 111 | compareImages(reference_file, output_file, useEpsCheck, perPixelError, 112 | globalError); 113 | 114 | delete[] h_reference; 115 | delete[] h_destImg; 116 | delete[] h_sourceImg; 117 | delete[] h_blendedImg; 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /assignments/HW6/reference_calc.cpp: -------------------------------------------------------------------------------- 1 | // Udacity HW 6 2 | // Poisson Blending Reference Calculation 3 | 4 | #include "utils.h" 5 | #include 6 | 7 | // Performs one iteration of the solver 8 | void computeIteration(const unsigned char *const dstImg, 9 | const unsigned char *const strictInteriorPixels, 10 | const unsigned char *const borderPixels, 11 | const std::vector &interiorPixelList, 12 | const size_t numColsSource, const float *const f, 13 | const float *const g, float *const f_next) { 14 | unsigned int off = 15 | interiorPixelList[0].x * numColsSource + interiorPixelList[0].y; 16 | 17 | for (size_t i = 0; i < interiorPixelList.size(); ++i) { 18 | float blendedSum = 0.f; 19 | float borderSum = 0.f; 20 | 21 | uint2 coord = interiorPixelList[i]; 22 | 23 | unsigned int offset = coord.x * numColsSource + coord.y; 24 | 25 | // process all 4 neighbor pixels 26 | // for each pixel if it is an interior pixel 27 | // then we add the previous f, otherwise if it is a 28 | // border pixel then we add the value of the destination 29 | // image at the border. These border values are our boundary 30 | // conditions. 31 | if (strictInteriorPixels[offset - 1]) { 32 | blendedSum += f[offset - 1]; 33 | } else { 34 | borderSum += dstImg[offset - 1]; 35 | } 36 | 37 | if (strictInteriorPixels[offset + 1]) { 38 | blendedSum += f[offset + 1]; 39 | } else { 40 | borderSum += dstImg[offset + 1]; 41 | } 42 | 43 | if (strictInteriorPixels[offset - numColsSource]) { 44 | blendedSum += f[offset - numColsSource]; 45 | } else { 46 | borderSum += dstImg[offset - numColsSource]; 47 | } 48 | 49 | if (strictInteriorPixels[offset + numColsSource]) { 50 | blendedSum += f[offset + numColsSource]; 51 | } else { 52 | borderSum += dstImg[offset + numColsSource]; 53 | } 54 | 55 | float f_next_val = (blendedSum + borderSum + g[offset]) / 4.f; 56 | 57 | f_next[offset] = 58 | std::min(255.f, std::max(0.f, f_next_val)); // clip to [0, 255] 59 | } 60 | } 61 | 62 | // pre-compute the values of g, which depend only the source image 63 | // and aren't iteration dependent. 64 | void computeG(const unsigned char *const channel, float *const g, 65 | const size_t numColsSource, 66 | const std::vector &interiorPixelList) { 67 | for (size_t i = 0; i < interiorPixelList.size(); ++i) { 68 | uint2 coord = interiorPixelList[i]; 69 | unsigned int offset = coord.x * numColsSource + coord.y; 70 | 71 | float sum = 4.f * channel[offset]; 72 | 73 | sum -= (float)channel[offset - 1] + (float)channel[offset + 1]; 74 | sum -= (float)channel[offset + numColsSource] + 75 | (float)channel[offset - numColsSource]; 76 | 77 | g[offset] = sum; 78 | } 79 | } 80 | 81 | void reference_calc(const uchar4 *const h_sourceImg, const size_t numRowsSource, 82 | const size_t numColsSource, const uchar4 *const h_destImg, 83 | uchar4 *const h_blendedImg) { 84 | 85 | // we need to create a list of border pixels and interior pixels 86 | // this is a conceptually simple implementation, not a particularly efficient 87 | // one... 88 | 89 | // first create mask 90 | size_t srcSize = numRowsSource * numColsSource; 91 | unsigned char *mask = new unsigned char[srcSize]; 92 | 93 | for (int i = 0; i < srcSize; ++i) { 94 | mask[i] = (h_sourceImg[i].x + h_sourceImg[i].y + h_sourceImg[i].z < 3 * 255) 95 | ? 1 96 | : 0; 97 | } 98 | 99 | // next compute strictly interior pixels and border pixels 100 | unsigned char *borderPixels = new unsigned char[srcSize]; 101 | unsigned char *strictInteriorPixels = new unsigned char[srcSize]; 102 | 103 | std::vector interiorPixelList; 104 | 105 | // the source region in the homework isn't near an image boundary, so we can 106 | // simplify the conditionals a little... 107 | for (size_t r = 1; r < numRowsSource - 1; ++r) { 108 | for (size_t c = 1; c < numColsSource - 1; ++c) { 109 | if (mask[r * numColsSource + c]) { 110 | if (mask[(r - 1) * numColsSource + c] && 111 | mask[(r + 1) * numColsSource + c] && 112 | mask[r * numColsSource + c - 1] && 113 | mask[r * numColsSource + c + 1]) { 114 | strictInteriorPixels[r * numColsSource + c] = 1; 115 | borderPixels[r * numColsSource + c] = 0; 116 | interiorPixelList.push_back(make_uint2(r, c)); 117 | } else { 118 | strictInteriorPixels[r * numColsSource + c] = 0; 119 | borderPixels[r * numColsSource + c] = 1; 120 | } 121 | } else { 122 | strictInteriorPixels[r * numColsSource + c] = 0; 123 | borderPixels[r * numColsSource + c] = 0; 124 | } 125 | } 126 | } 127 | 128 | // split the source and destination images into their respective 129 | // channels 130 | unsigned char *red_src = new unsigned char[srcSize]; 131 | unsigned char *blue_src = new unsigned char[srcSize]; 132 | unsigned char *green_src = new unsigned char[srcSize]; 133 | 134 | for (int i = 0; i < srcSize; ++i) { 135 | red_src[i] = h_sourceImg[i].x; 136 | blue_src[i] = h_sourceImg[i].y; 137 | green_src[i] = h_sourceImg[i].z; 138 | } 139 | 140 | unsigned char *red_dst = new unsigned char[srcSize]; 141 | unsigned char *blue_dst = new unsigned char[srcSize]; 142 | unsigned char *green_dst = new unsigned char[srcSize]; 143 | 144 | for (int i = 0; i < srcSize; ++i) { 145 | red_dst[i] = h_destImg[i].x; 146 | blue_dst[i] = h_destImg[i].y; 147 | green_dst[i] = h_destImg[i].z; 148 | } 149 | 150 | // next we'll precompute the g term - it never changes, no need to recompute 151 | // every iteration 152 | float *g_red = new float[srcSize]; 153 | float *g_blue = new float[srcSize]; 154 | float *g_green = new float[srcSize]; 155 | 156 | memset(g_red, 0, srcSize * sizeof(float)); 157 | memset(g_blue, 0, srcSize * sizeof(float)); 158 | memset(g_green, 0, srcSize * sizeof(float)); 159 | 160 | computeG(red_src, g_red, numColsSource, interiorPixelList); 161 | computeG(blue_src, g_blue, numColsSource, interiorPixelList); 162 | computeG(green_src, g_green, numColsSource, interiorPixelList); 163 | 164 | // for each color channel we'll need two buffers and we'll ping-pong between 165 | // them 166 | float *blendedValsRed_1 = new float[srcSize]; 167 | float *blendedValsRed_2 = new float[srcSize]; 168 | 169 | float *blendedValsBlue_1 = new float[srcSize]; 170 | float *blendedValsBlue_2 = new float[srcSize]; 171 | 172 | float *blendedValsGreen_1 = new float[srcSize]; 173 | float *blendedValsGreen_2 = new float[srcSize]; 174 | 175 | // IC is the source image, copy over 176 | for (size_t i = 0; i < srcSize; ++i) { 177 | blendedValsRed_1[i] = red_src[i]; 178 | blendedValsRed_2[i] = red_src[i]; 179 | blendedValsBlue_1[i] = blue_src[i]; 180 | blendedValsBlue_2[i] = blue_src[i]; 181 | blendedValsGreen_1[i] = green_src[i]; 182 | blendedValsGreen_2[i] = green_src[i]; 183 | } 184 | 185 | // Perform the solve on each color channel 186 | const size_t numIterations = 800; 187 | for (size_t i = 0; i < numIterations; ++i) { 188 | computeIteration(red_dst, strictInteriorPixels, borderPixels, 189 | interiorPixelList, numColsSource, blendedValsRed_1, g_red, 190 | blendedValsRed_2); 191 | 192 | std::swap(blendedValsRed_1, blendedValsRed_2); 193 | } 194 | 195 | for (size_t i = 0; i < numIterations; ++i) { 196 | computeIteration(blue_dst, strictInteriorPixels, borderPixels, 197 | interiorPixelList, numColsSource, blendedValsBlue_1, 198 | g_blue, blendedValsBlue_2); 199 | 200 | std::swap(blendedValsBlue_1, blendedValsBlue_2); 201 | } 202 | 203 | for (size_t i = 0; i < numIterations; ++i) { 204 | computeIteration(green_dst, strictInteriorPixels, borderPixels, 205 | interiorPixelList, numColsSource, blendedValsGreen_1, 206 | g_green, blendedValsGreen_2); 207 | 208 | std::swap(blendedValsGreen_1, blendedValsGreen_2); 209 | } 210 | std::swap(blendedValsRed_1, blendedValsRed_2); // put output into _2 211 | std::swap(blendedValsBlue_1, blendedValsBlue_2); // put output into _2 212 | std::swap(blendedValsGreen_1, blendedValsGreen_2); // put output into _2 213 | 214 | // copy the destination image to the output 215 | memcpy(h_blendedImg, h_destImg, sizeof(uchar4) * srcSize); 216 | 217 | // copy computed values for the interior into the output 218 | for (size_t i = 0; i < interiorPixelList.size(); ++i) { 219 | uint2 coord = interiorPixelList[i]; 220 | 221 | unsigned int offset = coord.x * numColsSource + coord.y; 222 | 223 | h_blendedImg[offset].x = blendedValsRed_2[offset]; 224 | h_blendedImg[offset].y = blendedValsBlue_2[offset]; 225 | h_blendedImg[offset].z = blendedValsGreen_2[offset]; 226 | } 227 | 228 | // wow, we allocated a lot of memory! 229 | delete[] mask; 230 | delete[] blendedValsRed_1; 231 | delete[] blendedValsRed_2; 232 | delete[] blendedValsBlue_1; 233 | delete[] blendedValsBlue_2; 234 | delete[] blendedValsGreen_1; 235 | delete[] blendedValsGreen_2; 236 | delete[] g_red; 237 | delete[] g_blue; 238 | delete[] g_green; 239 | delete[] red_src; 240 | delete[] red_dst; 241 | delete[] blue_src; 242 | delete[] blue_dst; 243 | delete[] green_src; 244 | delete[] green_dst; 245 | delete[] borderPixels; 246 | delete[] strictInteriorPixels; 247 | } 248 | -------------------------------------------------------------------------------- /assignments/HW6/reference_calc.h: -------------------------------------------------------------------------------- 1 | #ifndef REFERENCE_H__ 2 | #define REFERENCE_H__ 3 | 4 | void reference_calc(const uchar4 *const h_sourceImg, const size_t numRowsSource, 5 | const size_t numColsSource, const uchar4 *const h_destImg, 6 | uchar4 *const h_blendedImg); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /assignments/HW6/source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/source.png -------------------------------------------------------------------------------- /assignments/HW6/student_func.cu: -------------------------------------------------------------------------------- 1 | // Udacity HW 6 2 | // Poisson Blending 3 | 4 | /* Background 5 | ========== 6 | 7 | The goal for this assignment is to take one image (the source) and 8 | paste it into another image (the destination) attempting to match the 9 | two images so that the pasting is non-obvious. This is 10 | known as a "seamless clone". 11 | 12 | The basic ideas are as follows: 13 | 14 | 1) Figure out the interior and border of the source image 15 | 2) Use the values of the border pixels in the destination image 16 | as boundary conditions for solving a Poisson equation that tells 17 | us how to blend the images. 18 | 19 | No pixels from the destination except pixels on the border 20 | are used to compute the match. 21 | 22 | Solving the Poisson Equation 23 | ============================ 24 | 25 | There are multiple ways to solve this equation - we choose an iterative 26 | method - specifically the Jacobi method. Iterative methods start with 27 | a guess of the solution and then iterate to try and improve the guess 28 | until it stops changing. If the problem was well-suited for the method 29 | then it will stop and where it stops will be the solution. 30 | 31 | The Jacobi method is the simplest iterative method and converges slowly - 32 | that is we need a lot of iterations to get to the answer, but it is the 33 | easiest method to write. 34 | 35 | Jacobi Iterations 36 | ================= 37 | 38 | Our initial guess is going to be the source image itself. This is a pretty 39 | good guess for what the blended image will look like and it means that 40 | we won't have to do as many iterations compared to if we had started far 41 | from the final solution. 42 | 43 | ImageGuess_prev (Floating point) 44 | ImageGuess_next (Floating point) 45 | 46 | DestinationImg 47 | SourceImg 48 | 49 | Follow these steps to implement one iteration: 50 | 51 | 1) For every pixel p in the interior, compute two sums over the four 52 | neighboring pixels: Sum1: If the neighbor is in the interior then += 53 | ImageGuess_prev[neighbor] else if the neighbor in on the border then += 54 | DestinationImg[neighbor] 55 | 56 | Sum2: += SourceImg[p] - SourceImg[neighbor] (for all four neighbors) 57 | 58 | 2) Calculate the new pixel value: 59 | float newVal= (Sum1 + Sum2) / 4.f <------ Notice that the result is 60 | FLOATING POINT ImageGuess_next[p] = min(255, max(0, newVal)); //clamp to [0, 61 | 255] 62 | 63 | 64 | In this assignment we will do 800 iterations. 65 | */ 66 | 67 | #include "utils.h" 68 | #include 69 | 70 | void your_blend(const uchar4 *const h_sourceImg, // IN 71 | const size_t numRowsSource, const size_t numColsSource, 72 | const uchar4 *const h_destImg, // IN 73 | uchar4 *const h_blendedImg) // OUT 74 | { 75 | 76 | /* To Recap here are the steps you need to implement 77 | 78 | 1) Compute a mask of the pixels from the source image to be copied 79 | The pixels that shouldn't be copied are completely white, they 80 | have R=255, G=255, B=255. Any other pixels SHOULD be copied. 81 | 82 | 2) Compute the interior and border regions of the mask. An interior 83 | pixel has all 4 neighbors also inside the mask. A border pixel is 84 | in the mask itself, but has at least one neighbor that isn't. 85 | 86 | 3) Separate out the incoming image into three separate channels 87 | 88 | 4) Create two float(!) buffers for each color channel that will 89 | act as our guesses. Initialize them to the respective color 90 | channel of the source image since that will act as our intial guess. 91 | 92 | 5) For each color channel perform the Jacobi iteration described 93 | above 800 times. 94 | 95 | 6) Create the output image by replacing all the interior pixels 96 | in the destination image with the result of the Jacobi iterations. 97 | Just cast the floating point values to unsigned chars since we have 98 | already made sure to clamp them to the correct range. 99 | 100 | Since this is final assignment we provide little boilerplate code to 101 | help you. Notice that all the input/output pointers are HOST pointers. 102 | 103 | You will have to allocate all of your own GPU memory and perform your own 104 | memcopies to get data in and out of the GPU memory. 105 | 106 | Remember to wrap all of your calls with checkCudaErrors() to catch any 107 | thing that might go wrong. After each kernel call do: 108 | 109 | cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); 110 | 111 | to catch any errors that happened while executing the kernel. 112 | */ 113 | } 114 | -------------------------------------------------------------------------------- /assignments/HW6/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_TIMER_H__ 2 | #define GPU_TIMER_H__ 3 | 4 | #include 5 | 6 | struct GpuTimer { 7 | cudaEvent_t start; 8 | cudaEvent_t stop; 9 | 10 | GpuTimer() { 11 | cudaEventCreate(&start); 12 | cudaEventCreate(&stop); 13 | } 14 | 15 | ~GpuTimer() { 16 | cudaEventDestroy(start); 17 | cudaEventDestroy(stop); 18 | } 19 | 20 | void Start() { cudaEventRecord(start, 0); } 21 | 22 | void Stop() { cudaEventRecord(stop, 0); } 23 | 24 | float Elapsed() { 25 | float elapsed; 26 | cudaEventSynchronize(stop); 27 | cudaEventElapsedTime(&elapsed, start, stop); 28 | return elapsed; 29 | } 30 | }; 31 | 32 | #endif /* GPU_TIMER_H__ */ 33 | -------------------------------------------------------------------------------- /assignments/HW6/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H__ 2 | #define UTILS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 13 | 14 | template 15 | void check(T err, const char *const func, const char *const file, 16 | const int line) { 17 | if (err != cudaSuccess) { 18 | std::cerr << "CUDA error at: " << file << ":" << line << std::endl; 19 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | template 25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) { 26 | // check that the GPU result matches the CPU result 27 | for (size_t i = 0; i < numElem; ++i) { 28 | if (ref[i] != gpu[i]) { 29 | std::cerr << "Difference at pos " << i << std::endl; 30 | // the + is magic to convert char to int without messing 31 | // with other types 32 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 33 | << "\nGPU : " << +gpu[i] << std::endl; 34 | exit(1); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem, 41 | double eps1, double eps2) { 42 | assert(eps1 >= 0 && eps2 >= 0); 43 | unsigned long long totalDiff = 0; 44 | unsigned numSmallDifferences = 0; 45 | for (size_t i = 0; i < numElem; ++i) { 46 | // subtract smaller from larger in case of unsigned types 47 | T smaller = std::min(ref[i], gpu[i]); 48 | T larger = std::max(ref[i], gpu[i]); 49 | T diff = larger - smaller; 50 | if (diff > 0 && diff <= eps1) { 51 | numSmallDifferences++; 52 | } else if (diff > eps1) { 53 | std::cerr << "Difference at pos " << +i << " exceeds tolerance of " 54 | << eps1 << std::endl; 55 | std::cerr << "Reference: " << std::setprecision(17) << +ref[i] 56 | << "\nGPU : " << +gpu[i] << std::endl; 57 | exit(1); 58 | } 59 | totalDiff += diff * diff; 60 | } 61 | double percentSmallDifferences = 62 | (double)numSmallDifferences / (double)numElem; 63 | if (percentSmallDifferences > eps2) { 64 | std::cerr << "Total percentage of non-zero pixel difference between the " 65 | "two images exceeds " 66 | << 100.0 * eps2 << "%" << std::endl; 67 | std::cerr << "Percentage of non-zero pixel differences: " 68 | << 100.0 * percentSmallDifferences << "%" << std::endl; 69 | exit(1); 70 | } 71 | } 72 | 73 | // Uses the autodesk method of image comparison 74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels 75 | template 76 | void checkResultsAutodesk(const T *const ref, const T *const gpu, 77 | size_t numElem, double variance, size_t tolerance) { 78 | 79 | size_t numBadPixels = 0; 80 | for (size_t i = 0; i < numElem; ++i) { 81 | T smaller = std::min(ref[i], gpu[i]); 82 | T larger = std::max(ref[i], gpu[i]); 83 | T diff = larger - smaller; 84 | if (diff > variance) 85 | ++numBadPixels; 86 | } 87 | 88 | if (numBadPixels > tolerance) { 89 | std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" 90 | << tolerance << std::endl; 91 | exit(1); 92 | } 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /assignments/README.md: -------------------------------------------------------------------------------- 1 | These assignments require OpenCV as a prerequisite. The easiest way to install is probably with conda. 2 | 3 | To install OpenCV in a conda environment. 4 | ``` 5 | conda create -n cs344 -y 6 | conda activate cs344 7 | conda install -y -c anaconda opencv 8 | ``` 9 | 10 | To build 11 | ``` 12 | cd assignments 13 | mkdir build 14 | cd build 15 | cmake .. 16 | make 17 | ``` 18 | The binaries will then be contained within `assignments/bin`. 19 | 20 | # HW1 Passing Instructions 21 | From the `HW1` directory. 22 | 23 | Run `../bin/HW1 HW1/cinque_terre_small.jpg` --------------------------------------------------------------------------------