├── .gitignore
├── LectureQuizzes
    ├── 1_30.cu
    ├── 2_18.cu
    ├── 2_37.cu
    └── gputimer.h
├── README.md
└── assignments
    ├── CMakeLists.txt
    ├── HW1
        ├── CMakeLists.txt
        ├── HW1.cpp
        ├── HW1_differenceImage.png
        ├── HW1_output.png
        ├── HW1_reference.png
        ├── Makefile
        ├── cinque_terre.gold
        ├── cinque_terre_small.jpg
        ├── compare.cpp
        ├── compare.h
        ├── main.cpp
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── student_func.cu
        ├── timer.h
        └── utils.h
    ├── HW2
        ├── CMakeLists.txt
        ├── HW2.cpp
        ├── Makefile
        ├── cinque_terre.gold
        ├── cinque_terre_small.jpg
        ├── compare.cpp
        ├── compare.h
        ├── main.cpp
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── student_func.cu
        ├── timer.h
        └── utils.h
    ├── HW3
        ├── CMakeLists.txt
        ├── HW3.cu
        ├── Makefile
        ├── compare.cpp
        ├── compare.h
        ├── loadSaveImage.cpp
        ├── loadSaveImage.h
        ├── main.cpp
        ├── memorial.exr
        ├── memorial_large.exr
        ├── memorial_png.gold
        ├── memorial_png_large.gold
        ├── memorial_raw.png
        ├── memorial_raw_large.png
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── student_func.cu
        ├── timer.h
        └── utils.h
    ├── HW4
        ├── CMakeLists.txt
        ├── HW4.cu
        ├── Makefile
        ├── compare.cpp
        ├── compare.h
        ├── loadSaveImage.cpp
        ├── loadSaveImage.h
        ├── main.cpp
        ├── red_eye_effect.gold
        ├── red_eye_effect_5.jpg
        ├── red_eye_effect_template_5.jpg
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── student_func.cu
        ├── timer.h
        └── utils.h
    ├── HW5
        ├── CMakeLists.txt
        ├── Makefile
        ├── main.cu
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── student.cu
        ├── timer.h
        └── utils.h
    ├── HW6
        ├── CMakeLists.txt
        ├── HW6.cu
        ├── Makefile
        ├── blended.gold
        ├── compare.cpp
        ├── compare.h
        ├── destination.png
        ├── loadSaveImage.cpp
        ├── loadSaveImage.h
        ├── main.cpp
        ├── reference_calc.cpp
        ├── reference_calc.h
        ├── source.png
        ├── student_func.cu
        ├── timer.h
        └── utils.h
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | CMakeFiles
2 | *.cmake
3 | bin
4 | build
5 | 


--------------------------------------------------------------------------------
/LectureQuizzes/1_30.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | __global__ void cube(float *d_out, float *d_in) {
 4 |   // Todo: Fill in this function
 5 | }
 6 | 
 7 | int main(int argc, char **argv) {
 8 |   const int ARRAY_SIZE = 64;
 9 |   const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);
10 | 
11 |   // generate the input array on the host
12 |   float h_in[ARRAY_SIZE];
13 |   for (int i = 0; i < ARRAY_SIZE; i++) {
14 |     h_in[i] = float(i);
15 |   }
16 |   float h_out[ARRAY_SIZE];
17 | 
18 |   // declare GPU memory pointers
19 |   float *d_in;
20 |   float *d_out;
21 | 
22 |   // allocate GPU memory
23 |   cudaMalloc((void **)&d_in, ARRAY_BYTES);
24 |   cudaMalloc((void **)&d_out, ARRAY_BYTES);
25 | 
26 |   // transfer the array to the GPU
27 |   cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);
28 | 
29 |   // launch the kernel
30 |   cube<<<1, ARRAY_SIZE>>>(d_out, d_in);
31 | 
32 |   // copy back the result array to the CPU
33 |   cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
34 | 
35 |   // print out the resulting array
36 |   for (int i = 0; i < ARRAY_SIZE; i++) {
37 |     printf("%f", h_out[i]);
38 |     printf(((i % 4) != 3) ? "\t" : "\n");
39 |   }
40 | 
41 |   cudaFree(d_in);
42 |   cudaFree(d_out);
43 | 
44 |   return 0;
45 | }


--------------------------------------------------------------------------------
/LectureQuizzes/2_18.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #define NUM_BLOCKS 16
 4 | #define BLOCK_WIDTH 1
 5 | 
 6 | __global__ void hello()
 7 | {
 8 |     printf("Hello world! I'm a thread in block %d\n", blockIdx.x);
 9 | }
10 | 
11 | 
12 | int main(int argc,char **argv)
13 | {
14 |     // launch the kernel
15 |     hello<<<NUM_BLOCKS, BLOCK_WIDTH>>>();
16 | 
17 |     // force the printf()s to flush
18 |     cudaDeviceSynchronize();
19 | 
20 |     printf("That's all!\n");
21 | 
22 |     return 0;
23 | }


--------------------------------------------------------------------------------
/LectureQuizzes/2_37.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "gputimer.h"
 3 | 
 4 | #define NUM_THREADS 1000000
 5 | #define ARRAY_SIZE  100
 6 | bool atomic = true;
 7 | 
 8 | #define BLOCK_WIDTH 1000
 9 | 
10 | void print_array(int *array, int size)
11 | {
12 |     printf("{ ");
13 |     for (int i = 0; i < size; i++)  { printf("%d ", array[i]); }
14 |     printf("}\n");
15 | }
16 | 
17 | __global__ void increment_naive(int *g)
18 | {
19 | 	// which thread is this?
20 | 	int i = blockIdx.x * blockDim.x + threadIdx.x; 
21 | 
22 | 	// each thread to increment consecutive elements, wrapping at ARRAY_SIZE
23 | 	i = i % ARRAY_SIZE;  
24 | 	g[i] = g[i] + 1;
25 | }
26 | 
27 | __global__ void increment_atomic(int *g)
28 | {
29 | 	// which thread is this?
30 | 	int i = blockIdx.x * blockDim.x + threadIdx.x; 
31 | 
32 | 	// each thread to increment consecutive elements, wrapping at ARRAY_SIZE
33 | 	i = i % ARRAY_SIZE;  
34 | 	atomicAdd(& g[i], 1);
35 | }
36 | 
37 | int main(int argc,char **argv)
38 | {   
39 |     GpuTimer timer;
40 |     if (atomic) {
41 |         printf("atomic %d total threads in %d blocks writing into %d array elements\n",
42 |            NUM_THREADS, NUM_THREADS / BLOCK_WIDTH, ARRAY_SIZE);
43 |     } else {
44 |         printf("%d total threads in %d blocks writing into %d array elements\n",
45 |            NUM_THREADS, NUM_THREADS / BLOCK_WIDTH, ARRAY_SIZE);
46 |     }
47 | 
48 |     // declare and allocate host memory
49 |     int h_array[ARRAY_SIZE];
50 |     const int ARRAY_BYTES = ARRAY_SIZE * sizeof(int);
51 |  
52 |     // declare, allocate, and zero out GPU memory
53 |     int * d_array;
54 |     cudaMalloc((void **) &d_array, ARRAY_BYTES);
55 |     cudaMemset((void *) d_array, 0, ARRAY_BYTES); 
56 | 
57 |     // launch the kernel - comment out one of these
58 |     timer.Start();
59 |     
60 |     // Instructions: This program is needed for the next quiz
61 |     // uncomment increment_naive to measure speed and accuracy 
62 |     // of non-atomic increments or uncomment increment_atomic to
63 |     // measure speed and accuracy of  atomic icrements
64 |     if (atomic) {
65 |         increment_atomic<<<NUM_THREADS/BLOCK_WIDTH, BLOCK_WIDTH>>>(d_array);
66 |     } else {
67 |         increment_naive<<<NUM_THREADS/BLOCK_WIDTH, BLOCK_WIDTH>>>(d_array);
68 |     }
69 |     timer.Stop();
70 |     
71 |     // copy back the array of sums from GPU and print
72 |     cudaMemcpy(h_array, d_array, ARRAY_BYTES, cudaMemcpyDeviceToHost);
73 |     // print_array(h_array, ARRAY_SIZE);
74 |     printf("Time elapsed = %g ms\n", timer.Elapsed());
75 |  
76 |     // free GPU memory allocation and exit
77 |     cudaFree(d_array);
78 |     return 0;
79 | }


--------------------------------------------------------------------------------
/LectureQuizzes/gputimer.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GPU_TIMER_H__
 2 | #define __GPU_TIMER_H__
 3 | 
 4 | struct GpuTimer
 5 | {
 6 |       cudaEvent_t start;
 7 |       cudaEvent_t stop;
 8 |  
 9 |       GpuTimer()
10 |       {
11 |             cudaEventCreate(&start);
12 |             cudaEventCreate(&stop);
13 |       }
14 |  
15 |       ~GpuTimer()
16 |       {
17 |             cudaEventDestroy(start);
18 |             cudaEventDestroy(stop);
19 |       }
20 |  
21 |       void Start()
22 |       {
23 |             cudaEventRecord(start, 0);
24 |       }
25 |  
26 |       void Stop()
27 |       {
28 |             cudaEventRecord(stop, 0);
29 |       }
30 |  
31 |       float Elapsed()
32 |       {
33 |             float elapsed;
34 |             cudaEventSynchronize(stop);
35 |             cudaEventElapsedTime(&elapsed, start, stop);
36 |             return elapsed;
37 |       }
38 | };
39 | 
40 | #endif  /* __GPU_TIMER_H__ */


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | These assignments require OpenCV as a prerequisite. The easiest way to install is probably with conda.
 2 | 
 3 | To install OpenCV in a conda environment.
 4 | ```
 5 | conda create -n cs344 -y
 6 | conda activate cs344
 7 | conda install -y -c anaconda opencv
 8 | ```
 9 | 
10 | To build
11 | ```
12 | cd assignments
13 | mkdir build
14 | cd build
15 | cmake ..
16 | make
17 | ```
18 | The binaries will then be contained within `assignments/bin`.
19 | 
20 | # HW1 Passing Instructions
21 | From the `HW1` directory.
22 | 
23 | Run `../bin/HW1 HW1/cinque_terre_small.jpg'`


--------------------------------------------------------------------------------
/assignments/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 9 | project(cs344)
10 | 
11 | find_package(OpenCV REQUIRED)
12 | find_package(CUDA REQUIRED)
13 | 
14 | link_libraries(${OpenCV_LIBS} )
15 | 
16 | set (EXECUTABLE_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/bin/")
17 | 
18 | if(CUDA_FOUND)
19 |   # compared to class settings, we let NVidia's FindCUDA CMake detect 
20 |   # whether to build x64.  We tell it to support most devices, though, 
21 |   # to make sure more people can easily run class code without knowing 
22 |   # about this compiler argument
23 |   set(CUDA_NVCC_FLAGS "
24 |   -ccbin /usr/bin/gcc; 
25 |   -gencode;arch=compute_70,code=sm_70;  
26 |   -gencode;arch=compute_80,code=sm_80;")
27 | 
28 |   # add -Wextra compiler flag for gcc compilations
29 |   if (UNIX)
30 |     set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -Wextra")
31 |   endif (UNIX)
32 | 
33 |   # add debugging to CUDA NVCC flags.  For NVidia's NSight tools.
34 |   set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG} "-G")
35 | 
36 |   add_subdirectory (HW1)
37 |   add_subdirectory (HW2)
38 |   add_subdirectory (HW3)
39 |   add_subdirectory (HW4)
40 |   add_subdirectory (HW5)
41 |   add_subdirectory (HW6)
42 | else(CUDA_FOUND)
43 |   message("CUDA is not installed on this system.")
44 | endif()
45 | 


--------------------------------------------------------------------------------
/assignments/HW1/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | # collect source files
 9 | 
10 | file( GLOB  hdr *.hpp *.h )
11 | file( GLOB  cu  *.cu)
12 | SET (HW1_files main.cpp reference_calc.cpp compare.cpp)
13 | 
14 | CUDA_ADD_EXECUTABLE(HW1 ${HW1_files} ${hdr} ${cu})


--------------------------------------------------------------------------------
/assignments/HW1/HW1.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/core/core.hpp>
 2 | #include <opencv2/highgui/highgui.hpp>
 3 | #include <opencv2/opencv.hpp>
 4 | #include "utils.h"
 5 | #include <cuda.h>
 6 | #include <cuda_runtime.h>
 7 | #include <string>
 8 | 
 9 | cv::Mat imageRGBA;
10 | cv::Mat imageGrey;
11 | 
12 | uchar4 *d_rgbaImage__;
13 | unsigned char *d_greyImage__;
14 | 
15 | size_t numRows() { return imageRGBA.rows; }
16 | size_t numCols() { return imageRGBA.cols; }
17 | 
18 | // return types are void since any internal error will be handled by quitting
19 | // no point in returning error codes...
20 | // returns a pointer to an RGBA version of the input image
21 | // and a pointer to the single channel grey-scale output
22 | // on both the host and device
23 | void preProcess(uchar4 **inputImage, unsigned char **greyImage,
24 |                 uchar4 **d_rgbaImage, unsigned char **d_greyImage,
25 |                 const std::string &filename) {
26 |   // make sure the context initializes ok
27 |   checkCudaErrors(cudaFree(0));
28 | 
29 |   cv::Mat image;
30 |   image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
31 |   if (image.empty()) {
32 |     std::cerr << "Couldn't open file: " << filename << std::endl;
33 |     exit(1);
34 |   }
35 | 
36 |   cv::cvtColor(image, imageRGBA, CV_BGR2RGBA);
37 | 
38 |   // allocate memory for the output
39 |   imageGrey.create(image.rows, image.cols, CV_8UC1);
40 | 
41 |   // This shouldn't ever happen given the way the images are created
42 |   // at least based upon my limited understanding of OpenCV, but better to check
43 |   if (!imageRGBA.isContinuous() || !imageGrey.isContinuous()) {
44 |     std::cerr << "Images aren't continuous!! Exiting." << std::endl;
45 |     exit(1);
46 |   }
47 | 
48 |   *inputImage = (uchar4 *)imageRGBA.ptr<unsigned char>(0);
49 |   *greyImage = imageGrey.ptr<unsigned char>(0);
50 | 
51 |   const size_t numPixels = numRows() * numCols();
52 |   // allocate memory on the device for both input and output
53 |   checkCudaErrors(cudaMalloc(d_rgbaImage, sizeof(uchar4) * numPixels));
54 |   checkCudaErrors(cudaMalloc(d_greyImage, sizeof(unsigned char) * numPixels));
55 |   checkCudaErrors(cudaMemset(
56 |       *d_greyImage, 0,
57 |       numPixels *
58 |           sizeof(unsigned char))); // make sure no memory is left laying around
59 | 
60 |   // copy input array to the GPU
61 |   checkCudaErrors(cudaMemcpy(*d_rgbaImage, *inputImage,
62 |                              sizeof(uchar4) * numPixels,
63 |                              cudaMemcpyHostToDevice));
64 | 
65 |   d_rgbaImage__ = *d_rgbaImage;
66 |   d_greyImage__ = *d_greyImage;
67 | }
68 | 
69 | void postProcess(const std::string &output_file, unsigned char *data_ptr) {
70 |   cv::Mat output(numRows(), numCols(), CV_8UC1, (void *)data_ptr);
71 | 
72 |   // output the image
73 |   cv::imwrite(output_file.c_str(), output);
74 | }
75 | 
76 | void cleanup() {
77 |   // cleanup
78 |   cudaFree(d_rgbaImage__);
79 |   cudaFree(d_greyImage__);
80 | }
81 | 
82 | void generateReferenceImage(std::string input_filename,
83 |                             std::string output_filename) {
84 |   cv::Mat reference = cv::imread(input_filename, CV_LOAD_IMAGE_GRAYSCALE);
85 | 
86 |   cv::imwrite(output_filename, reference);
87 | }
88 | 


--------------------------------------------------------------------------------
/assignments/HW1/HW1_differenceImage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_differenceImage.png


--------------------------------------------------------------------------------
/assignments/HW1/HW1_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_output.png


--------------------------------------------------------------------------------
/assignments/HW1/HW1_reference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/HW1_reference.png


--------------------------------------------------------------------------------
/assignments/HW1/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW1//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW1/CMakeFiles/HW1.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW1/CMakeFiles/HW1.dir/rule
122 | .PHONY : homework/HW1/CMakeFiles/HW1.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW1: homework/HW1/CMakeFiles/HW1.dir/rule
126 | .PHONY : HW1
127 | 
128 | # fast build rule for target.
129 | HW1/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/build
131 | .PHONY : HW1/fast
132 | 
133 | compare.o: compare.cpp.o
134 | .PHONY : compare.o
135 | 
136 | # target to build an object file
137 | compare.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.o
139 | .PHONY : compare.cpp.o
140 | 
141 | compare.i: compare.cpp.i
142 | .PHONY : compare.i
143 | 
144 | # target to preprocess a source file
145 | compare.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.i
147 | .PHONY : compare.cpp.i
148 | 
149 | compare.s: compare.cpp.s
150 | .PHONY : compare.s
151 | 
152 | # target to generate assembly for a file
153 | compare.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/compare.cpp.s
155 | .PHONY : compare.cpp.s
156 | 
157 | main.o: main.cpp.o
158 | .PHONY : main.o
159 | 
160 | # target to build an object file
161 | main.cpp.o:
162 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.o
163 | .PHONY : main.cpp.o
164 | 
165 | main.i: main.cpp.i
166 | .PHONY : main.i
167 | 
168 | # target to preprocess a source file
169 | main.cpp.i:
170 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.i
171 | .PHONY : main.cpp.i
172 | 
173 | main.s: main.cpp.s
174 | .PHONY : main.s
175 | 
176 | # target to generate assembly for a file
177 | main.cpp.s:
178 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/main.cpp.s
179 | .PHONY : main.cpp.s
180 | 
181 | reference_calc.o: reference_calc.cpp.o
182 | .PHONY : reference_calc.o
183 | 
184 | # target to build an object file
185 | reference_calc.cpp.o:
186 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.o
187 | .PHONY : reference_calc.cpp.o
188 | 
189 | reference_calc.i: reference_calc.cpp.i
190 | .PHONY : reference_calc.i
191 | 
192 | # target to preprocess a source file
193 | reference_calc.cpp.i:
194 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.i
195 | .PHONY : reference_calc.cpp.i
196 | 
197 | reference_calc.s: reference_calc.cpp.s
198 | .PHONY : reference_calc.s
199 | 
200 | # target to generate assembly for a file
201 | reference_calc.cpp.s:
202 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW1/CMakeFiles/HW1.dir/build.make homework/HW1/CMakeFiles/HW1.dir/reference_calc.cpp.s
203 | .PHONY : reference_calc.cpp.s
204 | 
205 | # Help Target
206 | help:
207 | 	@echo "The following are some of the valid targets for this Makefile:"
208 | 	@echo "... all (the default if no target is provided)"
209 | 	@echo "... clean"
210 | 	@echo "... depend"
211 | 	@echo "... edit_cache"
212 | 	@echo "... rebuild_cache"
213 | 	@echo "... HW1"
214 | 	@echo "... compare.o"
215 | 	@echo "... compare.i"
216 | 	@echo "... compare.s"
217 | 	@echo "... main.o"
218 | 	@echo "... main.i"
219 | 	@echo "... main.s"
220 | 	@echo "... reference_calc.o"
221 | 	@echo "... reference_calc.i"
222 | 	@echo "... reference_calc.s"
223 | .PHONY : help
224 | 
225 | 
226 | 
227 | #=============================================================================
228 | # Special targets to cleanup operation of make.
229 | 
230 | # Special rule to run CMake to check the build system integrity.
231 | # No rule that depends on this can have commands that come from listfiles
232 | # because they might be regenerated.
233 | cmake_check_build_system:
234 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
235 | .PHONY : cmake_check_build_system
236 | 
237 | 


--------------------------------------------------------------------------------
/assignments/HW1/cinque_terre.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/cinque_terre.gold


--------------------------------------------------------------------------------
/assignments/HW1/cinque_terre_small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW1/cinque_terre_small.jpg


--------------------------------------------------------------------------------
/assignments/HW1/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/core/core.hpp>
 2 | #include <opencv2/highgui/highgui.hpp>
 3 | #include <opencv2/opencv.hpp>
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | void compareImages(std::string reference_filename, std::string test_filename,
 8 |                    bool useEpsCheck, double perPixelError, double globalError) {
 9 |   cv::Mat reference = cv::imread(reference_filename, -1);
10 |   cv::Mat test = cv::imread(test_filename, -1);
11 | 
12 |   cv::Mat diff = abs(reference - test);
13 | 
14 |   cv::Mat diffSingleChannel =
15 |       diff.reshape(1, 0); // convert to 1 channel, same # rows
16 | 
17 |   double minVal, maxVal;
18 | 
19 |   cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL,
20 |                 NULL); // NULL because we don't care about location
21 | 
22 |   // now perform transform so that we bump values to the full range
23 | 
24 |   diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal));
25 | 
26 |   diff = diffSingleChannel.reshape(reference.channels(), 0);
27 | 
28 |   cv::imwrite("HW1_differenceImage.png", diff);
29 |   // OK, now we can start comparing values...
30 |   unsigned char *referencePtr = reference.ptr<unsigned char>(0);
31 |   unsigned char *testPtr = test.ptr<unsigned char>(0);
32 | 
33 |   if (useEpsCheck) {
34 |     checkResultsEps(referencePtr, testPtr,
35 |                     reference.rows * reference.cols * reference.channels(),
36 |                     perPixelError, globalError);
37 |   } else {
38 |     checkResultsExact(referencePtr, testPtr,
39 |                       reference.rows * reference.cols * reference.channels());
40 |   }
41 | 
42 |   std::cout << "PASS" << std::endl;
43 |   return;
44 | }
45 | 


--------------------------------------------------------------------------------
/assignments/HW1/compare.h:
--------------------------------------------------------------------------------
1 | #ifndef COMPARE_H__
2 | #define COMPARE_H__
3 | 
4 | void compareImages(std::string reference_filename, std::string test_filename,
5 |                    bool useEpsCheck, double perPixelError, double globalError);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/assignments/HW1/main.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW1 Solution
  2 | 
  3 | #include <iostream>
  4 | #include "timer.h"
  5 | #include "utils.h"
  6 | #include <string>
  7 | #include <stdio.h>
  8 | #include "reference_calc.h"
  9 | #include "compare.h"
 10 | 
 11 | void your_rgba_to_greyscale(const uchar4 *const h_rgbaImage,
 12 |                             uchar4 *const d_rgbaImage,
 13 |                             unsigned char *const d_greyImage, size_t numRows,
 14 |                             size_t numCols);
 15 | 
 16 | // include the definitions of the above functions for this homework
 17 | #include "HW1.cpp"
 18 | 
 19 | int main(int argc, char **argv) {
 20 |   uchar4 *h_rgbaImage, *d_rgbaImage;
 21 |   unsigned char *h_greyImage, *d_greyImage;
 22 | 
 23 |   std::string input_file;
 24 |   std::string output_file;
 25 |   std::string reference_file;
 26 |   double perPixelError = 0.0;
 27 |   double globalError = 0.0;
 28 |   bool useEpsCheck = false;
 29 |   switch (argc) {
 30 |   case 2:
 31 |     input_file = std::string(argv[1]);
 32 |     output_file = "HW1_output.png";
 33 |     reference_file = "HW1_reference.png";
 34 |     break;
 35 |   case 3:
 36 |     input_file = std::string(argv[1]);
 37 |     output_file = std::string(argv[2]);
 38 |     reference_file = "HW1_reference.png";
 39 |     break;
 40 |   case 4:
 41 |     input_file = std::string(argv[1]);
 42 |     output_file = std::string(argv[2]);
 43 |     reference_file = std::string(argv[3]);
 44 |     break;
 45 |   case 6:
 46 |     useEpsCheck = true;
 47 |     input_file = std::string(argv[1]);
 48 |     output_file = std::string(argv[2]);
 49 |     reference_file = std::string(argv[3]);
 50 |     perPixelError = atof(argv[4]);
 51 |     globalError = atof(argv[5]);
 52 |     break;
 53 |   default:
 54 |     std::cerr << "Usage: ./HW1 input_file [output_filename] "
 55 |                  "[reference_filename] [perPixelError] [globalError]"
 56 |               << std::endl;
 57 |     exit(1);
 58 |   }
 59 |   // load the image and give us our input and output pointers
 60 |   preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage,
 61 |              input_file);
 62 | 
 63 |   GpuTimer timer;
 64 |   timer.Start();
 65 |   // call the students' code
 66 |   your_rgba_to_greyscale(h_rgbaImage, d_rgbaImage, d_greyImage, numRows(),
 67 |                          numCols());
 68 |   timer.Stop();
 69 |   cudaDeviceSynchronize();
 70 |   checkCudaErrors(cudaGetLastError());
 71 | 
 72 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 73 | 
 74 |   if (err < 0) {
 75 |     // Couldn't print! Probably the student closed stdout - bad news
 76 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 77 |               << std::endl;
 78 |     exit(1);
 79 |   }
 80 | 
 81 |   size_t numPixels = numRows() * numCols();
 82 |   checkCudaErrors(cudaMemcpy(h_greyImage, d_greyImage,
 83 |                              sizeof(unsigned char) * numPixels,
 84 |                              cudaMemcpyDeviceToHost));
 85 | 
 86 |   // check results and output the grey image
 87 |   postProcess(output_file, h_greyImage);
 88 | 
 89 |   referenceCalculation(h_rgbaImage, h_greyImage, numRows(), numCols());
 90 | 
 91 |   postProcess(reference_file, h_greyImage);
 92 | 
 93 |   // generateReferenceImage(input_file, reference_file);
 94 |   compareImages(reference_file, output_file, useEpsCheck, perPixelError,
 95 |                 globalError);
 96 | 
 97 |   cleanup();
 98 | 
 99 |   return 0;
100 | }
101 | 


--------------------------------------------------------------------------------
/assignments/HW1/reference_calc.cpp:
--------------------------------------------------------------------------------
 1 | // for uchar4 struct
 2 | #include <cuda_runtime.h>
 3 | 
 4 | void referenceCalculation(const uchar4 *const rgbaImage,
 5 |                           unsigned char *const greyImage, size_t numRows,
 6 |                           size_t numCols) {
 7 |   for (size_t r = 0; r < numRows; ++r) {
 8 |     for (size_t c = 0; c < numCols; ++c) {
 9 |       uchar4 rgba = rgbaImage[r * numCols + c];
10 |       float channelSum = .299f * rgba.x + .587f * rgba.y + .114f * rgba.z;
11 |       greyImage[r * numCols + c] = channelSum;
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/assignments/HW1/reference_calc.h:
--------------------------------------------------------------------------------
1 | #ifndef REFERENCE_H__
2 | #define REFERENCE_H__
3 | 
4 | void referenceCalculation(const uchar4 *const rgbaImage,
5 |                           unsigned char *const greyImage, size_t numRows,
6 |                           size_t numCols);
7 | 
8 | #endif


--------------------------------------------------------------------------------
/assignments/HW1/student_func.cu:
--------------------------------------------------------------------------------
 1 | // Homework 1
 2 | // Color to Greyscale Conversion
 3 | 
 4 | // A common way to represent color images is known as RGBA - the color
 5 | // is specified by how much Red, Grean and Blue is in it.
 6 | // The 'A' stands for Alpha and is used for transparency, it will be
 7 | // ignored in this homework.
 8 | 
 9 | // Each channel Red, Blue, Green and Alpha is represented by one byte.
10 | // Since we are using one byte for each color there are 256 different
11 | // possible values for each color.  This means we use 4 bytes per pixel.
12 | 
13 | // Greyscale images are represented by a single intensity value per pixel
14 | // which is one byte in size.
15 | 
16 | // To convert an image from color to grayscale one simple method is to
17 | // set the intensity to the average of the RGB channels.  But we will
18 | // use a more sophisticated method that takes into account how the eye
19 | // perceives color and weights the channels unequally.
20 | 
21 | // The eye responds most strongly to green followed by red and then blue.
22 | // The NTSC (National Television System Committee) recommends the following
23 | // formula for color to greyscale conversion:
24 | 
25 | // I = .299f * R + .587f * G + .114f * B
26 | 
27 | // Notice the trailing f's on the numbers which indicate that they are
28 | // single precision floating point constants and not double precision
29 | // constants.
30 | 
31 | // You should fill in the kernel as well as set the block and grid sizes
32 | // so that the entire image is processed.
33 | 
34 | #include "utils.h"
35 | 
36 | __global__ void rgba_to_greyscale(const uchar4 *const rgbaImage,
37 |                                   unsigned char *const greyImage, int numRows,
38 |                                   int numCols) {
39 |   // TODO
40 |   // Fill in the kernel to convert from color to greyscale
41 |   // the mapping from components of a uchar4 to RGBA is:
42 |   // .x -> R ; .y -> G ; .z -> B ; .w -> A
43 |   //
44 |   // The output (greyImage) at each pixel should be the result of
45 |   // applying the formula: output = .299f * R + .587f * G + .114f * B;
46 |   // Note: We will be ignoring the alpha channel for this conversion
47 | 
48 |   // First create a mapping from the 2D block and grid locations
49 |   // to an absolute 2D location in the image, then use that to
50 |   // calculate a 1D offset
51 | }
52 | 
53 | void your_rgba_to_greyscale(const uchar4 *const h_rgbaImage,
54 |                             uchar4 *const d_rgbaImage,
55 |                             unsigned char *const d_greyImage, size_t numRows,
56 |                             size_t numCols) {
57 |   // You must fill in the correct sizes for the blockSize and gridSize
58 |   // currently only one block with one thread is being launched
59 |   const dim3 blockSize(1, 1, 1); // TODO
60 |   const dim3 gridSize(1, 1, 1);  // TODO
61 |   rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage, numRows,
62 |                                              numCols);
63 | 
64 |   cudaDeviceSynchronize();
65 |   checkCudaErrors(cudaGetLastError());
66 | }
67 | 


--------------------------------------------------------------------------------
/assignments/HW1/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW1/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/HW2/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | # collect source files
 9 | 
10 | file( GLOB  hdr *.hpp *.h )
11 | file( GLOB  cu  *.cu)
12 | SET (HW2_files main.cpp reference_calc.cpp compare.cpp)
13 |     
14 | CUDA_ADD_EXECUTABLE(HW2 ${HW2_files} ${hdr} ${cu})
15 | 


--------------------------------------------------------------------------------
/assignments/HW2/HW2.cpp:
--------------------------------------------------------------------------------
  1 | #include <opencv2/core/core.hpp>
  2 | #include <opencv2/highgui/highgui.hpp>
  3 | #include <opencv2/opencv.hpp>
  4 | #include "utils.h"
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | #include <string>
  8 | 
  9 | cv::Mat imageInputRGBA;
 10 | cv::Mat imageOutputRGBA;
 11 | 
 12 | uchar4 *d_inputImageRGBA__;
 13 | uchar4 *d_outputImageRGBA__;
 14 | 
 15 | float *h_filter__;
 16 | 
 17 | size_t numRows() { return imageInputRGBA.rows; }
 18 | size_t numCols() { return imageInputRGBA.cols; }
 19 | 
 20 | // return types are void since any internal error will be handled by quitting
 21 | // no point in returning error codes...
 22 | // returns a pointer to an RGBA version of the input image
 23 | // and a pointer to the single channel grey-scale output
 24 | // on both the host and device
 25 | void preProcess(uchar4 **h_inputImageRGBA, uchar4 **h_outputImageRGBA,
 26 |                 uchar4 **d_inputImageRGBA, uchar4 **d_outputImageRGBA,
 27 |                 unsigned char **d_redBlurred, unsigned char **d_greenBlurred,
 28 |                 unsigned char **d_blueBlurred, float **h_filter,
 29 |                 int *filterWidth, const std::string &filename) {
 30 | 
 31 |   // make sure the context initializes ok
 32 |   checkCudaErrors(cudaFree(0));
 33 | 
 34 |   cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
 35 |   if (image.empty()) {
 36 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 37 |     exit(1);
 38 |   }
 39 | 
 40 |   cv::cvtColor(image, imageInputRGBA, CV_BGR2RGBA);
 41 | 
 42 |   // allocate memory for the output
 43 |   imageOutputRGBA.create(image.rows, image.cols, CV_8UC4);
 44 | 
 45 |   // This shouldn't ever happen given the way the images are created
 46 |   // at least based upon my limited understanding of OpenCV, but better to check
 47 |   if (!imageInputRGBA.isContinuous() || !imageOutputRGBA.isContinuous()) {
 48 |     std::cerr << "Images aren't continuous!! Exiting." << std::endl;
 49 |     exit(1);
 50 |   }
 51 | 
 52 |   *h_inputImageRGBA = (uchar4 *)imageInputRGBA.ptr<unsigned char>(0);
 53 |   *h_outputImageRGBA = (uchar4 *)imageOutputRGBA.ptr<unsigned char>(0);
 54 | 
 55 |   const size_t numPixels = numRows() * numCols();
 56 |   // allocate memory on the device for both input and output
 57 |   checkCudaErrors(cudaMalloc(d_inputImageRGBA, sizeof(uchar4) * numPixels));
 58 |   checkCudaErrors(cudaMalloc(d_outputImageRGBA, sizeof(uchar4) * numPixels));
 59 |   checkCudaErrors(cudaMemset(
 60 |       *d_outputImageRGBA, 0,
 61 |       numPixels * sizeof(uchar4))); // make sure no memory is left laying around
 62 | 
 63 |   // copy input array to the GPU
 64 |   checkCudaErrors(cudaMemcpy(*d_inputImageRGBA, *h_inputImageRGBA,
 65 |                              sizeof(uchar4) * numPixels,
 66 |                              cudaMemcpyHostToDevice));
 67 | 
 68 |   d_inputImageRGBA__ = *d_inputImageRGBA;
 69 |   d_outputImageRGBA__ = *d_outputImageRGBA;
 70 | 
 71 |   // now create the filter that they will use
 72 |   const int blurKernelWidth = 9;
 73 |   const float blurKernelSigma = 2.;
 74 | 
 75 |   *filterWidth = blurKernelWidth;
 76 | 
 77 |   // create and fill the filter we will convolve with
 78 |   *h_filter = new float[blurKernelWidth * blurKernelWidth];
 79 |   h_filter__ = *h_filter;
 80 | 
 81 |   float filterSum = 0.f; // for normalization
 82 | 
 83 |   for (int r = -blurKernelWidth / 2; r <= blurKernelWidth / 2; ++r) {
 84 |     for (int c = -blurKernelWidth / 2; c <= blurKernelWidth / 2; ++c) {
 85 |       float filterValue = expf(-(float)(c * c + r * r) /
 86 |                                (2.f * blurKernelSigma * blurKernelSigma));
 87 |       (*h_filter)[(r + blurKernelWidth / 2) * blurKernelWidth + c +
 88 |                   blurKernelWidth / 2] = filterValue;
 89 |       filterSum += filterValue;
 90 |     }
 91 |   }
 92 | 
 93 |   float normalizationFactor = 1.f / filterSum;
 94 | 
 95 |   for (int r = -blurKernelWidth / 2; r <= blurKernelWidth / 2; ++r) {
 96 |     for (int c = -blurKernelWidth / 2; c <= blurKernelWidth / 2; ++c) {
 97 |       (*h_filter)[(r + blurKernelWidth / 2) * blurKernelWidth + c +
 98 |                   blurKernelWidth / 2] *= normalizationFactor;
 99 |     }
100 |   }
101 | 
102 |   // blurred
103 |   checkCudaErrors(cudaMalloc(d_redBlurred, sizeof(unsigned char) * numPixels));
104 |   checkCudaErrors(
105 |       cudaMalloc(d_greenBlurred, sizeof(unsigned char) * numPixels));
106 |   checkCudaErrors(cudaMalloc(d_blueBlurred, sizeof(unsigned char) * numPixels));
107 |   checkCudaErrors(
108 |       cudaMemset(*d_redBlurred, 0, sizeof(unsigned char) * numPixels));
109 |   checkCudaErrors(
110 |       cudaMemset(*d_greenBlurred, 0, sizeof(unsigned char) * numPixels));
111 |   checkCudaErrors(
112 |       cudaMemset(*d_blueBlurred, 0, sizeof(unsigned char) * numPixels));
113 | }
114 | 
115 | void postProcess(const std::string &output_file, uchar4 *data_ptr) {
116 |   cv::Mat output(numRows(), numCols(), CV_8UC4, (void *)data_ptr);
117 | 
118 |   cv::Mat imageOutputBGR;
119 |   cv::cvtColor(output, imageOutputBGR, CV_RGBA2BGR);
120 |   // output the image
121 |   cv::imwrite(output_file.c_str(), imageOutputBGR);
122 | }
123 | 
124 | void cleanUp(void) {
125 |   cudaFree(d_inputImageRGBA__);
126 |   cudaFree(d_outputImageRGBA__);
127 |   delete[] h_filter__;
128 | }
129 | 
130 | // An unused bit of code showing how to accomplish this assignment using OpenCV.
131 | // It is much faster
132 | //    than the naive implementation in reference_calc.cpp.
133 | void generateReferenceImage(std::string input_file, std::string reference_file,
134 |                             int kernel_size) {
135 |   cv::Mat input = cv::imread(input_file);
136 |   // Create an identical image for the output as a placeholder
137 |   cv::Mat reference = cv::imread(input_file);
138 |   cv::GaussianBlur(input, reference, cv::Size2i(kernel_size, kernel_size), 0);
139 |   cv::imwrite(reference_file, reference);
140 | }
141 | 


--------------------------------------------------------------------------------
/assignments/HW2/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW2//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW2/CMakeFiles/HW2.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW2/CMakeFiles/HW2.dir/rule
122 | .PHONY : homework/HW2/CMakeFiles/HW2.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW2: homework/HW2/CMakeFiles/HW2.dir/rule
126 | .PHONY : HW2
127 | 
128 | # fast build rule for target.
129 | HW2/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/build
131 | .PHONY : HW2/fast
132 | 
133 | compare.o: compare.cpp.o
134 | .PHONY : compare.o
135 | 
136 | # target to build an object file
137 | compare.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.o
139 | .PHONY : compare.cpp.o
140 | 
141 | compare.i: compare.cpp.i
142 | .PHONY : compare.i
143 | 
144 | # target to preprocess a source file
145 | compare.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.i
147 | .PHONY : compare.cpp.i
148 | 
149 | compare.s: compare.cpp.s
150 | .PHONY : compare.s
151 | 
152 | # target to generate assembly for a file
153 | compare.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/compare.cpp.s
155 | .PHONY : compare.cpp.s
156 | 
157 | main.o: main.cpp.o
158 | .PHONY : main.o
159 | 
160 | # target to build an object file
161 | main.cpp.o:
162 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.o
163 | .PHONY : main.cpp.o
164 | 
165 | main.i: main.cpp.i
166 | .PHONY : main.i
167 | 
168 | # target to preprocess a source file
169 | main.cpp.i:
170 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.i
171 | .PHONY : main.cpp.i
172 | 
173 | main.s: main.cpp.s
174 | .PHONY : main.s
175 | 
176 | # target to generate assembly for a file
177 | main.cpp.s:
178 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/main.cpp.s
179 | .PHONY : main.cpp.s
180 | 
181 | reference_calc.o: reference_calc.cpp.o
182 | .PHONY : reference_calc.o
183 | 
184 | # target to build an object file
185 | reference_calc.cpp.o:
186 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.o
187 | .PHONY : reference_calc.cpp.o
188 | 
189 | reference_calc.i: reference_calc.cpp.i
190 | .PHONY : reference_calc.i
191 | 
192 | # target to preprocess a source file
193 | reference_calc.cpp.i:
194 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.i
195 | .PHONY : reference_calc.cpp.i
196 | 
197 | reference_calc.s: reference_calc.cpp.s
198 | .PHONY : reference_calc.s
199 | 
200 | # target to generate assembly for a file
201 | reference_calc.cpp.s:
202 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW2/CMakeFiles/HW2.dir/build.make homework/HW2/CMakeFiles/HW2.dir/reference_calc.cpp.s
203 | .PHONY : reference_calc.cpp.s
204 | 
205 | # Help Target
206 | help:
207 | 	@echo "The following are some of the valid targets for this Makefile:"
208 | 	@echo "... all (the default if no target is provided)"
209 | 	@echo "... clean"
210 | 	@echo "... depend"
211 | 	@echo "... edit_cache"
212 | 	@echo "... rebuild_cache"
213 | 	@echo "... HW2"
214 | 	@echo "... compare.o"
215 | 	@echo "... compare.i"
216 | 	@echo "... compare.s"
217 | 	@echo "... main.o"
218 | 	@echo "... main.i"
219 | 	@echo "... main.s"
220 | 	@echo "... reference_calc.o"
221 | 	@echo "... reference_calc.i"
222 | 	@echo "... reference_calc.s"
223 | .PHONY : help
224 | 
225 | 
226 | 
227 | #=============================================================================
228 | # Special targets to cleanup operation of make.
229 | 
230 | # Special rule to run CMake to check the build system integrity.
231 | # No rule that depends on this can have commands that come from listfiles
232 | # because they might be regenerated.
233 | cmake_check_build_system:
234 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
235 | .PHONY : cmake_check_build_system
236 | 
237 | 


--------------------------------------------------------------------------------
/assignments/HW2/cinque_terre.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW2/cinque_terre.gold


--------------------------------------------------------------------------------
/assignments/HW2/cinque_terre_small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW2/cinque_terre_small.jpg


--------------------------------------------------------------------------------
/assignments/HW2/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/core/core.hpp>
 2 | #include <opencv2/highgui/highgui.hpp>
 3 | #include <opencv2/opencv.hpp>
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | void compareImages(std::string reference_filename, std::string test_filename,
 8 |                    bool useEpsCheck, double perPixelError, double globalError) {
 9 |   cv::Mat reference = cv::imread(reference_filename, -1);
10 |   cv::Mat test = cv::imread(test_filename, -1);
11 | 
12 |   cv::Mat diff = abs(reference - test);
13 | 
14 |   cv::Mat diffSingleChannel =
15 |       diff.reshape(1, 0); // convert to 1 channel, same # rows
16 | 
17 |   double minVal, maxVal;
18 | 
19 |   cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL,
20 |                 NULL); // NULL because we don't care about location
21 | 
22 |   // now perform transform so that we bump values to the full range
23 | 
24 |   diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal));
25 | 
26 |   diff = diffSingleChannel.reshape(reference.channels(), 0);
27 | 
28 |   cv::imwrite("HW2_differenceImage.png", diff);
29 |   // OK, now we can start comparing values...
30 |   unsigned char *referencePtr = reference.ptr<unsigned char>(0);
31 |   unsigned char *testPtr = test.ptr<unsigned char>(0);
32 | 
33 |   if (useEpsCheck) {
34 |     checkResultsEps(referencePtr, testPtr,
35 |                     reference.rows * reference.cols * reference.channels(),
36 |                     perPixelError, globalError);
37 |   } else {
38 |     checkResultsExact(referencePtr, testPtr,
39 |                       reference.rows * reference.cols * reference.channels());
40 |   }
41 | 
42 |   std::cout << "PASS" << std::endl;
43 |   return;
44 | }


--------------------------------------------------------------------------------
/assignments/HW2/compare.h:
--------------------------------------------------------------------------------
1 | #ifndef COMPARE_H__
2 | #define COMPARE_H__
3 | 
4 | void compareImages(std::string reference_filename, std::string test_filename,
5 |                    bool useEpsCheck, double perPixelError, double globalError);
6 | 
7 | #endif


--------------------------------------------------------------------------------
/assignments/HW2/main.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW2 Driver
  2 | 
  3 | #include <iostream>
  4 | #include "timer.h"
  5 | #include "utils.h"
  6 | #include <string>
  7 | #include <stdio.h>
  8 | 
  9 | #include "reference_calc.h"
 10 | #include "compare.h"
 11 | 
 12 | // include the definitions of the above functions for this homework
 13 | #include "HW2.cpp"
 14 | 
 15 | /*******  DEFINED IN student_func.cu *********/
 16 | 
 17 | void your_gaussian_blur(const uchar4 *const h_inputImageRGBA,
 18 |                         uchar4 *const d_inputImageRGBA,
 19 |                         uchar4 *const d_outputImageRGBA, const size_t numRows,
 20 |                         const size_t numCols, unsigned char *d_redBlurred,
 21 |                         unsigned char *d_greenBlurred,
 22 |                         unsigned char *d_blueBlurred, const int filterWidth);
 23 | 
 24 | void allocateMemoryAndCopyToGPU(const size_t numRowsImage,
 25 |                                 const size_t numColsImage,
 26 |                                 const float *const h_filter,
 27 |                                 const size_t filterWidth);
 28 | 
 29 | /*******  Begin main *********/
 30 | 
 31 | int main(int argc, char **argv) {
 32 |   uchar4 *h_inputImageRGBA, *d_inputImageRGBA;
 33 |   uchar4 *h_outputImageRGBA, *d_outputImageRGBA;
 34 |   unsigned char *d_redBlurred, *d_greenBlurred, *d_blueBlurred;
 35 | 
 36 |   float *h_filter;
 37 |   int filterWidth;
 38 | 
 39 |   std::string input_file;
 40 |   std::string output_file;
 41 |   std::string reference_file;
 42 |   double perPixelError = 0.0;
 43 |   double globalError = 0.0;
 44 |   bool useEpsCheck = false;
 45 |   switch (argc) {
 46 |   case 2:
 47 |     input_file = std::string(argv[1]);
 48 |     output_file = "HW2_output.png";
 49 |     reference_file = "HW2_reference.png";
 50 |     break;
 51 |   case 3:
 52 |     input_file = std::string(argv[1]);
 53 |     output_file = std::string(argv[2]);
 54 |     reference_file = "HW2_reference.png";
 55 |     break;
 56 |   case 4:
 57 |     input_file = std::string(argv[1]);
 58 |     output_file = std::string(argv[2]);
 59 |     reference_file = std::string(argv[3]);
 60 |     break;
 61 |   case 6:
 62 |     useEpsCheck = true;
 63 |     input_file = std::string(argv[1]);
 64 |     output_file = std::string(argv[2]);
 65 |     reference_file = std::string(argv[3]);
 66 |     perPixelError = atof(argv[4]);
 67 |     globalError = atof(argv[5]);
 68 |     break;
 69 |   default:
 70 |     std::cerr << "Usage: ./HW2 input_file [output_filename] "
 71 |                  "[reference_filename] [perPixelError] [globalError]"
 72 |               << std::endl;
 73 |     exit(1);
 74 |   }
 75 |   // load the image and give us our input and output pointers
 76 |   preProcess(&h_inputImageRGBA, &h_outputImageRGBA, &d_inputImageRGBA,
 77 |              &d_outputImageRGBA, &d_redBlurred, &d_greenBlurred, &d_blueBlurred,
 78 |              &h_filter, &filterWidth, input_file);
 79 | 
 80 |   allocateMemoryAndCopyToGPU(numRows(), numCols(), h_filter, filterWidth);
 81 |   GpuTimer timer;
 82 |   timer.Start();
 83 |   // call the students' code
 84 |   your_gaussian_blur(h_inputImageRGBA, d_inputImageRGBA, d_outputImageRGBA,
 85 |                      numRows(), numCols(), d_redBlurred, d_greenBlurred,
 86 |                      d_blueBlurred, filterWidth);
 87 |   timer.Stop();
 88 |   cudaDeviceSynchronize();
 89 |   checkCudaErrors(cudaGetLastError());
 90 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 91 | 
 92 |   if (err < 0) {
 93 |     // Couldn't print! Probably the student closed stdout - bad news
 94 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 95 |               << std::endl;
 96 |     exit(1);
 97 |   }
 98 | 
 99 |   // check results and output the blurred image
100 | 
101 |   size_t numPixels = numRows() * numCols();
102 |   // copy the output back to the host
103 |   checkCudaErrors(cudaMemcpy(h_outputImageRGBA, d_outputImageRGBA__,
104 |                              sizeof(uchar4) * numPixels,
105 |                              cudaMemcpyDeviceToHost));
106 | 
107 |   postProcess(output_file, h_outputImageRGBA);
108 | 
109 |   referenceCalculation(h_inputImageRGBA, h_outputImageRGBA, numRows(),
110 |                        numCols(), h_filter, filterWidth);
111 | 
112 |   postProcess(reference_file, h_outputImageRGBA);
113 | 
114 |   //  Cheater easy way with OpenCV
115 |   // generateReferenceImage(input_file, reference_file, filterWidth);
116 | 
117 |   compareImages(reference_file, output_file, useEpsCheck, perPixelError,
118 |                 globalError);
119 | 
120 |   checkCudaErrors(cudaFree(d_redBlurred));
121 |   checkCudaErrors(cudaFree(d_greenBlurred));
122 |   checkCudaErrors(cudaFree(d_blueBlurred));
123 | 
124 |   cleanUp();
125 | 
126 |   return 0;
127 | }
128 | 


--------------------------------------------------------------------------------
/assignments/HW2/reference_calc.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cassert>
 3 | // for uchar4 struct
 4 | #include <cuda_runtime.h>
 5 | 
 6 | void channelConvolution(const unsigned char *const channel,
 7 |                         unsigned char *const channelBlurred,
 8 |                         const size_t numRows, const size_t numCols,
 9 |                         const float *filter, const int filterWidth) {
10 |   // Dealing with an even width filter is trickier
11 |   assert(filterWidth % 2 == 1);
12 | 
13 |   // For every pixel in the image
14 |   for (int r = 0; r < (int)numRows; ++r) {
15 |     for (int c = 0; c < (int)numCols; ++c) {
16 |       float result = 0.f;
17 |       // For every value in the filter around the pixel (c, r)
18 |       for (int filter_r = -filterWidth / 2; filter_r <= filterWidth / 2;
19 |            ++filter_r) {
20 |         for (int filter_c = -filterWidth / 2; filter_c <= filterWidth / 2;
21 |              ++filter_c) {
22 |           // Find the global image position for this filter position
23 |           // clamp to boundary of the image
24 |           int image_r = std::min(std::max(r + filter_r, 0),
25 |                                  static_cast<int>(numRows - 1));
26 |           int image_c = std::min(std::max(c + filter_c, 0),
27 |                                  static_cast<int>(numCols - 1));
28 | 
29 |           float image_value =
30 |               static_cast<float>(channel[image_r * numCols + image_c]);
31 |           float filter_value =
32 |               filter[(filter_r + filterWidth / 2) * filterWidth + filter_c +
33 |                      filterWidth / 2];
34 | 
35 |           result += image_value * filter_value;
36 |         }
37 |       }
38 | 
39 |       channelBlurred[r * numCols + c] = result;
40 |     }
41 |   }
42 | }
43 | 
44 | void referenceCalculation(const uchar4 *const rgbaImage,
45 |                           uchar4 *const outputImage, size_t numRows,
46 |                           size_t numCols, const float *const filter,
47 |                           const int filterWidth) {
48 |   unsigned char *red = new unsigned char[numRows * numCols];
49 |   unsigned char *blue = new unsigned char[numRows * numCols];
50 |   unsigned char *green = new unsigned char[numRows * numCols];
51 | 
52 |   unsigned char *redBlurred = new unsigned char[numRows * numCols];
53 |   unsigned char *blueBlurred = new unsigned char[numRows * numCols];
54 |   unsigned char *greenBlurred = new unsigned char[numRows * numCols];
55 | 
56 |   // First we separate the incoming RGBA image into three separate channels
57 |   // for Red, Green and Blue
58 |   for (size_t i = 0; i < numRows * numCols; ++i) {
59 |     uchar4 rgba = rgbaImage[i];
60 |     red[i] = rgba.x;
61 |     green[i] = rgba.y;
62 |     blue[i] = rgba.z;
63 |   }
64 | 
65 |   // Now we can do the convolution for each of the color channels
66 |   channelConvolution(red, redBlurred, numRows, numCols, filter, filterWidth);
67 |   channelConvolution(green, greenBlurred, numRows, numCols, filter,
68 |                      filterWidth);
69 |   channelConvolution(blue, blueBlurred, numRows, numCols, filter, filterWidth);
70 | 
71 |   // now recombine into the output image - Alpha is 255 for no transparency
72 |   for (size_t i = 0; i < numRows * numCols; ++i) {
73 |     uchar4 rgba =
74 |         make_uchar4(redBlurred[i], greenBlurred[i], blueBlurred[i], 255);
75 |     outputImage[i] = rgba;
76 |   }
77 | 
78 |   delete[] red;
79 |   delete[] green;
80 |   delete[] blue;
81 | 
82 |   delete[] redBlurred;
83 |   delete[] greenBlurred;
84 |   delete[] blueBlurred;
85 | }
86 | 


--------------------------------------------------------------------------------
/assignments/HW2/reference_calc.h:
--------------------------------------------------------------------------------
1 | #ifndef REFERENCE_H__
2 | #define REFERENCE_H__
3 | 
4 | void referenceCalculation(const uchar4 *const rgbaImage,
5 |                           uchar4 *const outputImage, size_t numRows,
6 |                           size_t numCols, const float *const filter,
7 |                           const int filterWidth);
8 | 
9 | #endif


--------------------------------------------------------------------------------
/assignments/HW2/student_func.cu:
--------------------------------------------------------------------------------
  1 | // Homework 2
  2 | // Image Blurring
  3 | //
  4 | // In this homework we are blurring an image. To do this, imagine that we have
  5 | // a square array of weight values. For each pixel in the image, imagine that we
  6 | // overlay this square array of weights on top of the image such that the center
  7 | // of the weight array is aligned with the current pixel. To compute a blurred
  8 | // pixel value, we multiply each pair of numbers that line up. In other words,
  9 | // we multiply each weight with the pixel underneath it. Finally, we add up all
 10 | // of the multiplied numbers and assign that value to our output for the current
 11 | // pixel. We repeat this process for all the pixels in the image.
 12 | 
 13 | // To help get you started, we have included some useful notes here.
 14 | 
 15 | //****************************************************************************
 16 | 
 17 | // For a color image that has multiple channels, we suggest separating
 18 | // the different color channels so that each color is stored contiguously
 19 | // instead of being interleaved. This will simplify your code.
 20 | 
 21 | // That is instead of RGBARGBARGBARGBA... we suggest transforming to three
 22 | // arrays (as in the previous homework we ignore the alpha channel again):
 23 | //  1) RRRRRRRR...
 24 | //  2) GGGGGGGG...
 25 | //  3) BBBBBBBB...
 26 | //
 27 | // The original layout is known an Array of Structures (AoS) whereas the
 28 | // format we are converting to is known as a Structure of Arrays (SoA).
 29 | 
 30 | // As a warm-up, we will ask you to write the kernel that performs this
 31 | // separation. You should then write the "meat" of the assignment,
 32 | // which is the kernel that performs the actual blur. We provide code that
 33 | // re-combines your blurred results for each color channel.
 34 | 
 35 | //****************************************************************************
 36 | 
 37 | // You must fill in the gaussian_blur kernel to perform the blurring of the
 38 | // inputChannel, using the array of weights, and put the result in the
 39 | // outputChannel.
 40 | 
 41 | // Here is an example of computing a blur, using a weighted average, for a
 42 | // single pixel in a small image.
 43 | //
 44 | // Array of weights:
 45 | //
 46 | //  0.0  0.2  0.0
 47 | //  0.2  0.2  0.2
 48 | //  0.0  0.2  0.0
 49 | //
 50 | // Image (note that we align the array of weights to the center of the box):
 51 | //
 52 | //    1  2  5  2  0  3
 53 | //       -------
 54 | //    3 |2  5  1| 6  0       0.0*2 + 0.2*5 + 0.0*1 +
 55 | //      |       |
 56 | //    4 |3  6  2| 1  4   ->  0.2*3 + 0.2*6 + 0.2*2 +   ->  3.2
 57 | //      |       |
 58 | //    0 |4  0  3| 4  2       0.0*4 + 0.2*0 + 0.0*3
 59 | //       -------
 60 | //    9  6  5  0  3  9
 61 | //
 62 | //         (1)                         (2)                 (3)
 63 | //
 64 | // A good starting place is to map each thread to a pixel as you have before.
 65 | // Then every thread can perform steps 2 and 3 in the diagram above
 66 | // completely independently of one another.
 67 | 
 68 | // Note that the array of weights is square, so its height is the same as its
 69 | // width. We refer to the array of weights as a filter, and we refer to its
 70 | // width with the variable filterWidth.
 71 | 
 72 | //****************************************************************************
 73 | 
 74 | // Your homework submission will be evaluated based on correctness and speed.
 75 | // We test each pixel against a reference solution. If any pixel differs by
 76 | // more than some small threshold value, the system will tell you that your
 77 | // solution is incorrect, and it will let you try again.
 78 | 
 79 | // Once you have gotten that working correctly, then you can think about using
 80 | // shared memory and having the threads cooperate to achieve better performance.
 81 | 
 82 | //****************************************************************************
 83 | 
 84 | // Also note that we've supplied a helpful debugging function called
 85 | // checkCudaErrors. You should wrap your allocation and copying statements like
 86 | // we've done in the code we're supplying you. Here is an example of the unsafe
 87 | // way to allocate memory on the GPU:
 88 | //
 89 | // cudaMalloc(&d_red, sizeof(unsigned char) * numRows * numCols);
 90 | //
 91 | // Here is an example of the safe way to do the same thing:
 92 | //
 93 | // checkCudaErrors(cudaMalloc(&d_red, sizeof(unsigned char) * numRows *
 94 | // numCols));
 95 | //
 96 | // Writing code the safe way requires slightly more typing, but is very helpful
 97 | // for catching mistakes. If you write code the unsafe way and you make a
 98 | // mistake, then any subsequent kernels won't compute anything, and it will be
 99 | // hard to figure out why. Writing code the safe way will inform you as soon as
100 | // you make a mistake.
101 | 
102 | // Finally, remember to free the memory you allocate at the end of the function.
103 | 
104 | //****************************************************************************
105 | 
106 | #include "utils.h"
107 | 
108 | __global__ void gaussian_blur(const unsigned char *const inputChannel,
109 |                               unsigned char *const outputChannel, int numRows,
110 |                               int numCols, const float *const filter,
111 |                               const int filterWidth) {
112 |   // TODO
113 | 
114 |   // NOTE: Be sure to compute any intermediate results in floating point
115 |   // before storing the final result as unsigned char.
116 | 
117 |   // NOTE: Be careful not to try to access memory that is outside the bounds of
118 |   // the image. You'll want code that performs the following check before
119 |   // accessing GPU memory:
120 |   //
121 |   // if ( absolute_image_position_x >= numCols ||
122 |   //      absolute_image_position_y >= numRows )
123 |   // {
124 |   //     return;
125 |   // }
126 | 
127 |   // NOTE: If a thread's absolute position 2D position is within the image, but
128 |   // some of its neighbors are outside the image, then you will need to be extra
129 |   // careful. Instead of trying to read such a neighbor value from GPU memory
130 |   // (which won't work because the value is out of bounds), you should
131 |   // explicitly clamp the neighbor values you read to be within the bounds of
132 |   // the image. If this is not clear to you, then please refer to sequential
133 |   // reference solution for the exact clamping semantics you should follow.
134 | }
135 | 
136 | // This kernel takes in an image represented as a uchar4 and splits
137 | // it into three images consisting of only one color channel each
138 | __global__ void separateChannels(const uchar4 *const inputImageRGBA,
139 |                                  int numRows, int numCols,
140 |                                  unsigned char *const redChannel,
141 |                                  unsigned char *const greenChannel,
142 |                                  unsigned char *const blueChannel) {
143 |   // TODO
144 |   //
145 |   // NOTE: Be careful not to try to access memory that is outside the bounds of
146 |   // the image. You'll want code that performs the following check before
147 |   // accessing GPU memory:
148 |   //
149 |   // if ( absolute_image_position_x >= numCols ||
150 |   //      absolute_image_position_y >= numRows )
151 |   // {
152 |   //     return;
153 |   // }
154 | }
155 | 
156 | // This kernel takes in three color channels and recombines them
157 | // into one image.  The alpha channel is set to 255 to represent
158 | // that this image has no transparency.
159 | __global__ void recombineChannels(const unsigned char *const redChannel,
160 |                                   const unsigned char *const greenChannel,
161 |                                   const unsigned char *const blueChannel,
162 |                                   uchar4 *const outputImageRGBA, int numRows,
163 |                                   int numCols) {
164 |   const int2 thread_2D_pos = make_int2(blockIdx.x * blockDim.x + threadIdx.x,
165 |                                        blockIdx.y * blockDim.y + threadIdx.y);
166 | 
167 |   const int thread_1D_pos = thread_2D_pos.y * numCols + thread_2D_pos.x;
168 | 
169 |   // make sure we don't try and access memory outside the image
170 |   // by having any threads mapped there return early
171 |   if (thread_2D_pos.x >= numCols || thread_2D_pos.y >= numRows)
172 |     return;
173 | 
174 |   unsigned char red = redChannel[thread_1D_pos];
175 |   unsigned char green = greenChannel[thread_1D_pos];
176 |   unsigned char blue = blueChannel[thread_1D_pos];
177 | 
178 |   // Alpha should be 255 for no transparency
179 |   uchar4 outputPixel = make_uchar4(red, green, blue, 255);
180 | 
181 |   outputImageRGBA[thread_1D_pos] = outputPixel;
182 | }
183 | 
184 | unsigned char *d_red, *d_green, *d_blue;
185 | float *d_filter;
186 | 
187 | void allocateMemoryAndCopyToGPU(const size_t numRowsImage,
188 |                                 const size_t numColsImage,
189 |                                 const float *const h_filter,
190 |                                 const size_t filterWidth) {
191 | 
192 |   // allocate memory for the three different channels
193 |   // original
194 |   checkCudaErrors(
195 |       cudaMalloc(&d_red, sizeof(unsigned char) * numRowsImage * numColsImage));
196 |   checkCudaErrors(cudaMalloc(&d_green, sizeof(unsigned char) * numRowsImage *
197 |                                            numColsImage));
198 |   checkCudaErrors(
199 |       cudaMalloc(&d_blue, sizeof(unsigned char) * numRowsImage * numColsImage));
200 | 
201 |   // TODO:
202 |   // Allocate memory for the filter on the GPU
203 |   // Use the pointer d_filter that we have already declared for you
204 |   // You need to allocate memory for the filter with cudaMalloc
205 |   // be sure to use checkCudaErrors like the above examples to
206 |   // be able to tell if anything goes wrong
207 |   // IMPORTANT: Notice that we pass a pointer to a pointer to cudaMalloc
208 | 
209 |   // TODO:
210 |   // Copy the filter on the host (h_filter) to the memory you just allocated
211 |   // on the GPU.  cudaMemcpy(dst, src, numBytes, cudaMemcpyHostToDevice);
212 |   // Remember to use checkCudaErrors!
213 | }
214 | 
215 | void your_gaussian_blur(const uchar4 *const h_inputImageRGBA,
216 |                         uchar4 *const d_inputImageRGBA,
217 |                         uchar4 *const d_outputImageRGBA, const size_t numRows,
218 |                         const size_t numCols, unsigned char *d_redBlurred,
219 |                         unsigned char *d_greenBlurred,
220 |                         unsigned char *d_blueBlurred, const int filterWidth) {
221 |   // TODO: Set reasonable block size (i.e., number of threads per block)
222 |   const dim3 blockSize;
223 | 
224 |   // TODO:
225 |   // Compute correct grid size (i.e., number of blocks per kernel launch)
226 |   // from the image size and and block size.
227 |   const dim3 gridSize;
228 | 
229 |   // TODO: Launch a kernel for separating the RGBA image into different color
230 |   // channels
231 | 
232 |   // Call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
233 |   // launching your kernel to make sure that you didn't make any mistakes.
234 |   cudaDeviceSynchronize();
235 |   checkCudaErrors(cudaGetLastError());
236 | 
237 |   // TODO: Call your convolution kernel here 3 times, once for each color
238 |   // channel.
239 | 
240 |   // Again, call cudaDeviceSynchronize(), then call checkCudaErrors()
241 |   // immediately after launching your kernel to make sure that you didn't make
242 |   // any mistakes.
243 |   cudaDeviceSynchronize();
244 |   checkCudaErrors(cudaGetLastError());
245 | 
246 |   // Now we recombine your results. We take care of launching this kernel for
247 |   // you.
248 |   //
249 |   // NOTE: This kernel launch depends on the gridSize and blockSize variables,
250 |   // which you must set yourself.
251 |   recombineChannels<<<gridSize, blockSize>>>(d_redBlurred, d_greenBlurred,
252 |                                              d_blueBlurred, d_outputImageRGBA,
253 |                                              numRows, numCols);
254 |   cudaDeviceSynchronize();
255 |   checkCudaErrors(cudaGetLastError());
256 | }
257 | 
258 | // Free all the memory that we allocated
259 | // TODO: make sure you free any arrays that you allocated
260 | void cleanup() {
261 |   checkCudaErrors(cudaFree(d_red));
262 |   checkCudaErrors(cudaFree(d_green));
263 |   checkCudaErrors(cudaFree(d_blue));
264 | }
265 | 


--------------------------------------------------------------------------------
/assignments/HW2/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW2/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <algorithm>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/HW3/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | # minimum required cmake version
 8 | cmake_minimum_required(VERSION 2.8)
 9 | find_package(CUDA QUIET REQUIRED)
10 | 
11 | SET (compare_files compare.cpp)
12 | 
13 | file( GLOB  hdr *.hpp *.h )
14 | file( GLOB  cu  *.cu)
15 | SET (HW3_files main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp)
16 |     
17 | CUDA_ADD_EXECUTABLE(HW3 ${HW3_files} ${hdr} ${cu})
18 | 


--------------------------------------------------------------------------------
/assignments/HW3/HW3.cu:
--------------------------------------------------------------------------------
  1 | #include "utils.h"
  2 | #include <string>
  3 | #include "loadSaveImage.h"
  4 | #include <thrust/extrema.h>
  5 | 
  6 | // chroma-LogLuminance Space
  7 | static float *d_x__;
  8 | static float *d_y__;
  9 | static float *d_logY__;
 10 | 
 11 | // memory for the cdf
 12 | static unsigned int *d_cdf__;
 13 | 
 14 | static const int numBins = 1024;
 15 | 
 16 | size_t numRows__;
 17 | size_t numCols__;
 18 | 
 19 | /* Copied from Mike's IPython notebook with some minor modifications
 20 |  * Mainly double precision constants to floats and log10 -> log10f
 21 |  * Also removed Luminance (Y) channel since it is never used       eke*/
 22 | 
 23 | __global__ void rgb_to_xyY(float *d_r, float *d_g, float *d_b, float *d_x,
 24 |                            float *d_y, float *d_log_Y, float delta,
 25 |                            int num_pixels_y, int num_pixels_x) {
 26 |   int ny = num_pixels_y;
 27 |   int nx = num_pixels_x;
 28 |   int2 image_index_2d = make_int2((blockIdx.x * blockDim.x) + threadIdx.x,
 29 |                                   (blockIdx.y * blockDim.y) + threadIdx.y);
 30 |   int image_index_1d = (nx * image_index_2d.y) + image_index_2d.x;
 31 | 
 32 |   if (image_index_2d.x < nx && image_index_2d.y < ny) {
 33 |     float r = d_r[image_index_1d];
 34 |     float g = d_g[image_index_1d];
 35 |     float b = d_b[image_index_1d];
 36 | 
 37 |     float X = (r * 0.4124f) + (g * 0.3576f) + (b * 0.1805f);
 38 |     float Y = (r * 0.2126f) + (g * 0.7152f) + (b * 0.0722f);
 39 |     float Z = (r * 0.0193f) + (g * 0.1192f) + (b * 0.9505f);
 40 | 
 41 |     float L = X + Y + Z;
 42 |     float x = X / L;
 43 |     float y = Y / L;
 44 | 
 45 |     float log_Y = log10f(delta + Y);
 46 | 
 47 |     d_x[image_index_1d] = x;
 48 |     d_y[image_index_1d] = y;
 49 |     d_log_Y[image_index_1d] = log_Y;
 50 |   }
 51 | }
 52 | 
 53 | /* Copied from Mike's IPython notebook *
 54 |    Modified just by having threads read the
 55 |    normalization constant directly from device memory
 56 |    instead of copying it back                          */
 57 | 
 58 | __global__ void normalize_cdf(unsigned int *d_input_cdf, float *d_output_cdf,
 59 |                               int n) {
 60 |   const float normalization_constant = 1.f / d_input_cdf[n - 1];
 61 | 
 62 |   int global_index_1d = (blockIdx.x * blockDim.x) + threadIdx.x;
 63 | 
 64 |   if (global_index_1d < n) {
 65 |     unsigned int input_value = d_input_cdf[global_index_1d];
 66 |     float output_value = input_value * normalization_constant;
 67 | 
 68 |     d_output_cdf[global_index_1d] = output_value;
 69 |   }
 70 | }
 71 | 
 72 | /* Copied from Mike's IPython notebook *
 73 |    Modified double constants -> float  *
 74 |    Perform tone mapping based upon new *
 75 |    luminance scaling                   */
 76 | 
 77 | __global__ void tonemap(float *d_x, float *d_y, float *d_log_Y,
 78 |                         float *d_cdf_norm, float *d_r_new, float *d_g_new,
 79 |                         float *d_b_new, float min_log_Y, float max_log_Y,
 80 |                         float log_Y_range, int num_bins, int num_pixels_y,
 81 |                         int num_pixels_x) {
 82 |   int ny = num_pixels_y;
 83 |   int nx = num_pixels_x;
 84 |   int2 image_index_2d = make_int2((blockIdx.x * blockDim.x) + threadIdx.x,
 85 |                                   (blockIdx.y * blockDim.y) + threadIdx.y);
 86 |   int image_index_1d = (nx * image_index_2d.y) + image_index_2d.x;
 87 | 
 88 |   if (image_index_2d.x < nx && image_index_2d.y < ny) {
 89 |     float x = d_x[image_index_1d];
 90 |     float y = d_y[image_index_1d];
 91 |     float log_Y = d_log_Y[image_index_1d];
 92 |     int bin_index =
 93 |         min(num_bins - 1, int((num_bins * (log_Y - min_log_Y)) / log_Y_range));
 94 |     float Y_new = d_cdf_norm[bin_index];
 95 | 
 96 |     float X_new = x * (Y_new / y);
 97 |     float Z_new = (1 - x - y) * (Y_new / y);
 98 | 
 99 |     float r_new = (X_new * 3.2406f) + (Y_new * -1.5372f) + (Z_new * -0.4986f);
100 |     float g_new = (X_new * -0.9689f) + (Y_new * 1.8758f) + (Z_new * 0.0415f);
101 |     float b_new = (X_new * 0.0557f) + (Y_new * -0.2040f) + (Z_new * 1.0570f);
102 | 
103 |     d_r_new[image_index_1d] = r_new;
104 |     d_g_new[image_index_1d] = g_new;
105 |     d_b_new[image_index_1d] = b_new;
106 |   }
107 | }
108 | 
109 | // return types are void since any internal error will be handled by quitting
110 | // no point in returning error codes...
111 | void preProcess(float **d_luminance, unsigned int **d_cdf, size_t *numRows,
112 |                 size_t *numCols, unsigned int *numberOfBins,
113 |                 const std::string &filename) {
114 |   // make sure the context initializes ok
115 |   checkCudaErrors(cudaFree(0));
116 | 
117 |   float *imgPtr; // we will become responsible for this pointer
118 |   loadImageHDR(filename, &imgPtr, &numRows__, &numCols__);
119 |   *numRows = numRows__;
120 |   *numCols = numCols__;
121 | 
122 |   // first thing to do is split incoming BGR float data into separate channels
123 |   size_t numPixels = numRows__ * numCols__;
124 |   float *red = new float[numPixels];
125 |   float *green = new float[numPixels];
126 |   float *blue = new float[numPixels];
127 | 
128 |   // Remeber image is loaded BGR
129 |   for (size_t i = 0; i < numPixels; ++i) {
130 |     blue[i] = imgPtr[3 * i + 0];
131 |     green[i] = imgPtr[3 * i + 1];
132 |     red[i] = imgPtr[3 * i + 2];
133 |   }
134 | 
135 |   delete[] imgPtr; // being good citizens are releasing resources
136 |                    // allocated in loadImageHDR
137 | 
138 |   float *d_red, *d_green, *d_blue; // RGB space
139 | 
140 |   size_t channelSize = sizeof(float) * numPixels;
141 | 
142 |   checkCudaErrors(cudaMalloc(&d_red, channelSize));
143 |   checkCudaErrors(cudaMalloc(&d_green, channelSize));
144 |   checkCudaErrors(cudaMalloc(&d_blue, channelSize));
145 |   checkCudaErrors(cudaMalloc(&d_x__, channelSize));
146 |   checkCudaErrors(cudaMalloc(&d_y__, channelSize));
147 |   checkCudaErrors(cudaMalloc(&d_logY__, channelSize));
148 | 
149 |   checkCudaErrors(cudaMemcpy(d_red, red, channelSize, cudaMemcpyHostToDevice));
150 |   checkCudaErrors(
151 |       cudaMemcpy(d_green, green, channelSize, cudaMemcpyHostToDevice));
152 |   checkCudaErrors(
153 |       cudaMemcpy(d_blue, blue, channelSize, cudaMemcpyHostToDevice));
154 | 
155 |   // convert from RGB space to chrominance/luminance space xyY
156 |   const dim3 blockSize(32, 16, 1);
157 |   const dim3 gridSize((numCols__ + blockSize.x - 1) / blockSize.x,
158 |                       (numRows__ + blockSize.y - 1) / blockSize.y, 1);
159 |   rgb_to_xyY<<<gridSize, blockSize>>>(d_red, d_green, d_blue, d_x__, d_y__,
160 |                                       d_logY__, .0001f, numRows__, numCols__);
161 | 
162 |   cudaDeviceSynchronize();
163 |   checkCudaErrors(cudaGetLastError());
164 | 
165 |   *d_luminance = d_logY__;
166 | 
167 |   // allocate memory for the cdf of the histogram
168 |   *numberOfBins = numBins;
169 |   checkCudaErrors(cudaMalloc(&d_cdf__, sizeof(unsigned int) * numBins));
170 |   checkCudaErrors(cudaMemset(d_cdf__, 0, sizeof(unsigned int) * numBins));
171 | 
172 |   *d_cdf = d_cdf__;
173 | 
174 |   checkCudaErrors(cudaFree(d_red));
175 |   checkCudaErrors(cudaFree(d_green));
176 |   checkCudaErrors(cudaFree(d_blue));
177 | 
178 |   delete[] red;
179 |   delete[] green;
180 |   delete[] blue;
181 | }
182 | 
183 | void postProcess(const std::string &output_file, size_t numRows, size_t numCols,
184 |                  float min_log_Y, float max_log_Y) {
185 |   const int numPixels = numRows__ * numCols__;
186 | 
187 |   const int numThreads = 192;
188 | 
189 |   float *d_cdf_normalized;
190 | 
191 |   checkCudaErrors(cudaMalloc(&d_cdf_normalized, sizeof(float) * numBins));
192 | 
193 |   // first normalize the cdf to a maximum value of 1
194 |   // this is how we compress the range of the luminance channel
195 |   normalize_cdf<<<(numBins + numThreads - 1) / numThreads, numThreads>>>(
196 |       d_cdf__, d_cdf_normalized, numBins);
197 | 
198 |   cudaDeviceSynchronize();
199 |   checkCudaErrors(cudaGetLastError());
200 | 
201 |   // allocate memory for the output RGB channels
202 |   float *h_red, *h_green, *h_blue;
203 |   float *d_red, *d_green, *d_blue;
204 | 
205 |   h_red = new float[numPixels];
206 |   h_green = new float[numPixels];
207 |   h_blue = new float[numPixels];
208 | 
209 |   checkCudaErrors(cudaMalloc(&d_red, sizeof(float) * numPixels));
210 |   checkCudaErrors(cudaMalloc(&d_green, sizeof(float) * numPixels));
211 |   checkCudaErrors(cudaMalloc(&d_blue, sizeof(float) * numPixels));
212 | 
213 |   float log_Y_range = max_log_Y - min_log_Y;
214 | 
215 |   const dim3 blockSize(32, 16, 1);
216 |   const dim3 gridSize((numCols + blockSize.x - 1) / blockSize.x,
217 |                       (numRows + blockSize.y - 1) / blockSize.y);
218 |   // next perform the actual tone-mapping
219 |   // we map each luminance value to its new value
220 |   // and then transform back to RGB space
221 |   tonemap<<<gridSize, blockSize>>>(d_x__, d_y__, d_logY__, d_cdf_normalized,
222 |                                    d_red, d_green, d_blue, min_log_Y, max_log_Y,
223 |                                    log_Y_range, numBins, numRows, numCols);
224 | 
225 |   cudaDeviceSynchronize();
226 |   checkCudaErrors(cudaGetLastError());
227 | 
228 |   checkCudaErrors(cudaMemcpy(h_red, d_red, sizeof(float) * numPixels,
229 |                              cudaMemcpyDeviceToHost));
230 |   checkCudaErrors(cudaMemcpy(h_green, d_green, sizeof(float) * numPixels,
231 |                              cudaMemcpyDeviceToHost));
232 |   checkCudaErrors(cudaMemcpy(h_blue, d_blue, sizeof(float) * numPixels,
233 |                              cudaMemcpyDeviceToHost));
234 | 
235 |   // recombine the image channels
236 |   float *imageHDR = new float[numPixels * 3];
237 | 
238 |   for (int i = 0; i < numPixels; ++i) {
239 |     imageHDR[3 * i + 0] = h_blue[i];
240 |     imageHDR[3 * i + 1] = h_green[i];
241 |     imageHDR[3 * i + 2] = h_red[i];
242 |   }
243 | 
244 |   saveImageHDR(imageHDR, numRows, numCols, output_file);
245 | 
246 |   delete[] imageHDR;
247 |   delete[] h_red;
248 |   delete[] h_green;
249 |   delete[] h_blue;
250 | 
251 |   // cleanup
252 |   checkCudaErrors(cudaFree(d_cdf_normalized));
253 | }
254 | 
255 | void cleanupGlobalMemory(void) {
256 |   checkCudaErrors(cudaFree(d_x__));
257 |   checkCudaErrors(cudaFree(d_y__));
258 |   checkCudaErrors(cudaFree(d_logY__));
259 |   checkCudaErrors(cudaFree(d_cdf__));
260 | }
261 | 


--------------------------------------------------------------------------------
/assignments/HW3/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW3//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW3/CMakeFiles/HW3.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW3/CMakeFiles/HW3.dir/rule
122 | .PHONY : homework/HW3/CMakeFiles/HW3.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW3: homework/HW3/CMakeFiles/HW3.dir/rule
126 | .PHONY : HW3
127 | 
128 | # fast build rule for target.
129 | HW3/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/build
131 | .PHONY : HW3/fast
132 | 
133 | compare.o: compare.cpp.o
134 | .PHONY : compare.o
135 | 
136 | # target to build an object file
137 | compare.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.o
139 | .PHONY : compare.cpp.o
140 | 
141 | compare.i: compare.cpp.i
142 | .PHONY : compare.i
143 | 
144 | # target to preprocess a source file
145 | compare.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.i
147 | .PHONY : compare.cpp.i
148 | 
149 | compare.s: compare.cpp.s
150 | .PHONY : compare.s
151 | 
152 | # target to generate assembly for a file
153 | compare.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/compare.cpp.s
155 | .PHONY : compare.cpp.s
156 | 
157 | loadSaveImage.o: loadSaveImage.cpp.o
158 | .PHONY : loadSaveImage.o
159 | 
160 | # target to build an object file
161 | loadSaveImage.cpp.o:
162 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.o
163 | .PHONY : loadSaveImage.cpp.o
164 | 
165 | loadSaveImage.i: loadSaveImage.cpp.i
166 | .PHONY : loadSaveImage.i
167 | 
168 | # target to preprocess a source file
169 | loadSaveImage.cpp.i:
170 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.i
171 | .PHONY : loadSaveImage.cpp.i
172 | 
173 | loadSaveImage.s: loadSaveImage.cpp.s
174 | .PHONY : loadSaveImage.s
175 | 
176 | # target to generate assembly for a file
177 | loadSaveImage.cpp.s:
178 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/loadSaveImage.cpp.s
179 | .PHONY : loadSaveImage.cpp.s
180 | 
181 | main.o: main.cpp.o
182 | .PHONY : main.o
183 | 
184 | # target to build an object file
185 | main.cpp.o:
186 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.o
187 | .PHONY : main.cpp.o
188 | 
189 | main.i: main.cpp.i
190 | .PHONY : main.i
191 | 
192 | # target to preprocess a source file
193 | main.cpp.i:
194 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.i
195 | .PHONY : main.cpp.i
196 | 
197 | main.s: main.cpp.s
198 | .PHONY : main.s
199 | 
200 | # target to generate assembly for a file
201 | main.cpp.s:
202 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/main.cpp.s
203 | .PHONY : main.cpp.s
204 | 
205 | reference_calc.o: reference_calc.cpp.o
206 | .PHONY : reference_calc.o
207 | 
208 | # target to build an object file
209 | reference_calc.cpp.o:
210 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.o
211 | .PHONY : reference_calc.cpp.o
212 | 
213 | reference_calc.i: reference_calc.cpp.i
214 | .PHONY : reference_calc.i
215 | 
216 | # target to preprocess a source file
217 | reference_calc.cpp.i:
218 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.i
219 | .PHONY : reference_calc.cpp.i
220 | 
221 | reference_calc.s: reference_calc.cpp.s
222 | .PHONY : reference_calc.s
223 | 
224 | # target to generate assembly for a file
225 | reference_calc.cpp.s:
226 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW3/CMakeFiles/HW3.dir/build.make homework/HW3/CMakeFiles/HW3.dir/reference_calc.cpp.s
227 | .PHONY : reference_calc.cpp.s
228 | 
229 | # Help Target
230 | help:
231 | 	@echo "The following are some of the valid targets for this Makefile:"
232 | 	@echo "... all (the default if no target is provided)"
233 | 	@echo "... clean"
234 | 	@echo "... depend"
235 | 	@echo "... edit_cache"
236 | 	@echo "... rebuild_cache"
237 | 	@echo "... HW3"
238 | 	@echo "... compare.o"
239 | 	@echo "... compare.i"
240 | 	@echo "... compare.s"
241 | 	@echo "... loadSaveImage.o"
242 | 	@echo "... loadSaveImage.i"
243 | 	@echo "... loadSaveImage.s"
244 | 	@echo "... main.o"
245 | 	@echo "... main.i"
246 | 	@echo "... main.s"
247 | 	@echo "... reference_calc.o"
248 | 	@echo "... reference_calc.i"
249 | 	@echo "... reference_calc.s"
250 | .PHONY : help
251 | 
252 | 
253 | 
254 | #=============================================================================
255 | # Special targets to cleanup operation of make.
256 | 
257 | # Special rule to run CMake to check the build system integrity.
258 | # No rule that depends on this can have commands that come from listfiles
259 | # because they might be regenerated.
260 | cmake_check_build_system:
261 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
262 | .PHONY : cmake_check_build_system
263 | 
264 | 


--------------------------------------------------------------------------------
/assignments/HW3/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/opencv.hpp>
 2 | #include "utils.h"
 3 | 
 4 | void compareImages(std::string reference_filename, std::string test_filename,
 5 |                    bool useEpsCheck, double perPixelError, double globalError) {
 6 |   cv::Mat reference = cv::imread(reference_filename, -1);
 7 |   cv::Mat test = cv::imread(test_filename, -1);
 8 | 
 9 |   cv::Mat diff = abs(reference - test);
10 | 
11 |   cv::Mat diffSingleChannel =
12 |       diff.reshape(1, 0); // convert to 1 channel, same # rows
13 | 
14 |   double minVal, maxVal;
15 | 
16 |   cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL,
17 |                 NULL); // NULL because we don't care about location
18 | 
19 |   // now perform transform so that we bump values to the full range
20 | 
21 |   diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal));
22 | 
23 |   diff = diffSingleChannel.reshape(reference.channels(), 0);
24 | 
25 |   cv::imwrite("HW3_differenceImage.png", diff);
26 |   // OK, now we can start comparing values...
27 |   unsigned char *referencePtr = reference.ptr<unsigned char>(0);
28 |   unsigned char *testPtr = test.ptr<unsigned char>(0);
29 | 
30 |   if (useEpsCheck) {
31 |     checkResultsEps(referencePtr, testPtr,
32 |                     reference.rows * reference.cols * reference.channels(),
33 |                     perPixelError, globalError);
34 |   } else {
35 |     checkResultsExact(referencePtr, testPtr,
36 |                       reference.rows * reference.cols * reference.channels());
37 |   }
38 | 
39 |   std::cout << "PASS" << std::endl;
40 |   return;
41 | }
42 | 


--------------------------------------------------------------------------------
/assignments/HW3/compare.h:
--------------------------------------------------------------------------------
1 | #ifndef HW3_H__
2 | #define HW3_H__
3 | 
4 | void compareImages(std::string reference_filename, std::string test_filename,
5 |                    bool useEpsCheck, double perPixelError, double globalError);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/assignments/HW3/loadSaveImage.cpp:
--------------------------------------------------------------------------------
  1 | #include <opencv2/core/core.hpp>
  2 | #include <opencv2/highgui/highgui.hpp>
  3 | #include <opencv2/opencv.hpp>
  4 | #include <vector>
  5 | #include <stdio.h>
  6 | #include "cuda_runtime.h"
  7 | 
  8 | // The caller becomes responsible for the returned pointer. This
  9 | // is done in the interest of keeping this code as simple as possible.
 10 | // In production code this is a bad idea - we should use RAII
 11 | // to ensure the memory is freed.  DO NOT COPY THIS AND USE IN PRODUCTION
 12 | // CODE!!!
 13 | void loadImageHDR(const std::string &filename, float **imagePtr,
 14 |                   size_t *numRows, size_t *numCols) {
 15 |   cv::Mat originImg = cv::imread(filename.c_str(),
 16 |                                  CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH);
 17 | 
 18 |   cv::Mat image;
 19 | 
 20 |   if (originImg.type() != CV_32FC3) {
 21 |     originImg.convertTo(image, CV_32FC3);
 22 |   } else {
 23 |     image = originImg;
 24 |   }
 25 | 
 26 |   if (image.empty()) {
 27 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 28 |     exit(1);
 29 |   }
 30 | 
 31 |   if (image.channels() != 3) {
 32 |     std::cerr << "Image must be color!" << std::endl;
 33 |     exit(1);
 34 |   }
 35 | 
 36 |   if (!image.isContinuous()) {
 37 |     std::cerr << "Image isn't continuous!" << std::endl;
 38 |     exit(1);
 39 |   }
 40 | 
 41 |   *imagePtr = new float[image.rows * image.cols * image.channels()];
 42 | 
 43 |   float *cvPtr = image.ptr<float>(0);
 44 |   for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i)
 45 |     (*imagePtr)[i] = cvPtr[i];
 46 | 
 47 |   *numRows = image.rows;
 48 |   *numCols = image.cols;
 49 | }
 50 | 
 51 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
 52 |                    size_t *numRows, size_t *numCols) {
 53 |   cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
 54 |   if (image.empty()) {
 55 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 56 |     exit(1);
 57 |   }
 58 | 
 59 |   if (image.channels() != 3) {
 60 |     std::cerr << "Image must be color!" << std::endl;
 61 |     exit(1);
 62 |   }
 63 | 
 64 |   if (!image.isContinuous()) {
 65 |     std::cerr << "Image isn't continuous!" << std::endl;
 66 |     exit(1);
 67 |   }
 68 | 
 69 |   cv::Mat imageRGBA;
 70 |   cv::cvtColor(image, imageRGBA, CV_BGR2RGBA);
 71 | 
 72 |   *imagePtr = new uchar4[image.rows * image.cols];
 73 | 
 74 |   unsigned char *cvPtr = imageRGBA.ptr<unsigned char>(0);
 75 |   for (size_t i = 0; i < image.rows * image.cols; ++i) {
 76 |     (*imagePtr)[i].x = cvPtr[4 * i + 0];
 77 |     (*imagePtr)[i].y = cvPtr[4 * i + 1];
 78 |     (*imagePtr)[i].z = cvPtr[4 * i + 2];
 79 |     (*imagePtr)[i].w = cvPtr[4 * i + 3];
 80 |   }
 81 | 
 82 |   *numRows = image.rows;
 83 |   *numCols = image.cols;
 84 | }
 85 | 
 86 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
 87 |                    const size_t numCols, const std::string &output_file) {
 88 |   int sizes[2];
 89 |   sizes[0] = numRows;
 90 |   sizes[1] = numCols;
 91 |   cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image);
 92 |   cv::Mat imageOutputBGR;
 93 |   cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR);
 94 |   // output the image
 95 |   cv::imwrite(output_file.c_str(), imageOutputBGR);
 96 | }
 97 | 
 98 | // output an exr file
 99 | // assumed to already be BGR
100 | void saveImageHDR(const float *const image, const size_t numRows,
101 |                   const size_t numCols, const std::string &output_file) {
102 |   int sizes[2];
103 |   sizes[0] = numRows;
104 |   sizes[1] = numCols;
105 | 
106 |   cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image);
107 | 
108 |   imageHDR = imageHDR * 255;
109 | 
110 |   cv::imwrite(output_file.c_str(), imageHDR);
111 | }
112 | 


--------------------------------------------------------------------------------
/assignments/HW3/loadSaveImage.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOADSAVEIMAGE_H__
 2 | #define LOADSAVEIMAGE_H__
 3 | 
 4 | #include <string>
 5 | #include <cuda_runtime.h> //for uchar4
 6 | 
 7 | void loadImageHDR(const std::string &filename, float **imagePtr,
 8 |                   size_t *numRows, size_t *numCols);
 9 | 
10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
11 |                    size_t *numRows, size_t *numCols);
12 | 
13 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
14 |                    const size_t numCols, const std::string &output_file);
15 | 
16 | void saveImageHDR(const float *const image, const size_t numRows,
17 |                   const size_t numCols, const std::string &output_file);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/assignments/HW3/main.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW3 Driver
  2 | 
  3 | #include <iostream>
  4 | #include "timer.h"
  5 | #include "utils.h"
  6 | #include <string>
  7 | #include <stdio.h>
  8 | #include <algorithm>
  9 | 
 10 | #include "compare.h"
 11 | #include "reference_calc.h"
 12 | 
 13 | // Functions from HW3.cu
 14 | void preProcess(float **d_luminance, unsigned int **d_cdf, size_t *numRows,
 15 |                 size_t *numCols, unsigned int *numBins,
 16 |                 const std::string &filename);
 17 | 
 18 | void postProcess(const std::string &output_file, size_t numRows, size_t numCols,
 19 |                  float min_logLum, float max_logLum);
 20 | 
 21 | void cleanupGlobalMemory(void);
 22 | 
 23 | // Function from student_func.cu
 24 | void your_histogram_and_prefixsum(const float *const d_luminance,
 25 |                                   unsigned int *const d_cdf, float &min_logLum,
 26 |                                   float &max_logLum, const size_t numRows,
 27 |                                   const size_t numCols, const size_t numBins);
 28 | 
 29 | int main(int argc, char **argv) {
 30 |   float *d_luminance;
 31 |   unsigned int *d_cdf;
 32 | 
 33 |   size_t numRows, numCols;
 34 |   unsigned int numBins;
 35 | 
 36 |   std::string input_file;
 37 |   std::string output_file;
 38 |   std::string reference_file;
 39 |   double perPixelError = 0.0;
 40 |   double globalError = 0.0;
 41 |   bool useEpsCheck = false;
 42 | 
 43 |   switch (argc) {
 44 |   case 2:
 45 |     input_file = std::string(argv[1]);
 46 |     output_file = "HW3_output.png";
 47 |     reference_file = "HW3_reference.png";
 48 |     break;
 49 |   case 3:
 50 |     input_file = std::string(argv[1]);
 51 |     output_file = std::string(argv[2]);
 52 |     reference_file = "HW3_reference.png";
 53 |     break;
 54 |   case 4:
 55 |     input_file = std::string(argv[1]);
 56 |     output_file = std::string(argv[2]);
 57 |     reference_file = std::string(argv[3]);
 58 |     break;
 59 |   case 6:
 60 |     useEpsCheck = true;
 61 |     input_file = std::string(argv[1]);
 62 |     output_file = std::string(argv[2]);
 63 |     reference_file = std::string(argv[3]);
 64 |     perPixelError = atof(argv[4]);
 65 |     globalError = atof(argv[5]);
 66 |     break;
 67 |   default:
 68 |     std::cerr << "Usage: ./HW3 input_file [output_filename] "
 69 |                  "[reference_filename] [perPixelError] [globalError]"
 70 |               << std::endl;
 71 |     exit(1);
 72 |   }
 73 |   // load the image and give us our input and output pointers
 74 |   preProcess(&d_luminance, &d_cdf, &numRows, &numCols, &numBins, input_file);
 75 | 
 76 |   GpuTimer timer;
 77 |   float min_logLum, max_logLum;
 78 |   min_logLum = 0.f;
 79 |   max_logLum = 1.f;
 80 |   timer.Start();
 81 |   // call the students' code
 82 |   your_histogram_and_prefixsum(d_luminance, d_cdf, min_logLum, max_logLum,
 83 |                                numRows, numCols, numBins);
 84 |   timer.Stop();
 85 |   cudaDeviceSynchronize();
 86 |   checkCudaErrors(cudaGetLastError());
 87 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 88 | 
 89 |   if (err < 0) {
 90 |     // Couldn't print! Probably the student closed stdout - bad news
 91 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 92 |               << std::endl;
 93 |     exit(1);
 94 |   }
 95 | 
 96 |   float *h_luminance = (float *)malloc(sizeof(float) * numRows * numCols);
 97 |   unsigned int *h_cdf = (unsigned int *)malloc(sizeof(unsigned int) * numBins);
 98 | 
 99 |   checkCudaErrors(cudaMemcpy(h_luminance, d_luminance,
100 |                              numRows * numCols * sizeof(float),
101 |                              cudaMemcpyDeviceToHost));
102 | 
103 |   // check results and output the tone-mapped image
104 |   postProcess(output_file, numRows, numCols, min_logLum, max_logLum);
105 | 
106 |   for (size_t i = 1; i < numCols * numRows; ++i) {
107 |     min_logLum = std::min(h_luminance[i], min_logLum);
108 |     max_logLum = std::max(h_luminance[i], max_logLum);
109 |   }
110 | 
111 |   referenceCalculation(h_luminance, h_cdf, numRows, numCols, numBins,
112 |                        min_logLum, max_logLum);
113 | 
114 |   checkCudaErrors(cudaMemcpy(d_cdf, h_cdf, sizeof(unsigned int) * numBins,
115 |                              cudaMemcpyHostToDevice));
116 | 
117 |   // check results and output the tone-mapped image
118 |   postProcess(reference_file, numRows, numCols, min_logLum, max_logLum);
119 | 
120 |   cleanupGlobalMemory();
121 | 
122 |   compareImages(reference_file, output_file, useEpsCheck, perPixelError,
123 |                 globalError);
124 | 
125 |   return 0;
126 | }
127 | 


--------------------------------------------------------------------------------
/assignments/HW3/memorial.exr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial.exr


--------------------------------------------------------------------------------
/assignments/HW3/memorial_large.exr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_large.exr


--------------------------------------------------------------------------------
/assignments/HW3/memorial_png.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_png.gold


--------------------------------------------------------------------------------
/assignments/HW3/memorial_png_large.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_png_large.gold


--------------------------------------------------------------------------------
/assignments/HW3/memorial_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_raw.png


--------------------------------------------------------------------------------
/assignments/HW3/memorial_raw_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW3/memorial_raw_large.png


--------------------------------------------------------------------------------
/assignments/HW3/reference_calc.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cassert>
 3 | 
 4 | void referenceCalculation(const float *const h_logLuminance,
 5 |                           unsigned int *const h_cdf, const size_t numRows,
 6 |                           const size_t numCols, const size_t numBins,
 7 |                           float &logLumMin, float &logLumMax) {
 8 |   logLumMin = h_logLuminance[0];
 9 |   logLumMax = h_logLuminance[0];
10 | 
11 |   // Step 1
12 |   // first we find the minimum and maximum across the entire image
13 |   for (size_t i = 1; i < numCols * numRows; ++i) {
14 |     logLumMin = std::min(h_logLuminance[i], logLumMin);
15 |     logLumMax = std::max(h_logLuminance[i], logLumMax);
16 |   }
17 | 
18 |   // Step 2
19 |   float logLumRange = logLumMax - logLumMin;
20 | 
21 |   // Step 3
22 |   // next we use the now known range to compute
23 |   // a histogram of numBins bins
24 |   unsigned int *histo = new unsigned int[numBins];
25 | 
26 |   for (size_t i = 0; i < numBins; ++i)
27 |     histo[i] = 0;
28 | 
29 |   for (size_t i = 0; i < numCols * numRows; ++i) {
30 |     unsigned int bin =
31 |         std::min(static_cast<unsigned int>(numBins - 1),
32 |                  static_cast<unsigned int>((h_logLuminance[i] - logLumMin) /
33 |                                            logLumRange * numBins));
34 |     histo[bin]++;
35 |   }
36 | 
37 |   // Step 4
38 |   // finally we perform and exclusive scan (prefix sum)
39 |   // on the histogram to get the cumulative distribution
40 |   h_cdf[0] = 0;
41 |   for (size_t i = 1; i < numBins; ++i) {
42 |     h_cdf[i] = h_cdf[i - 1] + histo[i - 1];
43 |   }
44 | 
45 |   delete[] histo;
46 | }


--------------------------------------------------------------------------------
/assignments/HW3/reference_calc.h:
--------------------------------------------------------------------------------
 1 | #ifndef REFERENCE_H__
 2 | #define REFERENCE_H__
 3 | 
 4 | void referenceCalculation(const float *const h_logLuminance,
 5 |                           unsigned int *const h_cdf, const size_t numRows,
 6 |                           const size_t numCols, const size_t numBins,
 7 |                           float &logLumMin, float &logLumMax);
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/assignments/HW3/student_func.cu:
--------------------------------------------------------------------------------
 1 | /* Udacity Homework 3
 2 |    HDR Tone-mapping
 3 | 
 4 |   Background HDR
 5 |   ==============
 6 | 
 7 |   A High Dynamic Range (HDR) image contains a wider variation of intensity
 8 |   and color than is allowed by the RGB format with 1 byte per channel that we
 9 |   have used in the previous assignment.
10 | 
11 |   To store this extra information we use single precision floating point for
12 |   each channel.  This allows for an extremely wide range of intensity values.
13 | 
14 |   In the image for this assignment, the inside of church with light coming in
15 |   through stained glass windows, the raw input floating point values for the
16 |   channels range from 0 to 275.  But the mean is .41 and 98% of the values are
17 |   less than 3!  This means that certain areas (the windows) are extremely bright
18 |   compared to everywhere else.  If we linearly map this [0-275] range into the
19 |   [0-255] range that we have been using then most values will be mapped to zero!
20 |   The only thing we will be able to see are the very brightest areas - the
21 |   windows - everything else will appear pitch black.
22 | 
23 |   The problem is that although we have cameras capable of recording the wide
24 |   range of intensity that exists in the real world our monitors are not capable
25 |   of displaying them.  Our eyes are also quite capable of observing a much wider
26 |   range of intensities than our image formats / monitors are capable of
27 |   displaying.
28 | 
29 |   Tone-mapping is a process that transforms the intensities in the image so that
30 |   the brightest values aren't nearly so far away from the mean.  That way when
31 |   we transform the values into [0-255] we can actually see the entire image.
32 |   There are many ways to perform this process and it is as much an art as a
33 |   science - there is no single "right" answer.  In this homework we will
34 |   implement one possible technique.
35 | 
36 |   Background Chrominance-Luminance
37 |   ================================
38 | 
39 |   The RGB space that we have been using to represent images can be thought of as
40 |   one possible set of axes spanning a three dimensional space of color.  We
41 |   sometimes choose other axes to represent this space because they make certain
42 |   operations more convenient.
43 | 
44 |   Another possible way of representing a color image is to separate the color
45 |   information (chromaticity) from the brightness information.  There are
46 |   multiple different methods for doing this - a common one during the analog
47 |   television days was known as Chrominance-Luminance or YUV.
48 | 
49 |   We choose to represent the image in this way so that we can remap only the
50 |   intensity channel and then recombine the new intensity values with the color
51 |   information to form the final image.
52 | 
53 |   Old TV signals used to be transmitted in this way so that black & white
54 |   televisions could display the luminance channel while color televisions would
55 |   display all three of the channels.
56 | 
57 | 
58 |   Tone-mapping
59 |   ============
60 | 
61 |   In this assignment we are going to transform the luminance channel (actually
62 |   the log of the luminance, but this is unimportant for the parts of the
63 |   algorithm that you will be implementing) by compressing its range to [0, 1].
64 |   To do this we need the cumulative distribution of the luminance values.
65 | 
66 |   Example
67 |   -------
68 | 
69 |   input : [2 4 3 3 1 7 4 5 7 0 9 4 3 2]
70 |   min / max / range: 0 / 9 / 9
71 | 
72 |   histo with 3 bins: [4 7 3]
73 | 
74 |   cdf : [4 11 14]
75 | 
76 | 
77 |   Your task is to calculate this cumulative distribution by following these
78 |   steps.
79 | 
80 | */
81 | 
82 | #include "utils.h"
83 | 
84 | void your_histogram_and_prefixsum(const float *const d_logLuminance,
85 |                                   unsigned int *const d_cdf, float &min_logLum,
86 |                                   float &max_logLum, const size_t numRows,
87 |                                   const size_t numCols, const size_t numBins) {
88 |   // TODO
89 |   /*Here are the steps you need to implement
90 |     1) find the minimum and maximum value in the input logLuminance channel
91 |        store in min_logLum and max_logLum
92 |     2) subtract them to find the range
93 |     3) generate a histogram of all the values in the logLuminance channel using
94 |        the formula: bin = (lum[i] - lumMin) / lumRange * numBins
95 |     4) Perform an exclusive scan (prefix sum) on the histogram to get
96 |        the cumulative distribution of luminance values (this should go in the
97 |        incoming d_cdf pointer which already has been allocated for you)       */
98 | }
99 | 


--------------------------------------------------------------------------------
/assignments/HW3/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW3/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/HW4/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | # collect source files
 9 | 
10 | file( GLOB  hdr *.hpp *.h )
11 | file( GLOB  cu  *.cu)
12 | SET (HW4_files main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp)
13 | 
14 | CUDA_ADD_EXECUTABLE(HW4 ${HW4_files} ${hdr} ${img} ${cu})
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/assignments/HW4/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW4//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW4/CMakeFiles/HW4.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW4/CMakeFiles/HW4.dir/rule
122 | .PHONY : homework/HW4/CMakeFiles/HW4.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW4: homework/HW4/CMakeFiles/HW4.dir/rule
126 | .PHONY : HW4
127 | 
128 | # fast build rule for target.
129 | HW4/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/build
131 | .PHONY : HW4/fast
132 | 
133 | compare.o: compare.cpp.o
134 | .PHONY : compare.o
135 | 
136 | # target to build an object file
137 | compare.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.o
139 | .PHONY : compare.cpp.o
140 | 
141 | compare.i: compare.cpp.i
142 | .PHONY : compare.i
143 | 
144 | # target to preprocess a source file
145 | compare.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.i
147 | .PHONY : compare.cpp.i
148 | 
149 | compare.s: compare.cpp.s
150 | .PHONY : compare.s
151 | 
152 | # target to generate assembly for a file
153 | compare.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/compare.cpp.s
155 | .PHONY : compare.cpp.s
156 | 
157 | loadSaveImage.o: loadSaveImage.cpp.o
158 | .PHONY : loadSaveImage.o
159 | 
160 | # target to build an object file
161 | loadSaveImage.cpp.o:
162 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.o
163 | .PHONY : loadSaveImage.cpp.o
164 | 
165 | loadSaveImage.i: loadSaveImage.cpp.i
166 | .PHONY : loadSaveImage.i
167 | 
168 | # target to preprocess a source file
169 | loadSaveImage.cpp.i:
170 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.i
171 | .PHONY : loadSaveImage.cpp.i
172 | 
173 | loadSaveImage.s: loadSaveImage.cpp.s
174 | .PHONY : loadSaveImage.s
175 | 
176 | # target to generate assembly for a file
177 | loadSaveImage.cpp.s:
178 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/loadSaveImage.cpp.s
179 | .PHONY : loadSaveImage.cpp.s
180 | 
181 | main.o: main.cpp.o
182 | .PHONY : main.o
183 | 
184 | # target to build an object file
185 | main.cpp.o:
186 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.o
187 | .PHONY : main.cpp.o
188 | 
189 | main.i: main.cpp.i
190 | .PHONY : main.i
191 | 
192 | # target to preprocess a source file
193 | main.cpp.i:
194 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.i
195 | .PHONY : main.cpp.i
196 | 
197 | main.s: main.cpp.s
198 | .PHONY : main.s
199 | 
200 | # target to generate assembly for a file
201 | main.cpp.s:
202 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/main.cpp.s
203 | .PHONY : main.cpp.s
204 | 
205 | reference_calc.o: reference_calc.cpp.o
206 | .PHONY : reference_calc.o
207 | 
208 | # target to build an object file
209 | reference_calc.cpp.o:
210 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.o
211 | .PHONY : reference_calc.cpp.o
212 | 
213 | reference_calc.i: reference_calc.cpp.i
214 | .PHONY : reference_calc.i
215 | 
216 | # target to preprocess a source file
217 | reference_calc.cpp.i:
218 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.i
219 | .PHONY : reference_calc.cpp.i
220 | 
221 | reference_calc.s: reference_calc.cpp.s
222 | .PHONY : reference_calc.s
223 | 
224 | # target to generate assembly for a file
225 | reference_calc.cpp.s:
226 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW4/CMakeFiles/HW4.dir/build.make homework/HW4/CMakeFiles/HW4.dir/reference_calc.cpp.s
227 | .PHONY : reference_calc.cpp.s
228 | 
229 | # Help Target
230 | help:
231 | 	@echo "The following are some of the valid targets for this Makefile:"
232 | 	@echo "... all (the default if no target is provided)"
233 | 	@echo "... clean"
234 | 	@echo "... depend"
235 | 	@echo "... edit_cache"
236 | 	@echo "... rebuild_cache"
237 | 	@echo "... HW4"
238 | 	@echo "... compare.o"
239 | 	@echo "... compare.i"
240 | 	@echo "... compare.s"
241 | 	@echo "... loadSaveImage.o"
242 | 	@echo "... loadSaveImage.i"
243 | 	@echo "... loadSaveImage.s"
244 | 	@echo "... main.o"
245 | 	@echo "... main.i"
246 | 	@echo "... main.s"
247 | 	@echo "... reference_calc.o"
248 | 	@echo "... reference_calc.i"
249 | 	@echo "... reference_calc.s"
250 | .PHONY : help
251 | 
252 | 
253 | 
254 | #=============================================================================
255 | # Special targets to cleanup operation of make.
256 | 
257 | # Special rule to run CMake to check the build system integrity.
258 | # No rule that depends on this can have commands that come from listfiles
259 | # because they might be regenerated.
260 | cmake_check_build_system:
261 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
262 | .PHONY : cmake_check_build_system
263 | 
264 | 


--------------------------------------------------------------------------------
/assignments/HW4/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/opencv.hpp>
 2 | #include "utils.h"
 3 | 
 4 | void compareImages(std::string reference_filename, std::string test_filename,
 5 |                    bool useEpsCheck, double perPixelError, double globalError) {
 6 |   cv::Mat reference = cv::imread(reference_filename, -1);
 7 |   cv::Mat test = cv::imread(test_filename, -1);
 8 | 
 9 |   cv::Mat diff = abs(reference - test);
10 | 
11 |   cv::Mat diffSingleChannel =
12 |       diff.reshape(1, 0); // convert to 1 channel, same # rows
13 | 
14 |   double minVal, maxVal;
15 | 
16 |   cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL,
17 |                 NULL); // NULL because we don't care about location
18 | 
19 |   // now perform transform so that we bump values to the full range
20 | 
21 |   diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal));
22 | 
23 |   diff = diffSingleChannel.reshape(reference.channels(), 0);
24 | 
25 |   cv::imwrite("HW4_differenceImage.png", diff);
26 |   // OK, now we can start comparing values...
27 |   unsigned char *referencePtr = reference.ptr<unsigned char>(0);
28 |   unsigned char *testPtr = test.ptr<unsigned char>(0);
29 | 
30 |   if (useEpsCheck) {
31 |     checkResultsEps(referencePtr, testPtr,
32 |                     reference.rows * reference.cols * reference.channels(),
33 |                     perPixelError, globalError);
34 |   } else {
35 |     checkResultsExact(referencePtr, testPtr,
36 |                       reference.rows * reference.cols * reference.channels());
37 |   }
38 | 
39 |   std::cout << "PASS" << std::endl;
40 |   return;
41 | }


--------------------------------------------------------------------------------
/assignments/HW4/compare.h:
--------------------------------------------------------------------------------
1 | #ifndef HW4_H__
2 | #define HW4_H__
3 | 
4 | void compareImages(std::string reference_filename, std::string test_filename,
5 |                    bool useEpsCheck, double perPixelError, double globalError);
6 | 
7 | #endif


--------------------------------------------------------------------------------
/assignments/HW4/loadSaveImage.cpp:
--------------------------------------------------------------------------------
  1 | #include <opencv2/core/core.hpp>
  2 | #include <opencv2/highgui/highgui.hpp>
  3 | #include <opencv2/opencv.hpp>
  4 | #include <vector>
  5 | #include "cuda_runtime.h"
  6 | 
  7 | // The caller becomes responsible for the returned pointer. This
  8 | // is done in the interest of keeping this code as simple as possible.
  9 | // In production code this is a bad idea - we should use RAII
 10 | // to ensure the memory is freed.  DO NOT COPY THIS AND USE IN PRODUCTION
 11 | // CODE!!!
 12 | void loadImageHDR(const std::string &filename, float **imagePtr,
 13 |                   size_t *numRows, size_t *numCols) {
 14 |   cv::Mat image = cv::imread(filename.c_str(),
 15 |                              CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH);
 16 |   if (image.empty()) {
 17 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 18 |     exit(1);
 19 |   }
 20 | 
 21 |   if (image.channels() != 3) {
 22 |     std::cerr << "Image must be color!" << std::endl;
 23 |     exit(1);
 24 |   }
 25 | 
 26 |   if (!image.isContinuous()) {
 27 |     std::cerr << "Image isn't continuous!" << std::endl;
 28 |     exit(1);
 29 |   }
 30 | 
 31 |   *imagePtr = new float[image.rows * image.cols * image.channels()];
 32 | 
 33 |   float *cvPtr = image.ptr<float>(0);
 34 |   for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i)
 35 |     (*imagePtr)[i] = cvPtr[i];
 36 | 
 37 |   *numRows = image.rows;
 38 |   *numCols = image.cols;
 39 | }
 40 | 
 41 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
 42 |                    size_t *numRows, size_t *numCols) {
 43 |   cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
 44 |   if (image.empty()) {
 45 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 46 |     exit(1);
 47 |   }
 48 | 
 49 |   if (image.channels() != 3) {
 50 |     std::cerr << "Image must be color!" << std::endl;
 51 |     exit(1);
 52 |   }
 53 | 
 54 |   if (!image.isContinuous()) {
 55 |     std::cerr << "Image isn't continuous!" << std::endl;
 56 |     exit(1);
 57 |   }
 58 | 
 59 |   cv::Mat imageRGBA;
 60 |   cv::cvtColor(image, imageRGBA, CV_BGR2RGBA);
 61 | 
 62 |   *imagePtr = new uchar4[image.rows * image.cols];
 63 | 
 64 |   unsigned char *cvPtr = imageRGBA.ptr<unsigned char>(0);
 65 |   for (size_t i = 0; i < image.rows * image.cols; ++i) {
 66 |     (*imagePtr)[i].x = cvPtr[4 * i + 0];
 67 |     (*imagePtr)[i].y = cvPtr[4 * i + 1];
 68 |     (*imagePtr)[i].z = cvPtr[4 * i + 2];
 69 |     (*imagePtr)[i].w = cvPtr[4 * i + 3];
 70 |   }
 71 | 
 72 |   *numRows = image.rows;
 73 |   *numCols = image.cols;
 74 | }
 75 | 
 76 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
 77 |                    const size_t numCols, const std::string &output_file) {
 78 |   int sizes[2];
 79 |   sizes[0] = numRows;
 80 |   sizes[1] = numCols;
 81 |   cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image);
 82 |   cv::Mat imageOutputBGR;
 83 |   cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR);
 84 |   // output the image
 85 |   cv::imwrite(output_file.c_str(), imageOutputBGR);
 86 | }
 87 | 
 88 | // output an exr file
 89 | // assumed to already be BGR
 90 | void saveImageHDR(const float *const image, const size_t numRows,
 91 |                   const size_t numCols, const std::string &output_file) {
 92 |   int sizes[2];
 93 |   sizes[0] = numRows;
 94 |   sizes[1] = numCols;
 95 | 
 96 |   cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image);
 97 | 
 98 |   imageHDR = imageHDR * 255;
 99 | 
100 |   cv::imwrite(output_file.c_str(), imageHDR);
101 | }
102 | 


--------------------------------------------------------------------------------
/assignments/HW4/loadSaveImage.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOADSAVEIMAGE_H__
 2 | #define LOADSAVEIMAGE_H__
 3 | 
 4 | #include <string>
 5 | #include <cuda_runtime.h> //for uchar4
 6 | 
 7 | void loadImageHDR(const std::string &filename, float **imagePtr,
 8 |                   size_t *numRows, size_t *numCols);
 9 | 
10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
11 |                    size_t *numRows, size_t *numCols);
12 | 
13 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
14 |                    const size_t numCols, const std::string &output_file);
15 | 
16 | void saveImageHDR(const float *const image, const size_t numRows,
17 |                   const size_t numCols, const std::string &output_file);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/assignments/HW4/main.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW4 Driver
  2 | 
  3 | #include <iostream>
  4 | #include "timer.h"
  5 | #include "utils.h"
  6 | #include <string>
  7 | #include <stdio.h>
  8 | #include <thrust/host_vector.h>
  9 | #include <thrust/device_vector.h>
 10 | 
 11 | #include "compare.h"
 12 | #include "reference_calc.h"
 13 | 
 14 | void preProcess(unsigned int **inputVals, unsigned int **inputPos,
 15 |                 unsigned int **outputVals, unsigned int **outputPos,
 16 |                 size_t &numElems, const std::string &filename,
 17 |                 const std::string &template_file);
 18 | 
 19 | void postProcess(const unsigned int *const outputVals,
 20 |                  const unsigned int *const outputPos, const size_t numElems,
 21 |                  const std::string &output_file);
 22 | 
 23 | void your_sort(unsigned int *const inputVals, unsigned int *const inputPos,
 24 |                unsigned int *const outputVals, unsigned int *const outputPos,
 25 |                const size_t numElems);
 26 | 
 27 | int main(int argc, char **argv) {
 28 |   unsigned int *inputVals;
 29 |   unsigned int *inputPos;
 30 |   unsigned int *outputVals;
 31 |   unsigned int *outputPos;
 32 | 
 33 |   size_t numElems;
 34 | 
 35 |   std::string input_file;
 36 |   std::string template_file;
 37 |   std::string output_file;
 38 |   std::string reference_file;
 39 |   double perPixelError = 0.0;
 40 |   double globalError = 0.0;
 41 |   bool useEpsCheck = false;
 42 | 
 43 |   switch (argc) {
 44 |   case 3:
 45 |     input_file = std::string(argv[1]);
 46 |     template_file = std::string(argv[2]);
 47 |     output_file = "HW4_output.png";
 48 |     break;
 49 |   case 4:
 50 |     input_file = std::string(argv[1]);
 51 |     template_file = std::string(argv[2]);
 52 |     output_file = std::string(argv[3]);
 53 |     break;
 54 |   default:
 55 |     std::cerr << "Usage: ./HW4 input_file template_file [output_filename]"
 56 |               << std::endl;
 57 |     exit(1);
 58 |   }
 59 |   // load the image and give us our input and output pointers
 60 |   preProcess(&inputVals, &inputPos, &outputVals, &outputPos, numElems,
 61 |              input_file, template_file);
 62 | 
 63 |   GpuTimer timer;
 64 |   timer.Start();
 65 | 
 66 |   // call the students' code
 67 |   your_sort(inputVals, inputPos, outputVals, outputPos, numElems);
 68 | 
 69 |   timer.Stop();
 70 |   cudaDeviceSynchronize();
 71 |   checkCudaErrors(cudaGetLastError());
 72 |   printf("\n");
 73 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 74 | 
 75 |   if (err < 0) {
 76 |     // Couldn't print! Probably the student closed stdout - bad news
 77 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 78 |               << std::endl;
 79 |     exit(1);
 80 |   }
 81 | 
 82 |   // check results and output the red-eye corrected image
 83 |   postProcess(outputVals, outputPos, numElems, output_file);
 84 | 
 85 |   // check code moved from HW4.cu
 86 |   /****************************************************************************
 87 |    * You can use the code below to help with debugging, but make sure to       *
 88 |    * comment it out again before submitting your assignment for grading,       *
 89 |    * otherwise this code will take too much time and make it seem like your    *
 90 |    * GPU implementation isn't fast enough.                                     *
 91 |    *                                                                           *
 92 |    * This code MUST RUN BEFORE YOUR CODE in case you accidentally change       *
 93 |    * the input values when implementing your radix sort.                       *
 94 |    *                                                                           *
 95 |    * This code performs the reference radix sort on the host and compares your *
 96 |    * sorted values to the reference.                                           *
 97 |    *                                                                           *
 98 |    * Thrust containers are used for copying memory from the GPU                *
 99 |    * *************************************************************************
100 |    */
101 |   thrust::device_ptr<unsigned int> d_inputVals(inputVals);
102 |   thrust::device_ptr<unsigned int> d_inputPos(inputPos);
103 | 
104 |   thrust::host_vector<unsigned int> h_inputVals(d_inputVals,
105 |                                                 d_inputVals + numElems);
106 |   thrust::host_vector<unsigned int> h_inputPos(d_inputPos,
107 |                                                d_inputPos + numElems);
108 | 
109 |   thrust::host_vector<unsigned int> h_outputVals(numElems);
110 |   thrust::host_vector<unsigned int> h_outputPos(numElems);
111 | 
112 |   reference_calculation(&h_inputVals[0], &h_inputPos[0], &h_outputVals[0],
113 |                         &h_outputPos[0], numElems);
114 | 
115 |   // postProcess(valsPtr, posPtr, numElems, reference_file);
116 | 
117 |   // compareImages(reference_file, output_file, useEpsCheck, perPixelError,
118 |   // globalError);
119 | 
120 |   thrust::device_ptr<unsigned int> d_outputVals(outputVals);
121 |   thrust::device_ptr<unsigned int> d_outputPos(outputPos);
122 | 
123 |   thrust::host_vector<unsigned int> h_yourOutputVals(d_outputVals,
124 |                                                      d_outputVals + numElems);
125 |   thrust::host_vector<unsigned int> h_yourOutputPos(d_outputPos,
126 |                                                     d_outputPos + numElems);
127 | 
128 |   checkResultsExact(&h_outputVals[0], &h_yourOutputVals[0], numElems);
129 |   checkResultsExact(&h_outputPos[0], &h_yourOutputPos[0], numElems);
130 | 
131 |   checkCudaErrors(cudaFree(inputVals));
132 |   checkCudaErrors(cudaFree(inputPos));
133 |   checkCudaErrors(cudaFree(outputVals));
134 |   checkCudaErrors(cudaFree(outputPos));
135 | 
136 |   return 0;
137 | }
138 | 


--------------------------------------------------------------------------------
/assignments/HW4/red_eye_effect.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect.gold


--------------------------------------------------------------------------------
/assignments/HW4/red_eye_effect_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect_5.jpg


--------------------------------------------------------------------------------
/assignments/HW4/red_eye_effect_template_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW4/red_eye_effect_template_5.jpg


--------------------------------------------------------------------------------
/assignments/HW4/reference_calc.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | // For memset
 3 | #include <cstring>
 4 | 
 5 | void reference_calculation(unsigned int *inputVals, unsigned int *inputPos,
 6 |                            unsigned int *outputVals, unsigned int *outputPos,
 7 |                            const size_t numElems) {
 8 |   const int numBits = 1;
 9 |   const int numBins = 1 << numBits;
10 | 
11 |   unsigned int *binHistogram = new unsigned int[numBins];
12 |   unsigned int *binScan = new unsigned int[numBins];
13 | 
14 |   unsigned int *vals_src = inputVals;
15 |   unsigned int *pos_src = inputPos;
16 | 
17 |   unsigned int *vals_dst = outputVals;
18 |   unsigned int *pos_dst = outputPos;
19 | 
20 |   // a simple radix sort - only guaranteed to work for numBits that are
21 |   // multiples of 2
22 |   for (unsigned int i = 0; i < 8 * sizeof(unsigned int); i += numBits) {
23 |     unsigned int mask = (numBins - 1) << i;
24 | 
25 |     memset(binHistogram, 0, sizeof(unsigned int) * numBins); // zero out the
26 |                                                              // bins
27 |     memset(binScan, 0, sizeof(unsigned int) * numBins); // zero out the bins
28 | 
29 |     // perform histogram of data & mask into bins
30 |     for (unsigned int j = 0; j < numElems; ++j) {
31 |       unsigned int bin = (vals_src[j] & mask) >> i;
32 |       binHistogram[bin]++;
33 |     }
34 | 
35 |     // perform exclusive prefix sum (scan) on binHistogram to get starting
36 |     // location for each bin
37 |     for (unsigned int j = 1; j < numBins; ++j) {
38 |       binScan[j] = binScan[j - 1] + binHistogram[j - 1];
39 |     }
40 | 
41 |     // Gather everything into the correct location
42 |     // need to move vals and positions
43 |     for (unsigned int j = 0; j < numElems; ++j) {
44 |       unsigned int bin = (vals_src[j] & mask) >> i;
45 |       vals_dst[binScan[bin]] = vals_src[j];
46 |       pos_dst[binScan[bin]] = pos_src[j];
47 |       binScan[bin]++;
48 |     }
49 | 
50 |     // swap the buffers (pointers only)
51 |     std::swap(vals_dst, vals_src);
52 |     std::swap(pos_dst, pos_src);
53 |   }
54 | 
55 |   // we did an even number of iterations, need to copy from input buffer into
56 |   // output
57 |   std::copy(inputVals, inputVals + numElems, outputVals);
58 |   std::copy(inputPos, inputPos + numElems, outputPos);
59 | 
60 |   delete[] binHistogram;
61 |   delete[] binScan;
62 | }
63 | 


--------------------------------------------------------------------------------
/assignments/HW4/reference_calc.h:
--------------------------------------------------------------------------------
 1 | #ifndef REFERENCE_H__
 2 | #define REFERENCE_H__
 3 | 
 4 | // A simple un-optimized reference radix sort calculation
 5 | // Only deals with power-of-2 radices
 6 | 
 7 | void reference_calculation(unsigned int *inputVals, unsigned int *inputPos,
 8 |                            unsigned int *outputVals, unsigned int *outputPos,
 9 |                            const size_t numElems);
10 | #endif


--------------------------------------------------------------------------------
/assignments/HW4/student_func.cu:
--------------------------------------------------------------------------------
 1 | // Udacity HW 4
 2 | // Radix Sorting
 3 | 
 4 | #include "utils.h"
 5 | #include <thrust/host_vector.h>
 6 | 
 7 | /* Red Eye Removal
 8 |    ===============
 9 | 
10 |    For this assignment we are implementing red eye removal.  This is
11 |    accomplished by first creating a score for every pixel that tells us how
12 |    likely it is to be a red eye pixel.  We have already done this for you - you
13 |    are receiving the scores and need to sort them in ascending order so that we
14 |    know which pixels to alter to remove the red eye.
15 | 
16 |    Note: ascending order == smallest to largest
17 | 
18 |    Each score is associated with a position, when you sort the scores, you must
19 |    also move the positions accordingly.
20 | 
21 |    Implementing Parallel Radix Sort with CUDA
22 |    ==========================================
23 | 
24 |    The basic idea is to construct a histogram on each pass of how many of each
25 |    "digit" there are.   Then we scan this histogram so that we know where to put
26 |    the output of each digit.  For example, the first 1 must come after all the
27 |    0s so we have to know how many 0s there are to be able to start moving 1s
28 |    into the correct position.
29 | 
30 |    1) Histogram of the number of occurrences of each digit
31 |    2) Exclusive Prefix Sum of Histogram
32 |    3) Determine relative offset of each digit
33 |         For example [0 0 1 1 0 0 1]
34 |                 ->  [0 1 0 1 2 3 2]
35 |    4) Combine the results of steps 2 & 3 to determine the final
36 |       output location for each element and move it there
37 | 
38 |    LSB Radix sort is an out-of-place sort and you will need to ping-pong values
39 |    between the input and output buffers we have provided.  Make sure the final
40 |    sorted results end up in the output buffer!  Hint: You may need to do a copy
41 |    at the end.
42 | 
43 |  */
44 | 
45 | void your_sort(unsigned int *const d_inputVals, unsigned int *const d_inputPos,
46 |                unsigned int *const d_outputVals,
47 |                unsigned int *const d_outputPos, const size_t numElems) {
48 |   // TODO
49 |   // PUT YOUR SORT HERE
50 | }
51 | 


--------------------------------------------------------------------------------
/assignments/HW4/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW4/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/HW5/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | # collect source files
 9 | 
10 | file( GLOB  hdr *.hpp *.h )
11 | 
12 | SET (HW5_files main.cu student.cu reference_calc.cpp)
13 | 
14 | CUDA_ADD_EXECUTABLE(HW5 ${HW5_files} ${hdr})
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/assignments/HW5/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW5//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW5/CMakeFiles/HW5.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW5/CMakeFiles/HW5.dir/rule
122 | .PHONY : homework/HW5/CMakeFiles/HW5.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW5: homework/HW5/CMakeFiles/HW5.dir/rule
126 | .PHONY : HW5
127 | 
128 | # fast build rule for target.
129 | HW5/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/build
131 | .PHONY : HW5/fast
132 | 
133 | reference_calc.o: reference_calc.cpp.o
134 | .PHONY : reference_calc.o
135 | 
136 | # target to build an object file
137 | reference_calc.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.o
139 | .PHONY : reference_calc.cpp.o
140 | 
141 | reference_calc.i: reference_calc.cpp.i
142 | .PHONY : reference_calc.i
143 | 
144 | # target to preprocess a source file
145 | reference_calc.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.i
147 | .PHONY : reference_calc.cpp.i
148 | 
149 | reference_calc.s: reference_calc.cpp.s
150 | .PHONY : reference_calc.s
151 | 
152 | # target to generate assembly for a file
153 | reference_calc.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW5/CMakeFiles/HW5.dir/build.make homework/HW5/CMakeFiles/HW5.dir/reference_calc.cpp.s
155 | .PHONY : reference_calc.cpp.s
156 | 
157 | # Help Target
158 | help:
159 | 	@echo "The following are some of the valid targets for this Makefile:"
160 | 	@echo "... all (the default if no target is provided)"
161 | 	@echo "... clean"
162 | 	@echo "... depend"
163 | 	@echo "... edit_cache"
164 | 	@echo "... rebuild_cache"
165 | 	@echo "... HW5"
166 | 	@echo "... reference_calc.o"
167 | 	@echo "... reference_calc.i"
168 | 	@echo "... reference_calc.s"
169 | .PHONY : help
170 | 
171 | 
172 | 
173 | #=============================================================================
174 | # Special targets to cleanup operation of make.
175 | 
176 | # Special rule to run CMake to check the build system integrity.
177 | # No rule that depends on this can have commands that come from listfiles
178 | # because they might be regenerated.
179 | cmake_check_build_system:
180 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
181 | .PHONY : cmake_check_build_system
182 | 
183 | 


--------------------------------------------------------------------------------
/assignments/HW5/main.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdlib>
  2 | #include <iostream>
  3 | #include <cstdio>
  4 | #include <fstream>
  5 | #include "utils.h"
  6 | #include "timer.h"
  7 | #include <cstdio>
  8 | #include <algorithm>
  9 | #if defined(_WIN16) || defined(_WIN32) || defined(_WIN64)
 10 | #include <Windows.h>
 11 | #else
 12 | #include <sys/time.h>
 13 | #endif
 14 | 
 15 | #include <thrust/random/linear_congruential_engine.h>
 16 | #include <thrust/random/normal_distribution.h>
 17 | #include <thrust/random/uniform_int_distribution.h>
 18 | 
 19 | #include "reference_calc.h"
 20 | 
 21 | void computeHistogram(const unsigned int *const d_vals,
 22 |                       unsigned int *const d_histo, const unsigned int numBins,
 23 |                       const unsigned int numElems);
 24 | 
 25 | int main(void) {
 26 |   const unsigned int numBins = 1024;
 27 |   const unsigned int numElems = 10000 * numBins;
 28 |   const float stddev = 100.f;
 29 | 
 30 |   unsigned int *vals = new unsigned int[numElems];
 31 |   unsigned int *h_vals = new unsigned int[numElems];
 32 |   unsigned int *h_studentHisto = new unsigned int[numBins];
 33 |   unsigned int *h_refHisto = new unsigned int[numBins];
 34 | 
 35 | #if defined(_WIN16) || defined(_WIN32) || defined(_WIN64)
 36 |   srand(GetTickCount());
 37 | #else
 38 |   timeval tv;
 39 |   gettimeofday(&tv, NULL);
 40 | 
 41 |   srand(tv.tv_usec);
 42 | #endif
 43 | 
 44 |   // make the mean unpredictable, but close enough to the middle
 45 |   // so that timings are unaffected
 46 |   unsigned int mean = rand() % 100 + 462;
 47 | 
 48 |   // Output mean so that grading can happen with the same inputs
 49 |   std::cout << mean << std::endl;
 50 | 
 51 |   thrust::minstd_rand rng;
 52 | 
 53 |   thrust::random::normal_distribution<float> normalDist((float)mean, stddev);
 54 | 
 55 |   // Generate the random values
 56 |   for (size_t i = 0; i < numElems; ++i) {
 57 |     vals[i] =
 58 |         std::min((unsigned int)std::max((int)normalDist(rng), 0), numBins - 1);
 59 |   }
 60 | 
 61 |   unsigned int *d_vals, *d_histo;
 62 | 
 63 |   GpuTimer timer;
 64 | 
 65 |   checkCudaErrors(cudaMalloc(&d_vals, sizeof(unsigned int) * numElems));
 66 |   checkCudaErrors(cudaMalloc(&d_histo, sizeof(unsigned int) * numBins));
 67 |   checkCudaErrors(cudaMemset(d_histo, 0, sizeof(unsigned int) * numBins));
 68 | 
 69 |   checkCudaErrors(cudaMemcpy(d_vals, vals, sizeof(unsigned int) * numElems,
 70 |                              cudaMemcpyHostToDevice));
 71 | 
 72 |   timer.Start();
 73 |   computeHistogram(d_vals, d_histo, numBins, numElems);
 74 |   timer.Stop();
 75 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 76 | 
 77 |   if (err < 0) {
 78 |     // Couldn't print! Probably the student closed stdout - bad news
 79 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 80 |               << std::endl;
 81 |     exit(1);
 82 |   }
 83 | 
 84 |   // copy the student-computed histogram back to the host
 85 |   checkCudaErrors(cudaMemcpy(h_studentHisto, d_histo,
 86 |                              sizeof(unsigned int) * numBins,
 87 |                              cudaMemcpyDeviceToHost));
 88 | 
 89 |   // generate reference for the given mean
 90 |   reference_calculation(vals, h_refHisto, numBins, numElems);
 91 | 
 92 |   // Now do the comparison
 93 |   checkResultsExact(h_refHisto, h_studentHisto, numBins);
 94 | 
 95 |   delete[] h_vals;
 96 |   delete[] h_refHisto;
 97 |   delete[] h_studentHisto;
 98 | 
 99 |   cudaFree(d_vals);
100 |   cudaFree(d_histo);
101 | 
102 |   return 0;
103 | }
104 | 


--------------------------------------------------------------------------------
/assignments/HW5/reference_calc.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | // Reference Histogram calculation
 3 | 
 4 | void reference_calculation(const unsigned int *const vals,
 5 |                            unsigned int *const histo, const size_t numBins,
 6 |                            const size_t numElems)
 7 | 
 8 | {
 9 |   // zero out bins
10 |   for (size_t i = 0; i < numBins; ++i)
11 |     histo[i] = 0;
12 | 
13 |   // go through vals and increment appropriate bin
14 |   for (size_t i = 0; i < numElems; ++i)
15 |     histo[vals[i]]++;
16 | }
17 | 


--------------------------------------------------------------------------------
/assignments/HW5/reference_calc.h:
--------------------------------------------------------------------------------
 1 | #ifndef REFERENCE_H__
 2 | #define REFERENCE_H__
 3 | 
 4 | // Reference Histogram calculation
 5 | 
 6 | void reference_calculation(const unsigned int *const vals,
 7 |                            unsigned int *const histo, const size_t numBins,
 8 |                            const size_t numElems);
 9 | 
10 | #endif


--------------------------------------------------------------------------------
/assignments/HW5/student.cu:
--------------------------------------------------------------------------------
 1 | /* Udacity HW5
 2 |    Histogramming for Speed
 3 | 
 4 |    The goal of this assignment is compute a histogram
 5 |    as fast as possible.  We have simplified the problem as much as
 6 |    possible to allow you to focus solely on the histogramming algorithm.
 7 | 
 8 |    The input values that you need to histogram are already the exact
 9 |    bins that need to be updated.  This is unlike in HW3 where you needed
10 |    to compute the range of the data and then do:
11 |    bin = (val - valMin) / valRange to determine the bin.
12 | 
13 |    Here the bin is just:
14 |    bin = val
15 | 
16 |    so the serial histogram calculation looks like:
17 |    for (i = 0; i < numElems; ++i)
18 |      histo[val[i]]++;
19 | 
20 |    That's it!  Your job is to make it run as fast as possible!
21 | 
22 |    The values are normally distributed - you may take
23 |    advantage of this fact in your implementation.
24 | 
25 | */
26 | 
27 | #include "utils.h"
28 | 
29 | __global__ void yourHisto(const unsigned int *const vals, // INPUT
30 |                           unsigned int *const histo,      // OUPUT
31 |                           int numVals) {
32 |   // TODO fill in this kernel to calculate the histogram
33 |   // as quickly as possible
34 | 
35 |   // Although we provide only one kernel skeleton,
36 |   // feel free to use more if it will help you
37 |   // write faster code
38 | }
39 | 
40 | void computeHistogram(const unsigned int *const d_vals, // INPUT
41 |                       unsigned int *const d_histo,      // OUTPUT
42 |                       const unsigned int numBins, const unsigned int numElems) {
43 |   // TODO Launch the yourHisto kernel
44 | 
45 |   // if you want to use/launch more than one kernel,
46 |   // feel free
47 | 
48 |   cudaDeviceSynchronize();
49 |   checkCudaErrors(cudaGetLastError());
50 | }
51 | 


--------------------------------------------------------------------------------
/assignments/HW5/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW5/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/HW6/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <summary> CMakeLists.txt for OpenCV and CUDA. </summary>
 3 | # <date>    2012-02-07          </date>
 4 | # <author>  Quan Tran Minh. edit by Johannes Kast, Michael Sarahan </author>
 5 | # <email>   quantm@unist.ac.kr  kast.jo@googlemail.com msarahan@gmail.com</email>
 6 | ############################################################################
 7 | 
 8 | # collect source files
 9 | 
10 | file( GLOB  hdr *.hpp *.h )
11 | 
12 | SET (HW6_files student_func.cu HW6.cu main.cpp loadSaveImage.cpp reference_calc.cpp compare.cpp)
13 | 
14 | CUDA_ADD_EXECUTABLE(HW6 ${HW6_files} ${hdr})
15 | 


--------------------------------------------------------------------------------
/assignments/HW6/HW6.cu:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | #include <cuda.h>
 3 | #include <cuda_runtime.h>
 4 | #include <string>
 5 | #include <iostream>
 6 | 
 7 | #include "loadSaveImage.h"
 8 | #include <stdio.h>
 9 | 
10 | // return types are void since any internal error will be handled by quitting
11 | // no point in returning error codes...
12 | void preProcess(uchar4 **sourceImg, size_t &numRows, size_t &numCols,
13 |                 uchar4 **destImg, uchar4 **blendedImg,
14 |                 const std::string &source_filename,
15 |                 const std::string &dest_filename) {
16 | 
17 |   // make sure the context initializes ok
18 |   checkCudaErrors(cudaFree(0));
19 | 
20 |   size_t numRowsSource, numColsSource, numRowsDest, numColsDest;
21 | 
22 |   loadImageRGBA(source_filename, sourceImg, &numRowsSource, &numColsSource);
23 |   loadImageRGBA(dest_filename, destImg, &numRowsDest, &numColsDest);
24 | 
25 |   assert(numRowsSource == numRowsDest);
26 |   assert(numColsSource == numColsDest);
27 | 
28 |   numRows = numRowsSource;
29 |   numCols = numColsSource;
30 | 
31 |   *blendedImg = new uchar4[numRows * numCols];
32 | }
33 | 
34 | void postProcess(const uchar4 *const blendedImg, const size_t numRowsDest,
35 |                  const size_t numColsDest, const std::string &output_file) {
36 |   // just need to save the image...
37 |   saveImageRGBA(blendedImg, numRowsDest, numColsDest, output_file);
38 | }
39 | 


--------------------------------------------------------------------------------
/assignments/HW6/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
  9 | .NOTPARALLEL:
 10 | 
 11 | #=============================================================================
 12 | # Special targets provided by cmake.
 13 | 
 14 | # Disable implicit rules so canonical targets will work.
 15 | .SUFFIXES:
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : %,v
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : RCS/%,v
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : SCCS/s.%
 28 | 
 29 | # Disable VCS-based implicit rules.
 30 | % : s.%
 31 | 
 32 | .SUFFIXES: .hpux_make_needs_suffix_list
 33 | 
 34 | # Command-line flag to silence nested $(MAKE).
 35 | $(VERBOSE)MAKESILENT = -s
 36 | 
 37 | #Suppress display of executed commands.
 38 | $(VERBOSE).SILENT:
 39 | 
 40 | # A target that is always out of date.
 41 | cmake_force:
 42 | .PHONY : cmake_force
 43 | 
 44 | #=============================================================================
 45 | # Set environment variables for the build.
 46 | 
 47 | # The shell in which to execute make rules.
 48 | SHELL = /bin/sh
 49 | 
 50 | # The CMake executable.
 51 | CMAKE_COMMAND = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake
 52 | 
 53 | # The command to remove a file.
 54 | RM = /usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E rm -f
 55 | 
 56 | # Escaping for special characters.
 57 | EQUALS = =
 58 | 
 59 | # The top-level source directory on which CMake was run.
 60 | CMAKE_SOURCE_DIR = /data/home/chilli/cluster/work/CS344/assignments
 61 | 
 62 | # The top-level build directory on which CMake was run.
 63 | CMAKE_BINARY_DIR = /data/home/chilli/cluster/work/CS344/assignments
 64 | 
 65 | #=============================================================================
 66 | # Targets provided globally by CMake.
 67 | 
 68 | # Special rule for the target rebuild_cache
 69 | rebuild_cache:
 70 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 71 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 72 | .PHONY : rebuild_cache
 73 | 
 74 | # Special rule for the target rebuild_cache
 75 | rebuild_cache/fast: rebuild_cache
 76 | .PHONY : rebuild_cache/fast
 77 | 
 78 | # Special rule for the target edit_cache
 79 | edit_cache:
 80 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 81 | 	/usr/local/lib/python3.6/dist-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 82 | .PHONY : edit_cache
 83 | 
 84 | # Special rule for the target edit_cache
 85 | edit_cache/fast: edit_cache
 86 | .PHONY : edit_cache/fast
 87 | 
 88 | # The main all target
 89 | all: cmake_check_build_system
 90 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles /data/home/chilli/cluster/work/CS344/assignments/homework/HW6//CMakeFiles/progress.marks
 91 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/all
 92 | 	$(CMAKE_COMMAND) -E cmake_progress_start /data/home/chilli/cluster/work/CS344/assignments/CMakeFiles 0
 93 | .PHONY : all
 94 | 
 95 | # The main clean target
 96 | clean:
 97 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/clean
 98 | .PHONY : clean
 99 | 
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 | 
104 | # Prepare targets for installation.
105 | preinstall: all
106 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/preinstall
107 | .PHONY : preinstall
108 | 
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/preinstall
112 | .PHONY : preinstall/fast
113 | 
114 | # clear depends
115 | depend:
116 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 | 
119 | # Convenience name for target.
120 | homework/HW6/CMakeFiles/HW6.dir/rule:
121 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 homework/HW6/CMakeFiles/HW6.dir/rule
122 | .PHONY : homework/HW6/CMakeFiles/HW6.dir/rule
123 | 
124 | # Convenience name for target.
125 | HW6: homework/HW6/CMakeFiles/HW6.dir/rule
126 | .PHONY : HW6
127 | 
128 | # fast build rule for target.
129 | HW6/fast:
130 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/build
131 | .PHONY : HW6/fast
132 | 
133 | compare.o: compare.cpp.o
134 | .PHONY : compare.o
135 | 
136 | # target to build an object file
137 | compare.cpp.o:
138 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.o
139 | .PHONY : compare.cpp.o
140 | 
141 | compare.i: compare.cpp.i
142 | .PHONY : compare.i
143 | 
144 | # target to preprocess a source file
145 | compare.cpp.i:
146 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.i
147 | .PHONY : compare.cpp.i
148 | 
149 | compare.s: compare.cpp.s
150 | .PHONY : compare.s
151 | 
152 | # target to generate assembly for a file
153 | compare.cpp.s:
154 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/compare.cpp.s
155 | .PHONY : compare.cpp.s
156 | 
157 | loadSaveImage.o: loadSaveImage.cpp.o
158 | .PHONY : loadSaveImage.o
159 | 
160 | # target to build an object file
161 | loadSaveImage.cpp.o:
162 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.o
163 | .PHONY : loadSaveImage.cpp.o
164 | 
165 | loadSaveImage.i: loadSaveImage.cpp.i
166 | .PHONY : loadSaveImage.i
167 | 
168 | # target to preprocess a source file
169 | loadSaveImage.cpp.i:
170 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.i
171 | .PHONY : loadSaveImage.cpp.i
172 | 
173 | loadSaveImage.s: loadSaveImage.cpp.s
174 | .PHONY : loadSaveImage.s
175 | 
176 | # target to generate assembly for a file
177 | loadSaveImage.cpp.s:
178 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/loadSaveImage.cpp.s
179 | .PHONY : loadSaveImage.cpp.s
180 | 
181 | main.o: main.cpp.o
182 | .PHONY : main.o
183 | 
184 | # target to build an object file
185 | main.cpp.o:
186 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.o
187 | .PHONY : main.cpp.o
188 | 
189 | main.i: main.cpp.i
190 | .PHONY : main.i
191 | 
192 | # target to preprocess a source file
193 | main.cpp.i:
194 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.i
195 | .PHONY : main.cpp.i
196 | 
197 | main.s: main.cpp.s
198 | .PHONY : main.s
199 | 
200 | # target to generate assembly for a file
201 | main.cpp.s:
202 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/main.cpp.s
203 | .PHONY : main.cpp.s
204 | 
205 | reference_calc.o: reference_calc.cpp.o
206 | .PHONY : reference_calc.o
207 | 
208 | # target to build an object file
209 | reference_calc.cpp.o:
210 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.o
211 | .PHONY : reference_calc.cpp.o
212 | 
213 | reference_calc.i: reference_calc.cpp.i
214 | .PHONY : reference_calc.i
215 | 
216 | # target to preprocess a source file
217 | reference_calc.cpp.i:
218 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.i
219 | .PHONY : reference_calc.cpp.i
220 | 
221 | reference_calc.s: reference_calc.cpp.s
222 | .PHONY : reference_calc.s
223 | 
224 | # target to generate assembly for a file
225 | reference_calc.cpp.s:
226 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(MAKE) $(MAKESILENT) -f homework/HW6/CMakeFiles/HW6.dir/build.make homework/HW6/CMakeFiles/HW6.dir/reference_calc.cpp.s
227 | .PHONY : reference_calc.cpp.s
228 | 
229 | # Help Target
230 | help:
231 | 	@echo "The following are some of the valid targets for this Makefile:"
232 | 	@echo "... all (the default if no target is provided)"
233 | 	@echo "... clean"
234 | 	@echo "... depend"
235 | 	@echo "... edit_cache"
236 | 	@echo "... rebuild_cache"
237 | 	@echo "... HW6"
238 | 	@echo "... compare.o"
239 | 	@echo "... compare.i"
240 | 	@echo "... compare.s"
241 | 	@echo "... loadSaveImage.o"
242 | 	@echo "... loadSaveImage.i"
243 | 	@echo "... loadSaveImage.s"
244 | 	@echo "... main.o"
245 | 	@echo "... main.i"
246 | 	@echo "... main.s"
247 | 	@echo "... reference_calc.o"
248 | 	@echo "... reference_calc.i"
249 | 	@echo "... reference_calc.s"
250 | .PHONY : help
251 | 
252 | 
253 | 
254 | #=============================================================================
255 | # Special targets to cleanup operation of make.
256 | 
257 | # Special rule to run CMake to check the build system integrity.
258 | # No rule that depends on this can have commands that come from listfiles
259 | # because they might be regenerated.
260 | cmake_check_build_system:
261 | 	cd /data/home/chilli/cluster/work/CS344/assignments && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
262 | .PHONY : cmake_check_build_system
263 | 
264 | 


--------------------------------------------------------------------------------
/assignments/HW6/blended.gold:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/blended.gold


--------------------------------------------------------------------------------
/assignments/HW6/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/opencv.hpp>
 2 | #include "utils.h"
 3 | 
 4 | void compareImages(std::string reference_filename, std::string test_filename,
 5 |                    bool useEpsCheck, double perPixelError, double globalError) {
 6 |   cv::Mat reference = cv::imread(reference_filename, -1);
 7 |   cv::Mat test = cv::imread(test_filename, -1);
 8 | 
 9 |   cv::Mat diff = abs(reference - test);
10 | 
11 |   cv::Mat diffSingleChannel =
12 |       diff.reshape(1, 0); // convert to 1 channel, same # rows
13 | 
14 |   double minVal, maxVal;
15 | 
16 |   cv::minMaxLoc(diffSingleChannel, &minVal, &maxVal, NULL,
17 |                 NULL); // NULL because we don't care about location
18 | 
19 |   // now perform transform so that we bump values to the full range
20 | 
21 |   diffSingleChannel = (diffSingleChannel - minVal) * (255. / (maxVal - minVal));
22 | 
23 |   diff = diffSingleChannel.reshape(reference.channels(), 0);
24 | 
25 |   cv::imwrite("HW6_differenceImage.png", diff);
26 |   // OK, now we can start comparing values...
27 |   unsigned char *referencePtr = reference.ptr<unsigned char>(0);
28 |   unsigned char *testPtr = test.ptr<unsigned char>(0);
29 | 
30 |   if (useEpsCheck) {
31 |     checkResultsEps(referencePtr, testPtr,
32 |                     reference.rows * reference.cols * reference.channels(),
33 |                     perPixelError, globalError);
34 |   } else {
35 |     checkResultsExact(referencePtr, testPtr,
36 |                       reference.rows * reference.cols * reference.channels());
37 |   }
38 | 
39 |   std::cout << "PASS" << std::endl;
40 |   return;
41 | }
42 | 


--------------------------------------------------------------------------------
/assignments/HW6/compare.h:
--------------------------------------------------------------------------------
1 | #ifndef HW3_H__
2 | #define HW3_H__
3 | 
4 | void compareImages(std::string reference_filename, std::string test_filename,
5 |                    bool useEpsCheck, double perPixelError, double globalError);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/assignments/HW6/destination.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/destination.png


--------------------------------------------------------------------------------
/assignments/HW6/loadSaveImage.cpp:
--------------------------------------------------------------------------------
  1 | #include <opencv2/core/core.hpp>
  2 | #include <opencv2/highgui/highgui.hpp>
  3 | #include <opencv2/opencv.hpp>
  4 | #include <vector>
  5 | #include "cuda_runtime.h"
  6 | 
  7 | // The caller becomes responsible for the returned pointer. This
  8 | // is done in the interest of keeping this code as simple as possible.
  9 | // In production code this is a bad idea - we should use RAII
 10 | // to ensure the memory is freed.  DO NOT COPY THIS AND USE IN PRODUCTION
 11 | // CODE!!!
 12 | void loadImageHDR(const std::string &filename, float **imagePtr,
 13 |                   size_t *numRows, size_t *numCols) {
 14 |   cv::Mat image = cv::imread(filename.c_str(),
 15 |                              CV_LOAD_IMAGE_COLOR | CV_LOAD_IMAGE_ANYDEPTH);
 16 |   if (image.empty()) {
 17 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 18 |     exit(1);
 19 |   }
 20 | 
 21 |   if (image.channels() != 3) {
 22 |     std::cerr << "Image must be color!" << std::endl;
 23 |     exit(1);
 24 |   }
 25 | 
 26 |   if (!image.isContinuous()) {
 27 |     std::cerr << "Image isn't continuous!" << std::endl;
 28 |     exit(1);
 29 |   }
 30 | 
 31 |   *imagePtr = new float[image.rows * image.cols * image.channels()];
 32 | 
 33 |   float *cvPtr = image.ptr<float>(0);
 34 |   for (size_t i = 0; i < image.rows * image.cols * image.channels(); ++i)
 35 |     (*imagePtr)[i] = cvPtr[i];
 36 | 
 37 |   *numRows = image.rows;
 38 |   *numCols = image.cols;
 39 | }
 40 | 
 41 | void loadImageGrey(const std::string &filename, unsigned char **imagePtr,
 42 |                    size_t *numRows, size_t *numCols) {
 43 |   cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_GRAYSCALE);
 44 |   if (image.empty()) {
 45 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 46 |     exit(1);
 47 |   }
 48 | 
 49 |   if (image.channels() != 1) {
 50 |     std::cerr << "Image must be greyscale!" << std::endl;
 51 |     exit(1);
 52 |   }
 53 | 
 54 |   if (!image.isContinuous()) {
 55 |     std::cerr << "Image isn't continuous!" << std::endl;
 56 |     exit(1);
 57 |   }
 58 | 
 59 |   *imagePtr = new unsigned char[image.rows * image.cols];
 60 | 
 61 |   unsigned char *cvPtr = image.ptr<unsigned char>(0);
 62 |   for (size_t i = 0; i < image.rows * image.cols; ++i) {
 63 |     (*imagePtr)[i] = cvPtr[i];
 64 |   }
 65 | 
 66 |   *numRows = image.rows;
 67 |   *numCols = image.cols;
 68 | }
 69 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
 70 |                    size_t *numRows, size_t *numCols) {
 71 |   cv::Mat image = cv::imread(filename.c_str(), CV_LOAD_IMAGE_COLOR);
 72 |   if (image.empty()) {
 73 |     std::cerr << "Couldn't open file: " << filename << std::endl;
 74 |     exit(1);
 75 |   }
 76 | 
 77 |   if (image.channels() != 3) {
 78 |     std::cerr << "Image must be color!" << std::endl;
 79 |     exit(1);
 80 |   }
 81 | 
 82 |   if (!image.isContinuous()) {
 83 |     std::cerr << "Image isn't continuous!" << std::endl;
 84 |     exit(1);
 85 |   }
 86 | 
 87 |   cv::Mat imageRGBA;
 88 |   cv::cvtColor(image, imageRGBA, CV_BGR2RGBA);
 89 | 
 90 |   *imagePtr = new uchar4[image.rows * image.cols];
 91 | 
 92 |   unsigned char *cvPtr = imageRGBA.ptr<unsigned char>(0);
 93 |   for (size_t i = 0; i < image.rows * image.cols; ++i) {
 94 |     (*imagePtr)[i].x = cvPtr[4 * i + 0];
 95 |     (*imagePtr)[i].y = cvPtr[4 * i + 1];
 96 |     (*imagePtr)[i].z = cvPtr[4 * i + 2];
 97 |     (*imagePtr)[i].w = cvPtr[4 * i + 3];
 98 |   }
 99 | 
100 |   *numRows = image.rows;
101 |   *numCols = image.cols;
102 | }
103 | 
104 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
105 |                    const size_t numCols, const std::string &output_file) {
106 |   int sizes[2];
107 |   sizes[0] = numRows;
108 |   sizes[1] = numCols;
109 |   cv::Mat imageRGBA(2, sizes, CV_8UC4, (void *)image);
110 |   cv::Mat imageOutputBGR;
111 |   cv::cvtColor(imageRGBA, imageOutputBGR, CV_RGBA2BGR);
112 |   // output the image
113 |   cv::imwrite(output_file.c_str(), imageOutputBGR);
114 | }
115 | 
116 | // output an exr file
117 | // assumed to already be BGR
118 | void saveImageHDR(const float *const image, const size_t numRows,
119 |                   const size_t numCols, const std::string &output_file) {
120 |   int sizes[2];
121 |   sizes[0] = numRows;
122 |   sizes[1] = numCols;
123 | 
124 |   cv::Mat imageHDR(2, sizes, CV_32FC3, (void *)image);
125 | 
126 |   imageHDR = imageHDR * 255;
127 | 
128 |   cv::imwrite(output_file.c_str(), imageHDR);
129 | }
130 | 


--------------------------------------------------------------------------------
/assignments/HW6/loadSaveImage.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOADSAVEIMAGE_H__
 2 | #define LOADSAVEIMAGE_H__
 3 | 
 4 | #include <string>
 5 | #include <cuda_runtime.h> //for uchar4
 6 | 
 7 | void loadImageHDR(const std::string &filename, float **imagePtr,
 8 |                   size_t *numRows, size_t *numCols);
 9 | 
10 | void loadImageRGBA(const std::string &filename, uchar4 **imagePtr,
11 |                    size_t *numRows, size_t *numCols);
12 | 
13 | void loadImageGrey(const std::string &filename, unsigned char **imagePtr,
14 |                    size_t *numRows, size_t *numCols);
15 | 
16 | void saveImageRGBA(const uchar4 *const image, const size_t numRows,
17 |                    const size_t numCols, const std::string &output_file);
18 | 
19 | void saveImageHDR(const float *const image, const size_t numRows,
20 |                   const size_t numCols, const std::string &output_file);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/assignments/HW6/main.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW6 Driver
  2 | 
  3 | #include <iostream>
  4 | #include "timer.h"
  5 | #include "utils.h"
  6 | #include <string>
  7 | #include <stdio.h>
  8 | 
  9 | #include <opencv2/core/core.hpp>
 10 | #include <opencv2/highgui/highgui.hpp>
 11 | #include <opencv2/opencv.hpp>
 12 | 
 13 | #include "reference_calc.h"
 14 | #include "compare.h"
 15 | 
 16 | void preProcess(uchar4 **sourceImg, size_t &numRowsSource,
 17 |                 size_t &numColsSource, uchar4 **destImg, uchar4 **blendedImg,
 18 |                 const std::string &source_filename,
 19 |                 const std::string &dest_filename);
 20 | 
 21 | void postProcess(const uchar4 *const blendedImg, const size_t numRowsDest,
 22 |                  const size_t numColsDest, const std::string &output_file);
 23 | 
 24 | void your_blend(const uchar4 *const sourceImg, const size_t numRowsSource,
 25 |                 const size_t numColsSource, const uchar4 *const destImg,
 26 |                 uchar4 *const blendedImg);
 27 | 
 28 | int main(int argc, char **argv) {
 29 |   uchar4 *h_sourceImg, *h_destImg, *h_blendedImg;
 30 |   size_t numRowsSource, numColsSource;
 31 | 
 32 |   std::string input_source_file;
 33 |   std::string input_dest_file;
 34 |   std::string output_file;
 35 | 
 36 |   std::string reference_file;
 37 |   double perPixelError = 0.0;
 38 |   double globalError = 0.0;
 39 |   bool useEpsCheck = false;
 40 | 
 41 |   switch (argc) {
 42 |   case 3:
 43 |     input_source_file = std::string(argv[1]);
 44 |     input_dest_file = std::string(argv[2]);
 45 |     output_file = "HW6_output.png";
 46 |     reference_file = "HW6_reference.png";
 47 |     break;
 48 |   case 4:
 49 |     input_source_file = std::string(argv[1]);
 50 |     input_dest_file = std::string(argv[2]);
 51 |     output_file = std::string(argv[3]);
 52 |     reference_file = "HW6_reference.png";
 53 |     break;
 54 |   case 5:
 55 |     input_source_file = std::string(argv[1]);
 56 |     input_dest_file = std::string(argv[2]);
 57 |     output_file = std::string(argv[3]);
 58 |     reference_file = std::string(argv[4]);
 59 |     break;
 60 |   case 7:
 61 |     useEpsCheck = true;
 62 |     input_source_file = std::string(argv[1]);
 63 |     input_dest_file = std::string(argv[2]);
 64 |     output_file = std::string(argv[3]);
 65 |     reference_file = std::string(argv[4]);
 66 |     perPixelError = atof(argv[5]);
 67 |     globalError = atof(argv[6]);
 68 |     break;
 69 |   default:
 70 |     std::cerr << "Usage: ./HW6 input_source_file input_dest_filename "
 71 |                  "[output_filename] [reference_filename] [perPixelError] "
 72 |                  "[globalError]"
 73 |               << std::endl;
 74 |     exit(1);
 75 |   }
 76 | 
 77 |   // load the image and give us our input and output pointers
 78 |   preProcess(&h_sourceImg, numRowsSource, numColsSource, &h_destImg,
 79 |              &h_blendedImg, input_source_file, input_dest_file);
 80 | 
 81 |   GpuTimer timer;
 82 |   timer.Start();
 83 | 
 84 |   // call the students' code
 85 |   your_blend(h_sourceImg, numRowsSource, numColsSource, h_destImg,
 86 |              h_blendedImg);
 87 | 
 88 |   timer.Stop();
 89 |   cudaDeviceSynchronize();
 90 |   checkCudaErrors(cudaGetLastError());
 91 |   int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 92 |   printf("\n");
 93 |   if (err < 0) {
 94 |     // Couldn't print! Probably the student closed stdout - bad news
 95 |     std::cerr << "Couldn't print timing information! STDOUT Closed!"
 96 |               << std::endl;
 97 |     exit(1);
 98 |   }
 99 | 
100 |   // check results and output the tone-mapped image
101 |   postProcess(h_blendedImg, numRowsSource, numColsSource, output_file);
102 | 
103 |   // calculate the reference image
104 |   uchar4 *h_reference = new uchar4[numRowsSource * numColsSource];
105 |   reference_calc(h_sourceImg, numRowsSource, numColsSource, h_destImg,
106 |                  h_reference);
107 | 
108 |   // save the reference image
109 |   postProcess(h_reference, numRowsSource, numColsSource, reference_file);
110 | 
111 |   compareImages(reference_file, output_file, useEpsCheck, perPixelError,
112 |                 globalError);
113 | 
114 |   delete[] h_reference;
115 |   delete[] h_destImg;
116 |   delete[] h_sourceImg;
117 |   delete[] h_blendedImg;
118 |   return 0;
119 | }
120 | 


--------------------------------------------------------------------------------
/assignments/HW6/reference_calc.cpp:
--------------------------------------------------------------------------------
  1 | // Udacity HW 6
  2 | // Poisson Blending Reference Calculation
  3 | 
  4 | #include "utils.h"
  5 | #include <thrust/host_vector.h>
  6 | 
  7 | // Performs one iteration of the solver
  8 | void computeIteration(const unsigned char *const dstImg,
  9 |                       const unsigned char *const strictInteriorPixels,
 10 |                       const unsigned char *const borderPixels,
 11 |                       const std::vector<uint2> &interiorPixelList,
 12 |                       const size_t numColsSource, const float *const f,
 13 |                       const float *const g, float *const f_next) {
 14 |   unsigned int off =
 15 |       interiorPixelList[0].x * numColsSource + interiorPixelList[0].y;
 16 | 
 17 |   for (size_t i = 0; i < interiorPixelList.size(); ++i) {
 18 |     float blendedSum = 0.f;
 19 |     float borderSum = 0.f;
 20 | 
 21 |     uint2 coord = interiorPixelList[i];
 22 | 
 23 |     unsigned int offset = coord.x * numColsSource + coord.y;
 24 | 
 25 |     // process all 4 neighbor pixels
 26 |     // for each pixel if it is an interior pixel
 27 |     // then we add the previous f, otherwise if it is a
 28 |     // border pixel then we add the value of the destination
 29 |     // image at the border.  These border values are our boundary
 30 |     // conditions.
 31 |     if (strictInteriorPixels[offset - 1]) {
 32 |       blendedSum += f[offset - 1];
 33 |     } else {
 34 |       borderSum += dstImg[offset - 1];
 35 |     }
 36 | 
 37 |     if (strictInteriorPixels[offset + 1]) {
 38 |       blendedSum += f[offset + 1];
 39 |     } else {
 40 |       borderSum += dstImg[offset + 1];
 41 |     }
 42 | 
 43 |     if (strictInteriorPixels[offset - numColsSource]) {
 44 |       blendedSum += f[offset - numColsSource];
 45 |     } else {
 46 |       borderSum += dstImg[offset - numColsSource];
 47 |     }
 48 | 
 49 |     if (strictInteriorPixels[offset + numColsSource]) {
 50 |       blendedSum += f[offset + numColsSource];
 51 |     } else {
 52 |       borderSum += dstImg[offset + numColsSource];
 53 |     }
 54 | 
 55 |     float f_next_val = (blendedSum + borderSum + g[offset]) / 4.f;
 56 | 
 57 |     f_next[offset] =
 58 |         std::min(255.f, std::max(0.f, f_next_val)); // clip to [0, 255]
 59 |   }
 60 | }
 61 | 
 62 | // pre-compute the values of g, which depend only the source image
 63 | // and aren't iteration dependent.
 64 | void computeG(const unsigned char *const channel, float *const g,
 65 |               const size_t numColsSource,
 66 |               const std::vector<uint2> &interiorPixelList) {
 67 |   for (size_t i = 0; i < interiorPixelList.size(); ++i) {
 68 |     uint2 coord = interiorPixelList[i];
 69 |     unsigned int offset = coord.x * numColsSource + coord.y;
 70 | 
 71 |     float sum = 4.f * channel[offset];
 72 | 
 73 |     sum -= (float)channel[offset - 1] + (float)channel[offset + 1];
 74 |     sum -= (float)channel[offset + numColsSource] +
 75 |            (float)channel[offset - numColsSource];
 76 | 
 77 |     g[offset] = sum;
 78 |   }
 79 | }
 80 | 
 81 | void reference_calc(const uchar4 *const h_sourceImg, const size_t numRowsSource,
 82 |                     const size_t numColsSource, const uchar4 *const h_destImg,
 83 |                     uchar4 *const h_blendedImg) {
 84 | 
 85 |   // we need to create a list of border pixels and interior pixels
 86 |   // this is a conceptually simple implementation, not a particularly efficient
 87 |   // one...
 88 | 
 89 |   // first create mask
 90 |   size_t srcSize = numRowsSource * numColsSource;
 91 |   unsigned char *mask = new unsigned char[srcSize];
 92 | 
 93 |   for (int i = 0; i < srcSize; ++i) {
 94 |     mask[i] = (h_sourceImg[i].x + h_sourceImg[i].y + h_sourceImg[i].z < 3 * 255)
 95 |                   ? 1
 96 |                   : 0;
 97 |   }
 98 | 
 99 |   // next compute strictly interior pixels and border pixels
100 |   unsigned char *borderPixels = new unsigned char[srcSize];
101 |   unsigned char *strictInteriorPixels = new unsigned char[srcSize];
102 | 
103 |   std::vector<uint2> interiorPixelList;
104 | 
105 |   // the source region in the homework isn't near an image boundary, so we can
106 |   // simplify the conditionals a little...
107 |   for (size_t r = 1; r < numRowsSource - 1; ++r) {
108 |     for (size_t c = 1; c < numColsSource - 1; ++c) {
109 |       if (mask[r * numColsSource + c]) {
110 |         if (mask[(r - 1) * numColsSource + c] &&
111 |             mask[(r + 1) * numColsSource + c] &&
112 |             mask[r * numColsSource + c - 1] &&
113 |             mask[r * numColsSource + c + 1]) {
114 |           strictInteriorPixels[r * numColsSource + c] = 1;
115 |           borderPixels[r * numColsSource + c] = 0;
116 |           interiorPixelList.push_back(make_uint2(r, c));
117 |         } else {
118 |           strictInteriorPixels[r * numColsSource + c] = 0;
119 |           borderPixels[r * numColsSource + c] = 1;
120 |         }
121 |       } else {
122 |         strictInteriorPixels[r * numColsSource + c] = 0;
123 |         borderPixels[r * numColsSource + c] = 0;
124 |       }
125 |     }
126 |   }
127 | 
128 |   // split the source and destination images into their respective
129 |   // channels
130 |   unsigned char *red_src = new unsigned char[srcSize];
131 |   unsigned char *blue_src = new unsigned char[srcSize];
132 |   unsigned char *green_src = new unsigned char[srcSize];
133 | 
134 |   for (int i = 0; i < srcSize; ++i) {
135 |     red_src[i] = h_sourceImg[i].x;
136 |     blue_src[i] = h_sourceImg[i].y;
137 |     green_src[i] = h_sourceImg[i].z;
138 |   }
139 | 
140 |   unsigned char *red_dst = new unsigned char[srcSize];
141 |   unsigned char *blue_dst = new unsigned char[srcSize];
142 |   unsigned char *green_dst = new unsigned char[srcSize];
143 | 
144 |   for (int i = 0; i < srcSize; ++i) {
145 |     red_dst[i] = h_destImg[i].x;
146 |     blue_dst[i] = h_destImg[i].y;
147 |     green_dst[i] = h_destImg[i].z;
148 |   }
149 | 
150 |   // next we'll precompute the g term - it never changes, no need to recompute
151 |   // every iteration
152 |   float *g_red = new float[srcSize];
153 |   float *g_blue = new float[srcSize];
154 |   float *g_green = new float[srcSize];
155 | 
156 |   memset(g_red, 0, srcSize * sizeof(float));
157 |   memset(g_blue, 0, srcSize * sizeof(float));
158 |   memset(g_green, 0, srcSize * sizeof(float));
159 | 
160 |   computeG(red_src, g_red, numColsSource, interiorPixelList);
161 |   computeG(blue_src, g_blue, numColsSource, interiorPixelList);
162 |   computeG(green_src, g_green, numColsSource, interiorPixelList);
163 | 
164 |   // for each color channel we'll need two buffers and we'll ping-pong between
165 |   // them
166 |   float *blendedValsRed_1 = new float[srcSize];
167 |   float *blendedValsRed_2 = new float[srcSize];
168 | 
169 |   float *blendedValsBlue_1 = new float[srcSize];
170 |   float *blendedValsBlue_2 = new float[srcSize];
171 | 
172 |   float *blendedValsGreen_1 = new float[srcSize];
173 |   float *blendedValsGreen_2 = new float[srcSize];
174 | 
175 |   // IC is the source image, copy over
176 |   for (size_t i = 0; i < srcSize; ++i) {
177 |     blendedValsRed_1[i] = red_src[i];
178 |     blendedValsRed_2[i] = red_src[i];
179 |     blendedValsBlue_1[i] = blue_src[i];
180 |     blendedValsBlue_2[i] = blue_src[i];
181 |     blendedValsGreen_1[i] = green_src[i];
182 |     blendedValsGreen_2[i] = green_src[i];
183 |   }
184 | 
185 |   // Perform the solve on each color channel
186 |   const size_t numIterations = 800;
187 |   for (size_t i = 0; i < numIterations; ++i) {
188 |     computeIteration(red_dst, strictInteriorPixels, borderPixels,
189 |                      interiorPixelList, numColsSource, blendedValsRed_1, g_red,
190 |                      blendedValsRed_2);
191 | 
192 |     std::swap(blendedValsRed_1, blendedValsRed_2);
193 |   }
194 | 
195 |   for (size_t i = 0; i < numIterations; ++i) {
196 |     computeIteration(blue_dst, strictInteriorPixels, borderPixels,
197 |                      interiorPixelList, numColsSource, blendedValsBlue_1,
198 |                      g_blue, blendedValsBlue_2);
199 | 
200 |     std::swap(blendedValsBlue_1, blendedValsBlue_2);
201 |   }
202 | 
203 |   for (size_t i = 0; i < numIterations; ++i) {
204 |     computeIteration(green_dst, strictInteriorPixels, borderPixels,
205 |                      interiorPixelList, numColsSource, blendedValsGreen_1,
206 |                      g_green, blendedValsGreen_2);
207 | 
208 |     std::swap(blendedValsGreen_1, blendedValsGreen_2);
209 |   }
210 |   std::swap(blendedValsRed_1, blendedValsRed_2);     // put output into _2
211 |   std::swap(blendedValsBlue_1, blendedValsBlue_2);   // put output into _2
212 |   std::swap(blendedValsGreen_1, blendedValsGreen_2); // put output into _2
213 | 
214 |   // copy the destination image to the output
215 |   memcpy(h_blendedImg, h_destImg, sizeof(uchar4) * srcSize);
216 | 
217 |   // copy computed values for the interior into the output
218 |   for (size_t i = 0; i < interiorPixelList.size(); ++i) {
219 |     uint2 coord = interiorPixelList[i];
220 | 
221 |     unsigned int offset = coord.x * numColsSource + coord.y;
222 | 
223 |     h_blendedImg[offset].x = blendedValsRed_2[offset];
224 |     h_blendedImg[offset].y = blendedValsBlue_2[offset];
225 |     h_blendedImg[offset].z = blendedValsGreen_2[offset];
226 |   }
227 | 
228 |   // wow, we allocated a lot of memory!
229 |   delete[] mask;
230 |   delete[] blendedValsRed_1;
231 |   delete[] blendedValsRed_2;
232 |   delete[] blendedValsBlue_1;
233 |   delete[] blendedValsBlue_2;
234 |   delete[] blendedValsGreen_1;
235 |   delete[] blendedValsGreen_2;
236 |   delete[] g_red;
237 |   delete[] g_blue;
238 |   delete[] g_green;
239 |   delete[] red_src;
240 |   delete[] red_dst;
241 |   delete[] blue_src;
242 |   delete[] blue_dst;
243 |   delete[] green_src;
244 |   delete[] green_dst;
245 |   delete[] borderPixels;
246 |   delete[] strictInteriorPixels;
247 | }
248 | 


--------------------------------------------------------------------------------
/assignments/HW6/reference_calc.h:
--------------------------------------------------------------------------------
1 | #ifndef REFERENCE_H__
2 | #define REFERENCE_H__
3 | 
4 | void reference_calc(const uchar4 *const h_sourceImg, const size_t numRowsSource,
5 |                     const size_t numColsSource, const uchar4 *const h_destImg,
6 |                     uchar4 *const h_blendedImg);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/assignments/HW6/source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chillee/CS344_2021/e02fb5dae079e8921af0e579d482f61dc467510b/assignments/HW6/source.png


--------------------------------------------------------------------------------
/assignments/HW6/student_func.cu:
--------------------------------------------------------------------------------
  1 | // Udacity HW 6
  2 | // Poisson Blending
  3 | 
  4 | /* Background
  5 |    ==========
  6 | 
  7 |    The goal for this assignment is to take one image (the source) and
  8 |    paste it into another image (the destination) attempting to match the
  9 |    two images so that the pasting is non-obvious. This is
 10 |    known as a "seamless clone".
 11 | 
 12 |    The basic ideas are as follows:
 13 | 
 14 |    1) Figure out the interior and border of the source image
 15 |    2) Use the values of the border pixels in the destination image
 16 |       as boundary conditions for solving a Poisson equation that tells
 17 |       us how to blend the images.
 18 | 
 19 |       No pixels from the destination except pixels on the border
 20 |       are used to compute the match.
 21 | 
 22 |    Solving the Poisson Equation
 23 |    ============================
 24 | 
 25 |    There are multiple ways to solve this equation - we choose an iterative
 26 |    method - specifically the Jacobi method. Iterative methods start with
 27 |    a guess of the solution and then iterate to try and improve the guess
 28 |    until it stops changing.  If the problem was well-suited for the method
 29 |    then it will stop and where it stops will be the solution.
 30 | 
 31 |    The Jacobi method is the simplest iterative method and converges slowly -
 32 |    that is we need a lot of iterations to get to the answer, but it is the
 33 |    easiest method to write.
 34 | 
 35 |    Jacobi Iterations
 36 |    =================
 37 | 
 38 |    Our initial guess is going to be the source image itself.  This is a pretty
 39 |    good guess for what the blended image will look like and it means that
 40 |    we won't have to do as many iterations compared to if we had started far
 41 |    from the final solution.
 42 | 
 43 |    ImageGuess_prev (Floating point)
 44 |    ImageGuess_next (Floating point)
 45 | 
 46 |    DestinationImg
 47 |    SourceImg
 48 | 
 49 |    Follow these steps to implement one iteration:
 50 | 
 51 |    1) For every pixel p in the interior, compute two sums over the four
 52 |    neighboring pixels: Sum1: If the neighbor is in the interior then +=
 53 |    ImageGuess_prev[neighbor] else if the neighbor in on the border then +=
 54 |    DestinationImg[neighbor]
 55 | 
 56 |       Sum2: += SourceImg[p] - SourceImg[neighbor]   (for all four neighbors)
 57 | 
 58 |    2) Calculate the new pixel value:
 59 |       float newVal= (Sum1 + Sum2) / 4.f  <------ Notice that the result is
 60 |    FLOATING POINT ImageGuess_next[p] = min(255, max(0, newVal)); //clamp to [0,
 61 |    255]
 62 | 
 63 | 
 64 |     In this assignment we will do 800 iterations.
 65 |    */
 66 | 
 67 | #include "utils.h"
 68 | #include <thrust/host_vector.h>
 69 | 
 70 | void your_blend(const uchar4 *const h_sourceImg, // IN
 71 |                 const size_t numRowsSource, const size_t numColsSource,
 72 |                 const uchar4 *const h_destImg, // IN
 73 |                 uchar4 *const h_blendedImg)    // OUT
 74 | {
 75 | 
 76 |   /* To Recap here are the steps you need to implement
 77 | 
 78 |      1) Compute a mask of the pixels from the source image to be copied
 79 |         The pixels that shouldn't be copied are completely white, they
 80 |         have R=255, G=255, B=255.  Any other pixels SHOULD be copied.
 81 | 
 82 |      2) Compute the interior and border regions of the mask.  An interior
 83 |         pixel has all 4 neighbors also inside the mask.  A border pixel is
 84 |         in the mask itself, but has at least one neighbor that isn't.
 85 | 
 86 |      3) Separate out the incoming image into three separate channels
 87 | 
 88 |      4) Create two float(!) buffers for each color channel that will
 89 |         act as our guesses.  Initialize them to the respective color
 90 |         channel of the source image since that will act as our intial guess.
 91 | 
 92 |      5) For each color channel perform the Jacobi iteration described
 93 |         above 800 times.
 94 | 
 95 |      6) Create the output image by replacing all the interior pixels
 96 |         in the destination image with the result of the Jacobi iterations.
 97 |         Just cast the floating point values to unsigned chars since we have
 98 |         already made sure to clamp them to the correct range.
 99 | 
100 |       Since this is final assignment we provide little boilerplate code to
101 |       help you.  Notice that all the input/output pointers are HOST pointers.
102 | 
103 |       You will have to allocate all of your own GPU memory and perform your own
104 |       memcopies to get data in and out of the GPU memory.
105 | 
106 |       Remember to wrap all of your calls with checkCudaErrors() to catch any
107 |       thing that might go wrong.  After each kernel call do:
108 | 
109 |       cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
110 | 
111 |       to catch any errors that happened while executing the kernel.
112 |   */
113 | }
114 | 


--------------------------------------------------------------------------------
/assignments/HW6/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer {
 7 |   cudaEvent_t start;
 8 |   cudaEvent_t stop;
 9 | 
10 |   GpuTimer() {
11 |     cudaEventCreate(&start);
12 |     cudaEventCreate(&stop);
13 |   }
14 | 
15 |   ~GpuTimer() {
16 |     cudaEventDestroy(start);
17 |     cudaEventDestroy(stop);
18 |   }
19 | 
20 |   void Start() { cudaEventRecord(start, 0); }
21 | 
22 |   void Stop() { cudaEventRecord(stop, 0); }
23 | 
24 |   float Elapsed() {
25 |     float elapsed;
26 |     cudaEventSynchronize(stop);
27 |     cudaEventElapsedTime(&elapsed, start, stop);
28 |     return elapsed;
29 |   }
30 | };
31 | 
32 | #endif /* GPU_TIMER_H__ */
33 | 


--------------------------------------------------------------------------------
/assignments/HW6/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
13 | 
14 | template <typename T>
15 | void check(T err, const char *const func, const char *const file,
16 |            const int line) {
17 |   if (err != cudaSuccess) {
18 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
19 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
20 |     exit(1);
21 |   }
22 | }
23 | 
24 | template <typename T>
25 | void checkResultsExact(const T *const ref, const T *const gpu, size_t numElem) {
26 |   // check that the GPU result matches the CPU result
27 |   for (size_t i = 0; i < numElem; ++i) {
28 |     if (ref[i] != gpu[i]) {
29 |       std::cerr << "Difference at pos " << i << std::endl;
30 |       // the + is magic to convert char to int without messing
31 |       // with other types
32 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
33 |                 << "\nGPU      : " << +gpu[i] << std::endl;
34 |       exit(1);
35 |     }
36 |   }
37 | }
38 | 
39 | template <typename T>
40 | void checkResultsEps(const T *const ref, const T *const gpu, size_t numElem,
41 |                      double eps1, double eps2) {
42 |   assert(eps1 >= 0 && eps2 >= 0);
43 |   unsigned long long totalDiff = 0;
44 |   unsigned numSmallDifferences = 0;
45 |   for (size_t i = 0; i < numElem; ++i) {
46 |     // subtract smaller from larger in case of unsigned types
47 |     T smaller = std::min(ref[i], gpu[i]);
48 |     T larger = std::max(ref[i], gpu[i]);
49 |     T diff = larger - smaller;
50 |     if (diff > 0 && diff <= eps1) {
51 |       numSmallDifferences++;
52 |     } else if (diff > eps1) {
53 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of "
54 |                 << eps1 << std::endl;
55 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i]
56 |                 << "\nGPU      : " << +gpu[i] << std::endl;
57 |       exit(1);
58 |     }
59 |     totalDiff += diff * diff;
60 |   }
61 |   double percentSmallDifferences =
62 |       (double)numSmallDifferences / (double)numElem;
63 |   if (percentSmallDifferences > eps2) {
64 |     std::cerr << "Total percentage of non-zero pixel difference between the "
65 |                  "two images exceeds "
66 |               << 100.0 * eps2 << "%" << std::endl;
67 |     std::cerr << "Percentage of non-zero pixel differences: "
68 |               << 100.0 * percentSmallDifferences << "%" << std::endl;
69 |     exit(1);
70 |   }
71 | }
72 | 
73 | // Uses the autodesk method of image comparison
74 | // Note the the tolerance here is in PIXELS not a percentage of input pixels
75 | template <typename T>
76 | void checkResultsAutodesk(const T *const ref, const T *const gpu,
77 |                           size_t numElem, double variance, size_t tolerance) {
78 | 
79 |   size_t numBadPixels = 0;
80 |   for (size_t i = 0; i < numElem; ++i) {
81 |     T smaller = std::min(ref[i], gpu[i]);
82 |     T larger = std::max(ref[i], gpu[i]);
83 |     T diff = larger - smaller;
84 |     if (diff > variance)
85 |       ++numBadPixels;
86 |   }
87 | 
88 |   if (numBadPixels > tolerance) {
89 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/"
90 |               << tolerance << std::endl;
91 |     exit(1);
92 |   }
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/assignments/README.md:
--------------------------------------------------------------------------------
 1 | These assignments require OpenCV as a prerequisite. The easiest way to install is probably with conda.
 2 | 
 3 | To install OpenCV in a conda environment.
 4 | ```
 5 | conda create -n cs344 -y
 6 | conda activate cs344
 7 | conda install -y -c anaconda opencv
 8 | ```
 9 | 
10 | To build
11 | ```
12 | cd assignments
13 | mkdir build
14 | cd build
15 | cmake ..
16 | make
17 | ```
18 | The binaries will then be contained within `assignments/bin`.
19 | 
20 | # HW1 Passing Instructions
21 | From the `HW1` directory.
22 | 
23 | Run `../bin/HW1 HW1/cinque_terre_small.jpg`


--------------------------------------------------------------------------------