├── cris.png ├── doge.jpg ├── huge.jpg ├── large.jpg ├── tiny.jpg ├── medium.jpg ├── source0.png ├── source1.jpg ├── source2.jpg ├── source3.jpg ├── verylarge.jpg ├── destination0.png ├── destination1.jpg ├── destination2.jpg ├── destination3.jpg ├── huge_source.jpg ├── large_source.jpg ├── tiny_source.jpg ├── wall_source.jpg ├── medium_source.jpg ├── verylarge_source.jpg ├── README.md ├── poisson_serial.py ├── parallel_poisson.py └── parallel_poisson_cuda.py /cris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/cris.png -------------------------------------------------------------------------------- /doge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/doge.jpg -------------------------------------------------------------------------------- /huge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/huge.jpg -------------------------------------------------------------------------------- /large.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/large.jpg -------------------------------------------------------------------------------- /tiny.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/tiny.jpg -------------------------------------------------------------------------------- /medium.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/medium.jpg -------------------------------------------------------------------------------- /source0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/source0.png -------------------------------------------------------------------------------- /source1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/source1.jpg -------------------------------------------------------------------------------- /source2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/source2.jpg -------------------------------------------------------------------------------- /source3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/source3.jpg -------------------------------------------------------------------------------- /verylarge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/verylarge.jpg -------------------------------------------------------------------------------- /destination0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/destination0.png -------------------------------------------------------------------------------- /destination1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/destination1.jpg -------------------------------------------------------------------------------- /destination2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/destination2.jpg -------------------------------------------------------------------------------- /destination3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/destination3.jpg -------------------------------------------------------------------------------- /huge_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/huge_source.jpg -------------------------------------------------------------------------------- /large_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/large_source.jpg -------------------------------------------------------------------------------- /tiny_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/tiny_source.jpg -------------------------------------------------------------------------------- /wall_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/wall_source.jpg -------------------------------------------------------------------------------- /medium_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/medium_source.jpg -------------------------------------------------------------------------------- /verylarge_source.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ealehman/parallel-poisson-blending/HEAD/verylarge_source.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Parallelization of Poisson Blending 2 | =================================== 3 | By Jason Ting, Ryan Lee, Alex Lehman 4 | 5 | Final Project for Computer Science 205, Fall 2013 (Cris Cecka) 6 | -------------------------------------------------------------- 7 | 8 | The objective of the Poisson Blending algorithm is to compose a source image and a target image in the gradient domain. The code implements Poisson Blending in parallel with CUDA and Cheetah to efficiently and automatically superimpose images without visible seams. 9 | 10 | How to Run: 11 | ----------- 12 | There are two ways to run the code: 13 | 14 | 1) Using the images included in the folder and the course software load, execute the following on the Resonance node: 15 | $ python parallel_poisson.py [# iterations] 16 | 17 | 2) Specifying the image that you would like to process, execute the following, again on the Resonance node: 18 | $ python parallel_poisson.py [source image] [destination image] [# iterations] 19 | 20 | Benchmarking: 21 | ------------- 22 | For the purposes of analysis, the average time per iteration was computed over 800 iterations (N) for destination images of 5 sizes: (200, 142), (375, 266), (750, 531), (1500, 1062), and (2500, 1770). 23 | -------------------------------------------------------------------------------- /poisson_serial.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Jason Ting, Alex Lehman, Ryan Lee 3 | CS205 Final Project 4 | Serial Implementation of Discretized Poisson Blending Algorithm 5 | ''' 6 | 7 | import numpy as np 8 | import os 9 | from sys import argv 10 | import Image 11 | import time 12 | 13 | def mask(source_im): 14 | # Compute list of tuples of mask from source image 15 | mask = [] 16 | for i in range(source_im.shape[0]): 17 | for j in range(source_im.shape[1]): 18 | if np.all(source_im[i,j] != [255, 255, 255]): 19 | mask.append((i,j)) 20 | 21 | # Compute border and interior tuples 22 | interior = [] 23 | for i,j in mask: 24 | if ((i,j+1) in mask) and ((i,j-1) in mask) and ((i+1,j) in mask) and ((i-1,j) in mask): 25 | interior.append((i,j)) 26 | return interior 27 | 28 | def poisson_serial(source_im, dest_im, out_im, interior, buffer1, buffer2, N): 29 | 30 | # Do Jacobi iterations (800 times) 31 | for color in [0,1,2]: 32 | 33 | # Initialize first buffer to incoming source image 34 | buffer1 = source_im[:,:,color] 35 | 36 | # Compute each iteration 37 | for count in range(N): 38 | print count 39 | 40 | for i,j in interior: 41 | 42 | # Compute sum 1 and 2 43 | sum1 = 0 44 | sum2 = 0 45 | 46 | for k,l in [(i,j+1), (i,j-1), (i+1,j), (i-1,j)]: 47 | if (k,l) in interior: 48 | sum1 += buffer1[k,l] 49 | else: 50 | sum1 += dest_im[k,l,color] 51 | 52 | sum2 += (source_im[i,j,color] - source_im[k,l,color]) 53 | 54 | buffer2[i,j] = min(255, max(0, (sum1 + sum2) / float(4))) 55 | 56 | # Set buffer 1 to buffer 2 and iterate 57 | buffer1 = buffer2 58 | 59 | # Copy to an output image 60 | for i,j in interior: 61 | out_im[i,j,color] = buffer1[i,j] 62 | 63 | return out_im 64 | 65 | if __name__ == '__main__': 66 | if len(argv) != 4: 67 | print "Usage: python", argv[0], "[source image] [destination image] [number of iterations]" 68 | exit() 69 | 70 | # Number of iterations 71 | N = int(argv[3]) 72 | 73 | # Load in source/dest images; convert to Numpy arrays for blending; use uint8 for CUDA as we did in hw5 74 | source_im = np.array(Image.open(argv[1]), dtype = float) 75 | dest_im = np.array(Image.open(argv[2]), dtype = float) 76 | 77 | # Allocate buffers the size of dest (optimization: maybe only keep size of source + 1 around so buffer is smaller, then add into image later) 78 | buffer1 = np.zeros((dest_im.shape[0], dest_im.shape[1]), dtype = float) 79 | buffer2 = np.zeros((dest_im.shape[0], dest_im.shape[1]), dtype = float) 80 | out_im = dest_im 81 | 82 | start = time.time() 83 | interior = mask(source_im) 84 | out_im = np.uint8(poisson_serial(source_im, dest_im, out_im, interior, buffer1, buffer2, N)) 85 | stop = time.time() 86 | 87 | print str(N) + ' Iterations, Serial Time: ' + str(stop - start) + ' seconds' 88 | 89 | out_im = Image.fromarray(out_im, 'RGB') 90 | out_im.save('result.png') 91 | -------------------------------------------------------------------------------- /parallel_poisson.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Jason Ting, Alex Lehman, Ryan Lee 3 | CS205 Final Project 4 | Parallelized Implementation of Discretized Poisson Blending Algorithm using CUDA with Cheetah 5 | ''' 6 | 7 | from PIL import Image 8 | import numpy as np 9 | import time 10 | import glob 11 | import pycuda.autoinit 12 | import pycuda.driver as cu 13 | import pycuda.compiler as nvcc 14 | import pycuda.gpuarray as gpu 15 | from Cheetah.Template import Template 16 | 17 | # define the CUDA kernels for the mask and blending 18 | mask_source = """ 19 | // define the interior pixels and make border pixels white 20 | __global__ void mask_kernel(uchar3* source) 21 | { 22 | // Compute thread id in x, y, and coalesced 23 | int i = $BLOCK_DIM_Y * blockIdx.y + threadIdx.y; 24 | int j = $BLOCK_DIM_X * blockIdx.x + threadIdx.x; 25 | int tid = i * $WIDTH + j; 26 | 27 | // ensure each pixel is within image size and not white 28 | if (i >= 0 && i < $HEIGHT && j >= 0 && j < $WIDTH && (source[tid].x < 255 || source[tid].y < 255 || source[tid].z < 255)) { 29 | // set up calculcations 30 | int pos; 31 | 32 | // goes over neighbors (up, down, left, right) 33 | #for ($x,$y) in $NEIGHBORS 34 | // define position for neighbor 35 | pos = tid + $x + $y*$WIDTH; 36 | 37 | // changes pixels in the border to white 38 | if (source[pos].x == 255 && source[pos].y == 255 && source[pos].z == 255) { 39 | #for $l in $RGB 40 | source[tid].$l = 255; 41 | #end for 42 | } 43 | #end for 44 | } 45 | } 46 | """ 47 | 48 | poisson_blending_source = """ 49 | __global__ void poisson_blending_kernel(uchar3* source, uchar3* destination, uchar3* buffer) 50 | { 51 | // Compute thread id in x, y, and coalesced 52 | int i = $BLOCK_DIM_Y * blockIdx.y + threadIdx.y; 53 | int j = $BLOCK_DIM_X * blockIdx.x + threadIdx.x; 54 | int tid = i * $WIDTH + j; 55 | 56 | // ensure each pixel is within image size and not white 57 | if (i >= 0 && i < $HEIGHT && j >= 0 && j < $WIDTH && (buffer[tid].x < 255 || buffer[tid].y < 255 || buffer[tid].z < 255)){ 58 | // set up calculations for next buffer 59 | int pos; 60 | float sum; 61 | 62 | // iterates over RGB 63 | #for $l in $RGB 64 | // setup calculations 65 | sum = 0.0; 66 | float next_buffer_$l = 0.0; 67 | 68 | // iterates over neighbors (up, down, left, right) 69 | #for ($x,$y) in $NEIGHBORS 70 | // define position for neighbor 71 | pos = tid + $x + $y*$WIDTH; 72 | 73 | // adds buffer neighbors if pixel is in interior otherwise add destination neighbors 74 | if (buffer[pos].x < 255 || buffer[pos].y < 255 || buffer[pos].z < 255) 75 | sum += buffer[pos].$l; 76 | else 77 | sum += destination[pos].$l; 78 | 79 | //add difference between source and neighbor 80 | sum += (source[tid].$l - source[pos].$l); 81 | #end for 82 | 83 | // updates the next buffer and clip (0,255) 84 | next_buffer_$l = min(255.f, max(0.f, sum/4.f)); 85 | #end for 86 | 87 | // updates the destination image and buffer 88 | destination[tid] = make_uchar3(next_buffer_x, next_buffer_y, next_buffer_z); 89 | buffer[tid] = make_uchar3(next_buffer_x, next_buffer_y, next_buffer_z); 90 | } 91 | } 92 | """ 93 | 94 | def cuda_compile(source_string, function_name): 95 | # compile the CUDA Kernel at runtime 96 | source_module = nvcc.SourceModule(source_string) 97 | # return a handle to the compiled CUDA kernel 98 | return source_module.get_function(function_name) 99 | 100 | def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors): 101 | # create Cheetah template and fill in variables for mask kernel 102 | mask_template = Template(mask_source) 103 | mask_template.BLOCK_DIM_X = b_size[0] 104 | mask_template.BLOCK_DIM_Y = b_size[1] 105 | mask_template.WIDTH = dest_im.shape[1] 106 | mask_template.HEIGHT = dest_im.shape[0] 107 | mask_template.RGB = RGB 108 | mask_template.NEIGHBORS = neighbors 109 | 110 | # compile the CUDA kernel 111 | mask_kernel = cuda_compile(mask_template, "mask_kernel") 112 | 113 | # alloc memory to GPU 114 | d_source = cu.mem_alloc(source_im.nbytes) 115 | cu.memcpy_htod(d_source, source_im) 116 | 117 | # sends to GPU filter out interior points in the mask 118 | mask_kernel(d_source, block=b_size, grid=g_size) 119 | 120 | # retrieves interior point buffer from GPU 121 | inner_buffer = np.array(dest_im, dtype =np.uint8) 122 | cu.memcpy_dtoh(inner_buffer, d_source) 123 | 124 | # returns the interior buffer 125 | return inner_buffer 126 | 127 | def poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, interior_buffer, n): 128 | # create Cheetah template and fill in variables for Poisson kernal 129 | template = Template(poisson_blending_source) 130 | template.BLOCK_DIM_X = b_size[0] 131 | template.BLOCK_DIM_Y = b_size[1] 132 | template.WIDTH = dest_im.shape[1] 133 | template.HEIGHT = dest_im.shape[0] 134 | template.RGB = RGB 135 | template.NEIGHBORS = neighbors 136 | 137 | # compile the CUDA kernel 138 | poisson_blending_kernel = cuda_compile(template, "poisson_blending_kernel") 139 | 140 | # alloc memory in GPU 141 | out_image = np.array(dest_im, dtype =np.uint8) 142 | d_source, d_destination, d_buffer= cu.mem_alloc(source_im.nbytes), cu.mem_alloc(dest_im.nbytes), cu.mem_alloc(interior_buffer.nbytes) 143 | cu.memcpy_htod(d_source, source_im) 144 | cu.memcpy_htod(d_destination, dest_im) 145 | cu.memcpy_htod(d_buffer, interior_buffer) 146 | 147 | # calls CUDA for Poisson Blending n # of times 148 | for i in range(n): 149 | poisson_blending_kernel(d_source, d_destination, d_buffer, block=b_size, grid=g_size) 150 | 151 | # retrieves the final output image and returns 152 | cu.memcpy_dtoh(out_image, d_destination) 153 | return out_image 154 | 155 | 156 | if __name__ == '__main__': 157 | # checks for proper usage 158 | if len(argv) == 2: 159 | source_files = glob.glob('source*.jpg') 160 | dest_files = glob.glob('dest*.jpg') 161 | N = argv[1] 162 | if len(source_files) != len(dest_files): 163 | print "Please make sure that your files are named sourceN.jpg, destN.jpg, and that each source is paired with a dest image." 164 | elif len(argv) == 4: 165 | source_files = [argv[1]] 166 | dest_files = [argv[2]] 167 | N = argv[3] 168 | else: 169 | print "Usage: python", argv[0], "[source image] [destination image] [# iterations] OR python", argv[0], "[# iterations] (for entire directory)" 170 | exit() 171 | 172 | # iterates over the image files 173 | for i in range(len(source_files)): 174 | # load in source/dest images and convert to Numpy arrays for blending with uint8 175 | source_im = np.array(Image.open(in_file), dtype = np.uint8) 176 | dest_im = np.array(Image.open(out_file_name[i]), dtype = np.uint8) 177 | 178 | # warmup the GPU (no calculations) 179 | for k in range(100): 180 | d_source = gpu.to_gpu(source_im) 181 | d_dest = gpu.to_gpu(dest_im) 182 | source_im = d_source.get() 183 | dest_im = d_dest.get() 184 | 185 | # block size (threads per block) 186 | b_size = (16,16,1) 187 | # grid size (blocks per grid) 188 | g_size = (int(np.ceil(float(dest_im.shape[1])/b_size[0])), int(np.ceil(float(dest_im.shape[0])/b_size[1]))) 189 | 190 | # initialize color pixel locations and neighboring positions [(+-1,0),(0,+-1)] for Cheetah 191 | RGB = ['x','y','z'] 192 | neighbors = [] 193 | for j in range(-1,2,2): 194 | neighbors.append((j,0)) 195 | neighbors.append((0,j)) 196 | 197 | # apply Poisson blending and time 198 | start = time.time() 199 | inner_buffer = interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors) 200 | out_im = poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, inner_buffer, N) 201 | end = time.time() 202 | print 'Parallel Time: ' + str(end - start) + ' seconds' 203 | 204 | # creates output and save the image 205 | out_im = Image.fromarray(out_im, 'RGB') 206 | out_im.save('results_' + str(i) + '.png') -------------------------------------------------------------------------------- /parallel_poisson_cuda.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Jason Ting, Alex Lehman, Ryan Lee 3 | CS205 Final Project 4 | Parallelized Implementation of Discretized Poisson Blending Algorithm using CUDA 5 | ''' 6 | 7 | from PIL import Image 8 | import numpy as np 9 | import time 10 | import glob 11 | import pycuda.autoinit 12 | import pycuda.driver as cu 13 | import pycuda.compiler as nvcc 14 | import pycuda.gpuarray as gpu 15 | from Cheetah.Template import Template 16 | 17 | # define the CUDA kernels for the mask and blending 18 | mask_source = """ 19 | // define the interior pixels and make border pixels white 20 | __global__ void mask_kernel(uchar3* source, int width, int height) 21 | { 22 | // Compute thread id in x, y, and coalesced 23 | int i = blockDim.y * blockIdx.y + threadIdx.y; 24 | int j = blockDim.x * blockIdx.x + threadIdx.x; 25 | int tid = i * width + j; 26 | 27 | // ensure each pixel is within image size and not white 28 | if (i >= 0 && i < height && j >= 0 && j < width && (source[tid].x < 255 || source[tid].y < 255 || source[tid].z < 255)) { 29 | // set up calculcations 30 | int pos; 31 | 32 | // goes over neighbors (up, down, left, right) 33 | for (int j = 0; j < 4; j++) { 34 | // define position for neighbor 35 | pos = tid + ((j-2) % 2) + ((int)((j-2) / 2)) * width; 36 | 37 | // changes pixels in the border to white 38 | if (source[pos].x == 255 && source[pos].y == 255 && source[pos].z == 255) { 39 | for (int i = 0; i < 3; i++) { 40 | switch(i) { 41 | case 0: 42 | source[tid].x = 255; 43 | break; 44 | case 1: 45 | source[tid].y = 255; 46 | break; 47 | case 2: 48 | source[tid].z = 255; 49 | break; 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | """ 57 | 58 | poisson_blending_source = """ 59 | __global__ void poisson_blending_kernel(uchar3* source, uchar3* destination, uchar3* buffer, int width, int height) 60 | { 61 | // Compute thread id in x, y, and coalesced 62 | int i = blockDim.y * blockIdx.y + threadIdx.y; 63 | int j = blockDim.x * blockIdx.x + threadIdx.x; 64 | int tid = i * width + j; 65 | unsigned char source_pix; 66 | unsigned char source_pos; 67 | unsigned char dest_pix; 68 | unsigned char dest_pos; 69 | unsigned char buffer_pos; 70 | float next_buffer_x = 0.0; 71 | float next_buffer_y = 0.0; 72 | float next_buffer_z = 0.0; 73 | 74 | // ensure each pixel is within image size and not white 75 | if (i >= 0 && i < height && j >= 0 && j < width && (buffer[tid].x < 255 || buffer[tid].y < 255 || buffer[tid].z < 255)) { 76 | // set up calculations for next buffer 77 | int pos; 78 | float sum; 79 | 80 | // iterates over RGB 81 | for (int i = 0; i < 3; i++) { 82 | switch(i) { 83 | case 0: 84 | source_pix = source[tid].x; 85 | dest_pix = dest[tid].x; 86 | break; 87 | case 1: 88 | source_pix = source[tid].y; 89 | dest_pix = dest[tid].y; 90 | break; 91 | case 2: 92 | source_pix = source[tid].z; 93 | dest_pix = dest[tid].z; 94 | break; 95 | } 96 | 97 | // setup calculations 98 | sum = 0.0; 99 | 100 | // iterates over neighbors (up, down, left, right) 101 | for (int j = 0; j < 4; j++) { 102 | // define position for neighbor 103 | pos = tid + ((j-2) % 2) + ((int)((j-2) / 2)) * width; 104 | switch(i) { 105 | case 0: 106 | source_pos = source[pos].x; 107 | dest_pos = dest[pos].x; 108 | buffer_pos = buffer[pos].x; 109 | break; 110 | case 1: 111 | source_pos = source[pos].y; 112 | dest_pos = dest[pos].y; 113 | buffer_pos = buffer[pos].y; 114 | break; 115 | case 2: 116 | source_pos = source[pos].z; 117 | dest_pos = dest[pos].z; 118 | buffer_pos = buffer[pos].z; 119 | break; 120 | } 121 | 122 | // adds buffer neighbors if pixel is in interior otherwise add destination neighbors 123 | if (buffer[pos].x < 255 || buffer[pos].y < 255 || buffer[pos].z < 255) 124 | sum += buffer_pos; 125 | else 126 | sum += dest_pos; 127 | 128 | //add difference between source and neighbor 129 | sum += (source_pix - source_pos); 130 | } 131 | 132 | // updates the next buffer and clip (0,255) 133 | switch(i) { 134 | case 0: 135 | next_buffer_x = min(255.f, max(0.f, sum/4.f)); 136 | break; 137 | case 1: 138 | next_buffer_y = min(255.f, max(0.f, sum/4.f)); 139 | break; 140 | case 2: 141 | next_buffer_z = min(255.f, max(0.f, sum/4.f)); 142 | break; 143 | } 144 | } 145 | } 146 | 147 | // updates the destination image and buffer 148 | destination[tid] = make_uchar3(next_buffer_x, next_buffer_y, next_buffer_z); 149 | buffer[tid] = make_uchar3(next_buffer_x, next_buffer_y, next_buffer_z); 150 | } 151 | } 152 | """ 153 | 154 | def cuda_compile(source_string, function_name): 155 | # compile the CUDA Kernel at runtime 156 | source_module = nvcc.SourceModule(source_string) 157 | # return a handle to the compiled CUDA kernel 158 | return source_module.get_function(function_name) 159 | 160 | def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors): 161 | # compile the CUDA kernel 162 | mask_kernel = cuda_compile(mask_source, "mask_kernel") 163 | 164 | # alloc memory to GPU 165 | d_source = cu.mem_alloc(source_im.nbytes) 166 | cu.memcpy_htod(d_source, source_im) 167 | 168 | # sends to GPU filter out interior points in the mask 169 | mask_kernel(d_source, dest_im.shape[1], dest_im.shape[0], block=b_size, grid=g_size) 170 | 171 | # retrieves interior point buffer from GPU 172 | inner_buffer = np.array(dest_im, dtype =np.uint8) 173 | cu.memcpy_dtoh(inner_buffer, d_source) 174 | 175 | # returns the interior buffer 176 | return inner_buffer 177 | 178 | def poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, interior_buffer, n): 179 | # compile the CUDA kernel 180 | poisson_blending_kernel = cuda_compile(poisson_blending_source, "poisson_blending_kernel") 181 | 182 | # alloc memory in GPU 183 | out_image = np.array(dest_im, dtype = np.uint8) 184 | d_source, d_destination, d_buffer = cu.mem_alloc(source_im.nbytes), cu.mem_alloc(dest_im.nbytes), cu.mem_alloc(interior_buffer.nbytes) 185 | cu.memcpy_htod(d_source, source_im) 186 | cu.memcpy_htod(d_destination, dest_im) 187 | cu.memcpy_htod(d_buffer, interior_buffer) 188 | 189 | # calls CUDA for Poisson Blending n # of times 190 | for i in range(n): 191 | poisson_blending_kernel(d_source, d_destination, d_buffer, dest_im.shape[1], dest_im.shape[0], block=b_size, grid=g_size) 192 | 193 | # retrieves the final output image and returns 194 | cu.memcpy_dtoh(out_image, d_destination) 195 | return out_image 196 | 197 | 198 | if __name__ == '__main__': 199 | # checks for proper usage 200 | if len(argv) == 2: 201 | source_files = glob.glob('source*.jpg') 202 | dest_files = glob.glob('dest*.jpg') 203 | N = argv[1] 204 | if len(source_files) != len(dest_files): 205 | print "Please make sure that your files are named sourceN.jpg, destN.jpg, and that each source is paired with a dest image." 206 | elif len(argv) == 4: 207 | source_files = [argv[1]] 208 | dest_files = [argv[2]] 209 | N = argv[3] 210 | else: 211 | print "Usage: python", argv[0], "[source image] [destination image] [# iterations] OR python", argv[0], "[# iterations] (for entire directory)" 212 | exit() 213 | 214 | # iterates over the image files 215 | for i in range(len(source_files)): 216 | # load in source/dest images and convert to Numpy arrays for blending with uint8 217 | source_im = np.array(Image.open(in_file), dtype = np.uint8) 218 | dest_im = np.array(Image.open(out_file_name[i]), dtype = np.uint8) 219 | 220 | # warmup the GPU (no calculations) 221 | for k in range(100): 222 | d_source = gpu.to_gpu(source_im) 223 | d_dest = gpu.to_gpu(dest_im) 224 | source_im = d_source.get() 225 | dest_im = d_dest.get() 226 | 227 | # block size (threads per block) 228 | b_size = (16,16,1) 229 | # grid size (blocks per grid) 230 | g_size = (int(np.ceil(float(dest_im.shape[1])/b_size[0])), int(np.ceil(float(dest_im.shape[0])/b_size[1]))) 231 | 232 | # initialize color pixel locations and neighboring positions [(+-1,0),(0,+-1)] for Cheetah 233 | RGB = ['x','y','z'] 234 | neighbors = [] 235 | for j in range(-1,2,2): 236 | neighbors.append((j,0)) 237 | neighbors.append((0,j)) 238 | 239 | # apply Poisson blending and time 240 | start = time.time() 241 | inner_buffer = interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors) 242 | out_im = poisson_parallel(source_im, dest_im, b_size, g_size, RGB, neighbors, inner_buffer, N) 243 | end = time.time() 244 | print 'Parallel Time: ' + str(end - start) + ' seconds' 245 | 246 | # creates output and save the image 247 | out_im = Image.fromarray(out_im, 'RGB') 248 | out_im.save('results_' + str(i) + '.png') 249 | --------------------------------------------------------------------------------