├── .gitignore ├── vector_add_kernel.cl ├── Makefile ├── errorchecking ├── Makefile ├── opencl_errors.h ├── check_opencl.h └── main.c ├── README.md ├── LICENSE └── main.c /.gitignore: -------------------------------------------------------------------------------- 1 | main 2 | -------------------------------------------------------------------------------- /vector_add_kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vector_add(__global int *A, __global int *B, __global int *C) { 2 | 3 | // Get the index of the current element 4 | int i = get_global_id(0); 5 | 6 | // Do the operation 7 | C[i] = A[i] + B[i]; 8 | } 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | OS := $(shell uname) 2 | OPTIONS:= 3 | 4 | ifeq ($(OS),Darwin) 5 | OPTIONS += -framework OpenCL 6 | else 7 | OPTIONS += -l OpenCL 8 | endif 9 | 10 | main: main.c 11 | gcc -Wall -g main.c -o main $(OPTIONS) 12 | 13 | clean: 14 | rm -rf main -------------------------------------------------------------------------------- /errorchecking/Makefile: -------------------------------------------------------------------------------- 1 | OS := $(shell uname) 2 | OPTIONS:= 3 | 4 | ifeq ($(OS),Darwin) 5 | OPTIONS += -framework OpenCL 6 | else 7 | OPTIONS += -l OpenCL 8 | endif 9 | 10 | main: main.c 11 | gcc -Wall -g main.c -o main $(OPTIONS) 12 | 13 | clean: 14 | rm -rf main -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | OpenCL-Getting-Started 2 | ====================== 3 | 4 | A small "getting started" tutorial for OpenCL. See 5 | http://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/ 6 | for more info. 7 | 8 | For simplicity, the main.c in this directory does not check for 9 | errors. If it doesn't work for you, try the version in the 10 | 'errorchecking' subdirectory instead, which does (this was contributed 11 | by Christian Jaeger.) 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011 Erik Smistad. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are 4 | permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of 7 | conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | of conditions and the following disclaimer in the documentation and/or other materials 11 | provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY Erik Smistad ''AS IS'' AND ANY EXPRESS OR IMPLIED 14 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | 23 | The views and conclusions contained in the software and documentation are those of the 24 | authors and should not be interpreted as representing official policies, either expressed 25 | or implied, of Erik Smistad. 26 | -------------------------------------------------------------------------------- /errorchecking/opencl_errors.h: -------------------------------------------------------------------------------- 1 | #ifndef _OPENCL_ERRORS_H 2 | #define _OPENCL_ERRORS_H 3 | 4 | /* from http://codepad.org/6FNvVllQ from on #opencl on 5 | freenode */ 6 | 7 | static const char* clErrorString[] = { 8 | "CL_SUCCESS", 9 | "CL_DEVICE_NOT_FOUND", 10 | "CL_DEVICE_NOT_AVAILABLE", 11 | "CL_COMPILER_NOT_AVAILABLE", 12 | "CL_MEM_OBJECT_ALLOCATION_FAILURE", 13 | "CL_OUT_OF_RESOURCES", 14 | "CL_OUT_OF_HOST_MEMORY", 15 | "CL_PROFILING_INFO_NOT_AVAILABLE", 16 | "CL_MEM_COPY_OVERLAP", 17 | "CL_IMAGE_FORMAT_MISMATCH", 18 | "CL_IMAGE_FORMAT_NOT_SUPPORTED", 19 | "CL_BUILD_PROGRAM_FAILURE", 20 | "CL_MAP_FAILURE", 21 | "CL_MISALIGNED_SUB_BUFFER_OFFSET", 22 | "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST", 23 | "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 24 | "CL_INVALID_VALUE", 25 | "CL_INVALID_DEVICE_TYPE", 26 | "CL_INVALID_PLATFORM", 27 | "CL_INVALID_DEVICE", 28 | "CL_INVALID_CONTEXT", 29 | "CL_INVALID_QUEUE_PROPERTIES", 30 | "CL_INVALID_COMMAND_QUEUE", 31 | "CL_INVALID_HOST_PTR", 32 | "CL_INVALID_MEM_OBJECT", 33 | "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", 34 | "CL_INVALID_IMAGE_SIZE", 35 | "CL_INVALID_SAMPLER", 36 | "CL_INVALID_BINARY", 37 | "CL_INVALID_BUILD_OPTIONS", 38 | "CL_INVALID_PROGRAM", 39 | "CL_INVALID_PROGRAM_EXECUTABLE", 40 | "CL_INVALID_KERNEL_NAME", 41 | "CL_INVALID_KERNEL_DEFINITION", 42 | "CL_INVALID_KERNEL", 43 | "CL_INVALID_ARG_INDEX", 44 | "CL_INVALID_ARG_VALUE", 45 | "CL_INVALID_ARG_SIZE", 46 | "CL_INVALID_KERNEL_ARGS", 47 | "CL_INVALID_WORK_DIMENSION", 48 | "CL_INVALID_WORK_GROUP_SIZE", 49 | "CL_INVALID_WORK_ITEM_SIZE", 50 | "CL_INVALID_GLOBAL_OFFSET", 51 | "CL_INVALID_EVENT_WAIT_LIST", 52 | "CL_INVALID_EVENT", 53 | "CL_INVALID_OPERATION", 54 | "CL_INVALID_GL_OBJECT", 55 | "CL_INVALID_BUFFER_SIZE", 56 | "CL_INVALID_MIP_LEVEL", 57 | "CL_INVALID_GLOBAL_WORK_SIZE", 58 | "CL_INVALID_PROPERTY" 59 | }; 60 | 61 | static inline const char* clGetErrorString(cl_int errorCode) { 62 | static const char* INVALIDERROR = "INVALID_ERROR_CODE"; 63 | int errorNumber = (errorCode < 0) ? -errorCode : errorCode; 64 | if (errorCode == -1000) return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; 65 | return (errorCode <= 64) ? clErrorString[errorNumber] : INVALIDERROR; 66 | } 67 | 68 | #endif /* _OPENCL_ERRORS_H */ 69 | -------------------------------------------------------------------------------- /errorchecking/check_opencl.h: -------------------------------------------------------------------------------- 1 | #ifndef _CHECK_OPENCL_H 2 | #define _CHECK_OPENCL_H 3 | 4 | 5 | /* Wrapper macros that check for errors on the OpenCL (and malloc) 6 | calls, and print error messages and increment a local error count 7 | variable called CHECK_errors if so. The macros all take as last 8 | argument the name of a label that the code will jump to if an error 9 | happens. The macros handle error code extraction themselves, you 10 | don't need to pass &ret. DECLARE_CHECK needs to be put at the start 11 | of the scope where CHECK_* macros are to be used (this defines 12 | a fresh CHECK_errors). 13 | */ 14 | 15 | #include "opencl_errors.h" 16 | 17 | #define DECLARE_CHECK \ 18 | int CHECK_errors=0; \ 19 | cl_int CHECK_ret; \ 20 | void *CHECK_malloc_tmp; 21 | 22 | #define inc_CHECK_errors() \ 23 | if (CHECK_errors < 64) CHECK_errors++; 24 | 25 | #define CHECK_malloc(siz,lbl) \ 26 | CHECK_malloc_tmp=malloc(siz); \ 27 | if (!CHECK_malloc_tmp) { \ 28 | inc_CHECK_errors(); \ 29 | fprintf (stderr,"out of memory, line %i\n", __LINE__); \ 30 | goto lbl; \ 31 | } 32 | 33 | /* and, CHECK_* for all OpenCL procedures */ 34 | 35 | #define CHECKRET_(callstr,exitlabel) \ 36 | if (CHECK_ret) { \ 37 | fprintf(stderr,"error: " callstr ": %s at '%s' line %i\n", \ 38 | clGetErrorString(CHECK_ret), __FILE__, __LINE__); \ 39 | inc_CHECK_errors(); \ 40 | goto exitlabel; \ 41 | } 42 | 43 | #define CHECK_clGetPlatformIDs(a,b,c,lbl) \ 44 | CHECK_ret = clGetPlatformIDs(a,b,c); \ 45 | CHECKRET_("clGetPlatformIDs", lbl); 46 | #define CHECK_clGetDeviceIDs(a,b,c,d,e,lbl) \ 47 | CHECK_ret = clGetDeviceIDs(a,b,c,d,e); \ 48 | CHECKRET_("clGetDeviceIDs", lbl); 49 | #define CHECK_clCreateContext(a,b,c,d,e,lbl) \ 50 | clCreateContext(a,b,c,d,e,&CHECK_ret); \ 51 | CHECKRET_("clCreateContext", lbl); 52 | #define CHECK_clCreateCommandQueue(a,b,c,lbl) \ 53 | clCreateCommandQueue(a,b,c,&CHECK_ret); \ 54 | CHECKRET_("clCreateCommandQueue", lbl); 55 | #define CHECK_clCreateBuffer(a,b,c,d,lbl) \ 56 | clCreateBuffer(a,b,c,d,&CHECK_ret); \ 57 | CHECKRET_("clCreateBuffer", lbl); 58 | #define CHECK_clEnqueueWriteBuffer(a,b,c,d,e,f,g,h,i,lbl) \ 59 | CHECK_ret = clEnqueueWriteBuffer(a,b,c,d,e,f,g,h,i); \ 60 | CHECKRET_("clEnqueueWriteBuffer", lbl); 61 | #define CHECK_clCreateProgramWithSource(a,b,c,d,lbl) \ 62 | clCreateProgramWithSource(a,b,c,d,&CHECK_ret); \ 63 | CHECKRET_("clCreateProgramWithSource", lbl); 64 | #define CHECK_clGetProgramBuildInfo(a,b,c,d,e,f,lbl) \ 65 | CHECK_ret= clGetProgramBuildInfo(a,b,c,d,e,f); \ 66 | CHECKRET_("clGetProgramBuildInfo", lbl); 67 | 68 | #define CHECK_clCreateKernel(a,b,lbl) \ 69 | clCreateKernel(a,b,&CHECK_ret); \ 70 | CHECKRET_("clCreateKernel", lbl); 71 | #define CHECK_clSetKernelArg(a,b,c,d,lbl) \ 72 | CHECK_ret= clSetKernelArg(a,b,c,d); \ 73 | CHECKRET_("clSetKernelArg", lbl); 74 | #define CHECK_clEnqueueNDRangeKernel(a,b,c,d,e,f,g,h,i,lbl) \ 75 | CHECK_ret= clEnqueueNDRangeKernel(a,b,c,d,e,f,g,h,i); \ 76 | CHECKRET_("clEnqueueNDRangeKernel", lbl); 77 | #define CHECK_clEnqueueReadBuffer(a,b,c,d,e,f,g,h,i,lbl) \ 78 | CHECK_ret= clEnqueueReadBuffer(a,b,c,d,e,f,g,h,i); \ 79 | CHECKRET_("clEnqueueReadBuffer", lbl); 80 | #define CHECK_clFlush(a,lbl) \ 81 | CHECK_ret= clFlush(a); \ 82 | CHECKRET_("clFlush", lbl); 83 | #define CHECK_clFinish(a,lbl) \ 84 | CHECK_ret= clFinish(a); \ 85 | CHECKRET_("clFinish", lbl); 86 | #define CHECK_clReleaseKernel(a,lbl) \ 87 | CHECK_ret= clReleaseKernel(a); \ 88 | CHECKRET_("clReleaseKernel", lbl); 89 | #define CHECK_clReleaseProgram(a,lbl) \ 90 | CHECK_ret= clReleaseProgram(a); \ 91 | CHECKRET_("clReleaseProgram", lbl); 92 | #define CHECK_clReleaseMemObject(a,lbl) \ 93 | CHECK_ret= clReleaseMemObject(a); \ 94 | CHECKRET_("clReleaseMemObject", lbl); 95 | #define CHECK_clReleaseCommandQueue(a,lbl) \ 96 | CHECK_ret= clReleaseCommandQueue(a); \ 97 | CHECKRET_("clReleaseCommandQueue", lbl); 98 | #define CHECK_clReleaseContext(a,lbl) \ 99 | CHECK_ret= clReleaseContext(a); \ 100 | CHECKRET_("clReleaseContext", lbl); 101 | 102 | 103 | #endif /* _CHECK_OPENCL_H */ 104 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef __APPLE__ 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | #define MAX_SOURCE_SIZE (0x100000) 11 | 12 | int main(void) { 13 | // Create the two input vectors 14 | int i; 15 | const int LIST_SIZE = 1024; 16 | int *A = (int*)malloc(sizeof(int)*LIST_SIZE); 17 | int *B = (int*)malloc(sizeof(int)*LIST_SIZE); 18 | for(i = 0; i < LIST_SIZE; i++) { 19 | A[i] = i; 20 | B[i] = LIST_SIZE - i; 21 | } 22 | 23 | // Load the kernel source code into the array source_str 24 | FILE *fp; 25 | char *source_str; 26 | size_t source_size; 27 | 28 | fp = fopen("vector_add_kernel.cl", "r"); 29 | if (!fp) { 30 | fprintf(stderr, "Failed to load kernel.\n"); 31 | exit(1); 32 | } 33 | source_str = (char*)malloc(MAX_SOURCE_SIZE); 34 | source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); 35 | fclose( fp ); 36 | 37 | // Get platform and device information 38 | cl_platform_id platform_id = NULL; 39 | cl_device_id device_id = NULL; 40 | cl_uint ret_num_devices; 41 | cl_uint ret_num_platforms; 42 | cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); 43 | ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1, 44 | &device_id, &ret_num_devices); 45 | 46 | // Create an OpenCL context 47 | cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); 48 | 49 | // Create a command queue 50 | cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); 51 | 52 | // Create memory buffers on the device for each vector 53 | cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, 54 | LIST_SIZE * sizeof(int), NULL, &ret); 55 | cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, 56 | LIST_SIZE * sizeof(int), NULL, &ret); 57 | cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 58 | LIST_SIZE * sizeof(int), NULL, &ret); 59 | 60 | // Copy the lists A and B to their respective memory buffers 61 | ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, 62 | LIST_SIZE * sizeof(int), A, 0, NULL, NULL); 63 | ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 64 | LIST_SIZE * sizeof(int), B, 0, NULL, NULL); 65 | 66 | // Create a program from the kernel source 67 | cl_program program = clCreateProgramWithSource(context, 1, 68 | (const char **)&source_str, (const size_t *)&source_size, &ret); 69 | 70 | // Build the program 71 | ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 72 | 73 | // Create the OpenCL kernel 74 | cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); 75 | 76 | // Set the arguments of the kernel 77 | ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj); 78 | ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj); 79 | ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj); 80 | 81 | // Execute the OpenCL kernel on the list 82 | size_t global_item_size = LIST_SIZE; // Process the entire lists 83 | size_t local_item_size = 64; // Process in groups of 64 84 | ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 85 | &global_item_size, &local_item_size, 0, NULL, NULL); 86 | 87 | // Read the memory buffer C on the device to the local variable C 88 | int *C = (int*)malloc(sizeof(int)*LIST_SIZE); 89 | ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 90 | LIST_SIZE * sizeof(int), C, 0, NULL, NULL); 91 | 92 | // Display the result to the screen 93 | for(i = 0; i < LIST_SIZE; i++) 94 | printf("%d + %d = %d\n", A[i], B[i], C[i]); 95 | 96 | // Clean up 97 | ret = clFlush(command_queue); 98 | ret = clFinish(command_queue); 99 | ret = clReleaseKernel(kernel); 100 | ret = clReleaseProgram(program); 101 | ret = clReleaseMemObject(a_mem_obj); 102 | ret = clReleaseMemObject(b_mem_obj); 103 | ret = clReleaseMemObject(c_mem_obj); 104 | ret = clReleaseCommandQueue(command_queue); 105 | ret = clReleaseContext(context); 106 | free(A); 107 | free(B); 108 | free(C); 109 | return 0; 110 | } 111 | 112 | -------------------------------------------------------------------------------- /errorchecking/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include /* PRIuPTR and uintptr_t */ 4 | 5 | /* errno and strerror */ 6 | #include 7 | #include 8 | 9 | #ifdef __APPLE__ 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | /* see check_opencl.h for docs on the CHECK_* macros */ 16 | #include "check_opencl.h" 17 | 18 | #define MAX_SOURCE_SIZE (0x100000) 19 | 20 | 21 | #define val_size 10000 22 | char val[val_size]; 23 | 24 | char *sourcepath= "../vector_add_kernel.cl"; 25 | 26 | int main(void) { 27 | DECLARE_CHECK; 28 | 29 | // Create the two input vectors 30 | int i; 31 | const int LIST_SIZE = 1024; 32 | int *A = CHECK_malloc(sizeof(int)*LIST_SIZE, err_malloc_A); 33 | int *B = CHECK_malloc(sizeof(int)*LIST_SIZE, err_malloc_B); 34 | 35 | for(i = 0; i < LIST_SIZE; i++) { 36 | A[i] = i; 37 | B[i] = LIST_SIZE - i; 38 | } 39 | 40 | // Load the kernel source code into the array source_str 41 | FILE *fp = fopen(sourcepath, "r"); 42 | if (!fp) { 43 | fprintf(stderr, "Failed to open kernel file '%s': %s\n", sourcepath, 44 | strerror(errno)); 45 | inc_CHECK_errors(); 46 | goto err_fopen; 47 | } 48 | 49 | char *source_str = CHECK_malloc(MAX_SOURCE_SIZE, err_malloc_source_str); 50 | size_t source_size= fread( source_str, 1, MAX_SOURCE_SIZE, fp); 51 | 52 | // Get platform and device information 53 | cl_platform_id platform_id = NULL; 54 | cl_device_id device_id = NULL; 55 | cl_uint ret_num_devices; 56 | cl_uint ret_num_platforms; 57 | 58 | CHECK_clGetPlatformIDs(1, &platform_id, &ret_num_platforms, 59 | err_clGetPlatformIDs); 60 | 61 | fprintf(stderr, "ret_num_platforms=%i\n", ret_num_platforms); 62 | 63 | CHECK_clGetDeviceIDs( platform_id, 64 | CL_DEVICE_TYPE_GPU, 65 | 1, 66 | &device_id, 67 | &ret_num_devices, 68 | err_clGetDeviceIDs); 69 | 70 | // Create an OpenCL context 71 | cl_context context = 72 | CHECK_clCreateContext( NULL, 1, &device_id, NULL, NULL, err_clCreateContext); 73 | 74 | // Create a command queue 75 | cl_command_queue command_queue = 76 | CHECK_clCreateCommandQueue(context, device_id, 0, err_clCreateCommandQueue); 77 | 78 | // Create memory buffers on the device for each vector 79 | cl_mem a_mem_obj = 80 | CHECK_clCreateBuffer(context, CL_MEM_READ_ONLY, 81 | LIST_SIZE * sizeof(int), NULL, err_a_mem_obj); 82 | cl_mem b_mem_obj = 83 | CHECK_clCreateBuffer(context, CL_MEM_READ_ONLY, 84 | LIST_SIZE * sizeof(int), NULL, err_b_mem_obj); 85 | cl_mem c_mem_obj = 86 | CHECK_clCreateBuffer(context, CL_MEM_WRITE_ONLY, 87 | LIST_SIZE * sizeof(int), NULL, err_c_mem_obj); 88 | 89 | // Copy the lists A and B to their respective memory buffers 90 | CHECK_clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, 91 | LIST_SIZE * sizeof(int), A, 0, NULL, NULL, 92 | err_clEnqueueWriteBuffer_A); 93 | CHECK_clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 94 | LIST_SIZE * sizeof(int), B, 0, NULL, NULL, 95 | err_clEnqueueWriteBuffer_B); 96 | 97 | // Create a program from the kernel source 98 | cl_program program = 99 | CHECK_clCreateProgramWithSource(context, 100 | 1, 101 | (const char**)&source_str, 102 | &source_size, 103 | err_clCreateProgramWithSource); 104 | 105 | free(source_str); //XXX can we do that while program is still alive ? 106 | 107 | // Build the program 108 | cl_int ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 109 | if (ret) { 110 | //cl_int ret0=ret; XX print it? 111 | size_t sizeused; 112 | CHECK_clGetProgramBuildInfo (program, 113 | device_id, 114 | CL_PROGRAM_BUILD_LOG, 115 | val_size-1, //? 116 | &val, 117 | &sizeused, 118 | err_clGetProgramBuildInfo); 119 | 120 | printf("clBuildProgram error: (sizeused %"PRIuPTR") '%s'\n", 121 | (uintptr_t) sizeused, val); 122 | err_clGetProgramBuildInfo: 123 | goto err_clBuildProgram; 124 | } 125 | 126 | // Create the OpenCL kernel 127 | cl_kernel kernel = CHECK_clCreateKernel(program, "vector_add", 128 | err_clCreateKernel); 129 | 130 | // Set the arguments of the kernel 131 | CHECK_clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_mem_obj, 132 | err_clSetKernelArg); 133 | CHECK_clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_mem_obj, 134 | err_clSetKernelArg); 135 | CHECK_clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_mem_obj, 136 | err_clSetKernelArg); 137 | 138 | // Execute the OpenCL kernel on the list 139 | size_t global_item_size = LIST_SIZE; // Process the entire lists 140 | size_t local_item_size = 64; // Process in groups of 64 141 | CHECK_clEnqueueNDRangeKernel 142 | (command_queue, kernel, 1, NULL, 143 | &global_item_size, &local_item_size, 0, NULL, NULL, 144 | err_clEnqueueNDRangeKernel); 145 | 146 | // Read the memory buffer C on the device to the local variable C 147 | int *C = CHECK_malloc(sizeof(int)*LIST_SIZE, err_malloc_C); 148 | 149 | CHECK_clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 150 | LIST_SIZE * sizeof(int), C, 0, NULL, NULL, 151 | err_clEnqueueReadBuffer); 152 | 153 | // Display the result to the screen 154 | for(i = 0; i < LIST_SIZE; i++) 155 | printf("%d + %d = %d\n", A[i], B[i], C[i]); 156 | 157 | err_clEnqueueReadBuffer: 158 | free(C); 159 | err_malloc_C: 160 | CHECK_clFlush(command_queue, err_clFlush); 161 | err_clFlush: 162 | CHECK_clFinish(command_queue, err_clEnqueueNDRangeKernel); 163 | err_clEnqueueNDRangeKernel: 164 | err_clSetKernelArg: 165 | CHECK_clReleaseKernel(kernel, err_clCreateKernel); 166 | err_clCreateKernel: 167 | err_clBuildProgram: 168 | CHECK_clReleaseProgram(program, err_clCreateProgramWithSource); 169 | err_clCreateProgramWithSource: 170 | err_clEnqueueWriteBuffer_B: 171 | err_clEnqueueWriteBuffer_A: 172 | CHECK_clReleaseMemObject(c_mem_obj, err_c_mem_obj); 173 | err_c_mem_obj: 174 | CHECK_clReleaseMemObject(b_mem_obj, err_b_mem_obj); 175 | err_b_mem_obj: 176 | CHECK_clReleaseMemObject(a_mem_obj, err_a_mem_obj); 177 | err_a_mem_obj: 178 | CHECK_clReleaseCommandQueue(command_queue, err_clCreateCommandQueue); 179 | err_clCreateCommandQueue: 180 | CHECK_clReleaseContext(context, err_clCreateContext); 181 | err_clCreateContext: 182 | // XXX deallocate device_id ? 183 | err_clGetDeviceIDs: 184 | // XXX deallocate platform_id ? 185 | err_clGetPlatformIDs: 186 | err_malloc_source_str: 187 | fclose( fp ); 188 | err_fopen: 189 | free(B); 190 | err_malloc_B: 191 | free(A); 192 | err_malloc_A: 193 | return CHECK_errors; 194 | } 195 | 196 | --------------------------------------------------------------------------------