├── .gitignore ├── LICENSE ├── README.md ├── pom.xml └── src └── main ├── java └── org │ └── jocl │ └── samples │ ├── HistogramAMD.java │ ├── HistogramNVIDIA.java │ ├── JOCLBandwidthTest.java │ ├── JOCLDeviceQuery.java │ ├── JOCLEventSample.java │ ├── JOCLMandelbrot.java │ ├── JOCLMappedBufferSample.java │ ├── JOCLMultiDeviceSample.java │ ├── JOCLReduction.java │ ├── JOCLSample.java │ ├── JOCLSample_1_1.java │ ├── JOCLSample_1_2_KernelArgs.java │ ├── JOCLSample_2_0_SVM.java │ ├── JOCLSimpleConvolution.java │ ├── JOCLSimpleGL3.java │ ├── JOCLSimpleImage.java │ ├── JOCLSimpleLWJGL.java │ ├── JOCLSimpleMandelbrot.java │ ├── JOCLSubBufferSample.java │ └── blast │ ├── JOCLBlastCaxpyBatchedSample.java │ ├── JOCLBlastDgemmSample.java │ └── JOCLBlastSample.java └── resources ├── data └── lena512color.png └── kernels ├── Histogram256.cl ├── Histogram_Kernels.cl ├── QuadFloat.cl ├── QuadFloatMandelbrot.cl ├── SimpleConvolution.cl ├── SimpleMandelbrot.cl ├── reduction.cl └── simpleGL.cl /.gitignore: -------------------------------------------------------------------------------- 1 | /.settings 2 | /target 3 | /.classpath 4 | /.project 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Marco Hutter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JOCLSamples 2 | 3 | Samples for JOCL - http://jocl.org 4 | 5 | **Note:** These samples have been moved here from the original samples page 6 | of the JOCL website, [http://www.jocl.org/samples/samples.html](http://www.jocl.org/samples/samples.html). 7 | These are mainly *standalone* samples, which means that each class contains 8 | the whole code that is required for the sample, although some of them refer 9 | to kernels that are stored in `src/main/resources/kernels`. Several methods 10 | (e.g. for the basic OpenCL initialization) appear in each of these samples. 11 | They may be moved to a utility class in the future. 12 | 13 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | org.jocl 6 | jocl-samples 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 11 | 12 | org.apache.maven.plugins 13 | maven-compiler-plugin 14 | 2.3.2 15 | 16 | 1.7 17 | 1.7 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | org.jocl 26 | jocl 27 | 2.0.4 28 | 29 | 30 | org.jocl 31 | jocl-blast 32 | 1.5.0 33 | 34 | 35 | org.jogamp.gluegen 36 | gluegen-rt-main 37 | 2.3.1 38 | 39 | 40 | org.jogamp.jogl 41 | jogl-all-main 42 | 2.3.1 43 | 44 | 45 | org.lwjgl.lwjgl 46 | lwjgl 47 | 2.9.3 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/HistogramNVIDIA.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | 8 | import static org.jocl.CL.*; 9 | 10 | import java.io.*; 11 | import java.util.Random; 12 | 13 | import org.jocl.*; 14 | 15 | /** 16 | * This class is a port of the NVIDIA OpenCL SDK "Histogram" sample. 17 | * The structure of the code has intentionally been kept similar 18 | * to the original sample. 19 | */ 20 | public class HistogramNVIDIA 21 | { 22 | public static final int HISTOGRAM256_BIN_COUNT = 256; 23 | 24 | //OpenCL histogram256 program 25 | static cl_program cpHistogram256; 26 | 27 | //OpenCL histogram256 kernels 28 | static cl_kernel ckHistogram256, ckMergeHistogram256; 29 | 30 | //histogram256() intermediate results buffer 31 | static int PARTIAL_HISTOGRAM256_COUNT = 240; 32 | static cl_mem d_PartialHistograms; 33 | 34 | //Default command queue for histogram256 kernels 35 | static cl_command_queue cqDefaultCommandQue; 36 | 37 | 38 | 39 | //////////////////////////////////////////////////////////////////////////////// 40 | //Test driver 41 | //////////////////////////////////////////////////////////////////////////////// 42 | public static void main(String args[]) 43 | { 44 | cl_context cxGPUContext; //OpenCL context 45 | cl_command_queue cqCommandQue; //OpenCL command que 46 | cl_mem d_Data, d_Histogram; //OpenCL memory buffer objects 47 | 48 | long dataBytes[] = new long[1]; 49 | int ciErrNum[] = new int[1]; 50 | int PassFailFlag = 1; 51 | 52 | byte h_Data[]; 53 | int h_HistogramCPU[], h_HistogramGPU[]; 54 | 55 | int byteCount = 128 * 8192; 56 | 57 | // start logs 58 | System.out.println("Starting...\n"); 59 | 60 | System.out.println("Initializing data..."); 61 | h_Data = new byte[byteCount]; 62 | h_HistogramCPU = new int[HISTOGRAM256_BIN_COUNT]; 63 | h_HistogramGPU = new int[HISTOGRAM256_BIN_COUNT]; 64 | 65 | Random random = new Random(2009); 66 | for(int i = 0; i < byteCount; i++) 67 | h_Data[i] = (byte)(random.nextInt() & 0xFF); 68 | 69 | // This will allow us to subsequently omit the "shrCheckError" calls for this sample 70 | CL.setExceptionsEnabled(true); 71 | 72 | System.out.println("Initializing OpenCL..."); 73 | 74 | // Obtain the platform IDs and initialize the context properties 75 | cl_platform_id platforms[] = new cl_platform_id[1]; 76 | clGetPlatformIDs(platforms.length, platforms, null); 77 | cl_context_properties contextProperties = new cl_context_properties(); 78 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]); 79 | cxGPUContext = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, null, null, ciErrNum); 80 | 81 | // get the list of GPU devices associated with context 82 | clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, null, dataBytes); 83 | cl_device_id cdDevices[] = new cl_device_id[(int)dataBytes[0] / Sizeof.cl_device_id]; 84 | 85 | clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes[0], Pointer.to(cdDevices), null); 86 | 87 | //Create a command-queue 88 | cl_queue_properties properties = new cl_queue_properties(); 89 | cqCommandQue = clCreateCommandQueueWithProperties( 90 | cxGPUContext, cdDevices[0], properties, ciErrNum); 91 | 92 | System.out.println("Allocating OpenCL memory...\n"); 93 | d_Data = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, byteCount * Sizeof.cl_char, Pointer.to(h_Data), ciErrNum); 94 | d_Histogram = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, null, ciErrNum); 95 | 96 | System.out.println("Initializing 256-bin OpenCL histogram..."); 97 | initHistogram256(cxGPUContext, cqCommandQue); 98 | 99 | System.out.printf("Running 256-bin OpenCL histogram for %d bytes...\n", byteCount); 100 | histogram256(null, d_Histogram, d_Data, byteCount); 101 | 102 | System.out.println("Validating OpenCL results..."); 103 | System.out.println("...reading back OpenCL results"); 104 | clEnqueueReadBuffer(cqCommandQue, d_Histogram, CL_TRUE, 0, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, Pointer.to(h_HistogramGPU), 0, null, null); 105 | 106 | System.out.println("...histogram256CPU()"); 107 | 108 | histogram256CPU(h_HistogramCPU, h_Data, byteCount); 109 | 110 | for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++) 111 | { 112 | if(h_HistogramGPU[i] != h_HistogramCPU[i]) 113 | { 114 | PassFailFlag = 0; 115 | } 116 | } 117 | System.out.println(PassFailFlag != 0 ? "256-bin histograms match\n" : "***256-bin histograms do not match!!!***\n" ); 118 | 119 | System.out.println("Shutting down 256-bin OpenCL histogram...\n\n"); 120 | 121 | //Release kernels and program 122 | closeHistogram256(); 123 | 124 | // pass or fail 125 | System.out.printf("TEST %s\n", PassFailFlag != 0 ? "PASSED" : "FAILED !!!"); 126 | 127 | System.out.println("Shutting down..."); 128 | 129 | //Release other OpenCL Objects 130 | ciErrNum[0] = clReleaseMemObject(d_Histogram); 131 | ciErrNum[0] |= clReleaseMemObject(d_Data); 132 | ciErrNum[0] |= clReleaseCommandQueue(cqCommandQue); 133 | ciErrNum[0] |= clReleaseContext(cxGPUContext); 134 | } 135 | 136 | 137 | static void histogram256CPU(int h_Histogram[], byte h_Data[], int byteCount) 138 | { 139 | for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++) 140 | h_Histogram[i] = 0; 141 | 142 | for(int i = 0; i < byteCount; i++){ 143 | int data = h_Data[i]; 144 | if (data < 0) 145 | { 146 | data+=256; 147 | } 148 | h_Histogram[data]++; 149 | } 150 | } 151 | 152 | 153 | //////////////////////////////////////////////////////////////////////////////// 154 | // OpenCL launchers for histogram256 / mergeHistogram256 kernels 155 | //////////////////////////////////////////////////////////////////////////////// 156 | 157 | static void initHistogram256(cl_context cxGPUContext, cl_command_queue cqParamCommandQue) 158 | { 159 | int ciErrNum[] = new int[1]; 160 | 161 | System.out.println("...loading Histogram256.cl"); 162 | String cHistogram256 = readFile("src/main/resources/kernels/Histogram256.cl"); 163 | 164 | System.out.println("...creating histogram256 program"); 165 | cpHistogram256 = clCreateProgramWithSource(cxGPUContext, 1, new String[]{cHistogram256}, new long[]{cHistogram256.length()}, ciErrNum); 166 | 167 | System.out.println("...building histogram256 program"); 168 | ciErrNum[0] = clBuildProgram(cpHistogram256, 0, null, null, null, null); 169 | 170 | System.out.println("...creating histogram256 kernels"); 171 | ckHistogram256 = clCreateKernel(cpHistogram256, "histogram256", ciErrNum); 172 | ckMergeHistogram256 = clCreateKernel(cpHistogram256, "mergeHistogram256", ciErrNum); 173 | 174 | System.out.println("...allocating internal histogram256 buffer"); 175 | d_PartialHistograms = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT * Sizeof.cl_uint, null, ciErrNum); 176 | 177 | //Save default command queue 178 | cqDefaultCommandQue = cqParamCommandQue; 179 | } 180 | 181 | static void closeHistogram256() 182 | { 183 | clReleaseMemObject(d_PartialHistograms); 184 | clReleaseKernel(ckMergeHistogram256); 185 | clReleaseKernel(ckHistogram256); 186 | clReleaseProgram(cpHistogram256); 187 | } 188 | 189 | static void histogram256(cl_command_queue cqCommandQue, cl_mem d_Histogram, cl_mem d_Data, int byteCount) 190 | { 191 | long localWorkSize[] = new long[1]; 192 | long globalWorkSize[] = new long[1]; 193 | 194 | if(cqCommandQue == null) 195 | cqCommandQue = cqDefaultCommandQue; 196 | 197 | int WARP_SIZE = 32; 198 | int WARP_COUNT = 6; 199 | 200 | int dataCount = byteCount / 4; 201 | clSetKernelArg(ckHistogram256, 0, Sizeof.cl_mem, Pointer.to(d_PartialHistograms)); 202 | clSetKernelArg(ckHistogram256, 1, Sizeof.cl_mem, Pointer.to(d_Data)); 203 | clSetKernelArg(ckHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{dataCount})); 204 | 205 | localWorkSize[0] = WARP_SIZE * WARP_COUNT; 206 | globalWorkSize[0] = PARTIAL_HISTOGRAM256_COUNT * localWorkSize[0]; 207 | 208 | clEnqueueNDRangeKernel(cqCommandQue, ckHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null); 209 | 210 | int MERGE_WORKGROUP_SIZE = 256; 211 | clSetKernelArg(ckMergeHistogram256, 0, Sizeof.cl_mem, Pointer.to(d_Histogram)); 212 | clSetKernelArg(ckMergeHistogram256, 1, Sizeof.cl_mem, Pointer.to(d_PartialHistograms)); 213 | clSetKernelArg(ckMergeHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{PARTIAL_HISTOGRAM256_COUNT})); 214 | 215 | localWorkSize[0] = MERGE_WORKGROUP_SIZE; 216 | globalWorkSize[0] = HISTOGRAM256_BIN_COUNT * localWorkSize[0]; 217 | 218 | clEnqueueNDRangeKernel(cqCommandQue, ckMergeHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null); 219 | } 220 | 221 | 222 | private static String readFile(String fileName) 223 | { 224 | BufferedReader br = null; 225 | try 226 | { 227 | br = new BufferedReader(new FileReader(fileName)); 228 | StringBuilder sb = new StringBuilder(); 229 | String line = null; 230 | while (true) 231 | { 232 | line = br.readLine(); 233 | if (line == null) 234 | { 235 | break; 236 | } 237 | sb.append(line+"\n"); 238 | } 239 | return sb.toString(); 240 | } 241 | catch (IOException e) 242 | { 243 | e.printStackTrace(); 244 | return ""; 245 | } 246 | finally 247 | { 248 | if (br != null) 249 | { 250 | try 251 | { 252 | br.close(); 253 | } 254 | catch (IOException e) 255 | { 256 | e.printStackTrace(); 257 | } 258 | } 259 | } 260 | } 261 | 262 | } 263 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/JOCLBandwidthTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | import static org.jocl.CL.*; 8 | 9 | import java.nio.ByteBuffer; 10 | import java.util.Locale; 11 | 12 | import org.jocl.*; 13 | 14 | /** 15 | * A test for the bandwidth of of the data transfer from the host 16 | * to the device. 17 | */ 18 | public class JOCLBandwidthTest 19 | { 20 | /** 21 | * The index of the OpenCL platform that this sample should run on 22 | */ 23 | private static final int platformIndex = 0; 24 | 25 | /** 26 | * The OpenCL device type that will be used 27 | */ 28 | private static final long deviceType = CL_DEVICE_TYPE_ALL; 29 | 30 | /** 31 | * The index of the OpenCL device that will be used 32 | */ 33 | private static final int deviceIndex = 0; 34 | 35 | /** 36 | * The OpenCL context 37 | */ 38 | private static cl_context context; 39 | 40 | /** 41 | * The OpenCL command queue 42 | */ 43 | private static cl_command_queue commandQueue; 44 | 45 | /** 46 | * The host memory modes that will be tested 47 | */ 48 | enum MemoryMode 49 | { 50 | PAGEABLE, 51 | PINNED 52 | } 53 | 54 | /** 55 | * The memory access modes that will be tested 56 | */ 57 | enum AccessMode 58 | { 59 | MAPPED, 60 | DIRECT 61 | } 62 | 63 | /** 64 | * The number of memcopy operations to perform for each size 65 | */ 66 | private static final long MEMCOPY_ITERATIONS = 100; 67 | 68 | /** 69 | * The entry point of this sample 70 | * 71 | * @param args Not used 72 | */ 73 | public static void main(String args[]) 74 | { 75 | initialize(); 76 | 77 | for (MemoryMode memoryMode : MemoryMode.values()) 78 | { 79 | for (AccessMode accessMode : AccessMode.values()) 80 | { 81 | runTest(memoryMode, accessMode); 82 | } 83 | } 84 | 85 | shutdown(); 86 | } 87 | 88 | /** 89 | * Run a bandwidth test with the given memory mode and access mode 90 | * 91 | * @param memoryMode The memory mode 92 | * @param accessMode The access mode 93 | */ 94 | private static void runTest(MemoryMode memoryMode, AccessMode accessMode) 95 | { 96 | int minExponent = 10; 97 | int maxExponent = 26; 98 | int count = maxExponent - minExponent; 99 | int memorySizes[] = new int[count]; 100 | double bandwidths[] = new double[memorySizes.length]; 101 | 102 | System.out.print("Running"); 103 | for (int i=0; i devices = new ArrayList(); 40 | for (int i=0; i 181 | System.out.printf("CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); 182 | int preferredVectorWidthChar = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR); 183 | int preferredVectorWidthShort = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT); 184 | int preferredVectorWidthInt = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT); 185 | int preferredVectorWidthLong = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG); 186 | int preferredVectorWidthFloat = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT); 187 | int preferredVectorWidthDouble = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE); 188 | System.out.printf("CHAR %d, SHORT %d, INT %d, LONG %d, FLOAT %d, DOUBLE %d\n\n\n", 189 | preferredVectorWidthChar, preferredVectorWidthShort, 190 | preferredVectorWidthInt, preferredVectorWidthLong, 191 | preferredVectorWidthFloat, preferredVectorWidthDouble); 192 | } 193 | } 194 | 195 | /** 196 | * Returns the value of the device info parameter with the given name 197 | * 198 | * @param device The device 199 | * @param paramName The parameter name 200 | * @return The value 201 | */ 202 | private static int getInt(cl_device_id device, int paramName) 203 | { 204 | return getInts(device, paramName, 1)[0]; 205 | } 206 | 207 | /** 208 | * Returns the values of the device info parameter with the given name 209 | * 210 | * @param device The device 211 | * @param paramName The parameter name 212 | * @param numValues The number of values 213 | * @return The value 214 | */ 215 | private static int[] getInts(cl_device_id device, int paramName, int numValues) 216 | { 217 | int values[] = new int[numValues]; 218 | clGetDeviceInfo(device, paramName, Sizeof.cl_int * numValues, Pointer.to(values), null); 219 | return values; 220 | } 221 | 222 | /** 223 | * Returns the value of the device info parameter with the given name 224 | * 225 | * @param device The device 226 | * @param paramName The parameter name 227 | * @return The value 228 | */ 229 | private static long getLong(cl_device_id device, int paramName) 230 | { 231 | return getLongs(device, paramName, 1)[0]; 232 | } 233 | 234 | /** 235 | * Returns the values of the device info parameter with the given name 236 | * 237 | * @param device The device 238 | * @param paramName The parameter name 239 | * @param numValues The number of values 240 | * @return The value 241 | */ 242 | private static long[] getLongs(cl_device_id device, int paramName, int numValues) 243 | { 244 | long values[] = new long[numValues]; 245 | clGetDeviceInfo(device, paramName, Sizeof.cl_long * numValues, Pointer.to(values), null); 246 | return values; 247 | } 248 | 249 | /** 250 | * Returns the value of the device info parameter with the given name 251 | * 252 | * @param device The device 253 | * @param paramName The parameter name 254 | * @return The value 255 | */ 256 | private static String getString(cl_device_id device, int paramName) 257 | { 258 | // Obtain the length of the string that will be queried 259 | long size[] = new long[1]; 260 | clGetDeviceInfo(device, paramName, 0, null, size); 261 | 262 | // Create a buffer of the appropriate size and fill it with the info 263 | byte buffer[] = new byte[(int)size[0]]; 264 | clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null); 265 | 266 | // Create a string from the buffer (excluding the trailing \0 byte) 267 | return new String(buffer, 0, buffer.length-1); 268 | } 269 | 270 | /** 271 | * Returns the value of the platform info parameter with the given name 272 | * 273 | * @param platform The platform 274 | * @param paramName The parameter name 275 | * @return The value 276 | */ 277 | private static String getString(cl_platform_id platform, int paramName) 278 | { 279 | // Obtain the length of the string that will be queried 280 | long size[] = new long[1]; 281 | clGetPlatformInfo(platform, paramName, 0, null, size); 282 | 283 | // Create a buffer of the appropriate size and fill it with the info 284 | byte buffer[] = new byte[(int)size[0]]; 285 | clGetPlatformInfo(platform, paramName, buffer.length, Pointer.to(buffer), null); 286 | 287 | // Create a string from the buffer (excluding the trailing \0 byte) 288 | return new String(buffer, 0, buffer.length-1); 289 | } 290 | 291 | /** 292 | * Returns the value of the device info parameter with the given name 293 | * 294 | * @param device The device 295 | * @param paramName The parameter name 296 | * @return The value 297 | */ 298 | private static long getSize(cl_device_id device, int paramName) 299 | { 300 | return getSizes(device, paramName, 1)[0]; 301 | } 302 | 303 | /** 304 | * Returns the values of the device info parameter with the given name 305 | * 306 | * @param device The device 307 | * @param paramName The parameter name 308 | * @param numValues The number of values 309 | * @return The value 310 | */ 311 | static long[] getSizes(cl_device_id device, int paramName, int numValues) 312 | { 313 | // The size of the returned data has to depend on 314 | // the size of a size_t, which is handled here 315 | ByteBuffer buffer = ByteBuffer.allocate( 316 | numValues * Sizeof.size_t).order(ByteOrder.nativeOrder()); 317 | clGetDeviceInfo(device, paramName, Sizeof.size_t * numValues, 318 | Pointer.to(buffer), null); 319 | long values[] = new long[numValues]; 320 | if (Sizeof.size_t == 4) 321 | { 322 | for (int i=0; i max) 221 | { 222 | System.out.print(" ..."); 223 | } 224 | } 225 | System.out.println(""); 226 | } 227 | 228 | /** 229 | * A simple helper class for tracking cl_events and printing 230 | * timing information for the execution of the commands that 231 | * are associated with the events. 232 | */ 233 | static class ExecutionStatistics 234 | { 235 | /** 236 | * A single entry of the ExecutionStatistics 237 | */ 238 | private static class Entry 239 | { 240 | private String name; 241 | private long submitTime[] = new long[1]; 242 | private long queuedTime[] = new long[1]; 243 | private long startTime[] = new long[1]; 244 | private long endTime[] = new long[1]; 245 | 246 | Entry(String name, cl_event event) 247 | { 248 | this.name = name; 249 | clGetEventProfilingInfo( 250 | event, CL_PROFILING_COMMAND_QUEUED, 251 | Sizeof.cl_ulong, Pointer.to(queuedTime), null); 252 | clGetEventProfilingInfo( 253 | event, CL_PROFILING_COMMAND_SUBMIT, 254 | Sizeof.cl_ulong, Pointer.to(submitTime), null); 255 | clGetEventProfilingInfo( 256 | event, CL_PROFILING_COMMAND_START, 257 | Sizeof.cl_ulong, Pointer.to(startTime), null); 258 | clGetEventProfilingInfo( 259 | event, CL_PROFILING_COMMAND_END, 260 | Sizeof.cl_ulong, Pointer.to(endTime), null); 261 | } 262 | 263 | void normalize(long baseTime) 264 | { 265 | submitTime[0] -= baseTime; 266 | queuedTime[0] -= baseTime; 267 | startTime[0] -= baseTime; 268 | endTime[0] -= baseTime; 269 | } 270 | 271 | long getQueuedTime() 272 | { 273 | return queuedTime[0]; 274 | } 275 | 276 | void print() 277 | { 278 | System.out.println("Event "+name+": "); 279 | System.out.println("Queued : "+ 280 | String.format("%8.3f", queuedTime[0]/1e6)+" ms"); 281 | System.out.println("Submit : "+ 282 | String.format("%8.3f", submitTime[0]/1e6)+" ms"); 283 | System.out.println("Start : "+ 284 | String.format("%8.3f", startTime[0]/1e6)+" ms"); 285 | System.out.println("End : "+ 286 | String.format("%8.3f", endTime[0]/1e6)+" ms"); 287 | 288 | long duration = endTime[0]-startTime[0]; 289 | System.out.println("Time : "+ 290 | String.format("%8.3f", duration / 1e6)+" ms"); 291 | } 292 | } 293 | 294 | /** 295 | * The list of entries in this instance 296 | */ 297 | private List entries = new ArrayList(); 298 | 299 | /** 300 | * Adds the specified entry to this instance 301 | * 302 | * @param name A name for the event 303 | * @param event The event 304 | */ 305 | public void addEntry(String name, cl_event event) 306 | { 307 | entries.add(new Entry(name, event)); 308 | } 309 | 310 | /** 311 | * Removes all entries 312 | */ 313 | public void clear() 314 | { 315 | entries.clear(); 316 | } 317 | 318 | /** 319 | * Normalize the entries, so that the times are relative 320 | * to the time when the first event was queued 321 | */ 322 | private void normalize() 323 | { 324 | long minQueuedTime = Long.MAX_VALUE; 325 | for (Entry entry : entries) 326 | { 327 | minQueuedTime = Math.min(minQueuedTime, entry.getQueuedTime()); 328 | } 329 | for (Entry entry : entries) 330 | { 331 | entry.normalize(minQueuedTime); 332 | } 333 | } 334 | 335 | /** 336 | * Print the statistics 337 | */ 338 | public void print() 339 | { 340 | normalize(); 341 | for (Entry entry : entries) 342 | { 343 | entry.print(); 344 | } 345 | } 346 | 347 | 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/JOCLMappedBufferSample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | 8 | import static org.jocl.CL.*; 9 | 10 | import java.nio.*; 11 | 12 | import org.jocl.*; 13 | 14 | /** 15 | * A small JOCL sample, similar to the minimal JOCLSample, but 16 | * demonstrating how to map a cl_mem to a Java ByteBuffer 17 | */ 18 | public class JOCLMappedBufferSample 19 | { 20 | /** 21 | * The source code of the OpenCL program to execute 22 | */ 23 | private static String programSource = 24 | "__kernel void "+ 25 | "sampleKernel(__global const float *a,"+ 26 | " __global const float *b,"+ 27 | " __global float *c)"+ 28 | "{"+ 29 | " int gid = get_global_id(0);"+ 30 | " c[gid] = a[gid] * b[gid];"+ 31 | "}"; 32 | 33 | /** 34 | * The name of the kernel to execute 35 | */ 36 | private static final String kernelName = "sampleKernel"; 37 | 38 | /** 39 | * The index of the OpenCL platform that this sample should run on 40 | */ 41 | private static final int platformIndex = 0; 42 | 43 | /** 44 | * The OpenCL device type that will be used 45 | */ 46 | private static final long deviceType = CL_DEVICE_TYPE_ALL; 47 | 48 | /** 49 | * The index of the OpenCL device that will be used 50 | */ 51 | private static final int deviceIndex = 0; 52 | 53 | /** 54 | * The OpenCL context 55 | */ 56 | private static cl_context context; 57 | 58 | /** 59 | * The OpenCL command queue 60 | */ 61 | private static cl_command_queue commandQueue; 62 | 63 | /** 64 | * The OpenCL program that contains the kernel 65 | */ 66 | private static cl_program program; 67 | 68 | /** 69 | * The OpenCL kernel from the program 70 | */ 71 | private static cl_kernel kernel; 72 | 73 | /** 74 | * The entry point of this sample 75 | * 76 | * @param args Not used 77 | */ 78 | public static void main(String args[]) 79 | { 80 | initialize(); 81 | 82 | // Create input- and output data 83 | int n = 10; 84 | float srcArrayA[] = new float[n]; 85 | float srcArrayB[] = new float[n]; 86 | float dstArray[] = new float[n]; 87 | for (int i=0; i 16 | *
17 | * Note: This is just a basic demo, showing the possibility to use multiple 18 | * devices simultaneously. Each device receives its own copy of the memory 19 | * objects to work on. In real applications, there may be a more complex 20 | * management of the buffers and the synchronization between the different 21 | * devices, which is beyond the scope of this sample. 22 | */ 23 | public class JOCLMultiDeviceSample 24 | { 25 | /** 26 | * The source code of the OpenCL program to execute, containing 27 | * some artificial workload to compute 28 | */ 29 | private static String programSource = 30 | "__kernel void sampleKernel(__global const float *input,"+ 31 | " __global float *output, " + 32 | " int size)"+ 33 | "{"+ 34 | " int gid = get_global_id(0);"+ 35 | " output[gid] = 0;" + 36 | " for (int i=0; i 71 | *
72 | * The reduction is performed in two phases: In the first phase, each 73 | * work group of the GPU computes the reduction of a part of the 74 | * input array. The size of this part is exactly the number of work 75 | * items in the group, and the reduction will be performed in local 76 | * memory. The results of these reductions will be written into 77 | * an output array. This output array is then reduced on the CPU. 78 | * 79 | * @param inputArray The array on which the reduction will be performed 80 | * @return The result of the reduction 81 | */ 82 | private static float reduce(float inputArray[]) 83 | { 84 | int localWorkSize = 128; 85 | int numWorkGroups = 64; 86 | float outputArray[] = new float[numWorkGroups]; 87 | 88 | // Allocate the memory objects for the input- and output data 89 | cl_mem inputMem = clCreateBuffer(context, 90 | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 91 | Sizeof.cl_float * inputArray.length, Pointer.to(inputArray), null); 92 | cl_mem outputMem = clCreateBuffer(context, 93 | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 94 | Sizeof.cl_float * numWorkGroups, Pointer.to(outputArray), null); 95 | 96 | // Perform the reduction on the GPU: Each work group will 97 | // perform the reduction of 'localWorkSize' elements, and 98 | // the results will be written into the output memory 99 | reduce( 100 | inputMem, inputArray.length, 101 | outputMem, numWorkGroups, 102 | localWorkSize); 103 | 104 | // Read the output data 105 | clEnqueueReadBuffer(commandQueue, outputMem, CL_TRUE, 0, 106 | numWorkGroups * Sizeof.cl_float, Pointer.to(outputArray), 107 | 0, null, null); 108 | 109 | // Perform the final reduction, by reducing the results 110 | // from the work groups on the CPU 111 | float result = reduceHost(outputArray); 112 | 113 | // Release memory objects 114 | clReleaseMemObject(inputMem); 115 | clReleaseMemObject(outputMem); 116 | 117 | return result; 118 | } 119 | 120 | 121 | /** 122 | * Perform a reduction of the float elements in the given input memory. 123 | * Each work group will reduce 'localWorkSize' elements, and write the 124 | * result into the given output memory. 125 | * 126 | * @param inputMem The input memory containing the float values to reduce 127 | * @param n The number of values in the input memory 128 | * @param outputMem The output memory that will store the reduction 129 | * result for each work group 130 | * @param numWorkGroups The number of work groups 131 | * @param localWorkSize The local work size, that is, the number of 132 | * work items in each work group 133 | */ 134 | private static void reduce( 135 | cl_mem inputMem, int n, 136 | cl_mem outputMem, int numWorkGroups, 137 | int localWorkSize) 138 | { 139 | // Set the arguments for the kernel 140 | int a = 0; 141 | clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(inputMem)); 142 | clSetKernelArg(kernel, a++, Sizeof.cl_float * localWorkSize, null); 143 | clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[]{n})); 144 | clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(outputMem)); 145 | 146 | // Compute the number of work groups and the global work size 147 | long globalWorkSize = numWorkGroups * localWorkSize; 148 | 149 | // Execute the kernel 150 | clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, 151 | new long[]{ globalWorkSize }, new long[]{ localWorkSize}, 152 | 0, null, null); 153 | } 154 | 155 | /** 156 | * Implementation of a Kahan summation reduction in plain Java 157 | * 158 | * @param array The input 159 | * @return The reduction result 160 | */ 161 | private static float reduceHost(float array[]) 162 | { 163 | float sum = array[0]; 164 | float c = 0.0f; 165 | for (int i = 1; i < array.length; i++) 166 | { 167 | float y = array[i] - c; 168 | float t = sum + y; 169 | c = (t - sum) - y; 170 | sum = t; 171 | } 172 | return sum; 173 | } 174 | 175 | /** 176 | * Initialize a default OpenCL context, command queue, program and kernel 177 | */ 178 | private static void initialize() 179 | { 180 | // The platform, device type and device number 181 | // that will be used 182 | final int platformIndex = 0; 183 | final long deviceType = CL_DEVICE_TYPE_ALL; 184 | final int deviceIndex = 0; 185 | 186 | // Enable exceptions and subsequently omit error checks in this sample 187 | CL.setExceptionsEnabled(true); 188 | 189 | // Obtain the number of platforms 190 | int numPlatformsArray[] = new int[1]; 191 | clGetPlatformIDs(0, null, numPlatformsArray); 192 | int numPlatforms = numPlatformsArray[0]; 193 | 194 | // Obtain a platform ID 195 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; 196 | clGetPlatformIDs(platforms.length, platforms, null); 197 | cl_platform_id platform = platforms[platformIndex]; 198 | 199 | // Initialize the context properties 200 | cl_context_properties contextProperties = new cl_context_properties(); 201 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); 202 | 203 | // Obtain the number of devices for the platform 204 | int numDevicesArray[] = new int[1]; 205 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); 206 | int numDevices = numDevicesArray[0]; 207 | 208 | // Obtain a device ID 209 | cl_device_id devices[] = new cl_device_id[numDevices]; 210 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null); 211 | cl_device_id device = devices[deviceIndex]; 212 | 213 | // Create a context for the selected device 214 | context = clCreateContext( 215 | contextProperties, 1, new cl_device_id[]{device}, 216 | null, null, null); 217 | 218 | // Create a command-queue for the selected device 219 | cl_queue_properties properties = new cl_queue_properties(); 220 | commandQueue = clCreateCommandQueueWithProperties( 221 | context, device, properties, null); 222 | 223 | // Create the program from the source code 224 | String programSource = readFile("src/main/resources/kernels/reduction.cl"); 225 | program = clCreateProgramWithSource(context, 226 | 1, new String[]{ programSource }, null, null); 227 | 228 | // Build the program 229 | clBuildProgram(program, 0, null, null, null, null); 230 | 231 | // Create the kernel 232 | kernel = clCreateKernel(program, "reduce", null); 233 | } 234 | 235 | /** 236 | * Shut down and release all resources that have been allocated 237 | * in {@link #initialize()} 238 | */ 239 | private static void shutdown() 240 | { 241 | clReleaseKernel(kernel); 242 | clReleaseProgram(program); 243 | clReleaseCommandQueue(commandQueue); 244 | clReleaseContext(context); 245 | } 246 | 247 | /** 248 | * Read the contents of the file with the given name, and return 249 | * it as a string 250 | * 251 | * @param fileName The name of the file to read 252 | * @return The contents of the file 253 | */ 254 | private static String readFile(String fileName) 255 | { 256 | BufferedReader br = null; 257 | try 258 | { 259 | br = new BufferedReader(new FileReader(fileName)); 260 | StringBuilder sb = new StringBuilder(); 261 | String line = null; 262 | while (true) 263 | { 264 | line = br.readLine(); 265 | if (line == null) 266 | { 267 | break; 268 | } 269 | sb.append(line+"\n"); 270 | } 271 | return sb.toString(); 272 | } 273 | catch (IOException e) 274 | { 275 | e.printStackTrace(); 276 | return ""; 277 | } 278 | finally 279 | { 280 | if (br != null) 281 | { 282 | try 283 | { 284 | br.close(); 285 | } 286 | catch (IOException ex) 287 | { 288 | ex.printStackTrace(); 289 | } 290 | } 291 | } 292 | } 293 | 294 | } 295 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/JOCLSample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | 8 | import static org.jocl.CL.*; 9 | 10 | import java.util.Arrays; 11 | 12 | import org.jocl.*; 13 | 14 | /** 15 | * A small JOCL sample. 16 | */ 17 | public class JOCLSample 18 | { 19 | /** 20 | * The source code of the OpenCL program to execute 21 | */ 22 | private static String programSource = 23 | "__kernel void "+ 24 | "sampleKernel(__global const float *a,"+ 25 | " __global const float *b,"+ 26 | " __global float *c)"+ 27 | "{"+ 28 | " int gid = get_global_id(0);"+ 29 | " c[gid] = a[gid] * b[gid];"+ 30 | "}"; 31 | 32 | 33 | /** 34 | * The entry point of this sample 35 | * 36 | * @param args Not used 37 | */ 38 | public static void main(String args[]) 39 | { 40 | // Create input- and output data 41 | int n = 10; 42 | float srcArrayA[] = new float[n]; 43 | float srcArrayB[] = new float[n]; 44 | float dstArray[] = new float[n]; 45 | for (int i=0; i=1; i--) 158 | { 159 | System.out.println("Seconds left: "+i); 160 | try 161 | { 162 | Thread.sleep(1000); 163 | } 164 | catch (InterruptedException e) 165 | { 166 | Thread.currentThread().interrupt(); 167 | } 168 | } 169 | System.out.println("Setting event status to CL_COMPLETE"); 170 | clSetUserEventStatus(userEvent, CL.CL_COMPLETE); 171 | } 172 | }); 173 | thread.start(); 174 | 175 | 176 | // Create the destructor callback which will be called 177 | // when the output memory object is destroyed 178 | MemObjectDestructorCallbackFunction 179 | memObjectDestructorCallbackFunction = 180 | new MemObjectDestructorCallbackFunction() 181 | { 182 | @Override 183 | public void function(cl_mem memobj, Object user_data) 184 | { 185 | System.out.println("Memory object "+memobj+ 186 | " was destroyed, user data: "+user_data); 187 | } 188 | }; 189 | clSetMemObjectDestructorCallback(dstMem, 190 | memObjectDestructorCallbackFunction, 191 | "Memory object destructor callback user data"); 192 | 193 | // Wait until all commands have completed 194 | clFinish(commandQueue); 195 | 196 | // Release kernel, program, and memory objects. 197 | clReleaseKernel(kernel); 198 | clReleaseProgram(program); 199 | clReleaseCommandQueue(commandQueue); 200 | clReleaseContext(context); 201 | clReleaseMemObject(srcMemA); 202 | clReleaseMemObject(srcMemB); 203 | 204 | // Releasing the output memory object will cause 205 | // the destructor callback to be called. 206 | clReleaseMemObject(dstMem); 207 | 208 | // Verify the result 209 | float reference[] = new float[]{10,12,18,20}; 210 | float result[] = new float[regionSizeX*regionSizeY]; 211 | regionData.get(result); 212 | boolean passed = Arrays.equals(result, reference); 213 | System.out.println(passed ? "PASSED" : "FAILED"); 214 | } 215 | 216 | /** 217 | * Default OpenCL initialization of the context, command queue, 218 | * program and kernel 219 | */ 220 | private static void defaultInitialization() 221 | { 222 | // The platform, device type and device number 223 | // that will be used 224 | final int platformIndex = 0; 225 | final long deviceType = CL_DEVICE_TYPE_ALL; 226 | final int deviceIndex = 0; 227 | 228 | // Enable exceptions and subsequently omit error checks in this sample 229 | CL.setExceptionsEnabled(true); 230 | 231 | // Obtain the number of platforms 232 | int numPlatformsArray[] = new int[1]; 233 | clGetPlatformIDs(0, null, numPlatformsArray); 234 | int numPlatforms = numPlatformsArray[0]; 235 | 236 | // Obtain a platform ID 237 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; 238 | clGetPlatformIDs(platforms.length, platforms, null); 239 | cl_platform_id platform = platforms[platformIndex]; 240 | 241 | // Initialize the context properties 242 | cl_context_properties contextProperties = new cl_context_properties(); 243 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); 244 | 245 | // Obtain the number of devices for the platform 246 | int numDevicesArray[] = new int[1]; 247 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); 248 | int numDevices = numDevicesArray[0]; 249 | 250 | // Obtain a device ID 251 | cl_device_id devices[] = new cl_device_id[numDevices]; 252 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null); 253 | cl_device_id device = devices[deviceIndex]; 254 | 255 | // Create a context for the selected device 256 | context = clCreateContext( 257 | contextProperties, 1, new cl_device_id[]{device}, 258 | null, null, null); 259 | 260 | String deviceName = getString(devices[0], CL_DEVICE_NAME); 261 | System.out.printf("CL_DEVICE_NAME: %s\n", deviceName); 262 | 263 | // Create a command-queue for the selected device 264 | cl_queue_properties properties = new cl_queue_properties(); 265 | commandQueue = clCreateCommandQueueWithProperties( 266 | context, device, properties, null); 267 | 268 | // Create the program from the source code 269 | program = clCreateProgramWithSource(context, 270 | 1, new String[]{ programSource }, null, null); 271 | 272 | // Build the program 273 | clBuildProgram(program, 0, null, null, null, null); 274 | 275 | // Create the kernel 276 | kernel = clCreateKernel(program, "sampleKernel", null); 277 | } 278 | 279 | /** 280 | * Print the given buffer as a matrix with the given number of columns 281 | * 282 | * @param data The buffer 283 | * @param columns The number of columns 284 | */ 285 | private static void print2D(FloatBuffer data, int columns) 286 | { 287 | StringBuffer sb = new StringBuffer(); 288 | for (int i=0; i= 2.0) 213 | { 214 | System.out.println("Using device "+ 215 | deviceName+", version "+version); 216 | device = currentDevice; 217 | break; 218 | } 219 | else 220 | { 221 | System.out.println("Skipping device "+ 222 | deviceName+", version "+version); 223 | } 224 | } 225 | if (device == null) 226 | { 227 | System.out.println("No OpenCL 2.0 capable device found"); 228 | System.exit(1); 229 | } 230 | 231 | // Create a context 232 | context = clCreateContext( 233 | contextProperties, 1, new cl_device_id[]{ device }, 234 | null, null, null); 235 | 236 | // Create the command queue 237 | cl_queue_properties properties = new cl_queue_properties(); 238 | commandQueue = clCreateCommandQueueWithProperties( 239 | context, device, properties, null); 240 | 241 | // Create the program from the source code 242 | cl_program program = clCreateProgramWithSource(context, 243 | 1, new String[]{ programSource }, null, null); 244 | 245 | // Build the program. It's important to specify the 246 | // -cl-std=CL2.0 247 | // build parameter here! 248 | clBuildProgram(program, 0, null, "-cl-std=CL2.0", null, null); 249 | 250 | // Create the kernel 251 | kernel = clCreateKernel(program, "sampleKernel", null); 252 | 253 | clReleaseProgram(program); 254 | } 255 | 256 | /** 257 | * Returns the OpenCL version of the given device, as a float 258 | * value 259 | * 260 | * @param device The device 261 | * @return The OpenCL version 262 | */ 263 | private static float getOpenCLVersion(cl_device_id device) 264 | { 265 | String deviceVersion = getString(device, CL_DEVICE_VERSION); 266 | String versionString = deviceVersion.substring(7, 10); 267 | float version = Float.parseFloat(versionString); 268 | return version; 269 | } 270 | 271 | /** 272 | * Returns the value of the device info parameter with the given name 273 | * 274 | * @param device The device 275 | * @param paramName The parameter name 276 | * @return The value 277 | */ 278 | private static String getString(cl_device_id device, int paramName) 279 | { 280 | // Obtain the length of the string that will be queried 281 | long size[] = new long[1]; 282 | clGetDeviceInfo(device, paramName, 0, null, size); 283 | 284 | // Create a buffer of the appropriate size and fill it with the info 285 | byte buffer[] = new byte[(int)size[0]]; 286 | clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null); 287 | 288 | // Create a string from the buffer (excluding the trailing \0 byte) 289 | return new String(buffer, 0, buffer.length-1); 290 | } 291 | 292 | 293 | } 294 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/JOCLSimpleImage.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | 8 | import static org.jocl.CL.*; 9 | 10 | import java.awt.*; 11 | import java.awt.image.*; 12 | import java.io.*; 13 | 14 | import javax.imageio.ImageIO; 15 | import javax.swing.*; 16 | 17 | import org.jocl.*; 18 | 19 | /** 20 | * A simple example demonstrating image handling between JOCL 21 | * and Swing. It shows an animation of a rotating image, 22 | * which is rotated using an OpenCL kernel involving some 23 | * basic image operations. 24 | */ 25 | public class JOCLSimpleImage 26 | { 27 | /** 28 | * Entry point for this sample. 29 | * 30 | * @param args not used 31 | */ 32 | public static void main(String args[]) 33 | { 34 | SwingUtilities.invokeLater(new Runnable() 35 | { 36 | public void run() 37 | { 38 | new JOCLSimpleImage(); 39 | } 40 | }); 41 | } 42 | 43 | /** 44 | * The source code of the kernel to execute. It will rotate the 45 | * input image by the given angle and write the result into the 46 | * output image. 47 | */ 48 | private static String programSource = 49 | ""+ "\n" + 50 | "const sampler_t samplerIn = "+ "\n" + 51 | " CLK_NORMALIZED_COORDS_FALSE | "+ "\n" + 52 | " CLK_ADDRESS_CLAMP |"+ "\n" + 53 | " CLK_FILTER_NEAREST;"+ "\n" + 54 | ""+ "\n" + 55 | "const sampler_t samplerOut = "+ "\n" + 56 | " CLK_NORMALIZED_COORDS_FALSE |"+ "\n" + 57 | " CLK_ADDRESS_CLAMP |"+ "\n" + 58 | " CLK_FILTER_NEAREST;"+ "\n" + 59 | ""+ "\n" + 60 | "__kernel void rotateImage("+ "\n" + 61 | " __read_only image2d_t sourceImage, "+ "\n" + 62 | " __write_only image2d_t targetImage, "+ "\n" + 63 | " float angle)"+ "\n" + 64 | "{"+ "\n" + 65 | " int gidX = get_global_id(0);"+ "\n" + 66 | " int gidY = get_global_id(1);"+ "\n" + 67 | " int w = get_image_width(sourceImage);"+ "\n" + 68 | " int h = get_image_height(sourceImage);"+ "\n" + 69 | " int cx = w/2;"+ "\n" + 70 | " int cy = h/2;"+ "\n" + 71 | " int dx = gidX-cx;"+ "\n" + 72 | " int dy = gidY-cy;"+ "\n" + 73 | " float ca = cos(angle);"+ "\n" + 74 | " float sa = sin(angle);"+ "\n" + 75 | " int inX = (int)(cx+ca*dx-sa*dy);"+ "\n" + 76 | " int inY = (int)(cy+sa*dx+ca*dy);"+ "\n" + 77 | " int2 posIn = {inX, inY};"+ "\n" + 78 | " int2 posOut = {gidX, gidY};"+ "\n" + 79 | " uint4 pixel = read_imageui(sourceImage, samplerIn, posIn);"+ "\n" + 80 | " write_imageui(targetImage, posOut, pixel);"+ "\n" + 81 | "}"; 82 | 83 | 84 | /** 85 | * Creates a BufferedImage of with type TYPE_INT_RGB from the 86 | * file with the given name. 87 | * 88 | * @param fileName The file name 89 | * @return The image, or null if the file may not be read 90 | */ 91 | private static BufferedImage createBufferedImage(String fileName) 92 | { 93 | BufferedImage image = null; 94 | try 95 | { 96 | image = ImageIO.read(new File(fileName)); 97 | } 98 | catch (IOException e) 99 | { 100 | e.printStackTrace(); 101 | return null; 102 | } 103 | 104 | int sizeX = image.getWidth(); 105 | int sizeY = image.getHeight(); 106 | 107 | BufferedImage result = new BufferedImage( 108 | sizeX, sizeY, BufferedImage.TYPE_INT_RGB); 109 | Graphics g = result.createGraphics(); 110 | g.drawImage(image, 0, 0, null); 111 | g.dispose(); 112 | return result; 113 | } 114 | 115 | /** 116 | * The input image 117 | */ 118 | private BufferedImage inputImage; 119 | 120 | /** 121 | * The output image 122 | */ 123 | private BufferedImage outputImage; 124 | 125 | /** 126 | * The OpenCL context 127 | */ 128 | private cl_context context; 129 | 130 | /** 131 | * The OpenCL command queue 132 | */ 133 | private cl_command_queue commandQueue; 134 | 135 | /** 136 | * The OpenCL kernel 137 | */ 138 | private cl_kernel kernel; 139 | 140 | /** 141 | * The memory object for the input image 142 | */ 143 | private cl_mem inputImageMem; 144 | 145 | /** 146 | * The memory object for the output image 147 | */ 148 | private cl_mem outputImageMem; 149 | 150 | /** 151 | * The width of the image 152 | */ 153 | private int imageSizeX; 154 | 155 | /** 156 | * The height of the image 157 | */ 158 | private int imageSizeY; 159 | 160 | /** 161 | * Creates the JOCLSimpleImage sample 162 | */ 163 | public JOCLSimpleImage() 164 | { 165 | // Read the input image file and create the output images 166 | String fileName = "src/main/resources/data/lena512color.png"; 167 | 168 | inputImage = createBufferedImage(fileName); 169 | imageSizeX = inputImage.getWidth(); 170 | imageSizeY = inputImage.getHeight(); 171 | 172 | outputImage = new BufferedImage( 173 | imageSizeX, imageSizeY, BufferedImage.TYPE_INT_RGB); 174 | 175 | // Create the panel showing the input and output images 176 | JPanel mainPanel = new JPanel(new GridLayout(1,0)); 177 | JLabel inputLabel = new JLabel(new ImageIcon(inputImage)); 178 | mainPanel.add(inputLabel, BorderLayout.CENTER); 179 | JLabel outputLabel = new JLabel(new ImageIcon(outputImage)); 180 | mainPanel.add(outputLabel, BorderLayout.CENTER); 181 | 182 | // Create the main frame 183 | JFrame frame = new JFrame("JOCL Simple Image Sample"); 184 | frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 185 | frame.setLayout(new BorderLayout()); 186 | frame.add(mainPanel, BorderLayout.CENTER); 187 | frame.pack(); 188 | frame.setVisible(true); 189 | 190 | initCL(); 191 | initImageMem(); 192 | startAnimation(outputLabel); 193 | } 194 | 195 | 196 | /** 197 | * Starts the thread which will advance the animation state 198 | * and call call the animation method. 199 | * 200 | * @param outputComponent The component to repaint after each step 201 | */ 202 | private void startAnimation(final Component outputComponent) 203 | { 204 | System.out.println("Starting animation..."); 205 | Thread thread = new Thread(new Runnable() 206 | { 207 | float angle = 0.0f; 208 | public void run() 209 | { 210 | while (true) 211 | { 212 | rotateImage(angle); 213 | angle += 0.1f; 214 | outputComponent.repaint(); 215 | 216 | try 217 | { 218 | Thread.sleep(20); 219 | } 220 | catch (InterruptedException e) 221 | { 222 | Thread.currentThread().interrupt(); 223 | return; 224 | } 225 | } 226 | } 227 | }); 228 | thread.setDaemon(true); 229 | thread.start(); 230 | } 231 | 232 | 233 | /** 234 | * Initialize the OpenCL context, command queue and kernel 235 | */ 236 | void initCL() 237 | { 238 | final int platformIndex = 0; 239 | final long deviceType = CL_DEVICE_TYPE_ALL; 240 | final int deviceIndex = 0; 241 | 242 | // Enable exceptions and subsequently omit error checks in this sample 243 | CL.setExceptionsEnabled(true); 244 | 245 | // Obtain the number of platforms 246 | int numPlatformsArray[] = new int[1]; 247 | clGetPlatformIDs(0, null, numPlatformsArray); 248 | int numPlatforms = numPlatformsArray[0]; 249 | 250 | // Obtain a platform ID 251 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; 252 | clGetPlatformIDs(platforms.length, platforms, null); 253 | cl_platform_id platform = platforms[platformIndex]; 254 | 255 | // Initialize the context properties 256 | cl_context_properties contextProperties = new cl_context_properties(); 257 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); 258 | 259 | // Obtain the number of devices for the platform 260 | int numDevicesArray[] = new int[1]; 261 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); 262 | int numDevices = numDevicesArray[0]; 263 | 264 | // Obtain a device ID 265 | cl_device_id devices[] = new cl_device_id[numDevices]; 266 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null); 267 | cl_device_id device = devices[deviceIndex]; 268 | 269 | // Create a context for the selected device 270 | context = clCreateContext( 271 | contextProperties, 1, new cl_device_id[]{device}, 272 | null, null, null); 273 | 274 | // Check if images are supported 275 | int imageSupport[] = new int[1]; 276 | clGetDeviceInfo (device, CL.CL_DEVICE_IMAGE_SUPPORT, 277 | Sizeof.cl_int, Pointer.to(imageSupport), null); 278 | System.out.println("Images supported: "+(imageSupport[0]==1)); 279 | if (imageSupport[0]==0) 280 | { 281 | System.out.println("Images are not supported"); 282 | System.exit(1); 283 | return; 284 | } 285 | 286 | // Create a command-queue for the selected device 287 | cl_queue_properties properties = new cl_queue_properties(); 288 | properties.addProperty(CL_QUEUE_PROFILING_ENABLE, 1); 289 | properties.addProperty(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 1); 290 | commandQueue = clCreateCommandQueueWithProperties( 291 | context, device, properties, null); 292 | 293 | // Create the program 294 | System.out.println("Creating program..."); 295 | cl_program program = clCreateProgramWithSource(context, 296 | 1, new String[]{ programSource }, null, null); 297 | 298 | // Build the program 299 | System.out.println("Building program..."); 300 | clBuildProgram(program, 0, null, null, null, null); 301 | 302 | // Create the kernel 303 | System.out.println("Creating kernel..."); 304 | kernel = clCreateKernel(program, "rotateImage", null); 305 | 306 | } 307 | 308 | /** 309 | * Initialize the memory objects for the input and output images 310 | */ 311 | private void initImageMem() 312 | { 313 | // Create the memory object for the input- and output image 314 | DataBufferInt dataBufferSrc = 315 | (DataBufferInt)inputImage.getRaster().getDataBuffer(); 316 | int dataSrc[] = dataBufferSrc.getData(); 317 | 318 | cl_image_format imageFormat = new cl_image_format(); 319 | imageFormat.image_channel_order = CL_RGBA; 320 | imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; 321 | 322 | inputImageMem = clCreateImage2D( 323 | context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, 324 | new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY, 325 | imageSizeX * Sizeof.cl_uint, Pointer.to(dataSrc), null); 326 | 327 | outputImageMem = clCreateImage2D( 328 | context, CL_MEM_WRITE_ONLY, 329 | new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY, 330 | 0, null, null); 331 | } 332 | 333 | 334 | /** 335 | * Rotate the input image by the given angle, and write it into 336 | * the output image 337 | * 338 | * @param angle The rotation angle 339 | */ 340 | void rotateImage(float angle) 341 | { 342 | // Set up the work size and arguments, and execute the kernel 343 | long globalWorkSize[] = new long[2]; 344 | globalWorkSize[0] = imageSizeX; 345 | globalWorkSize[1] = imageSizeY; 346 | clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(inputImageMem)); 347 | clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(outputImageMem)); 348 | clSetKernelArg(kernel, 2, Sizeof.cl_float, 349 | Pointer.to(new float[]{angle})); 350 | clEnqueueNDRangeKernel(commandQueue, kernel, 2, null, 351 | globalWorkSize, null, 0, null, null); 352 | 353 | // Read the pixel data into the output image 354 | DataBufferInt dataBufferDst = 355 | (DataBufferInt)outputImage.getRaster().getDataBuffer(); 356 | int dataDst[] = dataBufferDst.getData(); 357 | clEnqueueReadImage( 358 | commandQueue, outputImageMem, true, new long[3], 359 | new long[]{imageSizeX, imageSizeY, 1}, 360 | imageSizeX * Sizeof.cl_uint, 0, 361 | Pointer.to(dataDst), 0, null, null); 362 | } 363 | } 364 | 365 | -------------------------------------------------------------------------------- /src/main/java/org/jocl/samples/JOCLSimpleMandelbrot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | package org.jocl.samples; 7 | 8 | import static org.jocl.CL.*; 9 | 10 | import java.awt.*; 11 | import java.awt.event.*; 12 | import java.awt.image.*; 13 | import java.io.*; 14 | 15 | import javax.swing.*; 16 | 17 | import org.jocl.*; 18 | 19 | /** 20 | * A class that uses a simple OpenCL kernel to compute the 21 | * Mandelbrot set and displays it in an image 22 | */ 23 | public class JOCLSimpleMandelbrot 24 | { 25 | /** 26 | * Entry point for this sample. 27 | * 28 | * @param args not used 29 | */ 30 | public static void main(String args[]) 31 | { 32 | SwingUtilities.invokeLater(new Runnable() 33 | { 34 | public void run() 35 | { 36 | new JOCLSimpleMandelbrot(500,500); 37 | } 38 | }); 39 | } 40 | 41 | /** 42 | * The image which will contain the Mandelbrot pixel data 43 | */ 44 | private BufferedImage image; 45 | 46 | /** 47 | * The width of the image 48 | */ 49 | private int sizeX = 0; 50 | 51 | /** 52 | * The height of the image 53 | */ 54 | private int sizeY = 0; 55 | 56 | /** 57 | * The component which is used for rendering the image 58 | */ 59 | private JComponent imageComponent; 60 | 61 | /** 62 | * The OpenCL context 63 | */ 64 | private cl_context context; 65 | 66 | /** 67 | * The OpenCL command queue 68 | */ 69 | private cl_command_queue commandQueue; 70 | 71 | /** 72 | * The OpenCL kernel which will actually compute the Mandelbrot 73 | * set and store the pixel data in a CL memory object 74 | */ 75 | private cl_kernel kernel; 76 | 77 | /** 78 | * The OpenCL memory object which stores the pixel data 79 | */ 80 | private cl_mem pixelMem; 81 | 82 | /** 83 | * An OpenCL memory object which stores a nifty color map, 84 | * encoded as integers combining the RGB components of 85 | * the colors. 86 | */ 87 | private cl_mem colorMapMem; 88 | 89 | /** 90 | * The color map which will be copied to OpenCL for filling 91 | * the PBO. 92 | */ 93 | private int colorMap[]; 94 | 95 | /** 96 | * The minimum x-value of the area in which the Mandelbrot 97 | * set should be computed 98 | */ 99 | private float x0 = -2f; 100 | 101 | /** 102 | * The minimum y-value of the area in which the Mandelbrot 103 | * set should be computed 104 | */ 105 | private float y0 = -1.3f; 106 | 107 | /** 108 | * The maximum x-value of the area in which the Mandelbrot 109 | * set should be computed 110 | */ 111 | private float x1 = 0.6f; 112 | 113 | /** 114 | * The maximum y-value of the area in which the Mandelbrot 115 | * set should be computed 116 | */ 117 | private float y1 = 1.3f; 118 | 119 | 120 | /** 121 | * Creates the JOCLSimpleMandelbrot sample with the given 122 | * width and height 123 | */ 124 | public JOCLSimpleMandelbrot(int width, int height) 125 | { 126 | this.sizeX = width; 127 | this.sizeY = height; 128 | 129 | // Create the image and the component that will paint the image 130 | image = new BufferedImage(sizeX, sizeY, BufferedImage.TYPE_INT_RGB); 131 | imageComponent = new JPanel() 132 | { 133 | private static final long serialVersionUID = 1L; 134 | public void paintComponent(Graphics g) 135 | { 136 | super.paintComponent(g); 137 | g.drawImage(image, 0,0,this); 138 | } 139 | }; 140 | 141 | // Initialize the mouse interaction 142 | initInteraction(); 143 | 144 | // Initialize OpenCL 145 | initCL(); 146 | 147 | // Initial image update 148 | updateImage(); 149 | 150 | // Create the main frame 151 | JFrame frame = new JFrame("JOCL Simple Mandelbrot"); 152 | frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 153 | frame.setLayout(new BorderLayout()); 154 | imageComponent.setPreferredSize(new Dimension(width, height)); 155 | frame.add(imageComponent, BorderLayout.CENTER); 156 | frame.pack(); 157 | 158 | frame.setVisible(true); 159 | } 160 | 161 | /** 162 | * Initialize OpenCL: Create the context, the command queue 163 | * and the kernel. 164 | */ 165 | private void initCL() 166 | { 167 | final int platformIndex = 0; 168 | final long deviceType = CL_DEVICE_TYPE_ALL; 169 | final int deviceIndex = 0; 170 | 171 | // Enable exceptions and subsequently omit error checks in this sample 172 | CL.setExceptionsEnabled(true); 173 | 174 | // Obtain the number of platforms 175 | int numPlatformsArray[] = new int[1]; 176 | clGetPlatformIDs(0, null, numPlatformsArray); 177 | int numPlatforms = numPlatformsArray[0]; 178 | 179 | // Obtain a platform ID 180 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; 181 | clGetPlatformIDs(platforms.length, platforms, null); 182 | cl_platform_id platform = platforms[platformIndex]; 183 | 184 | // Initialize the context properties 185 | cl_context_properties contextProperties = new cl_context_properties(); 186 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); 187 | 188 | // Obtain the number of devices for the platform 189 | int numDevicesArray[] = new int[1]; 190 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); 191 | int numDevices = numDevicesArray[0]; 192 | 193 | // Obtain a device ID 194 | cl_device_id devices[] = new cl_device_id[numDevices]; 195 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null); 196 | cl_device_id device = devices[deviceIndex]; 197 | 198 | // Create a context for the selected device 199 | context = clCreateContext( 200 | contextProperties, 1, new cl_device_id[]{device}, 201 | null, null, null); 202 | 203 | // Create a command-queue for the selected device 204 | cl_queue_properties properties = new cl_queue_properties(); 205 | commandQueue = clCreateCommandQueueWithProperties( 206 | context, device, properties, null); 207 | 208 | // Program Setup 209 | String source = 210 | readFile("src/main/resources/kernels/SimpleMandelbrot.cl"); 211 | 212 | // Create the program 213 | cl_program cpProgram = clCreateProgramWithSource(context, 1, 214 | new String[]{ source }, null, null); 215 | 216 | // Build the program 217 | clBuildProgram(cpProgram, 0, null, "-cl-mad-enable", null, null); 218 | 219 | // Create the kernel 220 | kernel = clCreateKernel(cpProgram, "computeMandelbrot", null); 221 | 222 | // Create the memory object which will be filled with the 223 | // pixel data 224 | pixelMem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 225 | sizeX * sizeY * Sizeof.cl_uint, null, null); 226 | 227 | // Create and fill the memory object containing the color map 228 | initColorMap(32, Color.RED, Color.GREEN, Color.BLUE); 229 | colorMapMem = clCreateBuffer(context, CL_MEM_READ_WRITE, 230 | colorMap.length * Sizeof.cl_uint, null, null); 231 | clEnqueueWriteBuffer(commandQueue, colorMapMem, true, 0, 232 | colorMap.length * Sizeof.cl_uint, Pointer.to(colorMap), 0, null, null); 233 | } 234 | 235 | /** 236 | * Helper function which reads the file with the given name and returns 237 | * the contents of this file as a String. Will exit the application 238 | * if the file can not be read. 239 | * 240 | * @param fileName The name of the file to read. 241 | * @return The contents of the file 242 | */ 243 | private String readFile(String fileName) 244 | { 245 | BufferedReader br = null; 246 | try 247 | { 248 | br = new BufferedReader( 249 | new InputStreamReader(new FileInputStream(fileName))); 250 | StringBuffer sb = new StringBuffer(); 251 | String line = null; 252 | while (true) 253 | { 254 | line = br.readLine(); 255 | if (line == null) 256 | { 257 | break; 258 | } 259 | sb.append(line).append("\n"); 260 | } 261 | return sb.toString(); 262 | } 263 | catch (IOException e) 264 | { 265 | e.printStackTrace(); 266 | System.exit(1); 267 | return null; 268 | } 269 | finally 270 | { 271 | if (br != null) 272 | { 273 | try 274 | { 275 | br.close(); 276 | } 277 | catch (IOException e) 278 | { 279 | e.printStackTrace(); 280 | } 281 | } 282 | } 283 | } 284 | 285 | /** 286 | * Creates the colorMap array which contains RGB colors as integers, 287 | * interpolated through the given colors with colors.length * stepSize 288 | * steps 289 | * 290 | * @param stepSize The number of interpolation steps between two colors 291 | * @param colors The colors for the map 292 | */ 293 | private void initColorMap(int stepSize, Color ... colors) 294 | { 295 | colorMap = new int[stepSize*colors.length]; 296 | int index = 0; 297 | for (int i=0; i> 0) & 0xFFU, tag); 54 | addByte(l_WarpHist, (data >> 8) & 0xFFU, tag); 55 | addByte(l_WarpHist, (data >> 16) & 0xFFU, tag); 56 | addByte(l_WarpHist, (data >> 24) & 0xFFU, tag); 57 | } 58 | 59 | __kernel __attribute__((reqd_work_group_size(HISTOGRAM256_WORKGROUP_SIZE, 1, 1))) 60 | void histogram256( 61 | __global uint *d_PartialHistograms, 62 | __global uint *d_Data, 63 | uint dataCount 64 | ){ 65 | //Per-warp substorage storage 66 | __local uint l_Hist[WARP_COUNT * HISTOGRAM256_BIN_COUNT]; 67 | __local uint *l_WarpHist = l_Hist + (get_local_id(0) >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT; 68 | 69 | //Clear shared memory storage for current threadblock before processing 70 | for(uint i = 0; i < (HISTOGRAM256_BIN_COUNT / WARP_SIZE); i++) 71 | l_Hist[get_local_id(0) + i * (WARP_COUNT * WARP_SIZE)] = 0; 72 | 73 | const uint tag = get_local_id(0) << (32 - LOG2_WARP_SIZE); 74 | 75 | //Read through the entire input buffer, build per-warp histograms 76 | barrier(CLK_LOCAL_MEM_FENCE); 77 | for(uint pos = get_global_id(0); pos < dataCount; pos += get_global_size(0)){ 78 | uint data = d_Data[pos]; 79 | addWord(l_WarpHist, data, tag); 80 | } 81 | 82 | //Per-block histogram reduction 83 | barrier(CLK_LOCAL_MEM_FENCE); 84 | for(uint pos = get_local_id(0); pos < HISTOGRAM256_BIN_COUNT; pos += HISTOGRAM256_WORKGROUP_SIZE){ 85 | uint sum = 0; 86 | 87 | for(uint i = 0; i < WARP_COUNT; i++) 88 | sum += l_Hist[pos + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK; 89 | 90 | d_PartialHistograms[get_group_id(0) * HISTOGRAM256_BIN_COUNT + pos] = sum; 91 | } 92 | } 93 | 94 | 95 | 96 | //////////////////////////////////////////////////////////////////////////////// 97 | // Merge histogram256() output 98 | // Run one workgroup per bin; each workgroup adds up the same bin counter 99 | // from every partial histogram. Reads are uncoalesced, but mergeHistogram256 100 | // takes only a fraction of total processing time 101 | //////////////////////////////////////////////////////////////////////////////// 102 | #define MERGE_WORKGROUP_SIZE 256 103 | 104 | __kernel void mergeHistogram256( 105 | __global uint *d_Histogram, 106 | __global uint *d_PartialHistograms, 107 | uint histogramCount 108 | ){ 109 | __local uint l_Data[MERGE_WORKGROUP_SIZE]; 110 | 111 | uint sum = 0; 112 | for(uint i = get_local_id(0); i < histogramCount; i += MERGE_WORKGROUP_SIZE) 113 | sum += d_PartialHistograms[get_group_id(0) + i * HISTOGRAM256_BIN_COUNT]; 114 | l_Data[get_local_id(0)] = sum; 115 | 116 | for(uint stride = MERGE_WORKGROUP_SIZE / 2; stride > 0; stride >>= 1){ 117 | barrier(CLK_LOCAL_MEM_FENCE); 118 | if(get_local_id(0) < stride) 119 | l_Data[get_local_id(0)] += l_Data[get_local_id(0) + stride]; 120 | } 121 | 122 | if(get_local_id(0) == 0) 123 | d_Histogram[get_group_id(0)] = l_Data[0]; 124 | } 125 | -------------------------------------------------------------------------------- /src/main/resources/kernels/Histogram_Kernels.cl: -------------------------------------------------------------------------------- 1 | /* ============================================================ 2 | Copyright (c) 2009-2010 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Redistribution and use of this material is permitted under the following 5 | conditions: 6 | 7 | Redistributions must retain the above copyright notice and all terms of this 8 | license. 9 | 10 | In no event shall anyone redistributing or accessing or using this material 11 | commence or participate in any arbitration or legal action relating to this 12 | material against Advanced Micro Devices, Inc. or any copyright holders or 13 | contributors. The foregoing shall survive any expiration or termination of 14 | this license or any agreement or access or use related to this material. 15 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 16 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. 17 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 18 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 19 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 20 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 21 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 22 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 23 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 24 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 25 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 28 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 29 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 30 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 31 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 32 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 33 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 34 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 35 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 36 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 37 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 38 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 39 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 40 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 41 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 42 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 43 | ACCESS OR USE RELATED TO THIS MATERIAL. 44 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 45 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 46 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 47 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 48 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 49 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 50 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, 51 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 52 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 53 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 54 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 55 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. 56 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 57 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 58 | computer software and technical data, respectively. Use, duplication, 59 | distribution or disclosure by the U.S. Government and/or DOD agencies is 60 | subject to the full extent of restrictions in all applicable regulations, 61 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 62 | regulations thereof. Use of this material by the U.S. Government and/or DOD 63 | agencies is acknowledgment of the proprietary rights of any copyright holders 64 | and contributors, including those of Advanced Micro Devices, Inc., as well as 65 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 66 | commercial computer software. 67 | This license forms the entire agreement regarding the subject matter hereof and 68 | supersedes all proposals and prior discussions and writings between the parties 69 | with respect thereto. This license does not affect any ownership, rights, title, 70 | or interest in, or relating to, this material. No terms of this license can be 71 | modified or waived, and no breach of this license can be excused, unless done 72 | so in a writing signed by all affected parties. Each term of this license is 73 | separately enforceable. If any term of this license is determined to be or 74 | becomes unenforceable or illegal, such term shall be reformed to the minimum 75 | extent necessary in order for this license to remain in effect in accordance 76 | with its terms as modified by such reformation. This license shall be governed 77 | by and construed in accordance with the laws of the State of Texas without 78 | regard to rules on conflicts of law of any state or jurisdiction or the United 79 | Nations Convention on the International Sale of Goods. All disputes arising out 80 | of this license shall be subject to the jurisdiction of the federal and state 81 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 82 | jurisdiction and venue of these courts. 83 | ============================================================ */ 84 | 85 | 86 | /* 87 | * For a description of the algorithm and the terms used, please see the 88 | * documentation for this sample. 89 | * 90 | * On invocation of kernel blackScholes, each work thread calculates 91 | * thread-histogram bin and finally all thread-histograms merged into 92 | * block-histogram bin. Outside the kernel, All block-histograms merged 93 | * into final histogram 94 | */ 95 | 96 | #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 97 | 98 | #define BIN_SIZE 256 99 | #define GROUP_SIZE 16 100 | 101 | /** 102 | * @brief Calculates block-histogram bin whose bin size is 256 103 | * @param data input data pointer 104 | * @param sharedArray shared array for thread-histogram bins 105 | * @param binResult block-histogram array 106 | */ 107 | __kernel 108 | void histogram256(__global const uint* data, 109 | __local uchar* sharedArray, 110 | __global uint* binResult) 111 | { 112 | size_t localId = get_local_id(0); 113 | size_t globalId = get_global_id(0); 114 | size_t groupId = get_group_id(0); 115 | 116 | /* initialize shared array to zero */ 117 | for(int i = 0; i < BIN_SIZE; ++i) 118 | sharedArray[localId * BIN_SIZE + i] = 0; 119 | 120 | barrier(CLK_LOCAL_MEM_FENCE); 121 | 122 | /* calculate thread-histograms */ 123 | for(int i = 0; i < BIN_SIZE; ++i) 124 | { 125 | uint value = data[globalId * BIN_SIZE + i]; 126 | sharedArray[localId * BIN_SIZE + value]++; 127 | } 128 | 129 | barrier(CLK_LOCAL_MEM_FENCE); 130 | 131 | /* merge all thread-histograms into block-histogram */ 132 | for(int i = 0; i < BIN_SIZE / GROUP_SIZE; ++i) 133 | { 134 | uint binCount = 0; 135 | for(int j = 0; j < GROUP_SIZE; ++j) 136 | binCount += sharedArray[j * BIN_SIZE + i * GROUP_SIZE + localId]; 137 | 138 | binResult[groupId * BIN_SIZE + i * GROUP_SIZE + localId] = binCount; 139 | } 140 | } 141 | 142 | 143 | -------------------------------------------------------------------------------- /src/main/resources/kernels/QuadFloat.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | 7 | // Quad-Float functions for OpenCL float4 type. 8 | // Ported from quad-double (QD) package: 9 | // http://crd.lbl.gov/~dhbailey/mpdist/index.html 10 | 11 | inline float4 qfAssign(float value) 12 | { 13 | return (float4)(value, 0.0f, 0.0f, 0.0f); 14 | } 15 | 16 | inline float4 qfAssign2(float2 value) 17 | { 18 | return (float4)(value.x, value.y, 0.0f, 0.0f); 19 | } 20 | 21 | inline float4 qfNegate(float4 value) 22 | { 23 | return (float4)(-value.x, -value.y, -value.z, -value.w); 24 | } 25 | 26 | inline float two_sum(float a, float b, float *err) 27 | { 28 | float s = a + b; 29 | float bb = s - a; 30 | *err = (a - (s - bb)) + (b - bb); 31 | return s; 32 | } 33 | 34 | inline void three_sum(float *a, float *b, float *c) 35 | { 36 | float t1, t2, t3; 37 | t1 = two_sum(*a, *b, &t2); 38 | *a = two_sum(*c, t1, &t3); 39 | *b = two_sum(t2, t3, c); 40 | } 41 | 42 | inline void three_sum2(float *a, float *b, float *c) 43 | { 44 | float t1, t2, t3; 45 | t1 = two_sum(*a, *b, &t2); 46 | *a = two_sum(*c, t1, &t3); 47 | *b = t2 + t3; 48 | } 49 | 50 | 51 | inline float quick_two_sum(float a, float b, float *err) 52 | { 53 | float s = a + b; 54 | *err = b - (s - a); 55 | return s; 56 | } 57 | 58 | inline void renorm(float *c0, float *c1, 59 | float *c2, float *c3, float *c4) 60 | { 61 | float s0, s1, s2 = 0.0f, s3 = 0.0f; 62 | 63 | s0 = quick_two_sum(*c3, *c4, c4); 64 | s0 = quick_two_sum(*c2, s0, c3); 65 | s0 = quick_two_sum(*c1, s0, c2); 66 | *c0 = quick_two_sum(*c0, s0, c1); 67 | 68 | s0 = *c0; 69 | s1 = *c1; 70 | 71 | s0 = quick_two_sum(*c0, *c1, &s1); 72 | if (s1 != 0.0f) 73 | { 74 | s1 = quick_two_sum(s1, *c2, &s2); 75 | if (s2 != 0.0f) 76 | { 77 | s2 = quick_two_sum(s2, *c3, &s3); 78 | if (s3 != 0.0f) 79 | { 80 | s3 += *c4; 81 | } 82 | else 83 | { 84 | s2 += *c4; 85 | } 86 | } 87 | else 88 | { 89 | s1 = quick_two_sum(s1, *c3, &s2); 90 | if (s2 != 0.0f) 91 | { 92 | s2 = quick_two_sum(s2, *c4, &s3); 93 | } 94 | else 95 | { 96 | s1 = quick_two_sum(s1, *c4, &s2); 97 | } 98 | } 99 | } 100 | else 101 | { 102 | s0 = quick_two_sum(s0, *c2, &s1); 103 | if (s1 != 0.0f) 104 | { 105 | s1 = quick_two_sum(s1, *c3, &s2); 106 | if (s2 != 0.0f) 107 | { 108 | s2 = quick_two_sum(s2, *c4, &s3); 109 | } 110 | else 111 | { 112 | s1 = quick_two_sum(s1, *c4, &s2); 113 | } 114 | } 115 | else 116 | { 117 | s0 = quick_two_sum(s0, *c3, &s1); 118 | if (s1 != 0.0f) 119 | { 120 | s1 = quick_two_sum(s1, *c4, &s2); 121 | } 122 | else 123 | { 124 | s0 = quick_two_sum(s0, *c4, &s1); 125 | } 126 | } 127 | } 128 | 129 | *c0 = s0; 130 | *c1 = s1; 131 | *c2 = s2; 132 | *c3 = s3; 133 | } 134 | 135 | 136 | 137 | inline void qfAdd(float4 *sum, const float4 a, const float4 b) 138 | { 139 | float s0, s1, s2, s3; 140 | float t0, t1, t2, t3; 141 | 142 | s0 = two_sum(a.x, b.x, &t0); 143 | s1 = two_sum(a.y, b.y, &t1); 144 | s2 = two_sum(a.z, b.z, &t2); 145 | s3 = two_sum(a.w, b.w, &t3); 146 | 147 | s1 = two_sum(s1, t0, &t0); 148 | three_sum(&s2, &t0, &t1); 149 | three_sum2(&s3, &t0, &t2); 150 | t0 = t0 + t1 + t3; 151 | 152 | renorm(&s0, &s1, &s2, &s3, &t0); 153 | (*sum).x = s0; 154 | (*sum).y = s1; 155 | (*sum).z = s2; 156 | (*sum).w = s3; 157 | } 158 | 159 | inline void split(float a, float *hi, float *lo) 160 | { 161 | float temp = ((1<<12)+1) * a; 162 | *hi = temp - (temp - a); 163 | *lo = a - *hi; 164 | } 165 | 166 | 167 | inline float two_prod(float a, float b, float *err) 168 | { 169 | float a_hi, a_lo, b_hi, b_lo; 170 | float p = a * b; 171 | split(a, &a_hi, &a_lo); 172 | split(b, &b_hi, &b_lo); 173 | *err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo; 174 | return p; 175 | } 176 | 177 | 178 | inline void qfMul(float4 *prod, const float4 a, const float4 b) 179 | { 180 | float p0, p1, p2, p3, p4, p5; 181 | float q0, q1, q2, q3, q4, q5; 182 | float t0, t1; 183 | float s0, s1, s2; 184 | 185 | p0 = two_prod(a.x, b.x, &q0); 186 | 187 | p1 = two_prod(a.x, b.y, &q1); 188 | p2 = two_prod(a.y, b.x, &q2); 189 | 190 | p3 = two_prod(a.x, b.z, &q3); 191 | p4 = two_prod(a.y, b.y, &q4); 192 | p5 = two_prod(a.z, b.x, &q5); 193 | 194 | three_sum(&p1, &p2, &q0); 195 | 196 | three_sum(&p2, &q1, &q2); 197 | three_sum(&p3, &p4, &p5); 198 | 199 | s0 = two_sum(p2, p3, &t0); 200 | s1 = two_sum(q1, p4, &t1); 201 | s2 = q2 + p5; 202 | s1 = two_sum(s1, t0, &t0); 203 | s2 += (t0 + t1); 204 | 205 | s1 += a.x*b.w + a.y*b.z + a.z*b.y + a.w*b.x + q0 + q3 + q4 + q5; 206 | renorm(&p0, &p1, &s0, &s1, &s2); 207 | (*prod).x = p0; 208 | (*prod).y = p1; 209 | (*prod).z = p2; 210 | (*prod).w = p3; 211 | } 212 | 213 | 214 | inline void qfMulFloat(float4 *prod, const float4 a, const float b) 215 | { 216 | float p0, p1, p2, p3; 217 | float q0, q1, q2; 218 | float s0, s1, s2, s3, s4; 219 | 220 | p0 = two_prod(a.x, b, &q0); 221 | p1 = two_prod(a.y, b, &q1); 222 | p2 = two_prod(a.z, b, &q2); 223 | p3 = a.w * b; 224 | 225 | s0 = p0; 226 | 227 | s1 = two_sum(q0, p1, &s2); 228 | 229 | three_sum(&s2, &q1, &p2); 230 | 231 | three_sum2(&q1, &q2, &p3); 232 | s3 = q1; 233 | 234 | s4 = q2 + p2; 235 | 236 | renorm(&s0, &s1, &s2, &s3, &s4); 237 | (*prod).x = s0; 238 | (*prod).y = s1; 239 | (*prod).z = s2; 240 | (*prod).w = s3; 241 | } 242 | 243 | 244 | inline bool qfLessThan(float4 *a, float b) 245 | { 246 | return ((*a).x < b || ((*a).x == b && (*a).y < 0.0f)); 247 | } 248 | 249 | inline void renorm4(float *c0, float *c1, 250 | float *c2, float *c3) 251 | { 252 | float s0, s1, s2 = 0.0f, s3 = 0.0f; 253 | 254 | s0 = quick_two_sum(*c2, *c3, c3); 255 | s0 = quick_two_sum(*c1, s0, c2); 256 | *c0 = quick_two_sum(*c0, s0, c1); 257 | 258 | s0 = *c0; 259 | s1 = *c1; 260 | if (s1 != 0.0f) 261 | { 262 | s1 = quick_two_sum(s1, *c2, &s2); 263 | if (s2 != 0.0f) 264 | { 265 | s2 = quick_two_sum(s2, *c3, &s3); 266 | } 267 | else 268 | { 269 | s1 = quick_two_sum(s1, *c3, &s2); 270 | } 271 | } 272 | else 273 | { 274 | s0 = quick_two_sum(s0, *c2, &s1); 275 | if (s1 != 0.0f) 276 | { 277 | s1 = quick_two_sum(s1, *c3, &s2); 278 | } 279 | else 280 | { 281 | s0 = quick_two_sum(s0, *c3, &s1); 282 | } 283 | } 284 | *c0 = s0; 285 | *c1 = s1; 286 | *c2 = s2; 287 | *c3 = s3; 288 | } 289 | 290 | float4 qfDiv(const float4 a, const float4 b) 291 | { 292 | float q0, q1, q2, q3; 293 | 294 | float4 r; 295 | float4 p; 296 | 297 | q0 = a.x / b.x; 298 | 299 | // r = a - (b * q0); 300 | qfMulFloat(&p, b, q0); 301 | p = qfNegate(p); 302 | qfAdd(&r, a, p); 303 | 304 | q1 = r.x / b.x; 305 | // r -= (b * q1); 306 | qfMulFloat(&p, b, q1); 307 | p = qfNegate(p); 308 | qfAdd(&r, r, p); 309 | 310 | q2 = r.x / b.x; 311 | //r -= (b * q2); 312 | qfMulFloat(&p, b, q2); 313 | p = qfNegate(p); 314 | qfAdd(&r, r, p); 315 | 316 | q3 = r.x / b.x; 317 | 318 | renorm4(&q0, &q1, &q2, &q3); 319 | 320 | return (float4)(q0, q1, q2, q3); 321 | } 322 | -------------------------------------------------------------------------------- /src/main/resources/kernels/QuadFloatMandelbrot.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | 7 | // A mandelbrot kernel using QuadFloat functions 8 | 9 | inline int iterate( 10 | float2 x0, float2 y0, 11 | float2 dx, float2 dy, 12 | float relX, float relY, 13 | int maxIterations) 14 | { 15 | float4 qx0 = qfAssign2(x0); 16 | float4 qy0 = qfAssign2(y0); 17 | float4 qdx = qfAssign2(dx); 18 | float4 qdy = qfAssign2(dy); 19 | 20 | float4 qr = qfAssign(0); 21 | float4 qi = qfAssign(0); 22 | 23 | float4 qx = qfAssign(0); 24 | float4 qy = qfAssign(0); 25 | 26 | float4 qxx = qfAssign(0); 27 | float4 qyy = qfAssign(0); 28 | 29 | float4 qfTemp = qfAssign(0); 30 | float4 magnitudeSquared = qfAssign(0); 31 | 32 | //float r = x0 + ((float)ix / sizeX) * dx; 33 | //float i = y0 + ((float)iy / sizeY) * dy; 34 | qfMulFloat(&qfTemp, qdx, relX); 35 | qfAdd(&qr, qx0, qfTemp); 36 | 37 | qfMulFloat(&qfTemp, qdy, relY); 38 | qfAdd(&qi, qy0, qfTemp); 39 | 40 | int iteration = 0; 41 | while (iteration= maskOrigin.x && 21 | gy >= maskOrigin.y && 22 | gx < imageSize.x - (maskSize.x-maskOrigin.x-1) && 23 | gy < imageSize.y - (maskSize.y-maskOrigin.y-1)) 24 | { 25 | float4 sum = (float4)0; 26 | for(int mx=0; mx= 0 && gx < imageSize.x && 43 | gy >= 0 && gy < imageSize.y) 44 | { 45 | output[mul24(gy, imageSize.x)+gx] = (uchar4)0; 46 | } 47 | } 48 | 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /src/main/resources/kernels/SimpleMandelbrot.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * JOCL - Java bindings for OpenCL 3 | * 4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/ 5 | */ 6 | 7 | // A very simple OpenCL kernel for computing the mandelbrot set 8 | // 9 | // output : A buffer with sizeX*sizeY elements, storing 10 | // the colors as RGB ints 11 | // sizeX, sizeX : The width and height of the buffer 12 | // x0,y0,x1,y1 : The rectangle in which the mandelbrot 13 | // set will be computed 14 | // maxIterations : The maximum number of iterations 15 | // colorMap : A buffer with colorMapSize elements, 16 | // containing the pixel colors 17 | 18 | __kernel void computeMandelbrot( 19 | __global uint *output, 20 | int sizeX, int sizeY, 21 | float x0, float y0, 22 | float x1, float y1, 23 | int maxIterations, 24 | __global uint *colorMap, 25 | int colorMapSize 26 | ) 27 | { 28 | unsigned int ix = get_global_id(0); 29 | unsigned int iy = get_global_id(1); 30 | 31 | float r = x0 + ix * (x1-x0) / sizeX; 32 | float i = y0 + iy * (y1-y0) / sizeY; 33 | 34 | float x = 0; 35 | float y = 0; 36 | 37 | float magnitudeSquared = 0; 38 | int iteration = 0; 39 | while (iteration 0; offset = offset / 2) 29 | { 30 | if (lid < offset) 31 | { 32 | float other = scratch[lid + offset]; 33 | float mine = scratch[lid]; 34 | scratch[lid] = mine + other; 35 | } 36 | barrier(CLK_LOCAL_MEM_FENCE); 37 | } 38 | if (lid == 0) 39 | { 40 | result[get_group_id(0)] = scratch[0]; 41 | } 42 | } -------------------------------------------------------------------------------- /src/main/resources/kernels/simpleGL.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and 5 | * proprietary rights in and to this software and related documentation. 6 | * Any use, reproduction, disclosure, or distribution of this software 7 | * and related documentation without an express license agreement from 8 | * NVIDIA Corporation is strictly prohibited. 9 | * 10 | * Please refer to the applicable NVIDIA end user license agreement (EULA) 11 | * associated with this source code for terms and conditions that govern 12 | * your use of this NVIDIA software. 13 | * 14 | */ 15 | 16 | /* This example demonstrates how to use the OpenCL/OpenGL bindings */ 17 | 18 | /////////////////////////////////////////////////////////////////////////////// 19 | //! Simple kernel to modify vertex positions in sine wave pattern 20 | //! @param data data in global memory 21 | /////////////////////////////////////////////////////////////////////////////// 22 | __kernel void sine_wave(__global float4* pos, unsigned int width, unsigned int height, float time) 23 | { 24 | unsigned int x = get_global_id(0); 25 | unsigned int y = get_global_id(1); 26 | 27 | // calculate uv coordinates 28 | float u = x / (float) width; 29 | float v = y / (float) height; 30 | u = u*2.0f - 1.0f; 31 | v = v*2.0f - 1.0f; 32 | 33 | // calculate simple sine wave pattern 34 | float freq = 4.0f; 35 | float w = sin(u*freq + time) * cos(v*freq + time) * 0.5f; 36 | 37 | // write output vertex 38 | pos[y*width+x] = (float4)(u, w, v, 1.0f); 39 | } 40 | 41 | --------------------------------------------------------------------------------