├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
    └── main
        ├── java
            └── org
            │   └── jocl
            │       └── samples
            │           ├── HistogramAMD.java
            │           ├── HistogramNVIDIA.java
            │           ├── JOCLBandwidthTest.java
            │           ├── JOCLDeviceQuery.java
            │           ├── JOCLEventSample.java
            │           ├── JOCLMandelbrot.java
            │           ├── JOCLMappedBufferSample.java
            │           ├── JOCLMultiDeviceSample.java
            │           ├── JOCLReduction.java
            │           ├── JOCLSample.java
            │           ├── JOCLSample_1_1.java
            │           ├── JOCLSample_1_2_KernelArgs.java
            │           ├── JOCLSample_2_0_SVM.java
            │           ├── JOCLSimpleConvolution.java
            │           ├── JOCLSimpleGL3.java
            │           ├── JOCLSimpleImage.java
            │           ├── JOCLSimpleLWJGL.java
            │           ├── JOCLSimpleMandelbrot.java
            │           ├── JOCLSubBufferSample.java
            │           └── blast
            │               ├── JOCLBlastCaxpyBatchedSample.java
            │               ├── JOCLBlastDgemmSample.java
            │               └── JOCLBlastSample.java
        └── resources
            ├── data
                └── lena512color.png
            └── kernels
                ├── Histogram256.cl
                ├── Histogram_Kernels.cl
                ├── QuadFloat.cl
                ├── QuadFloatMandelbrot.cl
                ├── SimpleConvolution.cl
                ├── SimpleMandelbrot.cl
                ├── reduction.cl
                └── simpleGL.cl


/.gitignore:
--------------------------------------------------------------------------------
1 | /.settings
2 | /target
3 | /.classpath
4 | /.project
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Marco Hutter
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # JOCLSamples
 2 | 
 3 | Samples for JOCL - http://jocl.org
 4 | 
 5 | **Note:** These samples have been moved here from the original samples page
 6 | of the JOCL website, [http://www.jocl.org/samples/samples.html](http://www.jocl.org/samples/samples.html).
 7 | These are mainly *standalone* samples, which means that each class contains
 8 | the whole code that is required for the sample, although some of them refer
 9 | to kernels that are stored in `src/main/resources/kernels`. Several methods 
10 | (e.g. for the basic OpenCL initialization) appear in each of these samples.
11 | They may be moved to a utility class in the future.
12 | 
13 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 | 	<modelVersion>4.0.0</modelVersion>
 4 | 
 5 | 	<groupId>org.jocl</groupId>
 6 | 	<artifactId>jocl-samples</artifactId>
 7 | 	<version>0.0.1-SNAPSHOT</version>
 8 | 
 9 | 	<build>
10 | 		<plugins>
11 | 			<plugin>
12 | 				<groupId>org.apache.maven.plugins</groupId>
13 | 				<artifactId>maven-compiler-plugin</artifactId>
14 | 				<version>2.3.2</version>
15 | 				<configuration>
16 | 					<source>1.7</source>
17 | 					<target>1.7</target>
18 | 				</configuration>
19 | 			</plugin>
20 | 		</plugins>
21 | 	</build>
22 | 
23 | 	<dependencies>
24 | 		<dependency>
25 | 			<groupId>org.jocl</groupId>
26 | 			<artifactId>jocl</artifactId>
27 | 			<version>2.0.4</version>
28 | 		</dependency>
29 | 		<dependency>
30 | 			<groupId>org.jocl</groupId>
31 | 			<artifactId>jocl-blast</artifactId>
32 | 			<version>1.5.0</version>
33 | 		</dependency>
34 | 		<dependency>
35 | 			<groupId>org.jogamp.gluegen</groupId>
36 | 			<artifactId>gluegen-rt-main</artifactId>
37 | 			<version>2.3.1</version>
38 | 		</dependency>
39 | 		<dependency>
40 | 			<groupId>org.jogamp.jogl</groupId>
41 | 			<artifactId>jogl-all-main</artifactId>
42 | 			<version>2.3.1</version>
43 | 		</dependency>
44 | 		<dependency>
45 | 			<groupId>org.lwjgl.lwjgl</groupId>
46 | 			<artifactId>lwjgl</artifactId>
47 | 			<version>2.9.3</version>
48 | 		</dependency>
49 | 	</dependencies>
50 | </project>
51 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/HistogramNVIDIA.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.io.*;
 11 | import java.util.Random;
 12 | 
 13 | import org.jocl.*;
 14 | 
 15 | /**
 16 |  * This class is a port of the NVIDIA OpenCL SDK "Histogram" sample.
 17 |  * The structure of the code has intentionally been kept similar 
 18 |  * to the original sample.  
 19 |  */
 20 | public class HistogramNVIDIA
 21 | {
 22 |     public static final int HISTOGRAM256_BIN_COUNT = 256;
 23 |     
 24 |     //OpenCL histogram256 program
 25 |     static cl_program cpHistogram256;
 26 | 
 27 |     //OpenCL histogram256 kernels
 28 |     static cl_kernel ckHistogram256, ckMergeHistogram256;
 29 | 
 30 |     //histogram256() intermediate results buffer
 31 |     static int PARTIAL_HISTOGRAM256_COUNT = 240;
 32 |     static cl_mem d_PartialHistograms;
 33 | 
 34 |     //Default command queue for histogram256 kernels
 35 |     static cl_command_queue cqDefaultCommandQue;
 36 | 
 37 |     
 38 |     
 39 |     ////////////////////////////////////////////////////////////////////////////////
 40 |     //Test driver
 41 |     ////////////////////////////////////////////////////////////////////////////////
 42 |     public static void main(String args[])
 43 |     {
 44 |       cl_context       cxGPUContext; //OpenCL context
 45 |       cl_command_queue cqCommandQue; //OpenCL command que
 46 |       cl_mem    d_Data, d_Histogram; //OpenCL memory buffer objects
 47 | 
 48 |       long dataBytes[] = new long[1];
 49 |       int ciErrNum[] = new int[1];
 50 |       int PassFailFlag = 1;
 51 | 
 52 |       byte h_Data[];
 53 |       int h_HistogramCPU[], h_HistogramGPU[];
 54 |       
 55 |       int byteCount = 128 * 8192;
 56 | 
 57 |       // start logs
 58 |       System.out.println("Starting...\n"); 
 59 | 
 60 |       System.out.println("Initializing data...");
 61 |       h_Data         = new byte[byteCount];
 62 |       h_HistogramCPU = new int[HISTOGRAM256_BIN_COUNT];
 63 |       h_HistogramGPU = new int[HISTOGRAM256_BIN_COUNT];
 64 |       
 65 |       Random random = new Random(2009);
 66 |       for(int i = 0; i < byteCount; i++)
 67 |           h_Data[i] = (byte)(random.nextInt() & 0xFF);
 68 | 
 69 |       // This will allow us to subsequently omit the "shrCheckError" calls for this sample
 70 |       CL.setExceptionsEnabled(true);
 71 |       
 72 |       System.out.println("Initializing OpenCL...");
 73 | 
 74 |       // Obtain the platform IDs and initialize the context properties
 75 |       cl_platform_id platforms[] = new cl_platform_id[1];
 76 |       clGetPlatformIDs(platforms.length, platforms, null);
 77 |       cl_context_properties contextProperties = new cl_context_properties();
 78 |       contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]);
 79 |       cxGPUContext = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, null, null, ciErrNum);
 80 | 
 81 |       // get the list of GPU devices associated with context
 82 |       clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, null, dataBytes);
 83 |       cl_device_id cdDevices[] = new cl_device_id[(int)dataBytes[0] / Sizeof.cl_device_id];
 84 |       
 85 |       clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes[0], Pointer.to(cdDevices), null);
 86 | 
 87 |       //Create a command-queue
 88 |       cl_queue_properties properties = new cl_queue_properties();
 89 |       cqCommandQue = clCreateCommandQueueWithProperties(
 90 |           cxGPUContext, cdDevices[0], properties, ciErrNum);
 91 |       
 92 |       System.out.println("Allocating OpenCL memory...\n");
 93 |       d_Data = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, byteCount * Sizeof.cl_char, Pointer.to(h_Data), ciErrNum);
 94 |       d_Histogram = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, null, ciErrNum);
 95 | 
 96 |       System.out.println("Initializing 256-bin OpenCL histogram...");
 97 |       initHistogram256(cxGPUContext, cqCommandQue);
 98 | 
 99 |       System.out.printf("Running 256-bin OpenCL histogram for %d bytes...\n", byteCount);
100 |       histogram256(null, d_Histogram, d_Data, byteCount);
101 | 
102 |       System.out.println("Validating OpenCL results...");
103 |       System.out.println("...reading back OpenCL results");
104 |       clEnqueueReadBuffer(cqCommandQue, d_Histogram, CL_TRUE, 0, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, Pointer.to(h_HistogramGPU), 0, null, null);
105 | 
106 |       System.out.println("...histogram256CPU()");
107 | 
108 |       histogram256CPU(h_HistogramCPU, h_Data, byteCount);
109 | 
110 |       for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++)
111 |       {
112 |           if(h_HistogramGPU[i] != h_HistogramCPU[i])
113 |           {
114 |               PassFailFlag = 0;
115 |           }
116 |       }
117 |       System.out.println(PassFailFlag != 0 ? "256-bin histograms match\n" : "***256-bin histograms do not match!!!***\n" );
118 | 
119 |       System.out.println("Shutting down 256-bin OpenCL histogram...\n\n"); 
120 | 
121 |       //Release kernels and program
122 |       closeHistogram256();
123 | 
124 |       // pass or fail
125 |       System.out.printf("TEST %s\n", PassFailFlag != 0 ? "PASSED" : "FAILED !!!");
126 | 
127 |       System.out.println("Shutting down...");
128 | 
129 |       //Release other OpenCL Objects
130 |       ciErrNum[0]  = clReleaseMemObject(d_Histogram);
131 |       ciErrNum[0] |= clReleaseMemObject(d_Data);
132 |       ciErrNum[0] |= clReleaseCommandQueue(cqCommandQue);
133 |       ciErrNum[0] |= clReleaseContext(cxGPUContext);
134 |     }
135 |     
136 |     
137 |     static void histogram256CPU(int h_Histogram[], byte h_Data[], int byteCount)
138 |     {
139 |         for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++)
140 |             h_Histogram[i] = 0;
141 | 
142 |         for(int i = 0; i < byteCount; i++){
143 |             int data = h_Data[i];
144 |             if (data < 0)
145 |             {
146 |                 data+=256;
147 |             }
148 |             h_Histogram[data]++;
149 |         }
150 |     }
151 |     
152 | 
153 |     ////////////////////////////////////////////////////////////////////////////////
154 |     // OpenCL launchers for histogram256 / mergeHistogram256 kernels
155 |     ////////////////////////////////////////////////////////////////////////////////
156 | 
157 |     static void initHistogram256(cl_context cxGPUContext, cl_command_queue cqParamCommandQue)
158 |     {
159 |         int ciErrNum[] = new int[1];
160 | 
161 |         System.out.println("...loading Histogram256.cl");
162 |         String cHistogram256 = readFile("src/main/resources/kernels/Histogram256.cl");
163 | 
164 |         System.out.println("...creating histogram256 program");
165 |         cpHistogram256 = clCreateProgramWithSource(cxGPUContext, 1, new String[]{cHistogram256}, new long[]{cHistogram256.length()}, ciErrNum);
166 |         
167 |         System.out.println("...building histogram256 program");
168 |         ciErrNum[0] = clBuildProgram(cpHistogram256, 0, null, null, null, null);
169 | 
170 |         System.out.println("...creating histogram256 kernels");
171 |         ckHistogram256 = clCreateKernel(cpHistogram256, "histogram256", ciErrNum);
172 |         ckMergeHistogram256 = clCreateKernel(cpHistogram256, "mergeHistogram256", ciErrNum);
173 | 
174 |         System.out.println("...allocating internal histogram256 buffer");
175 |         d_PartialHistograms = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT * Sizeof.cl_uint, null, ciErrNum);
176 | 
177 |         //Save default command queue
178 |         cqDefaultCommandQue = cqParamCommandQue;
179 |     }
180 | 
181 |     static void closeHistogram256()
182 |     {
183 |         clReleaseMemObject(d_PartialHistograms);
184 |         clReleaseKernel(ckMergeHistogram256);
185 |         clReleaseKernel(ckHistogram256);
186 |         clReleaseProgram(cpHistogram256);
187 |     }
188 | 
189 |     static void histogram256(cl_command_queue cqCommandQue, cl_mem d_Histogram, cl_mem d_Data, int byteCount)
190 |     {
191 |         long localWorkSize[] = new long[1];
192 |         long globalWorkSize[] = new long[1];
193 | 
194 |         if(cqCommandQue == null)
195 |             cqCommandQue = cqDefaultCommandQue;
196 | 
197 |         int WARP_SIZE = 32;
198 |         int WARP_COUNT = 6;
199 | 
200 |         int dataCount = byteCount / 4;
201 |         clSetKernelArg(ckHistogram256, 0, Sizeof.cl_mem,  Pointer.to(d_PartialHistograms));
202 |         clSetKernelArg(ckHistogram256, 1, Sizeof.cl_mem,  Pointer.to(d_Data));
203 |         clSetKernelArg(ckHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{dataCount}));
204 | 
205 |         localWorkSize[0]  = WARP_SIZE * WARP_COUNT;
206 |         globalWorkSize[0] = PARTIAL_HISTOGRAM256_COUNT * localWorkSize[0];
207 | 
208 |         clEnqueueNDRangeKernel(cqCommandQue, ckHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null);
209 | 
210 |         int MERGE_WORKGROUP_SIZE = 256;
211 |         clSetKernelArg(ckMergeHistogram256, 0, Sizeof.cl_mem,  Pointer.to(d_Histogram));
212 |         clSetKernelArg(ckMergeHistogram256, 1, Sizeof.cl_mem,  Pointer.to(d_PartialHistograms));
213 |         clSetKernelArg(ckMergeHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{PARTIAL_HISTOGRAM256_COUNT}));
214 | 
215 |         localWorkSize[0]  = MERGE_WORKGROUP_SIZE;
216 |         globalWorkSize[0] = HISTOGRAM256_BIN_COUNT * localWorkSize[0];
217 | 
218 |         clEnqueueNDRangeKernel(cqCommandQue, ckMergeHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null);
219 |     }
220 | 
221 |     
222 |     private static String readFile(String fileName)
223 |     {
224 |         BufferedReader br = null;
225 |         try
226 |         {
227 |             br = new BufferedReader(new FileReader(fileName));
228 |             StringBuilder sb = new StringBuilder();
229 |             String line = null;
230 |             while (true)
231 |             {
232 |                 line = br.readLine();
233 |                 if (line == null)
234 |                 {
235 |                     break;
236 |                 }
237 |                 sb.append(line+"\n");
238 |             }
239 |             return sb.toString();
240 |         }
241 |         catch (IOException e)
242 |         {
243 |             e.printStackTrace();
244 |             return "";
245 |         }
246 |         finally
247 |         {
248 |             if (br != null)
249 |             {
250 |                 try
251 |                 {
252 |                     br.close();
253 |                 }
254 |                 catch (IOException e)
255 |                 {
256 |                     e.printStackTrace();
257 |                 }
258 |             }
259 |         }
260 |     }
261 |     
262 | }
263 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLBandwidthTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | import static org.jocl.CL.*;
  8 | 
  9 | import java.nio.ByteBuffer;
 10 | import java.util.Locale;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | /**
 15 |  * A test for the bandwidth of of the data transfer from the host 
 16 |  * to the device. 
 17 |  */
 18 | public class JOCLBandwidthTest
 19 | {
 20 |     /**
 21 |      * The index of the OpenCL platform that this sample should run on
 22 |      */
 23 |     private static final int platformIndex = 0;
 24 |     
 25 |     /**
 26 |      * The OpenCL device type that will be used
 27 |      */
 28 |     private static final long deviceType = CL_DEVICE_TYPE_ALL;
 29 |     
 30 |     /**
 31 |      * The index of the OpenCL device that will be used
 32 |      */
 33 |     private static final int deviceIndex = 0;
 34 |     
 35 |     /**
 36 |      * The OpenCL context
 37 |      */
 38 |     private static cl_context context;
 39 |     
 40 |     /**
 41 |      * The OpenCL command queue
 42 |      */
 43 |     private static cl_command_queue commandQueue;
 44 | 
 45 |     /**
 46 |      * The host memory modes that will be tested
 47 |      */
 48 |     enum MemoryMode 
 49 |     { 
 50 |         PAGEABLE, 
 51 |         PINNED 
 52 |     }
 53 |     
 54 |     /**
 55 |      * The memory access modes that will be tested
 56 |      */
 57 |     enum AccessMode 
 58 |     { 
 59 |         MAPPED, 
 60 |         DIRECT 
 61 |     }
 62 | 
 63 |     /**
 64 |      * The number of memcopy operations to perform for each size
 65 |      */
 66 |     private static final long MEMCOPY_ITERATIONS = 100;    
 67 |     
 68 |     /**
 69 |      * The entry point of this sample
 70 |      * 
 71 |      * @param args Not used
 72 |      */
 73 |     public static void main(String args[])
 74 |     {
 75 |         initialize();
 76 |         
 77 |         for (MemoryMode memoryMode : MemoryMode.values())
 78 |         {
 79 |             for (AccessMode accessMode : AccessMode.values())
 80 |             {
 81 |                 runTest(memoryMode, accessMode);
 82 |             }
 83 |         }
 84 |         
 85 |         shutdown();
 86 |     }
 87 |     
 88 |     /**
 89 |      * Run a bandwidth test with the given memory mode and access mode
 90 |      * 
 91 |      * @param memoryMode The memory mode
 92 |      * @param accessMode The access mode
 93 |      */
 94 |     private static void runTest(MemoryMode memoryMode, AccessMode accessMode)
 95 |     {
 96 |         int minExponent = 10;
 97 |         int maxExponent = 26;
 98 |         int count = maxExponent - minExponent;
 99 |         int memorySizes[] = new int[count];
100 |         double bandwidths[] = new double[memorySizes.length];
101 | 
102 |         System.out.print("Running");
103 |         for (int i=0; i<count; i++)
104 |         {
105 |             System.out.print(".");
106 |             memorySizes[i] = (1 << minExponent + i);
107 |             double bandwidth = computeBandwidth(
108 |                 memorySizes[i], memoryMode, accessMode);
109 |             bandwidths[i] = bandwidth;
110 |         }
111 |         System.out.println();
112 | 
113 |         System.out.println("Bandwidths for "+memoryMode+" and "+accessMode);
114 |         for (int i=0; i<memorySizes.length; i++)
115 |         {
116 |             String s = String.format("%10d", memorySizes[i]);
117 |             String b = String.format(Locale.ENGLISH, "%5.3f", bandwidths[i]);
118 |             System.out.println(s+" bytes : "+b+" MB/s");
119 |         }
120 |         System.out.println("\n");
121 |     }
122 | 
123 | 
124 |     /**
125 |      * Compute the bandwidth in MB/s for copying a chunk of memory of 
126 |      * the given size from the host to the device with the given 
127 |      * memory- and access mode
128 |      * 
129 |      * @param memorySize The memory size, in bytes
130 |      * @param memoryMode The memory mode
131 |      * @param accessMode The access mode
132 |      * @return The bandwidth, in MB/s
133 |      */
134 |     static double computeBandwidth(
135 |         int memorySize, MemoryMode memoryMode, AccessMode accessMode)
136 |     {
137 |         ByteBuffer hostData = null;
138 |         cl_mem pinnedHostData = null;
139 |         cl_mem deviceData = null;
140 | 
141 |         if(memoryMode == MemoryMode.PINNED)
142 |         {
143 |             // Allocate pinned host memory
144 |             pinnedHostData = clCreateBuffer(
145 |                 context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 
146 |                 memorySize, null, null);
147 | 
148 |             // Map the buffer into the host address space
149 |             hostData = clEnqueueMapBuffer(
150 |                 commandQueue, pinnedHostData, CL_TRUE, CL_MAP_WRITE, 
151 |                 0, memorySize, 0, null, null, null);
152 | 
153 |             // Write some data into the host buffer
154 |             for(int i = 0; i < memorySize; i++)
155 |             {
156 |                 hostData.put(i, (byte)i);
157 |             }
158 | 
159 |             // Unmap the buffer, writing the data back to the
160 |             // pinned host buffer
161 |             clEnqueueUnmapMemObject(commandQueue, pinnedHostData, 
162 |                 hostData, 0, null, null);
163 |         }
164 |         else
165 |         {
166 |             // Standard (pageable, non-pinned) allocation
167 |             hostData = ByteBuffer.allocateDirect(memorySize);
168 | 
169 |             // Write some data into the host buffer
170 |             for(int i = 0; i < memorySize; i++)
171 |             {
172 |                 hostData.put(i, (byte)i);
173 |             }
174 |         }
175 | 
176 |         // Allocate device memory
177 |         deviceData = clCreateBuffer(
178 |             context, CL_MEM_READ_WRITE, memorySize, null, null);
179 | 
180 |         clFinish(commandQueue);
181 |         long before = System.nanoTime();
182 | 
183 |         if(accessMode == AccessMode.DIRECT)
184 |         {
185 |             if(memoryMode == MemoryMode.PINNED)
186 |             {
187 |                 hostData = clEnqueueMapBuffer(
188 |                     commandQueue, pinnedHostData, CL_TRUE, CL_MAP_READ, 
189 |                     0, memorySize, 0, null, null, null);
190 |             }
191 | 
192 |             // Copy the data from the host buffer to the
193 |             // device a few times
194 |             for(int i = 0; i < MEMCOPY_ITERATIONS; i++)
195 |             {
196 |                 clEnqueueWriteBuffer(commandQueue, deviceData, CL_FALSE, 
197 |                     0, memorySize, Pointer.to(hostData), 0, null, null);
198 |             }
199 |             clFinish(commandQueue);
200 |         }
201 |         else
202 |         {
203 |             // Map the data from the device to the host addess space
204 |             ByteBuffer mappedDeviceData = clEnqueueMapBuffer(
205 |                 commandQueue, deviceData, CL_TRUE, CL_MAP_WRITE, 
206 |                 0, memorySize, 0, null, null, null);
207 |             if(memoryMode == MemoryMode.PINNED )
208 |             {
209 |                 hostData = clEnqueueMapBuffer(commandQueue, 
210 |                     pinnedHostData, CL_TRUE, CL_MAP_READ, 0, 
211 |                     memorySize, 0, null, null, null);
212 |             }
213 |             // Copy the data from the host buffer to the
214 |             // device a few times
215 |             for(int i = 0; i < MEMCOPY_ITERATIONS; i++)
216 |             {
217 |                 mappedDeviceData.put(hostData);
218 |                 hostData.position(0);
219 |                 mappedDeviceData.position(0);
220 |             }
221 |             clEnqueueUnmapMemObject(commandQueue, deviceData, 
222 |                 mappedDeviceData, 0, null, null);
223 |         }
224 | 
225 |         // Compute the bandwidth in MB/s
226 |         long after = System.nanoTime();
227 |         double durationS = (after - before) / 1e9;
228 |         double bandwidthInMBs = 
229 |             (memorySize * MEMCOPY_ITERATIONS)/(durationS * (1 << 20));
230 | 
231 |         // Clean up
232 |         if(deviceData != null)
233 |         {
234 |             clReleaseMemObject(deviceData);
235 |         }
236 |         if(pinnedHostData != null)
237 |         {
238 |             clEnqueueUnmapMemObject(commandQueue, pinnedHostData, 
239 |                 hostData, 0, null, null);
240 |             clReleaseMemObject(pinnedHostData);
241 |         }
242 | 
243 |         return bandwidthInMBs;
244 |     }    
245 |     
246 |     
247 |     
248 |     /**
249 |      * Perform a default initialization by creating a context 
250 |      * and a command queue
251 |      */
252 |     private static void initialize()
253 |     {
254 |         // Enable exceptions and subsequently omit error checks in this sample
255 |         CL.setExceptionsEnabled(true);
256 | 
257 |         // Obtain the number of platforms
258 |         int numPlatformsArray[] = new int[1];
259 |         clGetPlatformIDs(0, null, numPlatformsArray);
260 |         int numPlatforms = numPlatformsArray[0];
261 | 
262 |         // Obtain a platform ID
263 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
264 |         clGetPlatformIDs(platforms.length, platforms, null);
265 |         cl_platform_id platform = platforms[platformIndex];
266 | 
267 |         // Initialize the context properties
268 |         cl_context_properties contextProperties = new cl_context_properties();
269 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
270 |         
271 |         // Obtain the number of devices for the platform
272 |         int numDevicesArray[] = new int[1];
273 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
274 |         int numDevices = numDevicesArray[0];
275 |         
276 |         // Obtain a device ID 
277 |         cl_device_id devices[] = new cl_device_id[numDevices];
278 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
279 |         cl_device_id device = devices[deviceIndex];
280 | 
281 |         // Create a context for the selected device
282 |         context = clCreateContext(
283 |             contextProperties, 1, new cl_device_id[]{device}, 
284 |             null, null, null);
285 |         
286 |         // Create the command queue
287 |         cl_queue_properties properties = new cl_queue_properties();
288 |         commandQueue = clCreateCommandQueueWithProperties(
289 |             context, device, properties, null);
290 |     }
291 |     
292 |     /**
293 |      * Shut down and release all resources that have been allocated
294 |      * in {@link #initialize()}
295 |      */
296 |     private static void shutdown()
297 |     {
298 |         clReleaseCommandQueue(commandQueue);
299 |         clReleaseContext(context);
300 |     }
301 | }


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLDeviceQuery.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.nio.*;
 11 | import java.util.*;
 12 | 
 13 | import org.jocl.*;
 14 | 
 15 | /**
 16 |  * A JOCL program that queries and prints information about all
 17 |  * available devices.
 18 |  */
 19 | public class JOCLDeviceQuery
 20 | {
 21 |     /**
 22 |      * The entry point of this program
 23 |      *
 24 |      * @param args Not used
 25 |      */
 26 |     public static void main(String args[])
 27 |     {
 28 |         // Obtain the number of platforms
 29 |         int numPlatforms[] = new int[1];
 30 |         clGetPlatformIDs(0, null, numPlatforms);
 31 | 
 32 |         System.out.println("Number of platforms: "+numPlatforms[0]);
 33 | 
 34 |         // Obtain the platform IDs
 35 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms[0]];
 36 |         clGetPlatformIDs(platforms.length, platforms, null);
 37 | 
 38 |         // Collect all devices of all platforms
 39 |         List<cl_device_id> devices = new ArrayList<cl_device_id>();
 40 |         for (int i=0; i<platforms.length; i++)
 41 |         {
 42 |             String platformName = getString(platforms[i], CL_PLATFORM_NAME);
 43 | 
 44 |             // Obtain the number of devices for the current platform
 45 |             int numDevices[] = new int[1];
 46 |             clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, null, numDevices);
 47 | 
 48 |             System.out.println("Number of devices in platform "+platformName+": "+numDevices[0]);
 49 |             
 50 |             // CL_PLATFORM_VERSION
 51 |             String platformVersion = getString(platforms[i], CL_PLATFORM_VERSION);
 52 |             System.out.printf("CL_PLATFORM_VERSION: \t\t\t%s\n", platformVersion);
 53 | 
 54 |             cl_device_id devicesArray[] = new cl_device_id[numDevices[0]];
 55 |             clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices[0], devicesArray, null);
 56 | 
 57 |             devices.addAll(Arrays.asList(devicesArray));
 58 |         }
 59 | 
 60 |         // Print the infos about all devices
 61 |         for (cl_device_id device : devices)
 62 |         {
 63 |             // CL_DEVICE_NAME
 64 |             String deviceName = getString(device, CL_DEVICE_NAME);
 65 |             System.out.println("--- Info for device "+deviceName+": ---");
 66 |             System.out.printf("CL_DEVICE_NAME: \t\t\t%s\n", deviceName);
 67 | 
 68 |             // CL_DEVICE_VENDOR
 69 |             String deviceVendor = getString(device, CL_DEVICE_VENDOR);
 70 |             System.out.printf("CL_DEVICE_VENDOR: \t\t\t%s\n", deviceVendor);
 71 | 
 72 |             // CL_DRIVER_VERSION
 73 |             String driverVersion = getString(device, CL_DRIVER_VERSION);
 74 |             System.out.printf("CL_DRIVER_VERSION: \t\t\t%s\n", driverVersion);
 75 | 
 76 |             // CL_DEVICE_TYPE
 77 |             long deviceType = getLong(device, CL_DEVICE_TYPE);
 78 |             if( (deviceType & CL_DEVICE_TYPE_CPU) != 0)
 79 |                 System.out.printf("CL_DEVICE_TYPE:\t\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
 80 |             if( (deviceType & CL_DEVICE_TYPE_GPU) != 0)
 81 |                 System.out.printf("CL_DEVICE_TYPE:\t\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
 82 |             if( (deviceType & CL_DEVICE_TYPE_ACCELERATOR) != 0)
 83 |                 System.out.printf("CL_DEVICE_TYPE:\t\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
 84 |             if( (deviceType & CL_DEVICE_TYPE_DEFAULT) != 0)
 85 |                 System.out.printf("CL_DEVICE_TYPE:\t\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
 86 | 
 87 |             // CL_DEVICE_MAX_COMPUTE_UNITS
 88 |             int maxComputeUnits = getInt(device, CL_DEVICE_MAX_COMPUTE_UNITS);
 89 |             System.out.printf("CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%d\n", maxComputeUnits);
 90 | 
 91 |             // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
 92 |             long maxWorkItemDimensions = getLong(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
 93 |             System.out.printf("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%d\n", maxWorkItemDimensions);
 94 | 
 95 |             // CL_DEVICE_MAX_WORK_ITEM_SIZES
 96 |             long maxWorkItemSizes[] = getSizes(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3);
 97 |             System.out.printf("CL_DEVICE_MAX_WORK_ITEM_SIZES:\t\t%d / %d / %d \n",
 98 |                 maxWorkItemSizes[0], maxWorkItemSizes[1], maxWorkItemSizes[2]);
 99 | 
100 |             // CL_DEVICE_MAX_WORK_GROUP_SIZE
101 |             long maxWorkGroupSize = getSize(device, CL_DEVICE_MAX_WORK_GROUP_SIZE);
102 |             System.out.printf("CL_DEVICE_MAX_WORK_GROUP_SIZE:\t\t%d\n", maxWorkGroupSize);
103 | 
104 |             // CL_DEVICE_MAX_CLOCK_FREQUENCY
105 |             long maxClockFrequency = getLong(device, CL_DEVICE_MAX_CLOCK_FREQUENCY);
106 |             System.out.printf("CL_DEVICE_MAX_CLOCK_FREQUENCY:\t\t%d MHz\n", maxClockFrequency);
107 | 
108 |             // CL_DEVICE_ADDRESS_BITS
109 |             int addressBits = getInt(device, CL_DEVICE_ADDRESS_BITS);
110 |             System.out.printf("CL_DEVICE_ADDRESS_BITS:\t\t\t%d\n", addressBits);
111 | 
112 |             // CL_DEVICE_MAX_MEM_ALLOC_SIZE
113 |             long maxMemAllocSize = getLong(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
114 |             System.out.printf("CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%d MByte\n", (int)(maxMemAllocSize / (1024 * 1024)));
115 | 
116 |             // CL_DEVICE_GLOBAL_MEM_SIZE
117 |             long globalMemSize = getLong(device, CL_DEVICE_GLOBAL_MEM_SIZE);
118 |             System.out.printf("CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%d MByte\n", (int)(globalMemSize / (1024 * 1024)));
119 | 
120 |             // CL_DEVICE_ERROR_CORRECTION_SUPPORT
121 |             int errorCorrectionSupport = getInt(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT);
122 |             System.out.printf("CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", errorCorrectionSupport != 0 ? "yes" : "no");
123 | 
124 |             // CL_DEVICE_LOCAL_MEM_TYPE
125 |             int localMemType = getInt(device, CL_DEVICE_LOCAL_MEM_TYPE);
126 |             System.out.printf("CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", localMemType == 1 ? "local" : "global");
127 | 
128 |             // CL_DEVICE_LOCAL_MEM_SIZE
129 |             long localMemSize = getLong(device, CL_DEVICE_LOCAL_MEM_SIZE);
130 |             System.out.printf("CL_DEVICE_LOCAL_MEM_SIZE:\t\t%d KByte\n", (int)(localMemSize / 1024));
131 | 
132 |             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
133 |             long maxConstantBufferSize = getLong(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE);
134 |             System.out.printf("CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%d KByte\n", (int)(maxConstantBufferSize / 1024));
135 | 
136 |             // CL_DEVICE_QUEUE_PROPERTIES
137 |             long queueProperties = getLong(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES);
138 |             if(( queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) != 0)
139 |                 System.out.printf("CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
140 |             if(( queueProperties & CL_QUEUE_PROFILING_ENABLE ) != 0)
141 |                 System.out.printf("CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
142 | 
143 |             // CL_DEVICE_IMAGE_SUPPORT
144 |             int imageSupport = getInt(device, CL_DEVICE_IMAGE_SUPPORT);
145 |             System.out.printf("CL_DEVICE_IMAGE_SUPPORT:\t\t%d\n", imageSupport);
146 | 
147 |             // CL_DEVICE_MAX_READ_IMAGE_ARGS
148 |             int maxReadImageArgs = getInt(device, CL_DEVICE_MAX_READ_IMAGE_ARGS);
149 |             System.out.printf("CL_DEVICE_MAX_READ_IMAGE_ARGS:\t\t%d\n", maxReadImageArgs);
150 | 
151 |             // CL_DEVICE_MAX_WRITE_IMAGE_ARGS
152 |             int maxWriteImageArgs = getInt(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS);
153 |             System.out.printf("CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t\t%d\n", maxWriteImageArgs);
154 | 
155 |             // CL_DEVICE_SINGLE_FP_CONFIG
156 |             long singleFpConfig = getLong(device, CL_DEVICE_SINGLE_FP_CONFIG);
157 |             System.out.printf("CL_DEVICE_SINGLE_FP_CONFIG:\t\t%s\n",
158 |                 stringFor_cl_device_fp_config(singleFpConfig));
159 | 
160 |             // CL_DEVICE_IMAGE2D_MAX_WIDTH
161 |             long image2dMaxWidth = getSize(device, CL_DEVICE_IMAGE2D_MAX_WIDTH);
162 |             System.out.printf("CL_DEVICE_2D_MAX_WIDTH\t\t\t%d\n", image2dMaxWidth);
163 | 
164 |             // CL_DEVICE_IMAGE2D_MAX_HEIGHT
165 |             long image2dMaxHeight = getSize(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
166 |             System.out.printf("CL_DEVICE_2D_MAX_HEIGHT\t\t\t%d\n", image2dMaxHeight);
167 | 
168 |             // CL_DEVICE_IMAGE3D_MAX_WIDTH
169 |             long image3dMaxWidth = getSize(device, CL_DEVICE_IMAGE3D_MAX_WIDTH);
170 |             System.out.printf("CL_DEVICE_3D_MAX_WIDTH\t\t\t%d\n", image3dMaxWidth);
171 | 
172 |             // CL_DEVICE_IMAGE3D_MAX_HEIGHT
173 |             long image3dMaxHeight = getSize(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT);
174 |             System.out.printf("CL_DEVICE_3D_MAX_HEIGHT\t\t\t%d\n", image3dMaxHeight);
175 | 
176 |             // CL_DEVICE_IMAGE3D_MAX_DEPTH
177 |             long image3dMaxDepth = getSize(device, CL_DEVICE_IMAGE3D_MAX_DEPTH);
178 |             System.out.printf("CL_DEVICE_3D_MAX_DEPTH\t\t\t%d\n", image3dMaxDepth);
179 | 
180 |             // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
181 |             System.out.printf("CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
182 |             int preferredVectorWidthChar = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR);
183 |             int preferredVectorWidthShort = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT);
184 |             int preferredVectorWidthInt = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT);
185 |             int preferredVectorWidthLong = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG);
186 |             int preferredVectorWidthFloat = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT);
187 |             int preferredVectorWidthDouble = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE);
188 |             System.out.printf("CHAR %d, SHORT %d, INT %d, LONG %d, FLOAT %d, DOUBLE %d\n\n\n",
189 |                    preferredVectorWidthChar, preferredVectorWidthShort,
190 |                    preferredVectorWidthInt, preferredVectorWidthLong,
191 |                    preferredVectorWidthFloat, preferredVectorWidthDouble);
192 |         }
193 |     }
194 | 
195 |     /**
196 |      * Returns the value of the device info parameter with the given name
197 |      *
198 |      * @param device The device
199 |      * @param paramName The parameter name
200 |      * @return The value
201 |      */
202 |     private static int getInt(cl_device_id device, int paramName)
203 |     {
204 |         return getInts(device, paramName, 1)[0];
205 |     }
206 | 
207 |     /**
208 |      * Returns the values of the device info parameter with the given name
209 |      *
210 |      * @param device The device
211 |      * @param paramName The parameter name
212 |      * @param numValues The number of values
213 |      * @return The value
214 |      */
215 |     private static int[] getInts(cl_device_id device, int paramName, int numValues)
216 |     {
217 |         int values[] = new int[numValues];
218 |         clGetDeviceInfo(device, paramName, Sizeof.cl_int * numValues, Pointer.to(values), null);
219 |         return values;
220 |     }
221 | 
222 |     /**
223 |      * Returns the value of the device info parameter with the given name
224 |      *
225 |      * @param device The device
226 |      * @param paramName The parameter name
227 |      * @return The value
228 |      */
229 |     private static long getLong(cl_device_id device, int paramName)
230 |     {
231 |         return getLongs(device, paramName, 1)[0];
232 |     }
233 | 
234 |     /**
235 |      * Returns the values of the device info parameter with the given name
236 |      *
237 |      * @param device The device
238 |      * @param paramName The parameter name
239 |      * @param numValues The number of values
240 |      * @return The value
241 |      */
242 |     private static long[] getLongs(cl_device_id device, int paramName, int numValues)
243 |     {
244 |         long values[] = new long[numValues];
245 |         clGetDeviceInfo(device, paramName, Sizeof.cl_long * numValues, Pointer.to(values), null);
246 |         return values;
247 |     }
248 | 
249 |     /**
250 |      * Returns the value of the device info parameter with the given name
251 |      *
252 |      * @param device The device
253 |      * @param paramName The parameter name
254 |      * @return The value
255 |      */
256 |     private static String getString(cl_device_id device, int paramName)
257 |     {
258 |         // Obtain the length of the string that will be queried
259 |         long size[] = new long[1];
260 |         clGetDeviceInfo(device, paramName, 0, null, size);
261 | 
262 |         // Create a buffer of the appropriate size and fill it with the info
263 |         byte buffer[] = new byte[(int)size[0]];
264 |         clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
265 | 
266 |         // Create a string from the buffer (excluding the trailing \0 byte)
267 |         return new String(buffer, 0, buffer.length-1);
268 |     }
269 | 
270 |     /**
271 |      * Returns the value of the platform info parameter with the given name
272 |      *
273 |      * @param platform The platform
274 |      * @param paramName The parameter name
275 |      * @return The value
276 |      */
277 |     private static String getString(cl_platform_id platform, int paramName)
278 |     {
279 |         // Obtain the length of the string that will be queried
280 |         long size[] = new long[1];
281 |         clGetPlatformInfo(platform, paramName, 0, null, size);
282 | 
283 |         // Create a buffer of the appropriate size and fill it with the info
284 |         byte buffer[] = new byte[(int)size[0]];
285 |         clGetPlatformInfo(platform, paramName, buffer.length, Pointer.to(buffer), null);
286 | 
287 |         // Create a string from the buffer (excluding the trailing \0 byte)
288 |         return new String(buffer, 0, buffer.length-1);
289 |     }
290 |     
291 |     /**
292 |      * Returns the value of the device info parameter with the given name
293 |      *
294 |      * @param device The device
295 |      * @param paramName The parameter name
296 |      * @return The value
297 |      */
298 |     private static long getSize(cl_device_id device, int paramName)
299 |     {
300 |         return getSizes(device, paramName, 1)[0];
301 |     }
302 |     
303 |     /**
304 |      * Returns the values of the device info parameter with the given name
305 |      *
306 |      * @param device The device
307 |      * @param paramName The parameter name
308 |      * @param numValues The number of values
309 |      * @return The value
310 |      */
311 |     static long[] getSizes(cl_device_id device, int paramName, int numValues)
312 |     {
313 |         // The size of the returned data has to depend on 
314 |         // the size of a size_t, which is handled here
315 |         ByteBuffer buffer = ByteBuffer.allocate(
316 |             numValues * Sizeof.size_t).order(ByteOrder.nativeOrder());
317 |         clGetDeviceInfo(device, paramName, Sizeof.size_t * numValues, 
318 |             Pointer.to(buffer), null);
319 |         long values[] = new long[numValues];
320 |         if (Sizeof.size_t == 4)
321 |         {
322 |             for (int i=0; i<numValues; i++)
323 |             {
324 |                 values[i] = buffer.getInt(i * Sizeof.size_t);
325 |             }
326 |         }
327 |         else
328 |         {
329 |             for (int i=0; i<numValues; i++)
330 |             {
331 |                 values[i] = buffer.getLong(i * Sizeof.size_t);
332 |             }
333 |         }
334 |         return values;
335 |     }
336 |     
337 | }
338 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLEventSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.util.*;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | /**
 15 |  * A small sample demonstrating basic event handling and how to
 16 |  * obtain profiling information for a command queue.
 17 |  */
 18 | public class JOCLEventSample
 19 | {
 20 |     /**
 21 |      * Source code of a kernel that adds the
 22 |      * components of two vectors and stores
 23 |      * the result in a third vector
 24 |      */
 25 |     private static String programSource0 =
 26 |         "__kernel void vectorAdd(" +
 27 |         "     __global const float *a,"+
 28 |         "     __global const float *b, " +
 29 |         "     __global float *c)"+
 30 |         "{"+
 31 |         "    int gid = get_global_id(0);"+
 32 |         "    c[gid] = a[gid]+b[gid];"+
 33 |         "}";
 34 | 
 35 |     /**
 36 |      * Source code of a kernel that multiplies the
 37 |      * components of two vectors and stores
 38 |      * the result in a third vector
 39 |      */
 40 |     private static String programSource1 =
 41 |         "__kernel void vectorMul(" +
 42 |         "     __global const float *a,"+
 43 |         "     __global const float *b, " +
 44 |         "     __global float *c)"+
 45 |         "{"+
 46 |         "    int gid = get_global_id(0);"+
 47 |         "    c[gid] = a[gid]*b[gid];"+
 48 |         "}";
 49 | 
 50 | 
 51 |     /**
 52 |      * The entry point of this sample
 53 |      *
 54 |      * @param args Not used
 55 |      */
 56 |     public static void main(String args[])
 57 |     {
 58 |         // Initialize the input data
 59 |         int n = 50000000;
 60 |         float srcArrayA[] = new float[n];
 61 |         float srcArrayB[] = new float[n];
 62 |         float dstArray0[] = new float[n];
 63 |         float dstArray1[] = new float[n];
 64 |         for (int i=0; i<srcArrayA.length; i++)
 65 |         {
 66 |             srcArrayA[i] = i;
 67 |             srcArrayB[i] = i;
 68 |         }
 69 |         Pointer srcA = Pointer.to(srcArrayA);
 70 |         Pointer srcB = Pointer.to(srcArrayB);
 71 |         Pointer dst0 = Pointer.to(dstArray0);
 72 |         Pointer dst1 = Pointer.to(dstArray1);
 73 | 
 74 |         // The platform, device type and device number
 75 |         // that will be used
 76 |         final int platformIndex = 0;
 77 |         final long deviceType = CL_DEVICE_TYPE_ALL;
 78 |         final int deviceIndex = 0;
 79 | 
 80 |         // Enable exceptions and subsequently omit error checks in this sample
 81 |         CL.setExceptionsEnabled(true);
 82 | 
 83 |         // Obtain the number of platforms
 84 |         int numPlatformsArray[] = new int[1];
 85 |         clGetPlatformIDs(0, null, numPlatformsArray);
 86 |         int numPlatforms = numPlatformsArray[0];
 87 | 
 88 |         // Obtain a platform ID
 89 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
 90 |         clGetPlatformIDs(platforms.length, platforms, null);
 91 |         cl_platform_id platform = platforms[platformIndex];
 92 | 
 93 |         // Initialize the context properties
 94 |         cl_context_properties contextProperties = new cl_context_properties();
 95 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
 96 |         
 97 |         // Obtain the number of devices for the platform
 98 |         int numDevicesArray[] = new int[1];
 99 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
100 |         int numDevices = numDevicesArray[0];
101 |         
102 |         // Obtain a device ID 
103 |         cl_device_id devices[] = new cl_device_id[numDevices];
104 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
105 |         cl_device_id device = devices[deviceIndex];
106 | 
107 |         // Create a context for the selected device
108 |         cl_context context = clCreateContext(
109 |             contextProperties, 1, new cl_device_id[]{device}, 
110 |             null, null, null);
111 |         
112 |         // Create a command-queue, with profiling info enabled
113 |         cl_queue_properties properties = new cl_queue_properties();
114 |         properties.addProperty(CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE);
115 |         System.out.println(properties);
116 |         cl_command_queue commandQueue = clCreateCommandQueueWithProperties(
117 |             context, device, properties, null);
118 | 
119 |         // Allocate the buffer memory objects
120 |         cl_mem srcMemA = clCreateBuffer(context,
121 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
122 |             Sizeof.cl_float * n, srcA, null);
123 | 
124 |         cl_mem srcMemB = clCreateBuffer(context,
125 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
126 |             Sizeof.cl_float * n, srcB, null);
127 | 
128 |         cl_mem dstMem0 = clCreateBuffer(context,
129 |             CL_MEM_READ_WRITE,
130 |             Sizeof.cl_float * n, null, null);
131 | 
132 |         cl_mem dstMem1 = clCreateBuffer(context,
133 |             CL_MEM_READ_WRITE,
134 |             Sizeof.cl_float * n, null, null);
135 | 
136 |         // Create and build the the programs and the kernels
137 |         cl_program program0 = clCreateProgramWithSource(context,
138 |             1, new String[]{ programSource0 }, null, null);
139 |         cl_program program1 = clCreateProgramWithSource(context,
140 |             1, new String[]{ programSource1 }, null, null);
141 | 
142 |         // Build the programs
143 |         clBuildProgram(program0, 0, null, null, null, null);
144 |         clBuildProgram(program1, 0, null, null, null, null);
145 | 
146 |         // Create the kernels
147 |         cl_kernel kernel0 = clCreateKernel(program0, "vectorAdd", null);
148 |         cl_kernel kernel1 = clCreateKernel(program1, "vectorMul", null);
149 | 
150 |         // Set the arguments
151 |         clSetKernelArg(kernel0, 0, Sizeof.cl_mem, Pointer.to(srcMemA));
152 |         clSetKernelArg(kernel0, 1, Sizeof.cl_mem, Pointer.to(srcMemB));
153 |         clSetKernelArg(kernel0, 2, Sizeof.cl_mem, Pointer.to(dstMem0));
154 | 
155 |         clSetKernelArg(kernel1, 0, Sizeof.cl_mem, Pointer.to(srcMemA));
156 |         clSetKernelArg(kernel1, 1, Sizeof.cl_mem, Pointer.to(srcMemB));
157 |         clSetKernelArg(kernel1, 2, Sizeof.cl_mem, Pointer.to(dstMem1));
158 | 
159 |         // Set work-item dimensions and execute the kernels
160 |         long globalWorkSize[] = new long[]{n};
161 | 
162 |         System.out.println("Enqueueing kernels...");
163 |         cl_event kernelEvent0 = new cl_event();
164 |         clEnqueueNDRangeKernel(commandQueue, kernel0, 1, null,
165 |             globalWorkSize, null, 0, null, kernelEvent0);
166 | 
167 |         cl_event kernelEvent1 = new cl_event();
168 |         clEnqueueNDRangeKernel(commandQueue, kernel1, 1, null,
169 |             globalWorkSize, null, 0, null, kernelEvent1);
170 | 
171 |         // Wait for the the events, i.e. until the kernels have completed
172 |         System.out.println("Waiting for events...");
173 |         clWaitForEvents(2, new cl_event[]{kernelEvent0, kernelEvent1});
174 | 
175 |         // Read the results
176 |         System.out.println("Enqueueing output reads...");
177 |         cl_event readEvent0 = new cl_event();
178 |         clEnqueueReadBuffer(commandQueue, dstMem0, CL_TRUE, 0,
179 |             n * Sizeof.cl_float, dst0, 0, null, readEvent0);
180 | 
181 |         cl_event readEvent1 = new cl_event();
182 |         clEnqueueReadBuffer(commandQueue, dstMem1, CL_TRUE, 0,
183 |             n * Sizeof.cl_float, dst1, 0, null, readEvent1);
184 | 
185 |         // Wait for the the events, i.e. until the results are read
186 |         System.out.println("Waiting for events...");
187 |         clWaitForEvents(2, new cl_event[]{readEvent0, readEvent1});
188 | 
189 |         // Print the results
190 |         printResult(dstArray0, 10);
191 |         printResult(dstArray1, 10);
192 | 
193 |         // Print the timing information for the commands
194 |         ExecutionStatistics executionStatistics = new ExecutionStatistics();
195 |         executionStatistics.addEntry("kernel0", kernelEvent0);
196 |         executionStatistics.addEntry("kernel1", kernelEvent1);
197 |         executionStatistics.addEntry("  read0", readEvent0);
198 |         executionStatistics.addEntry("  read1", readEvent1);
199 |         executionStatistics.print();
200 | 
201 |     }
202 | 
203 |     /**
204 |      * Print up to 'max' entries of the given array
205 |      *
206 |      * @param result The array containing the result
207 |      * @param max The maximum number of entries to print
208 |      */
209 |     private static void printResult(float result[], int max)
210 |     {
211 |         System.out.print("Result: ");
212 |         max = Math.min(result.length, max);
213 |         for (int i=0; i<max; i++)
214 |         {
215 |             System.out.print(result[i]);
216 |             if (i < max-1)
217 |             {
218 |                 System.out.print(", ");
219 |             }
220 |             else if (result.length > max)
221 |             {
222 |                 System.out.print(" ...");
223 |             }
224 |         }
225 |         System.out.println("");
226 |     }
227 | 
228 |     /**
229 |      * A simple helper class for tracking cl_events and printing
230 |      * timing information for the execution of the commands that
231 |      * are associated with the events.
232 |      */
233 |     static class ExecutionStatistics
234 |     {
235 |         /**
236 |          * A single entry of the ExecutionStatistics
237 |          */
238 |         private static class Entry
239 |         {
240 |             private String name;
241 |             private long submitTime[] = new long[1];
242 |             private long queuedTime[] = new long[1];
243 |             private long startTime[] = new long[1];
244 |             private long endTime[] = new long[1];
245 | 
246 |             Entry(String name, cl_event event)
247 |             {
248 |                 this.name = name;
249 |                 clGetEventProfilingInfo(
250 |                     event, CL_PROFILING_COMMAND_QUEUED,
251 |                     Sizeof.cl_ulong, Pointer.to(queuedTime), null);
252 |                 clGetEventProfilingInfo(
253 |                     event, CL_PROFILING_COMMAND_SUBMIT,
254 |                     Sizeof.cl_ulong, Pointer.to(submitTime), null);
255 |                 clGetEventProfilingInfo(
256 |                     event, CL_PROFILING_COMMAND_START,
257 |                     Sizeof.cl_ulong, Pointer.to(startTime), null);
258 |                 clGetEventProfilingInfo(
259 |                     event, CL_PROFILING_COMMAND_END,
260 |                     Sizeof.cl_ulong, Pointer.to(endTime), null);
261 |             }
262 | 
263 |             void normalize(long baseTime)
264 |             {
265 |                 submitTime[0] -= baseTime;
266 |                 queuedTime[0] -= baseTime;
267 |                 startTime[0] -= baseTime;
268 |                 endTime[0] -= baseTime;
269 |             }
270 | 
271 |             long getQueuedTime()
272 |             {
273 |                 return queuedTime[0];
274 |             }
275 | 
276 |             void print()
277 |             {
278 |                 System.out.println("Event "+name+": ");
279 |                 System.out.println("Queued : "+
280 |                     String.format("%8.3f", queuedTime[0]/1e6)+" ms");
281 |                 System.out.println("Submit : "+
282 |                     String.format("%8.3f", submitTime[0]/1e6)+" ms");
283 |                 System.out.println("Start  : "+
284 |                     String.format("%8.3f", startTime[0]/1e6)+" ms");
285 |                 System.out.println("End    : "+
286 |                     String.format("%8.3f", endTime[0]/1e6)+" ms");
287 | 
288 |                 long duration = endTime[0]-startTime[0];
289 |                 System.out.println("Time   : "+
290 |                     String.format("%8.3f", duration / 1e6)+" ms");
291 |             }
292 |         }
293 | 
294 |         /**
295 |          * The list of entries in this instance
296 |          */
297 |         private List<Entry> entries = new ArrayList<Entry>();
298 | 
299 |         /**
300 |          * Adds the specified entry to this instance
301 |          *
302 |          * @param name A name for the event
303 |          * @param event The event
304 |          */
305 |         public void addEntry(String name, cl_event event)
306 |         {
307 |             entries.add(new Entry(name, event));
308 |         }
309 | 
310 |         /**
311 |          * Removes all entries
312 |          */
313 |         public void clear()
314 |         {
315 |             entries.clear();
316 |         }
317 | 
318 |         /**
319 |          * Normalize the entries, so that the times are relative
320 |          * to the time when the first event was queued
321 |          */
322 |         private void normalize()
323 |         {
324 |             long minQueuedTime = Long.MAX_VALUE;
325 |             for (Entry entry : entries)
326 |             {
327 |                 minQueuedTime = Math.min(minQueuedTime, entry.getQueuedTime());
328 |             }
329 |             for (Entry entry : entries)
330 |             {
331 |                 entry.normalize(minQueuedTime);
332 |             }
333 |         }
334 | 
335 |         /**
336 |          * Print the statistics
337 |          */
338 |         public void print()
339 |         {
340 |             normalize();
341 |             for (Entry entry : entries)
342 |             {
343 |                 entry.print();
344 |             }
345 |         }
346 | 
347 | 
348 |     }
349 | }
350 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLMappedBufferSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.nio.*;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | /**
 15 |  * A small JOCL sample, similar to the minimal JOCLSample, but
 16 |  * demonstrating how to map a cl_mem to a Java ByteBuffer
 17 |  */
 18 | public class JOCLMappedBufferSample
 19 | {
 20 |     /**
 21 |      * The source code of the OpenCL program to execute
 22 |      */
 23 |     private static String programSource =
 24 |         "__kernel void "+
 25 |         "sampleKernel(__global const float *a,"+
 26 |         "             __global const float *b,"+
 27 |         "             __global float *c)"+
 28 |         "{"+
 29 |         "    int gid = get_global_id(0);"+
 30 |         "    c[gid] = a[gid] * b[gid];"+
 31 |         "}";
 32 |     
 33 |     /**
 34 |      * The name of the kernel to execute
 35 |      */
 36 |     private static final String kernelName = "sampleKernel";
 37 | 
 38 |     /**
 39 |      * The index of the OpenCL platform that this sample should run on
 40 |      */
 41 |     private static final int platformIndex = 0;
 42 |     
 43 |     /**
 44 |      * The OpenCL device type that will be used
 45 |      */
 46 |     private static final long deviceType = CL_DEVICE_TYPE_ALL;
 47 |     
 48 |     /**
 49 |      * The index of the OpenCL device that will be used
 50 |      */
 51 |     private static final int deviceIndex = 0;
 52 |     
 53 |     /**
 54 |      * The OpenCL context
 55 |      */
 56 |     private static cl_context context;
 57 |     
 58 |     /**
 59 |      * The OpenCL command queue
 60 |      */
 61 |     private static cl_command_queue commandQueue;
 62 |     
 63 |     /**
 64 |      * The OpenCL program that contains the kernel
 65 |      */
 66 |     private static cl_program program;
 67 |     
 68 |     /**
 69 |      * The OpenCL kernel from the program
 70 |      */
 71 |     private static cl_kernel kernel;
 72 |     
 73 |     /**
 74 |      * The entry point of this sample
 75 |      * 
 76 |      * @param args Not used
 77 |      */
 78 |     public static void main(String args[])
 79 |     {
 80 |         initialize();
 81 |         
 82 |         // Create input- and output data 
 83 |         int n = 10;
 84 |         float srcArrayA[] = new float[n];
 85 |         float srcArrayB[] = new float[n];
 86 |         float dstArray[] = new float[n];
 87 |         for (int i=0; i<n; i++)
 88 |         {
 89 |             srcArrayA[i] = i;
 90 |             srcArrayB[i] = i;
 91 |         }
 92 |         
 93 |         // Allocate the memory objects for the input- and output data
 94 |         cl_mem srcMemA = clCreateBuffer(context, 
 95 |             CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
 96 |             Sizeof.cl_float * n, Pointer.to(srcArrayA), null);
 97 |         cl_mem srcMemB = clCreateBuffer(context, 
 98 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 99 |             Sizeof.cl_float * n, Pointer.to(srcArrayB), null);
100 |         cl_mem dstMem = clCreateBuffer(context, 
101 |             CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null);
102 |         
103 |         // Create a mapped buffer, which allows direct access to the cl_mem
104 |         // contents that was created from srcArrayA 
105 |         ByteBuffer mapped = clEnqueueMapBuffer(commandQueue, srcMemA, 
106 |             true, CL_MAP_WRITE, 0, n * Sizeof.cl_float, 0, null, null, null);
107 |         FloatBuffer floatBuffer = 
108 |             mapped.order(ByteOrder.nativeOrder()).asFloatBuffer();
109 |         
110 |         // Modify the contents of the cl_mem by changing some values
111 |         // in the mapped buffer
112 |         floatBuffer.put(4, 40);
113 |         floatBuffer.put(5, 50);
114 |         floatBuffer.put(6, 60);
115 |         
116 |         // Unmap the buffer
117 |         clEnqueueUnmapMemObject(commandQueue, srcMemA, 
118 |             mapped, 0, null, null);
119 |         
120 |         
121 |         // Set the arguments for the kernel
122 |         int a = 0;
123 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemA));
124 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemB));
125 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(dstMem));
126 |         
127 |         // Execute the kernel
128 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
129 |             new long[]{n}, null, 0, null, null);
130 |         
131 |         // Read the output data
132 |         clEnqueueReadBuffer(commandQueue, dstMem, CL_TRUE, 0,
133 |             n * Sizeof.cl_float, Pointer.to(dstArray), 0, null, null);
134 |         
135 |         // Release the memory objects
136 |         clReleaseMemObject(srcMemA);
137 |         clReleaseMemObject(srcMemB);
138 |         clReleaseMemObject(dstMem);
139 |         
140 |         // Verify the result. Before, apply the changes that have been done 
141 |         // for the cl_mem of srcArrayA via the mapped buffer.
142 |         srcArrayA[4] = 40;
143 |         srcArrayA[5] = 50;
144 |         srcArrayA[6] = 60;
145 |         boolean passed = true;
146 |         final float epsilon = 1e-7f;
147 |         for (int i=0; i<n; i++)
148 |         {
149 |             float x = dstArray[i];
150 |             float y = srcArrayA[i] * srcArrayB[i];
151 |             boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x);
152 |             if (!epsilonEqual)
153 |             {
154 |                 passed = false;
155 |                 break;
156 |             }
157 |         }
158 |         System.out.println("Test "+(passed?"PASSED":"FAILED"));
159 |         if (n <= 10)
160 |         {
161 |             System.out.println("Result: "+java.util.Arrays.toString(dstArray));
162 |         }
163 |         
164 |         shutdown();
165 |     }
166 |     
167 |     /**
168 |      * Perform a default initialization by creating a context 
169 |      * and a command queue, building the program and obtaining
170 |      * the kernel. 
171 |      */
172 |     private static void initialize()
173 |     {
174 |         // Enable exceptions and subsequently omit error checks in this sample
175 |         CL.setExceptionsEnabled(true);
176 | 
177 |         // Obtain the number of platforms
178 |         int numPlatformsArray[] = new int[1];
179 |         clGetPlatformIDs(0, null, numPlatformsArray);
180 |         int numPlatforms = numPlatformsArray[0];
181 | 
182 |         // Obtain a platform ID
183 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
184 |         clGetPlatformIDs(platforms.length, platforms, null);
185 |         cl_platform_id platform = platforms[platformIndex];
186 | 
187 |         // Initialize the context properties
188 |         cl_context_properties contextProperties = new cl_context_properties();
189 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
190 |         
191 |         // Obtain the number of devices for the platform
192 |         int numDevicesArray[] = new int[1];
193 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
194 |         int numDevices = numDevicesArray[0];
195 |         
196 |         // Obtain a device ID 
197 |         cl_device_id devices[] = new cl_device_id[numDevices];
198 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
199 |         cl_device_id device = devices[deviceIndex];
200 | 
201 |         // Create a context for the selected device
202 |         context = clCreateContext(
203 |             contextProperties, 1, new cl_device_id[]{device}, 
204 |             null, null, null);
205 |         
206 |         // Create the command queue
207 |         cl_queue_properties properties = new cl_queue_properties();
208 |         commandQueue = clCreateCommandQueueWithProperties(
209 |             context, device, properties, null);
210 |         
211 |         // Create the program
212 |         program = clCreateProgramWithSource(context, 
213 |             1, new String[]{ programSource }, null, null);
214 |         
215 |         // Build the program
216 |         clBuildProgram(program, 0, null, null, null, null);
217 |         
218 |         // Create the kernel
219 |         kernel = clCreateKernel(program, kernelName, null);
220 |     }
221 |     
222 |     /**
223 |      * Shut down and release all resources that have been allocated
224 |      * in {@link #initialize()}
225 |      */
226 |     private static void shutdown()
227 |     {
228 |         clReleaseKernel(kernel);
229 |         clReleaseProgram(program);
230 |         clReleaseCommandQueue(commandQueue);
231 |         clReleaseContext(context);
232 |     }
233 | }
234 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLMultiDeviceSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.util.Arrays;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | /**
 15 |  * A small JOCL sample that uses multiple devices. <br>
 16 |  * <br>
 17 |  * Note: This is just a basic demo, showing the possibility to use multiple 
 18 |  * devices simultaneously. Each device receives its own copy of the memory 
 19 |  * objects to work on. In real applications, there may be a more complex
 20 |  * management of the buffers and the synchronization between the different 
 21 |  * devices, which is beyond the scope of this sample.
 22 |  */
 23 | public class JOCLMultiDeviceSample
 24 | {
 25 |     /**
 26 |      * The source code of the OpenCL program to execute, containing 
 27 |      * some artificial workload to compute
 28 |      */
 29 |     private static String programSource = 
 30 |         "__kernel void sampleKernel(__global const float *input,"+
 31 |         "                           __global float *output, " +
 32 |         "                           int size)"+
 33 |         "{"+
 34 |         "    int gid = get_global_id(0);"+
 35 |         "    output[gid] = 0;" +
 36 |         "    for (int i=0; i<size; i++) " +
 37 |         "        output[gid] += input[i];" +
 38 |         "}";
 39 |     
 40 | 
 41 |     /**
 42 |      * The entry point of this sample
 43 |      * 
 44 |      * @param args Not used
 45 |      */
 46 |     public static void main(String args[])
 47 |     {
 48 |         // Create input- and output data 
 49 |         int n = 10000;
 50 |         float input[] = new float[n];
 51 |         float output[] = new float[n];
 52 |         Arrays.fill(input, 1.0f);
 53 | 
 54 |         // The platform and device type that will be used
 55 |         final int platformIndex = 0;
 56 |         final long deviceType = CL_DEVICE_TYPE_ALL;
 57 | 
 58 |         // Enable exceptions and subsequently omit error checks in this sample
 59 |         CL.setExceptionsEnabled(true);
 60 | 
 61 |         // Obtain the number of platforms
 62 |         int numPlatformsArray[] = new int[1];
 63 |         clGetPlatformIDs(0, null, numPlatformsArray);
 64 |         int numPlatforms = numPlatformsArray[0];
 65 | 
 66 |         // Obtain a platform ID
 67 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
 68 |         clGetPlatformIDs(platforms.length, platforms, null);
 69 |         cl_platform_id platform = platforms[platformIndex];
 70 |         String platformName = getString(platform, CL_PLATFORM_NAME);
 71 |         System.out.println("Using platform "+platformIndex+" of "+
 72 |             numPlatforms+": "+platformName);
 73 | 
 74 |         // Initialize the context properties
 75 |         cl_context_properties contextProperties = new cl_context_properties();
 76 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
 77 |         
 78 |         // Obtain the number of devices for the platform
 79 |         int numDevicesArray[] = new int[1];
 80 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
 81 |         int numDevices = numDevicesArray[0];
 82 |         
 83 |         // Obtain a device IDs 
 84 |         cl_device_id devices[] = new cl_device_id[numDevices];
 85 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
 86 |         for (int i=0; i<numDevices; i++)
 87 |         {
 88 |             String deviceName = getString(devices[i], CL_DEVICE_NAME);
 89 |             System.out.println("Device "+i+" of "+numDevices+": "+deviceName);
 90 |         }
 91 | 
 92 |         // Create a context for the devices
 93 |         cl_context context = clCreateContext(
 94 |             contextProperties, devices.length, devices, null, null, null);
 95 | 
 96 |         // Create and build the program and the kernel
 97 |         cl_program program = clCreateProgramWithSource(context,
 98 |             1, new String[]{ programSource }, null, null);
 99 |         clBuildProgram(program, 0, null, null, null, null);
100 |         cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
101 | 
102 |         // Allocate the memory objects for the input- and output data
103 |         cl_mem inputMems[] = new cl_mem[numDevices];
104 |         cl_mem outputMems[] = new cl_mem[numDevices];
105 |         for (int i=0; i<numDevices; i++)
106 |         {
107 |             
108 |             inputMems[i] = clCreateBuffer(context, 
109 |                 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
110 |                 Sizeof.cl_float * n, Pointer.to(input), null);
111 |             outputMems[i] = clCreateBuffer(context, 
112 |                 CL_MEM_READ_WRITE, 
113 |                 Sizeof.cl_float * n, null, null);
114 |         }
115 | 
116 |         // Create one command-queue for each device
117 |         cl_command_queue commandQueues[] = new cl_command_queue[numDevices];
118 |         for (int i=0; i<numDevices; i++)
119 |         {
120 |             // Create the command queue
121 |             cl_queue_properties properties = new cl_queue_properties();
122 |             properties.addProperty(CL_QUEUE_PROPERTIES, 
123 |                 CL_QUEUE_PROFILING_ENABLE);
124 |             commandQueues[i] = clCreateCommandQueueWithProperties(
125 |                 context, devices[i], properties, null);
126 |         }
127 |         
128 |         // Execute the kernel on each command queue, and 
129 |         // create events for each kernel launch
130 |         long before = System.nanoTime();
131 |         System.out.println("Enqueueing kernels");
132 |         cl_event events[] = new cl_event[numDevices];
133 |         for (int i=0; i<numDevices; i++)
134 |         {
135 |             clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(inputMems[i]));
136 |             clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(outputMems[i]));
137 |             clSetKernelArg(kernel, 2, Sizeof.cl_int, Pointer.to(new int[]{n}));
138 |             
139 |             events[i] = new cl_event();
140 |             clEnqueueNDRangeKernel(commandQueues[i], kernel, 1, null,
141 |                 new long[]{n}, null, 0, null, events[i]);
142 |         }
143 |         
144 |         // Wait until the work is finished on all command queues
145 |         System.out.println("Waiting for kernels");
146 |         clWaitForEvents(events.length, events);
147 |         long after = System.nanoTime();
148 |         
149 |         // Print the duration for each device
150 |         System.out.println("Waiting for kernels DONE");
151 |         for (int i=0; i<numDevices; i++)
152 |         {
153 |             float durationMs = computeDurationMs(events[i]);
154 |             System.out.println("Duration on device "+i+" of "+
155 |                 numDevices+": "+durationMs+"ms");
156 |         }
157 |         float totalDurationMs = (after-before)/1e6f;
158 |         System.out.println("Total duration: "+totalDurationMs+"ms");
159 |         
160 |         
161 |         // Read the output data of the first device
162 |         clEnqueueReadBuffer(commandQueues[0], outputMems[0], CL_TRUE, 0,
163 |             n * Sizeof.cl_float, Pointer.to(output), 0, null, null);
164 |         
165 |         // Release kernel, program, and memory objects
166 |         clReleaseKernel(kernel);
167 |         clReleaseProgram(program);
168 |         for (int i=0; i<numDevices; i++)
169 |         {
170 |             clReleaseMemObject(inputMems[i]);
171 |             clReleaseMemObject(outputMems[i]);
172 |             clReleaseEvent(events[i]);
173 |             clReleaseCommandQueue(commandQueues[i]);
174 |         }
175 |         clReleaseContext(context);
176 |         
177 |         // Print the first few elements of the result
178 |         for (int i=0; i<10; i++)
179 |         {
180 |             float x = output[i];
181 |             System.out.print(x+", ");
182 |         }
183 |         System.out.println("...");
184 |         System.out.println("Done");
185 |     }
186 |     
187 |     /**
188 |      * Compute the execution duration of the given event, in milliseconds
189 |      * 
190 |      * @param event The event
191 |      * @return The execution duration, in milliseconds
192 |      */
193 |     private static float computeDurationMs(cl_event event)
194 |     {
195 |         long startTime[] = {0};
196 |         long endTime[] = {0};
197 |         clGetEventProfilingInfo(
198 |             event, CL_PROFILING_COMMAND_START,
199 |             Sizeof.cl_ulong, Pointer.to(startTime), null);
200 |         clGetEventProfilingInfo(
201 |             event, CL_PROFILING_COMMAND_END,
202 |             Sizeof.cl_ulong, Pointer.to(endTime), null);
203 |         long durationNs = endTime[0]-startTime[0];
204 |         return durationNs / 1e6f;
205 |     }
206 |     
207 |     
208 |     /**
209 |      * Returns the value of the platform info parameter with the given name
210 |      *
211 |      * @param platform The platform
212 |      * @param paramName The parameter name
213 |      * @return The value
214 |      */
215 |     private static String getString(cl_platform_id platform, int paramName)
216 |     {
217 |         long size[] = new long[1];
218 |         clGetPlatformInfo(platform, paramName, 0, null, size);
219 |         byte buffer[] = new byte[(int)size[0]];
220 |         clGetPlatformInfo(platform, paramName, 
221 |             buffer.length, Pointer.to(buffer), null);
222 |         return new String(buffer, 0, buffer.length-1);
223 |     }
224 |     
225 |     /**
226 |      * Returns the value of the device info parameter with the given name
227 |      *
228 |      * @param device The device
229 |      * @param paramName The parameter name
230 |      * @return The value
231 |      */
232 |     private static String getString(cl_device_id device, int paramName)
233 |     {
234 |         long size[] = new long[1];
235 |         clGetDeviceInfo(device, paramName, 0, null, size);
236 |         byte buffer[] = new byte[(int)size[0]];
237 |         clGetDeviceInfo(device, paramName, 
238 |             buffer.length, Pointer.to(buffer), null);
239 |         return new String(buffer, 0, buffer.length-1);
240 |     }
241 |     
242 |     
243 | }
244 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLReduction.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.io.BufferedReader;
 11 | import java.io.FileReader;
 12 | import java.io.IOException;
 13 | 
 14 | import org.jocl.*;
 15 | 
 16 | /**
 17 |  * A sample showing a simple reduction with JOCL
 18 |  */
 19 | public class JOCLReduction
 20 | {
 21 |     /**
 22 |      * The OpenCL context
 23 |      */
 24 |     private static cl_context context;
 25 |     
 26 |     /**
 27 |      * The OpenCL command queue to which the all work will be dispatched
 28 |      */
 29 |     private static cl_command_queue commandQueue;
 30 |     
 31 |     /**
 32 |      * The OpenCL program containing the reduction kernel
 33 |      */
 34 |     private static cl_program program;
 35 |     
 36 |     /**
 37 |      * The OpenCL kernel that performs the reduction
 38 |      */
 39 |     private static cl_kernel kernel;
 40 | 
 41 |     /**
 42 |      * The entry point of this sample
 43 |      * 
 44 |      * @param args Not used
 45 |      */
 46 |     public static void main(String args[])
 47 |     {
 48 |         initialize();
 49 |         
 50 |         // Create input array that will be reduced
 51 |         int n = 100000;
 52 |         float inputArray[] = new float[n];
 53 |         for (int i=0; i<n; i++)
 54 |         {
 55 |             inputArray[i] = i;
 56 |         }
 57 |         
 58 |         // Compute the reduction on the GPU and the CPU and print the results
 59 |         float resultGPU = reduce(inputArray);
 60 |         float resultCPU = reduceHost(inputArray);
 61 |         System.out.println("GPU "+resultGPU);
 62 |         System.out.println("CPU "+resultCPU);
 63 |         
 64 |         shutdown();
 65 |     }
 66 |     
 67 |     
 68 |     /**
 69 |      * Perform a reduction of the given input array on the GPU and return
 70 |      * the result. <br>
 71 |      * <br>
 72 |      * The reduction is performed in two phases: In the first phase, each
 73 |      * work group of the GPU computes the reduction of a part of the 
 74 |      * input array. The size of this part is exactly the number of work
 75 |      * items in the group, and the reduction will be performed in local
 76 |      * memory. The results of these reductions will be written into
 77 |      * an output array. This output array is then reduced on the CPU. 
 78 |      * 
 79 |      * @param inputArray The array on which the reduction will be performed
 80 |      * @return The result of the reduction
 81 |      */
 82 |     private static float reduce(float inputArray[])
 83 |     {
 84 |         int localWorkSize = 128;
 85 |         int numWorkGroups = 64;
 86 |         float outputArray[] = new float[numWorkGroups];
 87 | 
 88 |         // Allocate the memory objects for the input- and output data
 89 |         cl_mem inputMem = clCreateBuffer(context, 
 90 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 91 |             Sizeof.cl_float * inputArray.length, Pointer.to(inputArray), null);
 92 |         cl_mem outputMem = clCreateBuffer(context, 
 93 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 94 |             Sizeof.cl_float * numWorkGroups, Pointer.to(outputArray), null);
 95 | 
 96 |         // Perform the reduction on the GPU: Each work group will 
 97 |         // perform the reduction of 'localWorkSize' elements, and
 98 |         // the results will be written into the output memory
 99 |         reduce(
100 |             inputMem, inputArray.length, 
101 |             outputMem, numWorkGroups,
102 |             localWorkSize);
103 |         
104 |         // Read the output data
105 |         clEnqueueReadBuffer(commandQueue, outputMem, CL_TRUE, 0,
106 |             numWorkGroups * Sizeof.cl_float, Pointer.to(outputArray), 
107 |             0, null, null);
108 | 
109 |         // Perform the final reduction, by reducing the results 
110 |         // from the work groups on the CPU
111 |         float result = reduceHost(outputArray);
112 |         
113 |         // Release memory objects
114 |         clReleaseMemObject(inputMem);
115 |         clReleaseMemObject(outputMem);
116 |         
117 |         return result;
118 |     }
119 |     
120 |     
121 |     /**
122 |      * Perform a reduction of the float elements in the given input memory.
123 |      * Each work group will reduce 'localWorkSize' elements, and write the
124 |      * result into the given output memory. 
125 |      *  
126 |      * @param inputMem The input memory containing the float values to reduce 
127 |      * @param n The number of values in the input memory
128 |      * @param outputMem The output memory that will store the reduction
129 |      * result for each work group
130 |      * @param numWorkGroups The number of work groups
131 |      * @param localWorkSize The local work size, that is, the number of
132 |      * work items in each work group 
133 |      */
134 |     private static void reduce(
135 |         cl_mem inputMem, int n, 
136 |         cl_mem outputMem, int numWorkGroups,
137 |         int localWorkSize)
138 |     {
139 |         // Set the arguments for the kernel
140 |         int a = 0;
141 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(inputMem));
142 |         clSetKernelArg(kernel, a++, Sizeof.cl_float * localWorkSize, null);
143 |         clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[]{n}));
144 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(outputMem));
145 |         
146 |         // Compute the number of work groups and the global work size
147 |         long globalWorkSize = numWorkGroups * localWorkSize;
148 |         
149 |         // Execute the kernel
150 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
151 |             new long[]{ globalWorkSize }, new long[]{ localWorkSize}, 
152 |             0, null, null);
153 |     }
154 |     
155 |     /**
156 |      * Implementation of a Kahan summation reduction in plain Java
157 |      * 
158 |      * @param array The input 
159 |      * @return The reduction result
160 |      */
161 |     private static float reduceHost(float array[])
162 |     {
163 |         float sum = array[0];
164 |         float c = 0.0f;              
165 |         for (int i = 1; i < array.length; i++)
166 |         {
167 |             float y = array[i] - c;  
168 |             float t = sum + y;      
169 |             c = (t - sum) - y;  
170 |             sum = t;            
171 |         }
172 |         return sum;
173 |     }
174 |     
175 |     /**
176 |      * Initialize a default OpenCL context, command queue, program and kernel
177 |      */
178 |     private static void initialize()
179 |     {
180 |         // The platform, device type and device number
181 |         // that will be used
182 |         final int platformIndex = 0;
183 |         final long deviceType = CL_DEVICE_TYPE_ALL;
184 |         final int deviceIndex = 0;
185 | 
186 |         // Enable exceptions and subsequently omit error checks in this sample
187 |         CL.setExceptionsEnabled(true);
188 | 
189 |         // Obtain the number of platforms
190 |         int numPlatformsArray[] = new int[1];
191 |         clGetPlatformIDs(0, null, numPlatformsArray);
192 |         int numPlatforms = numPlatformsArray[0];
193 | 
194 |         // Obtain a platform ID
195 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
196 |         clGetPlatformIDs(platforms.length, platforms, null);
197 |         cl_platform_id platform = platforms[platformIndex];
198 | 
199 |         // Initialize the context properties
200 |         cl_context_properties contextProperties = new cl_context_properties();
201 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
202 |         
203 |         // Obtain the number of devices for the platform
204 |         int numDevicesArray[] = new int[1];
205 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
206 |         int numDevices = numDevicesArray[0];
207 |         
208 |         // Obtain a device ID 
209 |         cl_device_id devices[] = new cl_device_id[numDevices];
210 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
211 |         cl_device_id device = devices[deviceIndex];
212 | 
213 |         // Create a context for the selected device
214 |         context = clCreateContext(
215 |             contextProperties, 1, new cl_device_id[]{device}, 
216 |             null, null, null);
217 |         
218 |         // Create a command-queue for the selected device
219 |         cl_queue_properties properties = new cl_queue_properties();
220 |         commandQueue = clCreateCommandQueueWithProperties(
221 |             context, device, properties, null);
222 |         
223 |         // Create the program from the source code
224 |         String programSource = readFile("src/main/resources/kernels/reduction.cl");
225 |         program = clCreateProgramWithSource(context,
226 |             1, new String[]{ programSource }, null, null);
227 |         
228 |         // Build the program
229 |         clBuildProgram(program, 0, null, null, null, null);
230 |         
231 |         // Create the kernel
232 |         kernel = clCreateKernel(program, "reduce", null);
233 |     }
234 |     
235 |     /**
236 |      * Shut down and release all resources that have been allocated
237 |      * in {@link #initialize()}
238 |      */
239 |     private static void shutdown()
240 |     {
241 |         clReleaseKernel(kernel);
242 |         clReleaseProgram(program);
243 |         clReleaseCommandQueue(commandQueue);
244 |         clReleaseContext(context);
245 |     }
246 |     
247 |     /**
248 |      * Read the contents of the file with the given name, and return
249 |      * it as a string
250 |      * 
251 |      * @param fileName The name of the file to read
252 |      * @return The contents of the file
253 |      */
254 |     private static String readFile(String fileName)
255 |     {
256 |         BufferedReader br = null;
257 |         try
258 |         {
259 |             br = new BufferedReader(new FileReader(fileName));
260 |             StringBuilder sb = new StringBuilder();
261 |             String line = null;
262 |             while (true)
263 |             {
264 |                 line = br.readLine();
265 |                 if (line == null)
266 |                 {
267 |                     break;
268 |                 }
269 |                 sb.append(line+"\n");
270 |             }
271 |             return sb.toString();
272 |         }
273 |         catch (IOException e)
274 |         {
275 |             e.printStackTrace();
276 |             return "";
277 |         }
278 |         finally
279 |         {
280 |             if (br != null)
281 |             {
282 |                 try
283 |                 {
284 |                     br.close();
285 |                 }
286 |                 catch (IOException ex)
287 |                 {
288 |                     ex.printStackTrace();
289 |                 }
290 |             }
291 |         }
292 |     }
293 |     
294 | }
295 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.util.Arrays;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | /**
 15 |  * A small JOCL sample.
 16 |  */
 17 | public class JOCLSample
 18 | {
 19 |     /**
 20 |      * The source code of the OpenCL program to execute
 21 |      */
 22 |     private static String programSource =
 23 |         "__kernel void "+
 24 |         "sampleKernel(__global const float *a,"+
 25 |         "             __global const float *b,"+
 26 |         "             __global float *c)"+
 27 |         "{"+
 28 |         "    int gid = get_global_id(0);"+
 29 |         "    c[gid] = a[gid] * b[gid];"+
 30 |         "}";
 31 |     
 32 | 
 33 |     /**
 34 |      * The entry point of this sample
 35 |      * 
 36 |      * @param args Not used
 37 |      */
 38 |     public static void main(String args[])
 39 |     {
 40 |         // Create input- and output data 
 41 |         int n = 10;
 42 |         float srcArrayA[] = new float[n];
 43 |         float srcArrayB[] = new float[n];
 44 |         float dstArray[] = new float[n];
 45 |         for (int i=0; i<n; i++)
 46 |         {
 47 |             srcArrayA[i] = i;
 48 |             srcArrayB[i] = i;
 49 |         }
 50 |         Pointer srcA = Pointer.to(srcArrayA);
 51 |         Pointer srcB = Pointer.to(srcArrayB);
 52 |         Pointer dst = Pointer.to(dstArray);
 53 | 
 54 |         // The platform, device type and device number
 55 |         // that will be used
 56 |         final int platformIndex = 0;
 57 |         final long deviceType = CL_DEVICE_TYPE_ALL;
 58 |         final int deviceIndex = 0;
 59 | 
 60 |         // Enable exceptions and subsequently omit error checks in this sample
 61 |         CL.setExceptionsEnabled(true);
 62 | 
 63 |         // Obtain the number of platforms
 64 |         int numPlatformsArray[] = new int[1];
 65 |         clGetPlatformIDs(0, null, numPlatformsArray);
 66 |         int numPlatforms = numPlatformsArray[0];
 67 | 
 68 |         // Obtain a platform ID
 69 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
 70 |         clGetPlatformIDs(platforms.length, platforms, null);
 71 |         cl_platform_id platform = platforms[platformIndex];
 72 | 
 73 |         // Initialize the context properties
 74 |         cl_context_properties contextProperties = new cl_context_properties();
 75 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
 76 |         
 77 |         // Obtain the number of devices for the platform
 78 |         int numDevicesArray[] = new int[1];
 79 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
 80 |         int numDevices = numDevicesArray[0];
 81 |         
 82 |         // Obtain a device ID 
 83 |         cl_device_id devices[] = new cl_device_id[numDevices];
 84 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
 85 |         cl_device_id device = devices[deviceIndex];
 86 | 
 87 |         // Create a context for the selected device
 88 |         cl_context context = clCreateContext(
 89 |             contextProperties, 1, new cl_device_id[]{device}, 
 90 |             null, null, null);
 91 |         
 92 |         // Create a command-queue for the selected device
 93 |         cl_queue_properties properties = new cl_queue_properties();
 94 |         cl_command_queue commandQueue = clCreateCommandQueueWithProperties(
 95 |             context, device, properties, null);
 96 | 
 97 |         // Allocate the memory objects for the input- and output data
 98 |         cl_mem srcMemA = clCreateBuffer(context, 
 99 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
100 |             Sizeof.cl_float * n, srcA, null);
101 |         cl_mem srcMemB = clCreateBuffer(context, 
102 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
103 |             Sizeof.cl_float * n, srcB, null);
104 |         cl_mem dstMem = clCreateBuffer(context, 
105 |             CL_MEM_READ_WRITE, 
106 |             Sizeof.cl_float * n, null, null);
107 |         
108 |         // Create the program from the source code
109 |         cl_program program = clCreateProgramWithSource(context,
110 |             1, new String[]{ programSource }, null, null);
111 |         
112 |         // Build the program
113 |         clBuildProgram(program, 0, null, null, null, null);
114 |         
115 |         // Create the kernel
116 |         cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
117 |         
118 |         // Set the arguments for the kernel
119 |         int a = 0;
120 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemA));
121 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemB));
122 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(dstMem));
123 |         
124 |         // Set the work-item dimensions
125 |         long global_work_size[] = new long[]{n};
126 |         
127 |         // Execute the kernel
128 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
129 |             global_work_size, null, 0, null, null);
130 |         
131 |         // Read the output data
132 |         clEnqueueReadBuffer(commandQueue, dstMem, CL_TRUE, 0,
133 |             n * Sizeof.cl_float, dst, 0, null, null);
134 |         
135 |         // Release kernel, program, and memory objects
136 |         clReleaseMemObject(srcMemA);
137 |         clReleaseMemObject(srcMemB);
138 |         clReleaseMemObject(dstMem);
139 |         clReleaseKernel(kernel);
140 |         clReleaseProgram(program);
141 |         clReleaseCommandQueue(commandQueue);
142 |         clReleaseContext(context);
143 |         
144 |         // Verify the result
145 |         boolean passed = true;
146 |         final float epsilon = 1e-7f;
147 |         for (int i=0; i<n; i++)
148 |         {
149 |             float x = dstArray[i];
150 |             float y = srcArrayA[i] * srcArrayB[i];
151 |             boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x);
152 |             if (!epsilonEqual)
153 |             {
154 |                 passed = false;
155 |                 break;
156 |             }
157 |         }
158 |         System.out.println("Test "+(passed?"PASSED":"FAILED"));
159 |         if (n <= 10)
160 |         {
161 |             System.out.println("Result: "+Arrays.toString(dstArray));
162 |         }
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSample_1_1.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2010 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.nio.*;
 11 | import java.util.*;
 12 | 
 13 | import org.jocl.*;
 14 | 
 15 | 
 16 | /**
 17 |  * A small JOCL sample, demonstrating some of the new features
 18 |  * that have been introduced with OpenCL 1.1.
 19 |  */
 20 | public class JOCLSample_1_1
 21 | {
 22 |     /**
 23 |      * The source code of the OpenCL program to execute
 24 |      */
 25 |     private static String programSource =
 26 |         "__kernel void "+
 27 |         "sampleKernel(__global const float *a,"+
 28 |         "             __global const float *b,"+
 29 |         "             __global float *c)"+
 30 |         "{"+
 31 |         "    int gid = get_global_id(0);"+
 32 |         "    c[gid] = a[gid] + b[gid];"+
 33 |         "}";
 34 | 
 35 |     private static cl_context context;
 36 |     private static cl_command_queue commandQueue;
 37 |     private static cl_kernel kernel;
 38 |     private static cl_program program;
 39 |     
 40 | 
 41 |     /**
 42 |      * The entry point of this sample
 43 |      * 
 44 |      * @param args Not used
 45 |      */
 46 |     public static void main(String args[])
 47 |     {
 48 |         defaultInitialization();
 49 |         
 50 |         // Create input- and output data
 51 |         int sizeX = 4;
 52 |         int sizeY = 4;
 53 |         int n = sizeX * sizeY;
 54 |         float srcArrayA[] = new float[n];
 55 |         float srcArrayB[] = new float[n];
 56 |         for (int i=0; i<n; i++)
 57 |         {
 58 |             srcArrayA[i] = i;
 59 |             srcArrayB[i] = i;
 60 |         }
 61 |         final Pointer srcA = Pointer.to(srcArrayA);
 62 |         final Pointer srcB = Pointer.to(srcArrayB);
 63 | 
 64 |         // Allocate the memory objects for the input- and output data
 65 |         cl_mem srcMemA = clCreateBuffer(context, 
 66 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 67 |             Sizeof.cl_float * n, srcA, null);
 68 |         cl_mem srcMemB = clCreateBuffer(context, 
 69 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 70 |             Sizeof.cl_float * n, srcB, null);
 71 |         cl_mem dstMem = clCreateBuffer(context, 
 72 |             CL_MEM_READ_WRITE, 
 73 |             Sizeof.cl_float * n, null, null);
 74 |         
 75 |         // Set the arguments for the kernel
 76 |         clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(srcMemA));
 77 |         clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(srcMemB));
 78 |         clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(dstMem));
 79 |         
 80 |         // Execute the kernel
 81 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
 82 |             new long[]{n}, null, 0, null, null);
 83 | 
 84 |         
 85 |         
 86 |         // New features of OpenCL 1.1 demonstrated here:
 87 |         // - User events
 88 |         // - Functions for handling memory regions
 89 |         // - Event callbacks
 90 |         // - Memory object destructor callbacks
 91 |         
 92 |         // The output buffer has 16 float elements. These elements may be
 93 |         // interpreted as a 4x4 matrix. The following setup will enqueue 
 94 |         // a command to read the center 2x2 matrix of this buffer. That
 95 |         // is, it will read a region starting at coordinates (1,1), with
 96 |         // a size of (2,2) elements. 
 97 |         final int regionSizeX = 2;
 98 |         final int regionSizeY = 2;
 99 |         long bufferOffset[] = new long[] { 1 * Sizeof.cl_float, 1, 0 };
100 |         long hostOffset[] = new long[] { 0, 0, 0 };
101 |         long region[] = new long[] { regionSizeX * Sizeof.cl_float, regionSizeY, 1 };
102 |         long bufferRowPitch = sizeX * Sizeof.cl_float;
103 |         long bufferSlicePitch = sizeX * sizeY * Sizeof.cl_float;
104 |         long hostRowPitch = regionSizeX * Sizeof.cl_float;
105 |         long hostSlicePitch = regionSizeX * regionSizeY;
106 |         final FloatBuffer regionData = 
107 |             ByteBuffer.allocateDirect(regionSizeX * regionSizeY * Sizeof.cl_float).
108 |                 order(ByteOrder.nativeOrder()).asFloatBuffer();
109 | 
110 |         // Create a user event. Later, the command that reads the
111 |         // result buffer will wait for this event to be completed.
112 |         final cl_event userEvent = clCreateUserEvent(context, null);
113 | 
114 |         // The command to read the memory region will be non-blocking, but
115 |         // waiting for the user event that was created above. Additionally,
116 |         // the command will have an associated event, for which a callback
117 |         // will be registered. This callback will be called when the read
118 |         // command has completed.
119 |         System.out.println("Enqueue buffer region read, waiting for user event");
120 |         cl_event readEvent = new cl_event();
121 |         clEnqueueReadBufferRect(
122 |             commandQueue, dstMem, false, bufferOffset, hostOffset, 
123 |             region, bufferRowPitch, bufferSlicePitch, hostRowPitch, 
124 |             hostSlicePitch, Pointer.to(regionData), 1, 
125 |             new cl_event[]{userEvent}, readEvent);
126 | 
127 |         // Create a callback function which will be called when
128 |         // the read event reaches the status CL_COMPLETE
129 |         EventCallbackFunction eventCallbackFunction = new EventCallbackFunction()
130 |         {
131 |             @Override
132 |             public void function(cl_event event, int type, Object user_data)
133 |             {
134 |                 System.out.println("Event "+event+" reached status "+
135 |                     CL.stringFor_command_execution_status(type)+
136 |                     ", user data: "+user_data);
137 |                 
138 |                 // Print the output that was read
139 |                 System.out.println("Buffer region was read:");
140 |                 print2D(regionData, regionSizeX);
141 |             }
142 |         };
143 |         clSetEventCallback(readEvent, CL.CL_COMPLETE, 
144 |             eventCallbackFunction, "Event callback user data");
145 |         
146 |         // Create a thread that will set the user event status
147 |         // to "CL_COMPLETE" after a few seconds. This will 
148 |         // allow the read command to be completed. When the
149 |         // read command is completed, the event callback 
150 |         // function will be called.
151 |         Thread thread = new Thread(new Runnable()
152 |         {
153 |             public void run()
154 |             {
155 |                 System.out.println("Waiting before setting " +
156 |                     "event status to CL_COMPLETE");
157 |                 for (int i=3; i>=1; i--)
158 |                 {
159 |                     System.out.println("Seconds left: "+i);
160 |                     try
161 |                     {
162 |                         Thread.sleep(1000);
163 |                     }
164 |                     catch (InterruptedException e)
165 |                     {
166 |                         Thread.currentThread().interrupt();
167 |                     }
168 |                 }
169 |                 System.out.println("Setting event status to CL_COMPLETE");
170 |                 clSetUserEventStatus(userEvent, CL.CL_COMPLETE);
171 |             }
172 |         });
173 |         thread.start();
174 |         
175 |         
176 |         // Create the destructor callback which will be called
177 |         // when the output memory object is destroyed
178 |         MemObjectDestructorCallbackFunction 
179 |             memObjectDestructorCallbackFunction = 
180 |                 new MemObjectDestructorCallbackFunction()
181 |         {
182 |             @Override
183 |             public void function(cl_mem memobj, Object user_data)
184 |             {
185 |                 System.out.println("Memory object "+memobj+
186 |                     " was destroyed, user data: "+user_data);
187 |             }
188 |         };
189 |         clSetMemObjectDestructorCallback(dstMem, 
190 |             memObjectDestructorCallbackFunction, 
191 |             "Memory object destructor callback user data");
192 |         
193 |         // Wait until all commands have completed
194 |         clFinish(commandQueue);
195 |         
196 |         // Release kernel, program, and memory objects. 
197 |         clReleaseKernel(kernel);
198 |         clReleaseProgram(program);
199 |         clReleaseCommandQueue(commandQueue);
200 |         clReleaseContext(context);
201 |         clReleaseMemObject(srcMemA);
202 |         clReleaseMemObject(srcMemB);
203 |         
204 |         // Releasing the output memory object will cause  
205 |         // the destructor callback to be called.        
206 |         clReleaseMemObject(dstMem);
207 | 
208 |         // Verify the result
209 |         float reference[] = new float[]{10,12,18,20};
210 |         float result[] = new float[regionSizeX*regionSizeY];
211 |         regionData.get(result);
212 |         boolean passed = Arrays.equals(result, reference);
213 |         System.out.println(passed ? "PASSED" : "FAILED");
214 |     }
215 |     
216 |     /**
217 |      * Default OpenCL initialization of the context, command queue,
218 |      * program and kernel
219 |      */
220 |     private static void defaultInitialization()
221 |     {
222 |         // The platform, device type and device number
223 |         // that will be used
224 |         final int platformIndex = 0;
225 |         final long deviceType = CL_DEVICE_TYPE_ALL;
226 |         final int deviceIndex = 0;
227 | 
228 |         // Enable exceptions and subsequently omit error checks in this sample
229 |         CL.setExceptionsEnabled(true);
230 | 
231 |         // Obtain the number of platforms
232 |         int numPlatformsArray[] = new int[1];
233 |         clGetPlatformIDs(0, null, numPlatformsArray);
234 |         int numPlatforms = numPlatformsArray[0];
235 | 
236 |         // Obtain a platform ID
237 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
238 |         clGetPlatformIDs(platforms.length, platforms, null);
239 |         cl_platform_id platform = platforms[platformIndex];
240 | 
241 |         // Initialize the context properties
242 |         cl_context_properties contextProperties = new cl_context_properties();
243 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
244 |         
245 |         // Obtain the number of devices for the platform
246 |         int numDevicesArray[] = new int[1];
247 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
248 |         int numDevices = numDevicesArray[0];
249 |         
250 |         // Obtain a device ID 
251 |         cl_device_id devices[] = new cl_device_id[numDevices];
252 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
253 |         cl_device_id device = devices[deviceIndex];
254 | 
255 |         // Create a context for the selected device
256 |         context = clCreateContext(
257 |             contextProperties, 1, new cl_device_id[]{device}, 
258 |             null, null, null);
259 |         
260 |         String deviceName = getString(devices[0], CL_DEVICE_NAME);
261 |         System.out.printf("CL_DEVICE_NAME: %s\n", deviceName);
262 |         
263 |         // Create a command-queue for the selected device
264 |         cl_queue_properties properties = new cl_queue_properties();
265 |         commandQueue = clCreateCommandQueueWithProperties(
266 |             context, device, properties, null);
267 | 
268 |         // Create the program from the source code
269 |         program = clCreateProgramWithSource(context,
270 |             1, new String[]{ programSource }, null, null);
271 |         
272 |         // Build the program
273 |         clBuildProgram(program, 0, null, null, null, null);
274 |         
275 |         // Create the kernel
276 |         kernel = clCreateKernel(program, "sampleKernel", null);
277 |     }
278 |     
279 |     /**
280 |      * Print the given buffer as a matrix with the given number of columns
281 |      * 
282 |      * @param data The buffer
283 |      * @param columns The number of columns
284 |      */
285 |     private static void print2D(FloatBuffer data, int columns)
286 |     {
287 |         StringBuffer sb = new StringBuffer();
288 |         for (int i=0; i<data.capacity(); i++)
289 |         {
290 |             sb.append(String.format(Locale.ENGLISH, "%5.1f ", data.get(i)));
291 |             if (((i+1)%columns)==0)
292 |             {
293 |                 sb.append("\n");
294 |             }
295 |         }
296 |         System.out.print(sb.toString());
297 |     }
298 |     
299 |     private static String getString(cl_device_id device, int paramName)
300 |     {
301 |         // Obtain the length of the string that will be queried
302 |         long size[] = new long[1];
303 |         clGetDeviceInfo(device, paramName, 0, null, size);
304 | 
305 |         // Create a buffer of the appropriate size and fill it with the info
306 |         byte buffer[] = new byte[(int)size[0]];
307 |         clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
308 | 
309 |         // Create a string from the buffer (excluding the trailing \0 byte)
310 |         return new String(buffer, 0, buffer.length-1);
311 |     }
312 |     
313 |     
314 | }
315 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSample_1_2_KernelArgs.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import org.jocl.*;
 11 | 
 12 | /**
 13 |  * A small JOCL sample, demonstrating some of the new features
 14 |  * that have been introduced with OpenCL 1.2.
 15 |  */
 16 | public class JOCLSample_1_2_KernelArgs
 17 | {
 18 |     // The platform, device type and device number that will be used
 19 |     private static final int platformIndex = 0;
 20 |     private static final long deviceType = CL_DEVICE_TYPE_ALL;
 21 |     private static final int deviceIndex = 0;
 22 |     
 23 |     /**
 24 |      * The source code of the OpenCL program to execute
 25 |      */
 26 |     private static String programSource =
 27 |         "__kernel void "+"\n"+
 28 |         "sampleKernel(__global const volatile float *first,"+"\n"+
 29 |         "             __constant char *second,"+"\n"+
 30 |         "             __local unsigned int *third,"+"\n"+
 31 |         "             unsigned short fourth,"+"\n"+
 32 |         "             __write_only image2d_t fifth)"+"\n"+
 33 |         "{"+"\n"+
 34 |         "}"+"\n";
 35 | 
 36 |     private static cl_context context;
 37 |     private static cl_device_id device;
 38 | 
 39 |     /**
 40 |      * The entry point of this sample
 41 |      * 
 42 |      * @param args Not used
 43 |      */
 44 |     public static void main(String args[])
 45 |     {
 46 |         // Initialize the context and a device
 47 |         defaultInitialization();
 48 |         
 49 |         // Create the program from the source code
 50 |         cl_program program = clCreateProgramWithSource(context,
 51 |             1, new String[]{ programSource }, null, null);
 52 |         
 53 |         // Build the program. Note that the "-cl-kernel-arg-info" parameter
 54 |         // must be given, in order to keep the information about the kernel
 55 |         // arguments that will later be queried
 56 |         clBuildProgram(program, 0, null, "-cl-kernel-arg-info", null, null);
 57 |         
 58 |         // Create the kernel
 59 |         cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
 60 | 
 61 |         // Arrays that will store the parameter values
 62 |         int paramValueInt[] = { 0 };
 63 |         long paramValueLong[] = { 0 };
 64 |         long sizeArray[] = { 0 };
 65 |         byte paramValueCharArray[] = new byte[1024];
 66 |         
 67 |         // Obtain the number of arguments that the kernel has
 68 |         clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, 
 69 |             Sizeof.cl_uint, Pointer.to(paramValueInt), null);
 70 |         int numArgs = paramValueInt[0];
 71 |         
 72 |         // Obtain information about each argument
 73 |         for (int a=0; a<numArgs; a++)
 74 |         {
 75 |             // The argument name
 76 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_NAME, 
 77 |                 0, null, sizeArray);
 78 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_NAME, 
 79 |                 sizeArray[0], Pointer.to(paramValueCharArray), null);
 80 |             String argName = 
 81 |                 new String(paramValueCharArray, 0, (int)sizeArray[0]-1);
 82 |             
 83 |             // The address qualifier (global/local/constant/private)
 84 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
 85 |                 Sizeof.cl_int, Pointer.to(paramValueInt), null);
 86 |             int addressQualifier = paramValueInt[0];
 87 |             
 88 |             // The access qualifier (readOnly/writeOnly/readWrite/none)
 89 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_ACCESS_QUALIFIER,
 90 |                 Sizeof.cl_int, Pointer.to(paramValueInt), null);
 91 |             int accessQualifier = paramValueInt[0];
 92 |             
 93 |             // The type qualifier bitfield (const/restrict/volatile/none)
 94 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_TYPE_QUALIFIER,
 95 |                 Sizeof.cl_long, Pointer.to(paramValueLong), null);
 96 |             long typeQualifier = paramValueLong[0];
 97 |             
 98 |             // The type name
 99 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_TYPE_NAME, 
100 |                 0, null, sizeArray);
101 |             clGetKernelArgInfo(kernel, a, CL_KERNEL_ARG_TYPE_NAME, 
102 |                 sizeArray[0], Pointer.to(paramValueCharArray), null);
103 |             String typeName = 
104 |                 new String(paramValueCharArray, 0, (int)sizeArray[0]-1);
105 |             
106 |             // Print the results:
107 |             System.out.println("Argument "+a+":");
108 |             System.out.println("    Name: "+argName);
109 |             System.out.println("    Address qualifier: "+
110 |                 CL.stringFor_cl_kernel_arg_address_qualifier(addressQualifier));
111 |             System.out.println("    Access qualifier : "+
112 |                 CL.stringFor_cl_kernel_arg_access_qualifier(accessQualifier));
113 |             System.out.println("    Type qualifier   : "+
114 |                 CL.stringFor_cl_kernel_arg_type_qualifer(typeQualifier));
115 |             System.out.println("    Type name        : "+typeName);
116 |         }
117 |     }
118 |     
119 |     /**
120 |      * Default OpenCL initialization of the context, command queue,
121 |      * program and kernel
122 |      */
123 |     private static void defaultInitialization()
124 |     {
125 |         // Enable exceptions and subsequently omit error checks in this sample
126 |         CL.setExceptionsEnabled(true);
127 | 
128 |         // Obtain the number of platforms
129 |         int numPlatformsArray[] = new int[1];
130 |         clGetPlatformIDs(0, null, numPlatformsArray);
131 |         int numPlatforms = numPlatformsArray[0];
132 | 
133 |         // Obtain a platform ID
134 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
135 |         clGetPlatformIDs(platforms.length, platforms, null);
136 |         cl_platform_id platform = platforms[platformIndex];
137 | 
138 |         // Check if the platform supports OpenCL 1.2
139 |         long sizeArray[] = { 0 };
140 |         clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, null, sizeArray);
141 |         byte buffer[] = new byte[(int)sizeArray[0]];
142 |         clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 
143 |             buffer.length, Pointer.to(buffer), null);
144 |         String versionString = new String(buffer, 0, buffer.length-1);
145 |         System.out.println("Platform version: "+versionString);
146 |         String versionNumberString = versionString.substring(7, 10);
147 |         try
148 |         {
149 |             String majorString = versionNumberString.substring(0, 1);
150 |             String minorString = versionNumberString.substring(2, 3);
151 |             int major = Integer.parseInt(majorString);
152 |             int minor = Integer.parseInt(minorString);
153 |             if (major == 1 && minor < 2)
154 |             {
155 |                 System.err.println(
156 |                     "Platform only supports OpenCL "+versionNumberString);
157 |                 System.exit(1);
158 |             }
159 |         }
160 |         catch (NumberFormatException e)
161 |         {
162 |             System.err.println(
163 |                 "Invalid version number: "+versionNumberString);
164 |             System.exit(1);
165 |         }
166 |         
167 |         // Initialize the context properties
168 |         cl_context_properties contextProperties = new cl_context_properties();
169 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
170 |         
171 |         // Obtain the number of devices for the platform
172 |         int numDevicesArray[] = new int[1];
173 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
174 |         int numDevices = numDevicesArray[0];
175 |         
176 |         // Obtain a device ID 
177 |         cl_device_id devices[] = new cl_device_id[numDevices];
178 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
179 |         device = devices[deviceIndex];
180 | 
181 |         // Create a context for the selected device
182 |         context = clCreateContext(
183 |             contextProperties, 1, new cl_device_id[]{device}, 
184 |             null, null, null);
185 |     }
186 | }
187 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSample_2_0_SVM.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.nio.*;
 11 | import java.util.Arrays;
 12 | 
 13 | import org.jocl.*;
 14 | 
 15 | /**
 16 |  * A JOCL sample demonstrating the SVM (shared virtual memory)
 17 |  * functionality that has been introduced with OpenCL 2.0
 18 |  */
 19 | public class JOCLSample_2_0_SVM
 20 | {
 21 |     /**
 22 |      * The source code of the OpenCL program to execute
 23 |      */
 24 |     private static String programSource =
 25 |         "__kernel void "+
 26 |         "sampleKernel(__global const float *a,"+
 27 |         "             __global const float *b,"+
 28 |         "             __global float *c)"+
 29 |         "{"+
 30 |         "    int gid = get_global_id(0);"+
 31 |         "    c[gid] = a[gid] * b[gid];"+
 32 |         "}";
 33 |     
 34 |     private static cl_context context;
 35 |     private static cl_device_id device;
 36 |     private static cl_command_queue commandQueue;
 37 |     private static cl_kernel kernel;
 38 |     
 39 |     public static void main(String[] args)
 40 |     {
 41 |         initCL();
 42 |         
 43 |         // Create input- and output data 
 44 |         int n = 10;
 45 |         float srcArrayA[] = new float[n];
 46 |         float srcArrayB[] = new float[n];
 47 |         for (int i=0; i<n; i++)
 48 |         {
 49 |             srcArrayA[i] = i;
 50 |             srcArrayB[i] = i;
 51 |         }
 52 |         Pointer srcA = Pointer.to(srcArrayA);
 53 |         Pointer srcB = Pointer.to(srcArrayB);
 54 | 
 55 |         // Allocate the memory objects for the input data
 56 |         cl_mem srcMemA = clCreateBuffer(context, 
 57 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 58 |             Sizeof.cl_float * n, srcA, null);
 59 |         cl_mem srcMemB = clCreateBuffer(context, 
 60 |             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
 61 |             Sizeof.cl_float * n, srcB, null);
 62 |         
 63 |         // Allocate shared virtual memory
 64 |         Pointer svm = clSVMAlloc(context, 
 65 |             CL_MEM_READ_WRITE, Sizeof.cl_float * n, 0);
 66 |        
 67 |         // Set the arguments for the kernel
 68 |         int a = 0;
 69 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemA));
 70 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(srcMemB));
 71 |         clSetKernelArgSVMPointer(kernel, a++, svm);
 72 |         
 73 |         // Execute the kernel
 74 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, 
 75 |             null, new long[]{n}, null, 0, null, null);
 76 |         
 77 |         // Enqueue the command to map the SVM into the host
 78 |         // memory space
 79 |         clEnqueueSVMMap(commandQueue, true, CL_MAP_WRITE, 
 80 |             svm, Sizeof.cl_float * n, 0, null, null);
 81 |         
 82 |         // Obtain the contents of the SVM as a FloatBuffer
 83 |         FloatBuffer fb = svm.getByteBuffer(0, Sizeof.cl_float * n).
 84 |             order(ByteOrder.nativeOrder()).asFloatBuffer();
 85 | 
 86 |         // Print the contents of the SVM, and modify it
 87 |         for (int i=0; i<n; i++)
 88 |         {
 89 |             float fOld = fb.get(i);
 90 |             float fNew = i+i;
 91 |             System.out.println("At "+i+" got "+fOld+", setting "+fNew);
 92 |             fb.put(i, fNew);
 93 |         }
 94 |         
 95 |         // Enqueue the command to un-map the SVM
 96 |         clEnqueueSVMUnmap(commandQueue, svm, 0, null, null);
 97 |         
 98 |         // Create output memory
 99 |         cl_mem dstMem = clCreateBuffer(context, 
100 |             CL_MEM_READ_WRITE, 
101 |             Sizeof.cl_float * n, null, null);
102 |         
103 |         // Execute the kernel again, reading from the SVM
104 |         // and writing into the output memory this time
105 |         a = 0;
106 |         clSetKernelArgSVMPointer(kernel, a++, svm);
107 |         clSetKernelArgSVMPointer(kernel, a++, svm);
108 |         clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(dstMem));
109 |         clEnqueueNDRangeKernel(commandQueue, kernel, 1, 
110 |             null, new long[]{n}, null, 0, null, null);
111 |         
112 |         // Read the output data
113 |         float dstArray[] = new float[n];
114 |         Pointer dst = Pointer.to(dstArray);
115 |         clEnqueueReadBuffer(commandQueue, dstMem, CL_TRUE, 0,
116 |             n * Sizeof.cl_float, dst, 0, null, null);
117 |         
118 |         // Enqueue the command to free the SVM data,
119 |         // registering an example callback
120 |         SVMFreeFunction callback = new SVMFreeFunction()
121 |         {
122 |             @Override
123 |             public void function(cl_command_queue queue, 
124 |                 int num_svm_pointers, Pointer[] svm_pointers, Object user_data)
125 |             {
126 |                 System.out.println(
127 |                     "Callback for freeing "+ num_svm_pointers+
128 |                     " SVM pointers, user data is "+user_data);
129 |             }
130 |         };
131 |         Object userData = "SampleUserData";
132 |         clEnqueueSVMFree(commandQueue, 1, new Pointer[]{svm}, 
133 |             callback, userData, 0, null, null);
134 | 
135 |         clFinish(commandQueue);
136 |         
137 |         // Release kernel, program, and memory objects
138 |         clReleaseMemObject(srcMemA);
139 |         clReleaseMemObject(srcMemB);
140 |         clReleaseMemObject(dstMem);
141 |         clReleaseKernel(kernel);
142 |         clReleaseCommandQueue(commandQueue);
143 |         clReleaseContext(context);
144 |         
145 |         // Verify the result
146 |         boolean passed = true;
147 |         final float epsilon = 1e-7f;
148 |         for (int i=0; i<n; i++)
149 |         {
150 |             float x = dstArray[i];
151 |             float y = 
152 |                 (srcArrayA[i]+srcArrayA[i]) * 
153 |                 (srcArrayB[i]+srcArrayB[i]);
154 |             boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x);
155 |             if (!epsilonEqual)
156 |             {
157 |                 passed = false;
158 |                 break;
159 |             }
160 |         }
161 |         System.out.println("Test "+(passed?"PASSED":"FAILED"));
162 |         if (n <= 10)
163 |         {
164 |             System.out.println("Result: "+Arrays.toString(dstArray));
165 |         }
166 |         
167 |     }
168 |     
169 |     
170 |     /**
171 |      * Default OpenCL initialization of the devices, context,
172 |      * command queue, program and kernel.
173 |      */
174 |     private static void initCL()
175 |     {
176 |         // The platform and device type that will be used
177 |         final int platformIndex = 0;
178 |         final long deviceType = CL_DEVICE_TYPE_ALL;
179 | 
180 |         // Enable exceptions and subsequently omit error checks in this sample
181 |         CL.setExceptionsEnabled(true);
182 | 
183 |         // Obtain the number of platforms
184 |         int numPlatformsArray[] = new int[1];
185 |         clGetPlatformIDs(0, null, numPlatformsArray);
186 |         int numPlatforms = numPlatformsArray[0];
187 |         
188 |         // Obtain a platform ID
189 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
190 |         clGetPlatformIDs(platforms.length, platforms, null);
191 |         cl_platform_id platform = platforms[platformIndex];
192 |         
193 |         // Initialize the context properties
194 |         cl_context_properties contextProperties = new cl_context_properties();
195 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
196 |         
197 |         // Obtain the number of devices for the platform
198 |         int numDevicesArray[] = new int[1];
199 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
200 |         int numDevices = numDevicesArray[0];
201 |         
202 |         // Obtain the all device IDs 
203 |         cl_device_id allDevices[] = new cl_device_id[numDevices];
204 |         clGetDeviceIDs(platform, deviceType, numDevices, allDevices, null);
205 | 
206 | 
207 |         // Find the first device that supports OpenCL 2.0
208 |         for (cl_device_id currentDevice : allDevices)
209 |         {
210 |             String deviceName = getString(currentDevice, CL_DEVICE_NAME);
211 |             float version = getOpenCLVersion(currentDevice);
212 |             if (version >= 2.0)
213 |             {
214 |                 System.out.println("Using device "+
215 |                     deviceName+", version "+version);
216 |                 device = currentDevice;
217 |                 break;
218 |             }
219 |             else
220 |             {
221 |                 System.out.println("Skipping device "+
222 |                     deviceName+", version "+version);
223 |             }
224 |         }
225 |         if (device == null)
226 |         {
227 |             System.out.println("No OpenCL 2.0 capable device found");
228 |             System.exit(1);
229 |         }
230 |         
231 |         // Create a context 
232 |         context = clCreateContext(
233 |             contextProperties, 1, new cl_device_id[]{ device }, 
234 |             null, null, null);
235 |         
236 |         // Create the command queue
237 |         cl_queue_properties properties = new cl_queue_properties();
238 |         commandQueue = clCreateCommandQueueWithProperties(
239 |             context, device, properties, null);
240 | 
241 |         // Create the program from the source code
242 |         cl_program program = clCreateProgramWithSource(context,
243 |             1, new String[]{ programSource }, null, null);
244 |         
245 |         // Build the program. It's important to specify the
246 |         // -cl-std=CL2.0
247 |         // build parameter here!
248 |         clBuildProgram(program, 0, null, "-cl-std=CL2.0", null, null);
249 |         
250 |         // Create the kernel
251 |         kernel = clCreateKernel(program, "sampleKernel", null);
252 |         
253 |         clReleaseProgram(program);
254 |     }
255 |     
256 |     /**
257 |      * Returns the OpenCL version of the given device, as a float
258 |      * value
259 |      * 
260 |      * @param device The device
261 |      * @return The OpenCL version
262 |      */
263 |     private static float getOpenCLVersion(cl_device_id device)
264 |     {
265 |         String deviceVersion = getString(device, CL_DEVICE_VERSION);
266 |         String versionString = deviceVersion.substring(7, 10);
267 |         float version = Float.parseFloat(versionString);
268 |         return version;
269 |     }
270 |     
271 |     /**
272 |      * Returns the value of the device info parameter with the given name
273 |      *  
274 |      * @param device The device
275 |      * @param paramName The parameter name
276 |      * @return The value
277 |      */
278 |     private static String getString(cl_device_id device, int paramName)
279 |     {
280 |         // Obtain the length of the string that will be queried
281 |         long size[] = new long[1];
282 |         clGetDeviceInfo(device, paramName, 0, null, size);
283 | 
284 |         // Create a buffer of the appropriate size and fill it with the info
285 |         byte buffer[] = new byte[(int)size[0]];
286 |         clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
287 | 
288 |         // Create a string from the buffer (excluding the trailing \0 byte)
289 |         return new String(buffer, 0, buffer.length-1);
290 |     }
291 |     
292 |     
293 | }
294 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSimpleImage.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.awt.*;
 11 | import java.awt.image.*;
 12 | import java.io.*;
 13 | 
 14 | import javax.imageio.ImageIO;
 15 | import javax.swing.*;
 16 | 
 17 | import org.jocl.*;
 18 | 
 19 | /**
 20 |  * A simple example demonstrating image handling between JOCL
 21 |  * and Swing. It shows an animation of a rotating image,
 22 |  * which is rotated using an OpenCL kernel involving some
 23 |  * basic image operations.
 24 |  */
 25 | public class JOCLSimpleImage
 26 | {
 27 |     /**
 28 |      * Entry point for this sample.
 29 |      *
 30 |      * @param args not used
 31 |      */
 32 |     public static void main(String args[])
 33 |     {
 34 |         SwingUtilities.invokeLater(new Runnable()
 35 |         {
 36 |             public void run()
 37 |             {
 38 |                 new JOCLSimpleImage();
 39 |             }
 40 |         });
 41 |     }
 42 | 
 43 |     /**
 44 |      * The source code of the kernel to execute. It will rotate the
 45 |      * input image by the given angle and write the result into the
 46 |      * output image.
 47 |      */
 48 |     private static String programSource =
 49 |         ""+ "\n" +
 50 |         "const sampler_t samplerIn = "+ "\n" +
 51 |         "    CLK_NORMALIZED_COORDS_FALSE | "+ "\n" +
 52 |         "    CLK_ADDRESS_CLAMP |"+ "\n" +
 53 |         "    CLK_FILTER_NEAREST;"+ "\n" +
 54 |         ""+ "\n" +
 55 |         "const sampler_t samplerOut = "+ "\n" +
 56 |         "    CLK_NORMALIZED_COORDS_FALSE |"+ "\n" +
 57 |         "    CLK_ADDRESS_CLAMP |"+ "\n" +
 58 |         "    CLK_FILTER_NEAREST;"+ "\n" +
 59 |         ""+ "\n" +
 60 |         "__kernel void rotateImage("+ "\n" +
 61 |         "    __read_only  image2d_t sourceImage, "+ "\n" +
 62 |         "    __write_only image2d_t targetImage, "+ "\n" +
 63 |         "    float angle)"+ "\n" +
 64 |         "{"+ "\n" +
 65 |         "    int gidX = get_global_id(0);"+ "\n" +
 66 |         "    int gidY = get_global_id(1);"+ "\n" +
 67 |         "    int w = get_image_width(sourceImage);"+ "\n" +
 68 |         "    int h = get_image_height(sourceImage);"+ "\n" +
 69 |         "    int cx = w/2;"+ "\n" +
 70 |         "    int cy = h/2;"+ "\n" +
 71 |         "    int dx = gidX-cx;"+ "\n" +
 72 |         "    int dy = gidY-cy;"+ "\n" +
 73 |         "    float ca = cos(angle);"+ "\n" +
 74 |         "    float sa = sin(angle);"+ "\n" +
 75 |         "    int inX = (int)(cx+ca*dx-sa*dy);"+ "\n" +
 76 |         "    int inY = (int)(cy+sa*dx+ca*dy);"+ "\n" +
 77 |         "    int2 posIn = {inX, inY};"+ "\n" +
 78 |         "    int2 posOut = {gidX, gidY};"+ "\n" +
 79 |         "    uint4 pixel = read_imageui(sourceImage, samplerIn, posIn);"+ "\n" +
 80 |         "    write_imageui(targetImage, posOut, pixel);"+ "\n" +
 81 |         "}";
 82 | 
 83 | 
 84 |     /**
 85 |      * Creates a BufferedImage of with type TYPE_INT_RGB from the
 86 |      * file with the given name.
 87 |      *
 88 |      * @param fileName The file name
 89 |      * @return The image, or null if the file may not be read
 90 |      */
 91 |     private static BufferedImage createBufferedImage(String fileName)
 92 |     {
 93 |         BufferedImage image = null;
 94 |         try
 95 |         {
 96 |             image = ImageIO.read(new File(fileName));
 97 |         }
 98 |         catch (IOException e)
 99 |         {
100 |             e.printStackTrace();
101 |             return null;
102 |         }
103 | 
104 |         int sizeX = image.getWidth();
105 |         int sizeY = image.getHeight();
106 | 
107 |         BufferedImage result = new BufferedImage(
108 |             sizeX, sizeY, BufferedImage.TYPE_INT_RGB);
109 |         Graphics g = result.createGraphics();
110 |         g.drawImage(image, 0, 0, null);
111 |         g.dispose();
112 |         return result;
113 |     }
114 | 
115 |     /**
116 |      * The input image
117 |      */
118 |     private BufferedImage inputImage;
119 | 
120 |     /**
121 |      * The output image
122 |      */
123 |     private BufferedImage outputImage;
124 | 
125 |     /**
126 |      * The OpenCL context
127 |      */
128 |     private cl_context context;
129 | 
130 |     /**
131 |      * The OpenCL command queue
132 |      */
133 |     private cl_command_queue commandQueue;
134 | 
135 |     /**
136 |      * The OpenCL kernel
137 |      */
138 |     private cl_kernel kernel;
139 | 
140 |     /**
141 |      * The memory object for the input image
142 |      */
143 |     private cl_mem inputImageMem;
144 | 
145 |     /**
146 |      * The memory object for the output image
147 |      */
148 |     private cl_mem outputImageMem;
149 | 
150 |     /**
151 |      * The width of the image
152 |      */
153 |     private int imageSizeX;
154 | 
155 |     /**
156 |      * The height of the image
157 |      */
158 |     private int imageSizeY;
159 | 
160 |     /**
161 |      * Creates the JOCLSimpleImage sample
162 |      */
163 |     public JOCLSimpleImage()
164 |     {
165 |         // Read the input image file and create the output images
166 |         String fileName = "src/main/resources/data/lena512color.png";
167 | 
168 |         inputImage = createBufferedImage(fileName);
169 |         imageSizeX = inputImage.getWidth();
170 |         imageSizeY = inputImage.getHeight();
171 | 
172 |         outputImage = new BufferedImage(
173 |             imageSizeX, imageSizeY, BufferedImage.TYPE_INT_RGB);
174 | 
175 |         // Create the panel showing the input and output images
176 |         JPanel mainPanel = new JPanel(new GridLayout(1,0));
177 |         JLabel inputLabel = new JLabel(new ImageIcon(inputImage));
178 |         mainPanel.add(inputLabel, BorderLayout.CENTER);
179 |         JLabel outputLabel = new JLabel(new ImageIcon(outputImage));
180 |         mainPanel.add(outputLabel, BorderLayout.CENTER);
181 | 
182 |         // Create the main frame
183 |         JFrame frame = new JFrame("JOCL Simple Image Sample");
184 |         frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
185 |         frame.setLayout(new BorderLayout());
186 |         frame.add(mainPanel, BorderLayout.CENTER);
187 |         frame.pack();
188 |         frame.setVisible(true);
189 | 
190 |         initCL();
191 |         initImageMem();
192 |         startAnimation(outputLabel);
193 |     }
194 | 
195 | 
196 |     /**
197 |      * Starts the thread which will advance the animation state
198 |      * and call call the animation method.
199 |      *
200 |      * @param outputComponent The component to repaint after each step
201 |      */
202 |     private void startAnimation(final Component outputComponent)
203 |     {
204 |         System.out.println("Starting animation...");
205 |         Thread thread = new Thread(new Runnable()
206 |         {
207 |             float angle = 0.0f;
208 |             public void run()
209 |             {
210 |                 while (true)
211 |                 {
212 |                     rotateImage(angle);
213 |                     angle += 0.1f;
214 |                     outputComponent.repaint();
215 | 
216 |                     try
217 |                     {
218 |                         Thread.sleep(20);
219 |                     }
220 |                     catch (InterruptedException e)
221 |                     {
222 |                         Thread.currentThread().interrupt();
223 |                         return;
224 |                     }
225 |                 }
226 |             }
227 |         });
228 |         thread.setDaemon(true);
229 |         thread.start();
230 |     }
231 | 
232 | 
233 |     /**
234 |      * Initialize the OpenCL context, command queue and kernel
235 |      */
236 |     void initCL()
237 |     {
238 |         final int platformIndex = 0;
239 |         final long deviceType = CL_DEVICE_TYPE_ALL;
240 |         final int deviceIndex = 0;
241 | 
242 |         // Enable exceptions and subsequently omit error checks in this sample
243 |         CL.setExceptionsEnabled(true);
244 | 
245 |         // Obtain the number of platforms
246 |         int numPlatformsArray[] = new int[1];
247 |         clGetPlatformIDs(0, null, numPlatformsArray);
248 |         int numPlatforms = numPlatformsArray[0];
249 | 
250 |         // Obtain a platform ID
251 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
252 |         clGetPlatformIDs(platforms.length, platforms, null);
253 |         cl_platform_id platform = platforms[platformIndex];
254 | 
255 |         // Initialize the context properties
256 |         cl_context_properties contextProperties = new cl_context_properties();
257 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
258 |         
259 |         // Obtain the number of devices for the platform
260 |         int numDevicesArray[] = new int[1];
261 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
262 |         int numDevices = numDevicesArray[0];
263 |         
264 |         // Obtain a device ID 
265 |         cl_device_id devices[] = new cl_device_id[numDevices];
266 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
267 |         cl_device_id device = devices[deviceIndex];
268 | 
269 |         // Create a context for the selected device
270 |         context = clCreateContext(
271 |             contextProperties, 1, new cl_device_id[]{device}, 
272 |             null, null, null);
273 |         
274 |         // Check if images are supported
275 |         int imageSupport[] = new int[1];
276 |         clGetDeviceInfo (device, CL.CL_DEVICE_IMAGE_SUPPORT,
277 |             Sizeof.cl_int, Pointer.to(imageSupport), null);
278 |         System.out.println("Images supported: "+(imageSupport[0]==1));
279 |         if (imageSupport[0]==0)
280 |         {
281 |             System.out.println("Images are not supported");
282 |             System.exit(1);
283 |             return;
284 |         }
285 | 
286 |         // Create a command-queue for the selected device
287 |         cl_queue_properties properties = new cl_queue_properties();
288 |         properties.addProperty(CL_QUEUE_PROFILING_ENABLE, 1);
289 |         properties.addProperty(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 1);
290 |         commandQueue = clCreateCommandQueueWithProperties(
291 |             context, device, properties, null);
292 |         
293 |         // Create the program
294 |         System.out.println("Creating program...");
295 |         cl_program program = clCreateProgramWithSource(context,
296 |             1, new String[]{ programSource }, null, null);
297 | 
298 |         // Build the program
299 |         System.out.println("Building program...");
300 |         clBuildProgram(program, 0, null, null, null, null);
301 | 
302 |         // Create the kernel
303 |         System.out.println("Creating kernel...");
304 |         kernel = clCreateKernel(program, "rotateImage", null);
305 | 
306 |     }
307 | 
308 |     /**
309 |      * Initialize the memory objects for the input and output images
310 |      */
311 |     private void initImageMem()
312 |     {
313 |         // Create the memory object for the input- and output image
314 |         DataBufferInt dataBufferSrc =
315 |             (DataBufferInt)inputImage.getRaster().getDataBuffer();
316 |         int dataSrc[] = dataBufferSrc.getData();
317 | 
318 |         cl_image_format imageFormat = new cl_image_format();
319 |         imageFormat.image_channel_order = CL_RGBA;
320 |         imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
321 | 
322 |         inputImageMem = clCreateImage2D(
323 |             context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
324 |             new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY,
325 |             imageSizeX * Sizeof.cl_uint, Pointer.to(dataSrc), null);
326 | 
327 |         outputImageMem = clCreateImage2D(
328 |             context, CL_MEM_WRITE_ONLY,
329 |             new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY,
330 |             0, null, null);
331 |     }
332 | 
333 | 
334 |     /**
335 |      * Rotate the input image by the given angle, and write it into
336 |      * the output image
337 |      *
338 |      * @param angle The rotation angle
339 |      */
340 |     void rotateImage(float angle)
341 |     {
342 |         // Set up the work size and arguments, and execute the kernel
343 |         long globalWorkSize[] = new long[2];
344 |         globalWorkSize[0] = imageSizeX;
345 |         globalWorkSize[1] = imageSizeY;
346 |         clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(inputImageMem));
347 |         clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(outputImageMem));
348 |         clSetKernelArg(kernel, 2, Sizeof.cl_float,
349 |             Pointer.to(new float[]{angle}));
350 |         clEnqueueNDRangeKernel(commandQueue, kernel, 2, null,
351 |             globalWorkSize, null, 0, null, null);
352 | 
353 |         // Read the pixel data into the output image
354 |         DataBufferInt dataBufferDst =
355 |             (DataBufferInt)outputImage.getRaster().getDataBuffer();
356 |         int dataDst[] = dataBufferDst.getData();
357 |         clEnqueueReadImage(
358 |             commandQueue, outputImageMem, true, new long[3],
359 |             new long[]{imageSizeX, imageSizeY, 1},
360 |             imageSizeX * Sizeof.cl_uint, 0,
361 |             Pointer.to(dataDst), 0, null, null);
362 |     }
363 | }
364 | 
365 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSimpleMandelbrot.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.awt.*;
 11 | import java.awt.event.*;
 12 | import java.awt.image.*;
 13 | import java.io.*;
 14 | 
 15 | import javax.swing.*;
 16 | 
 17 | import org.jocl.*;
 18 | 
 19 | /**
 20 |  * A class that uses a simple OpenCL kernel to compute the
 21 |  * Mandelbrot set and displays it in an image
 22 |  */
 23 | public class JOCLSimpleMandelbrot
 24 | {
 25 |     /**
 26 |      * Entry point for this sample.
 27 |      * 
 28 |      * @param args not used
 29 |      */
 30 |     public static void main(String args[])
 31 |     {
 32 |         SwingUtilities.invokeLater(new Runnable()
 33 |         {
 34 |             public void run()
 35 |             {
 36 |                 new JOCLSimpleMandelbrot(500,500);
 37 |             }
 38 |         });
 39 |     }
 40 | 
 41 |     /**
 42 |      * The image which will contain the Mandelbrot pixel data
 43 |      */
 44 |     private BufferedImage image;
 45 | 
 46 |     /**
 47 |      * The width of the image
 48 |      */
 49 |     private int sizeX = 0;
 50 | 
 51 |     /**
 52 |      * The height of the image
 53 |      */
 54 |     private int sizeY = 0;
 55 | 
 56 |     /**
 57 |      * The component which is used for rendering the image
 58 |      */
 59 |     private JComponent imageComponent;
 60 |     
 61 |     /** 
 62 |      * The OpenCL context
 63 |      */
 64 |     private cl_context context;
 65 | 
 66 |     /**
 67 |      * The OpenCL command queue
 68 |      */
 69 |     private cl_command_queue commandQueue;
 70 | 
 71 |     /**
 72 |      * The OpenCL kernel which will actually compute the Mandelbrot
 73 |      * set and store the pixel data in a CL memory object
 74 |      */
 75 |     private cl_kernel kernel;
 76 | 
 77 |     /**
 78 |      * The OpenCL memory object which stores the pixel data
 79 |      */
 80 |     private cl_mem pixelMem;
 81 |     
 82 |     /**
 83 |      * An OpenCL memory object which stores a nifty color map,
 84 |      * encoded as integers combining the RGB components of
 85 |      * the colors.
 86 |      */
 87 |     private cl_mem colorMapMem;
 88 | 
 89 |     /**
 90 |      * The color map which will be copied to OpenCL for filling
 91 |      * the PBO. 
 92 |      */
 93 |     private int colorMap[];
 94 |     
 95 |     /**
 96 |      * The minimum x-value of the area in which the Mandelbrot 
 97 |      * set should be computed
 98 |      */
 99 |     private float x0 = -2f;
100 | 
101 |     /**
102 |      * The minimum y-value of the area in which the Mandelbrot 
103 |      * set should be computed
104 |      */
105 |     private float y0 = -1.3f;
106 | 
107 |     /**
108 |      * The maximum x-value of the area in which the Mandelbrot 
109 |      * set should be computed
110 |      */
111 |     private float x1 = 0.6f;
112 | 
113 |     /**
114 |      * The maximum y-value of the area in which the Mandelbrot 
115 |      * set should be computed
116 |      */
117 |     private float y1 = 1.3f;
118 | 
119 |     
120 |     /**
121 |      * Creates the JOCLSimpleMandelbrot sample with the given
122 |      * width and height
123 |      */
124 |     public JOCLSimpleMandelbrot(int width, int height)
125 |     {
126 |         this.sizeX = width;
127 |         this.sizeY = height;
128 | 
129 |         // Create the image and the component that will paint the image
130 |         image = new BufferedImage(sizeX, sizeY, BufferedImage.TYPE_INT_RGB);
131 |         imageComponent = new JPanel()
132 |         {
133 |             private static final long serialVersionUID = 1L;
134 |             public void paintComponent(Graphics g)
135 |             {
136 |                 super.paintComponent(g);
137 |                 g.drawImage(image, 0,0,this);
138 |             }   
139 |         };
140 |         
141 |         // Initialize the mouse interaction
142 |         initInteraction();
143 | 
144 |         // Initialize OpenCL
145 |         initCL();
146 | 
147 |         // Initial image update 
148 |         updateImage();
149 |         
150 |         // Create the main frame
151 |         JFrame frame = new JFrame("JOCL Simple Mandelbrot");
152 |         frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
153 |         frame.setLayout(new BorderLayout());
154 |         imageComponent.setPreferredSize(new Dimension(width, height));
155 |         frame.add(imageComponent, BorderLayout.CENTER);
156 |         frame.pack();
157 |         
158 |         frame.setVisible(true);
159 |     }
160 |     
161 |     /**
162 |      * Initialize OpenCL: Create the context, the command queue
163 |      * and the kernel.
164 |      */
165 |     private void initCL()
166 |     {
167 |         final int platformIndex = 0;
168 |         final long deviceType = CL_DEVICE_TYPE_ALL;
169 |         final int deviceIndex = 0;
170 | 
171 |         // Enable exceptions and subsequently omit error checks in this sample
172 |         CL.setExceptionsEnabled(true);
173 | 
174 |         // Obtain the number of platforms
175 |         int numPlatformsArray[] = new int[1];
176 |         clGetPlatformIDs(0, null, numPlatformsArray);
177 |         int numPlatforms = numPlatformsArray[0];
178 | 
179 |         // Obtain a platform ID
180 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
181 |         clGetPlatformIDs(platforms.length, platforms, null);
182 |         cl_platform_id platform = platforms[platformIndex];
183 | 
184 |         // Initialize the context properties
185 |         cl_context_properties contextProperties = new cl_context_properties();
186 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
187 |         
188 |         // Obtain the number of devices for the platform
189 |         int numDevicesArray[] = new int[1];
190 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
191 |         int numDevices = numDevicesArray[0];
192 |         
193 |         // Obtain a device ID 
194 |         cl_device_id devices[] = new cl_device_id[numDevices];
195 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
196 |         cl_device_id device = devices[deviceIndex];
197 | 
198 |         // Create a context for the selected device
199 |         context = clCreateContext(
200 |             contextProperties, 1, new cl_device_id[]{device}, 
201 |             null, null, null);
202 |         
203 |         // Create a command-queue for the selected device
204 |         cl_queue_properties properties = new cl_queue_properties();
205 |         commandQueue = clCreateCommandQueueWithProperties(
206 |             context, device, properties, null);
207 | 
208 |         // Program Setup
209 |         String source = 
210 |             readFile("src/main/resources/kernels/SimpleMandelbrot.cl");
211 | 
212 |         // Create the program
213 |         cl_program cpProgram = clCreateProgramWithSource(context, 1, 
214 |             new String[]{ source }, null, null);
215 | 
216 |         // Build the program
217 |         clBuildProgram(cpProgram, 0, null, "-cl-mad-enable", null, null);
218 | 
219 |         // Create the kernel
220 |         kernel = clCreateKernel(cpProgram, "computeMandelbrot", null);
221 | 
222 |         // Create the memory object which will be filled with the
223 |         // pixel data
224 |         pixelMem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
225 |             sizeX * sizeY * Sizeof.cl_uint, null, null);
226 |         
227 |         // Create and fill the memory object containing the color map
228 |         initColorMap(32, Color.RED, Color.GREEN, Color.BLUE);
229 |         colorMapMem = clCreateBuffer(context, CL_MEM_READ_WRITE, 
230 |             colorMap.length * Sizeof.cl_uint, null, null);
231 |         clEnqueueWriteBuffer(commandQueue, colorMapMem, true, 0, 
232 |             colorMap.length * Sizeof.cl_uint, Pointer.to(colorMap), 0, null, null);
233 |     }
234 |     
235 |     /**
236 |      * Helper function which reads the file with the given name and returns 
237 |      * the contents of this file as a String. Will exit the application
238 |      * if the file can not be read.
239 |      * 
240 |      * @param fileName The name of the file to read.
241 |      * @return The contents of the file
242 |      */
243 |     private String readFile(String fileName)
244 |     {
245 |         BufferedReader br = null;
246 |         try
247 |         {
248 |             br = new BufferedReader(
249 |                 new InputStreamReader(new FileInputStream(fileName)));
250 |             StringBuffer sb = new StringBuffer();
251 |             String line = null;
252 |             while (true)
253 |             {
254 |                 line = br.readLine();
255 |                 if (line == null)
256 |                 {
257 |                     break;
258 |                 }
259 |                 sb.append(line).append("\n");
260 |             }
261 |             return sb.toString();
262 |         }
263 |         catch (IOException e)
264 |         {
265 |             e.printStackTrace();
266 |             System.exit(1);
267 |             return null;
268 |         }
269 |         finally
270 |         {
271 |             if (br != null)
272 |             {
273 |                 try
274 |                 {
275 |                     br.close();
276 |                 }
277 |                 catch (IOException e)
278 |                 {
279 |                     e.printStackTrace();
280 |                 }
281 |             }
282 |         }
283 |     }
284 |     
285 |     /**
286 |      * Creates the colorMap array which contains RGB colors as integers,
287 |      * interpolated through the given colors with colors.length * stepSize
288 |      * steps
289 |      * 
290 |      * @param stepSize The number of interpolation steps between two colors
291 |      * @param colors The colors for the map
292 |      */
293 |     private void initColorMap(int stepSize, Color ... colors)
294 |     {
295 |         colorMap = new int[stepSize*colors.length];
296 |         int index = 0;
297 |         for (int i=0; i<colors.length-1; i++)
298 |         {
299 |             Color c0 = colors[i];
300 |             int r0 = c0.getRed();
301 |             int g0 = c0.getGreen();
302 |             int b0 = c0.getBlue();
303 | 
304 |             Color c1 = colors[i+1];
305 |             int r1 = c1.getRed();
306 |             int g1 = c1.getGreen();
307 |             int b1 = c1.getBlue();
308 |             
309 |             int dr = r1-r0;
310 |             int dg = g1-g0;
311 |             int db = b1-b0;
312 | 
313 |             for (int j=0; j<stepSize; j++)
314 |             {
315 |                 float alpha = (float)j / (stepSize-1);
316 |                 int r = (int)(r0 + alpha * dr);
317 |                 int g = (int)(g0 + alpha * dg);
318 |                 int b = (int)(b0 + alpha * db);
319 |                 int rgb = 
320 |                     (r << 16) |
321 |                     (g <<  8) |
322 |                     (b <<  0);
323 |                 colorMap[index++] = rgb;
324 |             }
325 |         }
326 |     }
327 |     
328 |     
329 |     /**
330 |      * Attach the mouse- and mouse wheel listeners to the glComponent
331 |      * which allow zooming and panning the fractal
332 |      */
333 |     private void initInteraction()
334 |     {
335 |         final Point previousPoint = new Point();
336 |         
337 |         imageComponent.addMouseMotionListener(new MouseMotionListener()
338 |         {
339 |             @Override
340 |             public void mouseDragged(MouseEvent e)
341 |             {
342 |                 int dx = previousPoint.x - e.getX();
343 |                 int dy = previousPoint.y - e.getY();
344 |                 
345 |                 float wdx = x1-x0;
346 |                 float wdy = y1-y0;
347 |                 
348 |                 x0 += (dx / 150.0f) * wdx;
349 |                 x1 += (dx / 150.0f) * wdx;
350 | 
351 |                 y0 += (dy / 150.0f) * wdy;
352 |                 y1 += (dy / 150.0f) * wdy;
353 |                 
354 |                 previousPoint.setLocation(e.getX(), e.getY());
355 |                 
356 |                 updateImage();
357 |             }
358 | 
359 |             @Override
360 |             public void mouseMoved(MouseEvent e)
361 |             {
362 |                 previousPoint.setLocation(e.getX(), e.getY());
363 |             }
364 |             
365 |         });
366 |         
367 |         imageComponent.addMouseWheelListener(new MouseWheelListener()
368 |         {
369 |             @Override
370 |             public void mouseWheelMoved(MouseWheelEvent e)
371 |             {
372 |                 float dx = x1-x0;
373 |                 float dy = y1-y0;
374 |                 float delta = e.getWheelRotation() / 20.0f;
375 |                 x0 += delta * dx;
376 |                 x1 -= delta * dx;
377 |                 y0 += delta * dy;
378 |                 y1 -= delta * dy;
379 |                 
380 |                 updateImage();
381 |             }
382 |         });
383 |     }
384 |     
385 | 
386 |     /**
387 |      * Execute the kernel function and read the resulting pixel data
388 |      * into the BufferedImage
389 |      */
390 |     private void updateImage()
391 |     {
392 |         // Set work size and execute the kernel
393 |         long globalWorkSize[] = new long[2];
394 |         globalWorkSize[0] = sizeX;
395 |         globalWorkSize[1] = sizeY;
396 | 
397 |         int maxIterations = 250;
398 |         clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(pixelMem));
399 |         clSetKernelArg(kernel, 1, Sizeof.cl_uint, Pointer.to(new int[]{sizeX}));
400 |         clSetKernelArg(kernel, 2, Sizeof.cl_uint, Pointer.to(new int[]{sizeY}));
401 |         clSetKernelArg(kernel, 3, Sizeof.cl_float, Pointer.to(new float[]{ x0 }));
402 |         clSetKernelArg(kernel, 4, Sizeof.cl_float, Pointer.to(new float[]{ y0 }));
403 |         clSetKernelArg(kernel, 5, Sizeof.cl_float, Pointer.to(new float[]{ x1 }));
404 |         clSetKernelArg(kernel, 6, Sizeof.cl_float, Pointer.to(new float[]{ y1 }));
405 |         clSetKernelArg(kernel, 7, Sizeof.cl_int, Pointer.to(new int[]{ maxIterations }));
406 |         clSetKernelArg(kernel, 8, Sizeof.cl_mem, Pointer.to(colorMapMem));
407 |         clSetKernelArg(kernel, 9, Sizeof.cl_int, Pointer.to(new int[]{ colorMap.length }));
408 | 
409 |         clEnqueueNDRangeKernel(commandQueue, kernel, 2, null, 
410 |             globalWorkSize, null, 0, null, null);
411 |         
412 |         // Read the pixel data into the BufferedImage
413 |         DataBufferInt dataBuffer = (DataBufferInt)image.getRaster().getDataBuffer();
414 |         int data[] = dataBuffer.getData();
415 |         clEnqueueReadBuffer(commandQueue, pixelMem, CL_TRUE, 0, 
416 |             Sizeof.cl_int * sizeY * sizeX, Pointer.to(data), 0, null, null);
417 |         
418 |         imageComponent.repaint();
419 |     }
420 |     
421 |     
422 | }
423 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSubBufferSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | 
 10 | import java.util.Arrays;
 11 | 
 12 | import org.jocl.*;
 13 | 
 14 | 
 15 | /**
 16 |  * A sample demonstrating how to create sub-buffers
 17 |  * that have been introduced with OpenCL 1.1.
 18 |  */
 19 | public class JOCLSubBufferSample
 20 | {
 21 |     private static cl_context context;
 22 |     private static cl_command_queue commandQueue;
 23 | 
 24 |     /**
 25 |      * The entry point of this sample
 26 |      * 
 27 |      * @param args Not used
 28 |      */
 29 |     public static void main(String args[])
 30 |     {
 31 |         simpleInitialization();
 32 |         //CL.setLogLevel(LogLevel.LOG_TRACE);
 33 |         
 34 |         // Create an array with 8 elements and consecutive values
 35 |         int fullSize = 8;
 36 |         float fullArray[] = new float[fullSize];
 37 |         for (int i=0; i<fullSize; i++)
 38 |         {
 39 |             fullArray[i] = i;
 40 |         }
 41 |         System.out.println("Full input array  : "+Arrays.toString(fullArray));
 42 |         
 43 |         // Create a buffer for the full array
 44 |         cl_mem fullMem = clCreateBuffer(context, 
 45 |             CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
 46 |             Sizeof.cl_float * fullSize, Pointer.to(fullArray), null);
 47 | 
 48 |         // Create a sub-buffer
 49 |         int subOffset = 2;
 50 |         int subSize = 4;
 51 |         cl_buffer_region region = new cl_buffer_region(
 52 |             subOffset*Sizeof.cl_float, 
 53 |             subSize*Sizeof.cl_float);
 54 |         cl_mem subMem = clCreateSubBuffer(fullMem, 
 55 |             (int)CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, 
 56 |             region, null);
 57 | 
 58 |         // Create an array for the sub-buffer, and copy the data
 59 |         // from the sub-buffer to the array
 60 |         float subArray[] = new float[subSize];
 61 |         clEnqueueReadBuffer(commandQueue, subMem, true, 
 62 |             0, subSize * Sizeof.cl_float, Pointer.to(subArray), 
 63 |             0, null, null);
 64 |         
 65 |         System.out.println("Read sub-array    : "+Arrays.toString(subArray));
 66 | 
 67 |         // Modify the data in the sub-array, and copy it back
 68 |         // into the sub-buffer
 69 |         subArray[0] = -5;
 70 |         subArray[1] = -4;
 71 |         subArray[2] = -3;
 72 |         subArray[3] = -2;
 73 |         clEnqueueWriteBuffer(commandQueue, subMem, true, 
 74 |             0, subSize * Sizeof.cl_float, Pointer.to(subArray), 
 75 |             0, null, null);
 76 | 
 77 |         System.out.println("Modified sub-array: "+Arrays.toString(subArray));
 78 |         
 79 |         // Read the full buffer back into the array 
 80 |         clEnqueueReadBuffer(commandQueue, fullMem, true, 
 81 |             0, fullSize * Sizeof.cl_float, Pointer.to(fullArray), 
 82 |             0, null, null);
 83 |         
 84 |         System.out.println("Full result array : "+Arrays.toString(fullArray));
 85 |         
 86 |     }
 87 |     
 88 |     
 89 |     /**
 90 |      * Simple OpenCL initialization of the context and command queue
 91 |      */
 92 |     private static void simpleInitialization()
 93 |     {
 94 |         // The platform, device type and device number
 95 |         // that will be used
 96 |         final int platformIndex = 0;
 97 |         final long deviceType = CL_DEVICE_TYPE_ALL;
 98 |         final int deviceIndex = 0;
 99 | 
100 |         // Enable exceptions and subsequently omit error checks in this sample
101 |         CL.setExceptionsEnabled(true);
102 | 
103 |         // Obtain the number of platforms
104 |         int numPlatformsArray[] = new int[1];
105 |         clGetPlatformIDs(0, null, numPlatformsArray);
106 |         int numPlatforms = numPlatformsArray[0];
107 | 
108 |         // Obtain a platform ID
109 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
110 |         clGetPlatformIDs(platforms.length, platforms, null);
111 |         cl_platform_id platform = platforms[platformIndex];
112 | 
113 |         // Initialize the context properties
114 |         cl_context_properties contextProperties = new cl_context_properties();
115 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
116 |         
117 |         // Obtain the number of devices for the platform
118 |         int numDevicesArray[] = new int[1];
119 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
120 |         int numDevices = numDevicesArray[0];
121 |         
122 |         // Obtain a device ID 
123 |         cl_device_id devices[] = new cl_device_id[numDevices];
124 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
125 |         cl_device_id device = devices[deviceIndex];
126 | 
127 |         // Create a context for the selected device
128 |         context = clCreateContext(
129 |             contextProperties, 1, new cl_device_id[]{device}, 
130 |             null, null, null);
131 |         
132 |         // Create a command-queue for the selected device
133 |         cl_queue_properties properties = new cl_queue_properties();
134 |         commandQueue = clCreateCommandQueueWithProperties(
135 |             context, device, properties, null);
136 |     }
137 |     
138 |     
139 | }
140 | 


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/blast/JOCLBlastCaxpyBatchedSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples.blast;
  7 |  
  8 | import static org.jocl.CL.*;
  9 | import static org.jocl.blast.CLBlast.CLBlastCaxpyBatched;
 10 | 
 11 | import java.nio.FloatBuffer;
 12 | import java.util.Locale;
 13 | 
 14 | import org.jocl.*;
 15 | import org.jocl.blast.CLBlast;
 16 | 
 17 | /**
 18 |  * An example for using the batched CAXPY function from CLBlast to compute
 19 |  * Y = a * X + Y
 20 |  * for several single-precision complex number vectors
 21 |  */
 22 | public class JOCLBlastCaxpyBatchedSample
 23 | {
 24 |     private static cl_context context;
 25 |     private static cl_command_queue commandQueue;
 26 |  
 27 |     /**
 28 |      * The entry point of this sample
 29 |      *
 30 |      * @param args Not used
 31 |      */
 32 |     public static void main(String args[])
 33 |     {
 34 |         CL.setExceptionsEnabled(true);
 35 |         CLBlast.setExceptionsEnabled(true);
 36 |  
 37 |         defaultInitialization();
 38 |        
 39 | 
 40 |         // Create the host input data. Each entry of these vectors consists 
 41 |         // of TWO values, which are the real- and imaginary part of the 
 42 |         // complex number
 43 |         int numVectors = 3;
 44 |         int vectorSize = 5;
 45 |         
 46 |         // 3 vectors, each with 5 dimensions (*2, for real- and imaginary part)
 47 |         float X[] =  
 48 |         {
 49 |             1,1, 1,2, 1,3, 1,4, 1,5,
 50 |             2,1, 2,2, 2,3, 2,4, 2,5,
 51 |             3,1, 3,2, 3,3, 3,4, 3,5,
 52 |         };
 53 |         // 3 vectors, each with 5 dimensions (*2, for real- and imaginary part)
 54 |         float Y[] =
 55 |         {
 56 |             4,1, 4,2, 4,3, 4,4, 4,5,
 57 |             5,1, 5,2, 5,3, 5,4, 5,5,
 58 |             6,1, 6,2, 6,3, 6,4, 6,5,
 59 |         };
 60 |        
 61 |         // Create the device input buffers
 62 |         cl_mem memX = clCreateBuffer(context, CL_MEM_READ_ONLY,
 63 |             vectorSize * numVectors * Sizeof.cl_float2, null, null);
 64 |         cl_mem memY = clCreateBuffer(context, CL_MEM_READ_ONLY,
 65 |             vectorSize * numVectors * Sizeof.cl_float2, null, null);
 66 |  
 67 |         // Copy the host data to the device
 68 |         clEnqueueWriteBuffer(commandQueue, memX, CL_TRUE, 0,
 69 |             vectorSize * numVectors * Sizeof.cl_float2, 
 70 |             Pointer.to(X), 0, null, null);
 71 |         clEnqueueWriteBuffer(commandQueue, memY, CL_TRUE, 0,
 72 |             vectorSize * numVectors * Sizeof.cl_float2, 
 73 |             Pointer.to(Y), 0, null, null);
 74 |  
 75 |         // 3 factors to be multiplied with X (*2, for real- and imaginary part)
 76 |         float alphas[] = { 1,2, 2,3, 3,4 };
 77 |         
 78 |         // Execute batched CAXPY: Y = alpha * X + Y
 79 |         cl_event event = new cl_event();
 80 |         CLBlastCaxpyBatched(vectorSize, alphas, 
 81 |             memX, new long[] { 0, 5, 10 }, 1, 
 82 |             memY, new long[] { 0, 5, 10 }, 1,  
 83 |             numVectors, commandQueue, event);
 84 |        
 85 |         // Wait for the computation to be finished
 86 |         clWaitForEvents( 1, new cl_event[] { event });
 87 |  
 88 |         // Copy the result data back to the host
 89 |         float resultY[] = new float[vectorSize * numVectors * 2];
 90 |         clEnqueueReadBuffer(commandQueue, memY, CL_TRUE, 0,
 91 |             vectorSize * numVectors * Sizeof.cl_float2, 
 92 |             Pointer.to(resultY), 0, null, null);
 93 |  
 94 |         // Print the inputs and the result
 95 |         System.out.println("a:");
 96 |         printComplex2D(FloatBuffer.wrap(alphas), 1);
 97 |  
 98 |         System.out.println("X:");
 99 |         printComplex2D(FloatBuffer.wrap(X), vectorSize);
100 | 
101 |         System.out.println("Y:");
102 |         printComplex2D(FloatBuffer.wrap(Y), vectorSize);
103 |  
104 |         System.out.println("Result:");
105 |         printComplex2D(FloatBuffer.wrap(resultY), vectorSize);
106 |        
107 |         // Clean up
108 |         clReleaseMemObject(memX);
109 |         clReleaseMemObject(memY);
110 |         clReleaseCommandQueue(commandQueue);
111 |         clReleaseContext(context);        
112 |     }
113 |    
114 |     /**
115 |      * Default OpenCL initialization of the context and command queue
116 |      */
117 |     private static void defaultInitialization()
118 |     {
119 |         // The platform, device type and device number
120 |         // that will be used
121 |         final int platformIndex = 0;
122 |         final long deviceType = CL_DEVICE_TYPE_ALL;
123 |         final int deviceIndex = 0;
124 |  
125 |         // Enable exceptions and subsequently omit error checks in this sample
126 |         CL.setExceptionsEnabled(true);
127 |  
128 |         // Obtain the number of platforms
129 |         int numPlatformsArray[] = new int[1];
130 |         clGetPlatformIDs(0, null, numPlatformsArray);
131 |         int numPlatforms = numPlatformsArray[0];
132 |  
133 |         // Obtain a platform ID
134 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
135 |         clGetPlatformIDs(platforms.length, platforms, null);
136 |         cl_platform_id platform = platforms[platformIndex];
137 |  
138 |         // Initialize the context properties
139 |         cl_context_properties contextProperties = new cl_context_properties();
140 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
141 |        
142 |         // Obtain the number of devices for the platform
143 |         int numDevicesArray[] = new int[1];
144 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
145 |         int numDevices = numDevicesArray[0];
146 |        
147 |         // Obtain a device ID
148 |         cl_device_id devices[] = new cl_device_id[numDevices];
149 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
150 |         cl_device_id device = devices[deviceIndex];
151 |  
152 |         // Create a context for the selected device
153 |         context = clCreateContext(
154 |             contextProperties, 1, new cl_device_id[]{device},
155 |             null, null, null);
156 |        
157 |         String deviceName = getString(devices[0], CL_DEVICE_NAME);
158 |         System.out.printf("CL_DEVICE_NAME: %s\n", deviceName);
159 |        
160 |         // Create a command-queue for the selected device
161 |         cl_queue_properties properties = new cl_queue_properties();
162 |         commandQueue = clCreateCommandQueueWithProperties(
163 |             context, device, properties, null);
164 |     }
165 |    
166 |     /**
167 |      * Print the given buffer as a matrix with the given number of columns.
168 |      * This assumes that the the elements of these buffers are complex 
169 |      * numbers, consisting of a real- and an imaginary part.
170 |      *
171 |      * @param data The buffer
172 |      * @param columns The number of columns
173 |      */
174 |     private static void printComplex2D(FloatBuffer data, int columns)
175 |     {
176 |         StringBuffer sb = new StringBuffer();
177 |         for (int i=0; i<data.capacity() / 2; i++)
178 |         {
179 |             sb.append(String.format(Locale.ENGLISH, "(%5.1f, %5.1fi) ",
180 |                 data.get(i * 2 + 0), data.get(i * 2 + 1)));
181 |             if (((i + 1) % columns) == 0)
182 |             {
183 |                 sb.append("\n");
184 |             }
185 |         }
186 |         System.out.print(sb.toString());
187 |     }
188 |    
189 |     private static String getString(cl_device_id device, int paramName)
190 |     {
191 |         // Obtain the length of the string that will be queried
192 |         long size[] = new long[1];
193 |         clGetDeviceInfo(device, paramName, 0, null, size);
194 |  
195 |         // Create a buffer of the appropriate size and fill it with the info
196 |         byte buffer[] = new byte[(int)size[0]];
197 |         clGetDeviceInfo(device, paramName, buffer.length, 
198 |             Pointer.to(buffer), null);
199 |  
200 |         // Create a string from the buffer (excluding the trailing \0 byte)
201 |         return new String(buffer, 0, buffer.length-1);
202 |     }
203 |  
204 | }


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/blast/JOCLBlastDgemmSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples.blast;
  7 |  
  8 | import static org.jocl.CL.*;
  9 | import static org.jocl.blast.CLBlast.*;
 10 | import static org.jocl.blast.CLBlastLayout.CLBlastLayoutRowMajor;
 11 | import static org.jocl.blast.CLBlastTranspose.CLBlastTransposeNo;
 12 |  
 13 | import java.nio.DoubleBuffer;
 14 | import java.util.Locale;
 15 |  
 16 | import org.jocl.*;
 17 | import org.jocl.blast.CLBlast;
 18 |  
 19 | /**
 20 |  * A basic sample showing how to use JOCLBlast to perform a DGEMM
 21 |  */
 22 | public class JOCLBlastDgemmSample
 23 | {
 24 |     private static cl_context context;
 25 |     private static cl_command_queue commandQueue;
 26 |  
 27 |     /**
 28 |      * The entry point of this sample
 29 |      *
 30 |      * @param args Not used
 31 |      */
 32 |     public static void main(String args[])
 33 |     {
 34 |         CL.setExceptionsEnabled(true);
 35 |         CLBlast.setExceptionsEnabled(true);
 36 |  
 37 |         defaultInitialization();
 38 |        
 39 |         // Create the host input data:
 40 |         // Matrix A with size MxK
 41 |         // Matrix B with size   KxN
 42 |         // Matrix C with size M x N
 43 |         int M = 4;
 44 |         int N = 3;
 45 |         int K = 5;
 46 |         double A[] =  
 47 |         {
 48 |             11, 12, 13, 14, 15,
 49 |             21, 22, 23, 24, 25,
 50 |             31, 32, 33, 34, 35,
 51 |             41, 42, 43, 44, 45,
 52 |         };
 53 |         double B[] =
 54 |         {
 55 |             11, 12, 13,
 56 |             21, 22, 23,
 57 |             31, 32, 33,
 58 |             41, 42, 43,
 59 |             51, 52, 53,
 60 |         };
 61 |         double C[] =
 62 |         {
 63 |             11, 12, 13,
 64 |             21, 22, 23,
 65 |             31, 32, 33,
 66 |             41, 42, 43,
 67 |         };
 68 |        
 69 |         // Create the device input buffers
 70 |         cl_mem memA = clCreateBuffer(context, CL_MEM_READ_ONLY,
 71 |             M * K * Sizeof.cl_double, null, null);
 72 |         cl_mem memB = clCreateBuffer(context, CL_MEM_READ_ONLY,
 73 |             K * N * Sizeof.cl_double, null, null);
 74 |         cl_mem memC = clCreateBuffer(context, CL_MEM_READ_WRITE,
 75 |             M * N * Sizeof.cl_double, null, null);
 76 |  
 77 |         // Copy the host data to the device
 78 |         clEnqueueWriteBuffer(commandQueue, memA, CL_TRUE, 0,
 79 |             M * K * Sizeof.cl_double, Pointer.to(A), 0, null, null);
 80 |         clEnqueueWriteBuffer(commandQueue, memB, CL_TRUE, 0,
 81 |             K * N * Sizeof.cl_double, Pointer.to(B), 0, null, null);
 82 |         clEnqueueWriteBuffer(commandQueue, memC, CL_TRUE, 0,
 83 |             M * N * Sizeof.cl_double, Pointer.to(C), 0, null, null);
 84 |  
 85 |         // Execute GEMM:
 86 |         // C = alpha * A * B + beta * C
 87 |         double alpha = 10;
 88 |         double beta = 20;
 89 |         cl_event event = new cl_event();
 90 |         CLBlastDgemm(
 91 |             CLBlastLayoutRowMajor, CLBlastTransposeNo, CLBlastTransposeNo, 
 92 |             M, N, K, alpha,
 93 |             memA, 0, K,
 94 |             memB, 0, N, beta,
 95 |             memC, 0, N,
 96 |             commandQueue, event);
 97 |        
 98 |         // Wait for the computation to be finished
 99 |         clWaitForEvents( 1, new cl_event[] { event });
100 |  
101 |         // Copy the result data back to the host
102 |         double result[] = new double[M*N];
103 |         clEnqueueReadBuffer(commandQueue, memC, CL_TRUE, 0,
104 |             M * N * Sizeof.cl_double, Pointer.to(result), 0, null, null);
105 |  
106 |         // Print the inputs and the result
107 |         System.out.println("A:");
108 |         print2D(DoubleBuffer.wrap(A), K);
109 |  
110 |         System.out.println("B:");
111 |         print2D(DoubleBuffer.wrap(B), N);
112 |  
113 |         System.out.println("C:");
114 |         print2D(DoubleBuffer.wrap(C), N);
115 |        
116 |         System.out.println(
117 |             "Result of C = " + alpha + " * A * B + " + beta + " * C:");
118 |         print2D(DoubleBuffer.wrap(result), N);
119 |  
120 |         // Clean up
121 |         clReleaseMemObject(memA);
122 |         clReleaseMemObject(memB);
123 |         clReleaseMemObject(memC);
124 |         clReleaseCommandQueue(commandQueue);
125 |         clReleaseContext(context);        
126 |     }
127 |    
128 |     /**
129 |      * Default OpenCL initialization of the context and command queue
130 |      */
131 |     private static void defaultInitialization()
132 |     {
133 |         // The platform, device type and device number
134 |         // that will be used
135 |         final int platformIndex = 0;
136 |         final long deviceType = CL_DEVICE_TYPE_ALL;
137 |         final int deviceIndex = 0;
138 |  
139 |         // Enable exceptions and subsequently omit error checks in this sample
140 |         CL.setExceptionsEnabled(true);
141 |  
142 |         // Obtain the number of platforms
143 |         int numPlatformsArray[] = new int[1];
144 |         clGetPlatformIDs(0, null, numPlatformsArray);
145 |         int numPlatforms = numPlatformsArray[0];
146 |  
147 |         // Obtain a platform ID
148 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
149 |         clGetPlatformIDs(platforms.length, platforms, null);
150 |         cl_platform_id platform = platforms[platformIndex];
151 |  
152 |         // Initialize the context properties
153 |         cl_context_properties contextProperties = new cl_context_properties();
154 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
155 |        
156 |         // Obtain the number of devices for the platform
157 |         int numDevicesArray[] = new int[1];
158 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
159 |         int numDevices = numDevicesArray[0];
160 |        
161 |         // Obtain a device ID
162 |         cl_device_id devices[] = new cl_device_id[numDevices];
163 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
164 |         cl_device_id device = devices[deviceIndex];
165 |  
166 |         // Create a context for the selected device
167 |         context = clCreateContext(
168 |             contextProperties, 1, new cl_device_id[]{device},
169 |             null, null, null);
170 |        
171 |         String deviceName = getString(devices[0], CL_DEVICE_NAME);
172 |         System.out.printf("CL_DEVICE_NAME: %s\n", deviceName);
173 |        
174 |         // Create a command-queue for the selected device
175 |         cl_queue_properties properties = new cl_queue_properties();
176 |         commandQueue = clCreateCommandQueueWithProperties(
177 |             context, device, properties, null);
178 |  
179 |     }
180 |    
181 |     /**
182 |      * Print the given buffer as a matrix with the given number of columns
183 |      *
184 |      * @param data The buffer
185 |      * @param columns The number of columns
186 |      */
187 |     private static void print2D(DoubleBuffer data, int columns)
188 |     {
189 |         StringBuffer sb = new StringBuffer();
190 |         for (int i=0; i<data.capacity(); i++)
191 |         {
192 |             sb.append(String.format(Locale.ENGLISH, "%5.1f ", data.get(i)));
193 |             if (((i+1)%columns)==0)
194 |             {
195 |                 sb.append("\n");
196 |             }
197 |         }
198 |         System.out.print(sb.toString());
199 |     }
200 |    
201 |     private static String getString(cl_device_id device, int paramName)
202 |     {
203 |         // Obtain the length of the string that will be queried
204 |         long size[] = new long[1];
205 |         clGetDeviceInfo(device, paramName, 0, null, size);
206 |  
207 |         // Create a buffer of the appropriate size and fill it with the info
208 |         byte buffer[] = new byte[(int)size[0]];
209 |         clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
210 |  
211 |         // Create a string from the buffer (excluding the trailing \0 byte)
212 |         return new String(buffer, 0, buffer.length-1);
213 |     }
214 |  
215 | }


--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/blast/JOCLBlastSample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  * 
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | package org.jocl.samples.blast;
  7 | 
  8 | import static org.jocl.CL.*;
  9 | import static org.jocl.blast.CLBlast.CLBlastSgemm;
 10 | import static org.jocl.blast.CLBlastLayout.CLBlastLayoutRowMajor;
 11 | import static org.jocl.blast.CLBlastTranspose.CLBlastTransposeNo;
 12 | 
 13 | import java.nio.FloatBuffer;
 14 | import java.util.Locale;
 15 | 
 16 | import org.jocl.*;
 17 | import org.jocl.blast.CLBlast;
 18 | 
 19 | /**
 20 |  * A basic sample showing how to use JOCLBlast to perform a SGEMM
 21 |  */
 22 | public class JOCLBlastSample
 23 | {
 24 |     private static cl_context context;
 25 |     private static cl_command_queue commandQueue;
 26 | 
 27 |     /**
 28 |      * The entry point of this sample
 29 |      * 
 30 |      * @param args Not used
 31 |      */
 32 |     public static void main(String args[])
 33 |     {
 34 |         CL.setExceptionsEnabled(true);
 35 |         CLBlast.setExceptionsEnabled(true);
 36 | 
 37 |         defaultInitialization();
 38 |         
 39 |         // Create the host input data:
 40 |         // Matrix A with size MxK
 41 |         // Matrix B with size   KxN
 42 |         // Matrix C with size M x N
 43 |         int M = 4;
 44 |         int N = 3;
 45 |         int K = 5;
 46 |         float A[] =  
 47 |         {
 48 |             11, 12, 13, 14, 15,
 49 |             21, 22, 23, 24, 25,
 50 |             31, 32, 33, 34, 35,
 51 |             41, 42, 43, 44, 45,
 52 |         };
 53 |         float B[] = 
 54 |         { 
 55 |             11, 12, 13,
 56 |             21, 22, 23,
 57 |             31, 32, 33,
 58 |             41, 42, 43,
 59 |             51, 52, 53,
 60 |         };
 61 |         float C[] = 
 62 |         {
 63 |             11, 12, 13,
 64 |             21, 22, 23,
 65 |             31, 32, 33,
 66 |             41, 42, 43, 
 67 |         };
 68 |         
 69 |         // Create the device input buffers
 70 |         cl_mem memA = clCreateBuffer(context, CL_MEM_READ_ONLY, 
 71 |             M * K * Sizeof.cl_float, null, null);
 72 |         cl_mem memB = clCreateBuffer(context, CL_MEM_READ_ONLY, 
 73 |             K * N * Sizeof.cl_float, null, null);
 74 |         cl_mem memC = clCreateBuffer(context, CL_MEM_READ_WRITE, 
 75 |             M * N * Sizeof.cl_float, null, null);
 76 | 
 77 |         // Copy the host data to the device
 78 |         clEnqueueWriteBuffer(commandQueue, memA, CL_TRUE, 0, 
 79 |             M * K * Sizeof.cl_float, Pointer.to(A), 0, null, null);
 80 |         clEnqueueWriteBuffer(commandQueue, memB, CL_TRUE, 0, 
 81 |             K * N * Sizeof.cl_float, Pointer.to(B), 0, null, null);
 82 |         clEnqueueWriteBuffer(commandQueue, memC, CL_TRUE, 0, 
 83 |             M * N * Sizeof.cl_float, Pointer.to(C), 0, null, null);
 84 | 
 85 |         // Execute GEMM:
 86 |         // C = alpha * A * B + beta * C
 87 |         float alpha = 10;
 88 |         float beta = 20;
 89 |         cl_event event = new cl_event();
 90 |         CLBlastSgemm(
 91 |             CLBlastLayoutRowMajor, CLBlastTransposeNo, CLBlastTransposeNo, 
 92 |             M, N, K, alpha, 
 93 |             memA, 0, K, 
 94 |             memB, 0, N, beta, 
 95 |             memC, 0, N, 
 96 |             commandQueue, event);
 97 |         
 98 |         System.out.println("Event is "+event);
 99 |         
100 |         //cl_context ct = new cl_context();
101 |         //clGetEventInfo(event, CL.CL_EVENT_CONTEXT, Sizeof.cl_context, Pointer.to(ct), null);
102 |         //System.out.println("Context "+ct);
103 |         
104 |         // Wait for the computation to be finished
105 |         clWaitForEvents( 1, new cl_event[] { event });
106 | 
107 |         // Copy the result data back to the host
108 |         float result[] = new float[M*N];
109 |         clEnqueueReadBuffer(commandQueue, memC, CL_TRUE, 0, 
110 |             M * N * Sizeof.cl_float, Pointer.to(result), 0, null, null);
111 | 
112 |         // Print the inputs and the result
113 |         System.out.println("A:");
114 |         print2D(FloatBuffer.wrap(A), K);
115 | 
116 |         System.out.println("B:");
117 |         print2D(FloatBuffer.wrap(B), N);
118 | 
119 |         System.out.println("C:");
120 |         print2D(FloatBuffer.wrap(C), N);
121 |         
122 |         System.out.println(
123 |             "Result of C = " + alpha + " * A * B + " + beta + " * C:");
124 |         print2D(FloatBuffer.wrap(result), N);
125 | 
126 |         // Clean up
127 |         clReleaseMemObject(memA);
128 |         clReleaseMemObject(memB);
129 |         clReleaseMemObject(memC);
130 |         clReleaseCommandQueue(commandQueue);
131 |         clReleaseContext(context);        
132 |     }
133 |     
134 |     /**
135 |      * Default OpenCL initialization of the context and command queue
136 |      */
137 |     private static void defaultInitialization()
138 |     {
139 |         // The platform, device type and device number
140 |         // that will be used
141 |         final int platformIndex = 0;
142 |         final long deviceType = CL_DEVICE_TYPE_ALL;
143 |         final int deviceIndex = 0;
144 | 
145 |         // Enable exceptions and subsequently omit error checks in this sample
146 |         CL.setExceptionsEnabled(true);
147 | 
148 |         // Obtain the number of platforms
149 |         int numPlatformsArray[] = new int[1];
150 |         clGetPlatformIDs(0, null, numPlatformsArray);
151 |         int numPlatforms = numPlatformsArray[0];
152 | 
153 |         // Obtain a platform ID
154 |         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
155 |         clGetPlatformIDs(platforms.length, platforms, null);
156 |         cl_platform_id platform = platforms[platformIndex];
157 | 
158 |         // Initialize the context properties
159 |         cl_context_properties contextProperties = new cl_context_properties();
160 |         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
161 |         
162 |         // Obtain the number of devices for the platform
163 |         int numDevicesArray[] = new int[1];
164 |         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
165 |         int numDevices = numDevicesArray[0];
166 |         
167 |         // Obtain a device ID 
168 |         cl_device_id devices[] = new cl_device_id[numDevices];
169 |         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
170 |         cl_device_id device = devices[deviceIndex];
171 | 
172 |         // Create a context for the selected device
173 |         context = clCreateContext(
174 |             contextProperties, 1, new cl_device_id[]{device}, 
175 |             null, null, null);
176 |         
177 |         String deviceName = getString(devices[0], CL_DEVICE_NAME);
178 |         System.out.printf("CL_DEVICE_NAME: %s\n", deviceName);
179 |         
180 |         // Create a command-queue for the selected device
181 |         cl_queue_properties properties = new cl_queue_properties();
182 |         commandQueue = clCreateCommandQueueWithProperties(
183 |             context, device, properties, null);
184 |     }
185 |     
186 |     /**
187 |      * Print the given buffer as a matrix with the given number of columns
188 |      * 
189 |      * @param data The buffer
190 |      * @param columns The number of columns
191 |      */
192 |     private static void print2D(FloatBuffer data, int columns)
193 |     {
194 |         StringBuffer sb = new StringBuffer();
195 |         for (int i=0; i<data.capacity(); i++)
196 |         {
197 |             sb.append(String.format(Locale.ENGLISH, "%5.1f ", data.get(i)));
198 |             if (((i+1)%columns)==0)
199 |             {
200 |                 sb.append("\n");
201 |             }
202 |         }
203 |         System.out.print(sb.toString());
204 |     }
205 |     
206 |     private static String getString(cl_device_id device, int paramName)
207 |     {
208 |         // Obtain the length of the string that will be queried
209 |         long size[] = new long[1];
210 |         clGetDeviceInfo(device, paramName, 0, null, size);
211 | 
212 |         // Create a buffer of the appropriate size and fill it with the info
213 |         byte buffer[] = new byte[(int)size[0]];
214 |         clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
215 | 
216 |         // Create a string from the buffer (excluding the trailing \0 byte)
217 |         return new String(buffer, 0, buffer.length-1);
218 |     }
219 | 
220 | }
221 | 


--------------------------------------------------------------------------------
/src/main/resources/data/lena512color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gpu/JOCLSamples/31114e3efd3a67e8d94f6ccdb676af192ab18c28/src/main/resources/data/lena512color.png


--------------------------------------------------------------------------------
/src/main/resources/kernels/Histogram256.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NVIDIA Corporation and its licensors retain all intellectual property and 
  5 |  * proprietary rights in and to this software and related documentation. 
  6 |  * Any use, reproduction, disclosure, or distribution of this software 
  7 |  * and related documentation without an express license agreement from
  8 |  * NVIDIA Corporation is strictly prohibited.
  9 |  *
 10 |  * Please refer to the applicable NVIDIA end user license agreement (EULA) 
 11 |  * associated with this source code for terms and conditions that govern 
 12 |  * your use of this NVIDIA software.
 13 |  * 
 14 |  */
 15 | 
 16 | 
 17 | 
 18 | ////////////////////////////////////////////////////////////////////////////////
 19 | // Common definition
 20 | ////////////////////////////////////////////////////////////////////////////////
 21 | #define HISTOGRAM256_BIN_COUNT 256
 22 | 
 23 | #define      UINT_BITS 32U
 24 | #define LOG2_WARP_SIZE 5U
 25 | #define      WARP_SIZE (1U << LOG2_WARP_SIZE)
 26 | 
 27 | //Warps ==subhistograms per work-group
 28 | #define WARP_COUNT 6
 29 | 
 30 | //Workgroup size
 31 | #define HISTOGRAM256_WORKGROUP_SIZE (WARP_COUNT * WARP_SIZE)
 32 | 
 33 | //Local memory per workgroup
 34 | #define HISTOGRAM256_WORKGROUP_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
 35 | 
 36 | 
 37 | 
 38 | ////////////////////////////////////////////////////////////////////////////////
 39 | // Main computation pass: compute per-workgroup partial histograms
 40 | ////////////////////////////////////////////////////////////////////////////////
 41 | #define TAG_MASK ( (1U << (UINT_BITS - LOG2_WARP_SIZE)) - 1U )
 42 | 
 43 | inline void addByte(volatile __local uint *l_WarpHist, uint data, uint tag){
 44 |     uint count;
 45 |     do{
 46 |         count = l_WarpHist[data] & TAG_MASK;
 47 |         count = tag | (count + 1);
 48 |         l_WarpHist[data] = count;
 49 |     }while(l_WarpHist[data] != count);
 50 | }
 51 | 
 52 | inline void addWord(volatile __local uint *l_WarpHist, uint data, uint tag){
 53 |     addByte(l_WarpHist, (data >>  0) & 0xFFU, tag);
 54 |     addByte(l_WarpHist, (data >>  8) & 0xFFU, tag);
 55 |     addByte(l_WarpHist, (data >> 16) & 0xFFU, tag);
 56 |     addByte(l_WarpHist, (data >> 24) & 0xFFU, tag);
 57 | }
 58 | 
 59 | __kernel __attribute__((reqd_work_group_size(HISTOGRAM256_WORKGROUP_SIZE, 1, 1)))
 60 | void histogram256(
 61 |     __global uint *d_PartialHistograms,
 62 |     __global uint *d_Data,
 63 |     uint dataCount
 64 | ){
 65 |     //Per-warp substorage storage
 66 |     __local uint l_Hist[WARP_COUNT * HISTOGRAM256_BIN_COUNT];
 67 |     __local uint *l_WarpHist = l_Hist + (get_local_id(0) >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
 68 | 
 69 |     //Clear shared memory storage for current threadblock before processing
 70 |     for(uint i = 0; i < (HISTOGRAM256_BIN_COUNT / WARP_SIZE); i++)
 71 |         l_Hist[get_local_id(0) + i  * (WARP_COUNT * WARP_SIZE)] = 0;
 72 | 
 73 |     const uint tag =  get_local_id(0) << (32 - LOG2_WARP_SIZE);
 74 | 
 75 |     //Read through the entire input buffer, build per-warp histograms
 76 |     barrier(CLK_LOCAL_MEM_FENCE);
 77 |     for(uint pos = get_global_id(0); pos < dataCount; pos += get_global_size(0)){
 78 |         uint data = d_Data[pos];
 79 |         addWord(l_WarpHist, data, tag);
 80 |     }
 81 | 
 82 |     //Per-block histogram reduction
 83 |     barrier(CLK_LOCAL_MEM_FENCE);
 84 |     for(uint pos = get_local_id(0); pos < HISTOGRAM256_BIN_COUNT; pos += HISTOGRAM256_WORKGROUP_SIZE){
 85 |         uint sum = 0;
 86 | 
 87 |         for(uint i = 0; i < WARP_COUNT; i++)
 88 |             sum += l_Hist[pos + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
 89 | 
 90 |         d_PartialHistograms[get_group_id(0) * HISTOGRAM256_BIN_COUNT + pos] = sum;
 91 |     }
 92 | }
 93 | 
 94 | 
 95 | 
 96 | ////////////////////////////////////////////////////////////////////////////////
 97 | // Merge histogram256() output
 98 | // Run one workgroup per bin; each workgroup adds up the same bin counter 
 99 | // from every partial histogram. Reads are uncoalesced, but mergeHistogram256
100 | // takes only a fraction of total processing time
101 | ////////////////////////////////////////////////////////////////////////////////
102 | #define MERGE_WORKGROUP_SIZE 256
103 | 
104 | __kernel void mergeHistogram256(
105 |     __global uint *d_Histogram,
106 |     __global uint *d_PartialHistograms,
107 |     uint histogramCount
108 | ){
109 |     __local uint l_Data[MERGE_WORKGROUP_SIZE];
110 | 
111 |     uint sum = 0;
112 |     for(uint i = get_local_id(0); i < histogramCount; i += MERGE_WORKGROUP_SIZE)
113 |         sum += d_PartialHistograms[get_group_id(0) + i * HISTOGRAM256_BIN_COUNT];
114 |     l_Data[get_local_id(0)] = sum;
115 | 
116 |     for(uint stride = MERGE_WORKGROUP_SIZE / 2; stride > 0; stride >>= 1){
117 |         barrier(CLK_LOCAL_MEM_FENCE);
118 |         if(get_local_id(0) < stride)
119 |             l_Data[get_local_id(0)] += l_Data[get_local_id(0) + stride];
120 |     }
121 | 
122 |     if(get_local_id(0) == 0)
123 |         d_Histogram[get_group_id(0)] = l_Data[0];
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/resources/kernels/Histogram_Kernels.cl:
--------------------------------------------------------------------------------
  1 | /* ============================================================
  2 | Copyright (c) 2009-2010 Advanced Micro Devices, Inc.  All rights reserved.
  3 |  
  4 | Redistribution and use of this material is permitted under the following 
  5 | conditions:
  6 |  
  7 | Redistributions must retain the above copyright notice and all terms of this 
  8 | license.
  9 |  
 10 | In no event shall anyone redistributing or accessing or using this material 
 11 | commence or participate in any arbitration or legal action relating to this 
 12 | material against Advanced Micro Devices, Inc. or any copyright holders or 
 13 | contributors. The foregoing shall survive any expiration or termination of 
 14 | this license or any agreement or access or use related to this material. 
 15 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 
 16 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL.
 17 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 
 18 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 
 19 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 
 20 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 
 21 | FROM DEFECTS OR VIRUSES.  ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 
 22 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 
 23 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 
 24 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 
 25 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 
 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE,
 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 28 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 
 29 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 
 30 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 
 31 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 
 32 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 
 33 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 
 34 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 
 35 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 
 36 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 
 37 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 
 38 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 
 39 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 
 40 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 
 41 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 
 42 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 
 43 | ACCESS OR USE RELATED TO THIS MATERIAL.
 44 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 
 45 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 
 46 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 
 47 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 
 48 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 
 49 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 
 50 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED,
 51 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 
 52 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 
 53 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 
 54 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 
 55 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL.
 56 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 
 57 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 
 58 | computer software and technical data, respectively. Use, duplication, 
 59 | distribution or disclosure by the U.S. Government and/or DOD agencies is 
 60 | subject to the full extent of restrictions in all applicable regulations, 
 61 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 
 62 | regulations thereof. Use of this material by the U.S. Government and/or DOD 
 63 | agencies is acknowledgment of the proprietary rights of any copyright holders 
 64 | and contributors, including those of Advanced Micro Devices, Inc., as well as 
 65 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 
 66 | commercial computer software.
 67 | This license forms the entire agreement regarding the subject matter hereof and 
 68 | supersedes all proposals and prior discussions and writings between the parties 
 69 | with respect thereto. This license does not affect any ownership, rights, title,
 70 | or interest in, or relating to, this material. No terms of this license can be 
 71 | modified or waived, and no breach of this license can be excused, unless done 
 72 | so in a writing signed by all affected parties. Each term of this license is 
 73 | separately enforceable. If any term of this license is determined to be or 
 74 | becomes unenforceable or illegal, such term shall be reformed to the minimum 
 75 | extent necessary in order for this license to remain in effect in accordance 
 76 | with its terms as modified by such reformation. This license shall be governed 
 77 | by and construed in accordance with the laws of the State of Texas without 
 78 | regard to rules on conflicts of law of any state or jurisdiction or the United 
 79 | Nations Convention on the International Sale of Goods. All disputes arising out 
 80 | of this license shall be subject to the jurisdiction of the federal and state 
 81 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 
 82 | jurisdiction and venue of these courts.
 83 | ============================================================ */
 84 | 
 85 | 
 86 | /*
 87 |  * For a description of the algorithm and the terms used, please see the
 88 |  * documentation for this sample.
 89 |  *
 90 |  * On invocation of kernel blackScholes, each work thread calculates
 91 |  * thread-histogram bin and finally all thread-histograms merged into 
 92 |  * block-histogram bin. Outside the kernel, All block-histograms merged
 93 |  * into final histogram
 94 |  */
 95 | 
 96 | #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 
 97 | 
 98 | #define BIN_SIZE 256
 99 | #define GROUP_SIZE 16
100 | 
101 | /**
102 |  * @brief   Calculates block-histogram bin whose bin size is 256
103 |  * @param   data  input data pointer
104 |  * @param   sharedArray shared array for thread-histogram bins
105 |  * @param   binResult block-histogram array
106 |  */
107 | __kernel
108 | void histogram256(__global const uint* data,
109 |                   __local uchar* sharedArray,
110 |                   __global uint* binResult)
111 | {
112 |     size_t localId = get_local_id(0);
113 |     size_t globalId = get_global_id(0);
114 |     size_t groupId = get_group_id(0);
115 | 
116 |     /* initialize shared array to zero */
117 |     for(int i = 0; i < BIN_SIZE; ++i)
118 |         sharedArray[localId * BIN_SIZE + i] = 0;
119 | 
120 |     barrier(CLK_LOCAL_MEM_FENCE);
121 |     
122 |     /* calculate thread-histograms */
123 |     for(int i = 0; i < BIN_SIZE; ++i)
124 |     {
125 |         uint value = data[globalId * BIN_SIZE + i];
126 |         sharedArray[localId * BIN_SIZE + value]++;
127 |     }
128 |     
129 |     barrier(CLK_LOCAL_MEM_FENCE); 
130 |     
131 |     /* merge all thread-histograms into block-histogram */
132 |     for(int i = 0; i < BIN_SIZE / GROUP_SIZE; ++i)
133 |     {
134 |         uint binCount = 0;
135 |         for(int j = 0; j < GROUP_SIZE; ++j)
136 |             binCount += sharedArray[j * BIN_SIZE + i * GROUP_SIZE + localId];
137 |             
138 |         binResult[groupId * BIN_SIZE + i * GROUP_SIZE + localId] = binCount;
139 |     }
140 | }
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/src/main/resources/kernels/QuadFloat.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * JOCL - Java bindings for OpenCL
  3 |  *
  4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
  5 |  */
  6 | 
  7 | // Quad-Float functions for OpenCL float4 type.
  8 | // Ported from quad-double (QD) package:
  9 | // http://crd.lbl.gov/~dhbailey/mpdist/index.html
 10 | 
 11 | inline float4 qfAssign(float value)
 12 | {
 13 |     return (float4)(value, 0.0f, 0.0f, 0.0f);
 14 | }
 15 | 
 16 | inline float4 qfAssign2(float2 value)
 17 | {
 18 |     return (float4)(value.x, value.y, 0.0f, 0.0f);
 19 | }
 20 | 
 21 | inline float4 qfNegate(float4 value)
 22 | {
 23 |     return (float4)(-value.x, -value.y, -value.z, -value.w);
 24 | }
 25 | 
 26 | inline float two_sum(float a, float b, float *err)
 27 | {
 28 |     float s = a + b;
 29 |     float bb = s - a;
 30 |     *err = (a - (s - bb)) + (b - bb);
 31 |     return s;
 32 | }
 33 | 
 34 | inline void three_sum(float *a, float *b, float *c)
 35 | {
 36 |     float t1, t2, t3;
 37 |     t1 = two_sum(*a, *b, &t2);
 38 |     *a  = two_sum(*c, t1, &t3);
 39 |     *b  = two_sum(t2, t3, c);
 40 | }
 41 | 
 42 | inline void three_sum2(float *a, float *b, float *c)
 43 | {
 44 |     float t1, t2, t3;
 45 |     t1 = two_sum(*a, *b, &t2);
 46 |     *a  = two_sum(*c, t1, &t3);
 47 |     *b = t2 + t3;
 48 | }
 49 | 
 50 | 
 51 | inline float quick_two_sum(float a, float b, float *err)
 52 | {
 53 |     float s = a + b;
 54 |     *err = b - (s - a);
 55 |     return s;
 56 | }
 57 | 
 58 | inline void renorm(float *c0, float *c1,
 59 |                    float *c2, float *c3, float *c4)
 60 | {
 61 |     float s0, s1, s2 = 0.0f, s3 = 0.0f;
 62 | 
 63 |     s0 = quick_two_sum(*c3, *c4, c4);
 64 |     s0 = quick_two_sum(*c2, s0, c3);
 65 |     s0 = quick_two_sum(*c1, s0, c2);
 66 |     *c0 = quick_two_sum(*c0, s0, c1);
 67 | 
 68 |     s0 = *c0;
 69 |     s1 = *c1;
 70 | 
 71 |     s0 = quick_two_sum(*c0, *c1, &s1);
 72 |     if (s1 != 0.0f)
 73 |     {
 74 |         s1 = quick_two_sum(s1, *c2, &s2);
 75 |         if (s2 != 0.0f)
 76 |         {
 77 |             s2 = quick_two_sum(s2, *c3, &s3);
 78 |             if (s3 != 0.0f)
 79 |             {
 80 |                 s3 += *c4;
 81 |             }
 82 |             else
 83 |             {
 84 |                 s2 += *c4;
 85 |             }
 86 |         }
 87 |         else
 88 |         {
 89 |             s1 = quick_two_sum(s1, *c3, &s2);
 90 |             if (s2 != 0.0f)
 91 |             {
 92 |                 s2 = quick_two_sum(s2, *c4, &s3);
 93 |             }
 94 |             else
 95 |             {
 96 |                 s1 = quick_two_sum(s1, *c4, &s2);
 97 |             }
 98 |         }
 99 |     }
100 |     else
101 |     {
102 |         s0 = quick_two_sum(s0, *c2, &s1);
103 |         if (s1 != 0.0f)
104 |         {
105 |             s1 = quick_two_sum(s1, *c3, &s2);
106 |             if (s2 != 0.0f)
107 |             {
108 |                 s2 = quick_two_sum(s2, *c4, &s3);
109 |             }
110 |             else
111 |             {
112 |                 s1 = quick_two_sum(s1, *c4, &s2);
113 |             }
114 |         }
115 |         else
116 |         {
117 |             s0 = quick_two_sum(s0, *c3, &s1);
118 |             if (s1 != 0.0f)
119 |             {
120 |                 s1 = quick_two_sum(s1, *c4, &s2);
121 |             }
122 |             else
123 |             {
124 |                 s0 = quick_two_sum(s0, *c4, &s1);
125 |             }
126 |         }
127 |     }
128 | 
129 |     *c0 = s0;
130 |     *c1 = s1;
131 |     *c2 = s2;
132 |     *c3 = s3;
133 | }
134 | 
135 | 
136 | 
137 | inline void qfAdd(float4 *sum, const float4 a, const float4 b)
138 | {
139 |     float s0, s1, s2, s3;
140 |     float t0, t1, t2, t3;
141 | 
142 |     s0 = two_sum(a.x, b.x, &t0);
143 |     s1 = two_sum(a.y, b.y, &t1);
144 |     s2 = two_sum(a.z, b.z, &t2);
145 |     s3 = two_sum(a.w, b.w, &t3);
146 | 
147 |     s1 = two_sum(s1, t0, &t0);
148 |     three_sum(&s2, &t0, &t1);
149 |     three_sum2(&s3, &t0, &t2);
150 |     t0 = t0 + t1 + t3;
151 | 
152 |     renorm(&s0, &s1, &s2, &s3, &t0);
153 |     (*sum).x = s0;
154 |     (*sum).y = s1;
155 |     (*sum).z = s2;
156 |     (*sum).w = s3;
157 | }
158 | 
159 | inline void split(float a, float *hi, float *lo)
160 | {
161 |     float temp = ((1<<12)+1) * a;
162 |     *hi = temp - (temp - a);
163 |     *lo = a - *hi;
164 | }
165 | 
166 | 
167 | inline float two_prod(float a, float b, float *err)
168 | {
169 |     float a_hi, a_lo, b_hi, b_lo;
170 |     float p = a * b;
171 |     split(a, &a_hi, &a_lo);
172 |     split(b, &b_hi, &b_lo);
173 |     *err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
174 |     return p;
175 | }
176 | 
177 | 
178 | inline void qfMul(float4 *prod, const float4 a, const float4 b)
179 | {
180 |     float p0, p1, p2, p3, p4, p5;
181 |     float q0, q1, q2, q3, q4, q5;
182 |     float t0, t1;
183 |     float s0, s1, s2;
184 | 
185 |     p0 = two_prod(a.x, b.x, &q0);
186 | 
187 |     p1 = two_prod(a.x, b.y, &q1);
188 |     p2 = two_prod(a.y, b.x, &q2);
189 | 
190 |     p3 = two_prod(a.x, b.z, &q3);
191 |     p4 = two_prod(a.y, b.y, &q4);
192 |     p5 = two_prod(a.z, b.x, &q5);
193 | 
194 |     three_sum(&p1, &p2, &q0);
195 | 
196 |     three_sum(&p2, &q1, &q2);
197 |     three_sum(&p3, &p4, &p5);
198 | 
199 |     s0 = two_sum(p2, p3, &t0);
200 |     s1 = two_sum(q1, p4, &t1);
201 |     s2 = q2 + p5;
202 |     s1 = two_sum(s1, t0, &t0);
203 |     s2 += (t0 + t1);
204 | 
205 |     s1 += a.x*b.w + a.y*b.z + a.z*b.y + a.w*b.x + q0 + q3 + q4 + q5;
206 |     renorm(&p0, &p1, &s0, &s1, &s2);
207 |     (*prod).x = p0;
208 |     (*prod).y = p1;
209 |     (*prod).z = p2;
210 |     (*prod).w = p3;
211 | }
212 | 
213 | 
214 | inline void qfMulFloat(float4 *prod, const float4 a, const float b)
215 | {
216 |     float p0, p1, p2, p3;
217 |     float q0, q1, q2;
218 |     float s0, s1, s2, s3, s4;
219 | 
220 |     p0 = two_prod(a.x, b, &q0);
221 |     p1 = two_prod(a.y, b, &q1);
222 |     p2 = two_prod(a.z, b, &q2);
223 |     p3 = a.w * b;
224 | 
225 |     s0 = p0;
226 | 
227 |     s1 = two_sum(q0, p1, &s2);
228 | 
229 |     three_sum(&s2, &q1, &p2);
230 | 
231 |     three_sum2(&q1, &q2, &p3);
232 |     s3 = q1;
233 | 
234 |     s4 = q2 + p2;
235 | 
236 |     renorm(&s0, &s1, &s2, &s3, &s4);
237 |     (*prod).x = s0;
238 |     (*prod).y = s1;
239 |     (*prod).z = s2;
240 |     (*prod).w = s3;
241 | }
242 | 
243 | 
244 | inline bool qfLessThan(float4 *a, float b)
245 | {
246 |     return ((*a).x < b || ((*a).x == b && (*a).y < 0.0f));
247 | }
248 | 
249 | inline void renorm4(float *c0, float *c1,
250 |                     float *c2, float *c3)
251 | {
252 |     float s0, s1, s2 = 0.0f, s3 = 0.0f;
253 | 
254 |     s0 = quick_two_sum(*c2, *c3, c3);
255 |     s0 = quick_two_sum(*c1, s0, c2);
256 |     *c0 = quick_two_sum(*c0, s0, c1);
257 | 
258 |     s0 = *c0;
259 |     s1 = *c1;
260 |     if (s1 != 0.0f)
261 |     {
262 |         s1 = quick_two_sum(s1, *c2, &s2);
263 |         if (s2 != 0.0f)
264 |         {
265 |             s2 = quick_two_sum(s2, *c3, &s3);
266 |         }
267 |         else
268 |         {
269 |           s1 = quick_two_sum(s1, *c3, &s2);
270 |         }
271 |     }
272 |     else
273 |     {
274 |         s0 = quick_two_sum(s0, *c2, &s1);
275 |         if (s1 != 0.0f)
276 |         {
277 |             s1 = quick_two_sum(s1, *c3, &s2);
278 |         }
279 |         else
280 |         {
281 |             s0 = quick_two_sum(s0, *c3, &s1);
282 |         }
283 |     }
284 |     *c0 = s0;
285 |     *c1 = s1;
286 |     *c2 = s2;
287 |     *c3 = s3;
288 | }
289 | 
290 | float4 qfDiv(const float4 a, const float4 b)
291 | {
292 |     float q0, q1, q2, q3;
293 | 
294 |     float4 r;
295 |     float4 p;
296 | 
297 |     q0 = a.x / b.x;
298 | 
299 |     // r = a - (b * q0);
300 |     qfMulFloat(&p, b, q0);
301 |     p = qfNegate(p);
302 |     qfAdd(&r, a, p);
303 | 
304 |     q1 = r.x / b.x;
305 |     // r -= (b * q1);
306 |     qfMulFloat(&p, b, q1);
307 |     p = qfNegate(p);
308 |     qfAdd(&r, r, p);
309 | 
310 |     q2 = r.x / b.x;
311 |     //r -= (b * q2);
312 |     qfMulFloat(&p, b, q2);
313 |     p = qfNegate(p);
314 |     qfAdd(&r, r, p);
315 | 
316 |     q3 = r.x / b.x;
317 | 
318 |     renorm4(&q0, &q1, &q2, &q3);
319 | 
320 |     return (float4)(q0, q1, q2, q3);
321 | }
322 | 


--------------------------------------------------------------------------------
/src/main/resources/kernels/QuadFloatMandelbrot.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * JOCL - Java bindings for OpenCL
 3 |  *
 4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
 5 |  */
 6 | 
 7 | // A mandelbrot kernel using QuadFloat functions
 8 | 
 9 | inline int iterate(
10 |     float2 x0, float2 y0,
11 |     float2 dx, float2 dy,
12 |     float relX, float relY,
13 |     int maxIterations)
14 | {
15 |     float4 qx0 = qfAssign2(x0);
16 |     float4 qy0 = qfAssign2(y0);
17 |     float4 qdx = qfAssign2(dx);
18 |     float4 qdy = qfAssign2(dy);
19 | 
20 |     float4 qr = qfAssign(0);
21 |     float4 qi = qfAssign(0);
22 | 
23 |     float4 qx = qfAssign(0);
24 |     float4 qy = qfAssign(0);
25 | 
26 |     float4 qxx = qfAssign(0);
27 |     float4 qyy = qfAssign(0);
28 | 
29 |     float4 qfTemp = qfAssign(0);
30 |     float4 magnitudeSquared = qfAssign(0);
31 | 
32 |     //float r = x0 + ((float)ix / sizeX) * dx;
33 |     //float i = y0 + ((float)iy / sizeY) * dy;
34 |     qfMulFloat(&qfTemp, qdx, relX);
35 |     qfAdd(&qr, qx0, qfTemp);
36 | 
37 |     qfMulFloat(&qfTemp, qdy, relY);
38 |     qfAdd(&qi, qy0, qfTemp);
39 | 
40 |     int iteration = 0;
41 |     while (iteration<maxIterations && qfLessThan(&magnitudeSquared, 4))
42 |     {
43 | 
44 |         // float xx = x*x;
45 |         qfMul(&qxx, qx,qx);
46 | 
47 |         // float yy = y*y;
48 |         qfMul(&qyy, qy,qy);
49 | 
50 |         //y = 2*x*y+i;
51 |         qfMulFloat(&qfTemp, qx,2);
52 |         qfMul(&qfTemp, qfTemp,qy);
53 |         qfAdd(&qy, qfTemp,qi);
54 | 
55 |         //x = xx-yy+r;
56 |         qfTemp.x = -qyy.x;
57 |         qfTemp.y = -qyy.y;
58 |         qfTemp.z = -qyy.z;
59 |         qfTemp.w = -qyy.w;
60 |         qfAdd(&qfTemp,qxx,qfTemp);
61 |         qfAdd(&qx, qfTemp,qr);
62 | 
63 | 
64 |         qfAdd(&magnitudeSquared, qxx, qyy);
65 |         iteration++;
66 |     }
67 |     return iteration;
68 | 
69 | }
70 | 
71 | 
72 | 
73 | __kernel void computeMandelbrot(
74 |     __global uint *output,
75 |     int sizeX, int sizeY,
76 |     int tileX, int tileY,
77 |     int tileSizeX, int tileSizeY,
78 |     float2 x0, float2 y0,
79 |     float2 dx, float2 dy,
80 |     int maxIterations)
81 | {
82 |     unsigned int ix = get_global_id(0);
83 |     unsigned int iy = get_global_id(1);
84 | 
85 |     int indexX = ix + tileX * tileSizeX;
86 |     int indexY = iy + tileY * tileSizeY;
87 | 
88 |     float relX = (float)indexX / sizeX;
89 |     float relY = (float)indexY / sizeY;
90 | 
91 |     int iteration = iterate(x0, y0, dx, dy, relX, relY, maxIterations);
92 |     output[mul24((int)iy, tileSizeX)+ix] = iteration;
93 | }
94 | 
95 | 


--------------------------------------------------------------------------------
/src/main/resources/kernels/SimpleConvolution.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * JOCL - Java bindings for OpenCL
 3 |  *
 4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
 5 |  */
 6 | 
 7 | // A simple image convolution kernel
 8 | 
 9 | __kernel void convolution(
10 |     __global uchar4 *input,
11 |     __global float *mask,
12 |     __global uchar4 *output,
13 |     const int2 imageSize,
14 |     const int2 maskSize,
15 |     const int2 maskOrigin)
16 | {
17 |     int gx = get_global_id(0);
18 |     int gy = get_global_id(1);
19 | 
20 |     if (gx >= maskOrigin.x &&
21 |         gy >= maskOrigin.y &&
22 |         gx < imageSize.x - (maskSize.x-maskOrigin.x-1) &&
23 |         gy < imageSize.y - (maskSize.y-maskOrigin.y-1))
24 |     {
25 |         float4 sum = (float4)0;
26 |         for(int mx=0; mx<maskSize.x; mx++)
27 |         {
28 |             for(int my=0; my<maskSize.x; my++)
29 |             {
30 |                 int mi = mul24(my, maskSize.x) + mx;
31 |                 int ix = gx - maskOrigin.x + mx;
32 |                 int iy = gy - maskOrigin.y + my;
33 |                 int i = mul24(iy, imageSize.x) + ix;
34 |                 sum += convert_float4(input[i]) * mask[mi];
35 |             }
36 |         }
37 |         uchar4 result = convert_uchar4_sat(sum);
38 |         output[mul24(gy, imageSize.x)+gx] = result;
39 |     }
40 |     else
41 |     {
42 |         if (gx >= 0 && gx < imageSize.x &&
43 |             gy >= 0 && gy < imageSize.y)
44 |         {
45 |             output[mul24(gy, imageSize.x)+gx] = (uchar4)0;
46 |         }
47 |     }
48 | 
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/src/main/resources/kernels/SimpleMandelbrot.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * JOCL - Java bindings for OpenCL
 3 |  *
 4 |  * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
 5 |  */
 6 | 
 7 | // A very simple OpenCL kernel for computing the mandelbrot set
 8 | //
 9 | // output        : A buffer with sizeX*sizeY elements, storing
10 | //                 the colors as RGB ints
11 | // sizeX, sizeX  : The width and height of the buffer
12 | // x0,y0,x1,y1   : The rectangle in which the mandelbrot
13 | //                 set will be computed
14 | // maxIterations : The maximum number of iterations
15 | // colorMap      : A buffer with colorMapSize elements,
16 | //                 containing the pixel colors
17 | 
18 | __kernel void computeMandelbrot(
19 |     __global uint *output,
20 |     int sizeX, int sizeY,
21 |     float x0, float y0,
22 |     float x1, float y1,
23 |     int maxIterations,
24 |     __global uint *colorMap,
25 |     int colorMapSize
26 |     )
27 | {
28 |     unsigned int ix = get_global_id(0);
29 |     unsigned int iy = get_global_id(1);
30 | 
31 |     float r = x0 + ix * (x1-x0) / sizeX;
32 |     float i = y0 + iy * (y1-y0) / sizeY;
33 | 
34 |     float x = 0;
35 |     float y = 0;
36 | 
37 |     float magnitudeSquared = 0;
38 |     int iteration = 0;
39 |     while (iteration<maxIterations && magnitudeSquared<4)
40 |     {
41 |         float xx = x*x;
42 |         float yy = y*y;
43 |         y = 2*x*y+i;
44 |         x = xx-yy+r;
45 |         magnitudeSquared=xx+yy;
46 |         iteration++;
47 |     }
48 |     if (iteration == maxIterations)
49 |     {
50 |         output[iy*sizeX+ix] = 0;
51 |     }
52 |     else
53 |     {
54 |         float alpha = (float)iteration/maxIterations;
55 |         int colorIndex = (int)(alpha * colorMapSize);
56 |         output[iy*sizeX+ix] = colorMap[colorIndex];
57 | 	}
58 | }


--------------------------------------------------------------------------------
/src/main/resources/kernels/reduction.cl:
--------------------------------------------------------------------------------
 1 | 
 2 | // The reduction kernel that is described as "Two-stage reduction" at
 3 | // http://developer.amd.com/resources/documentation-articles/
 4 | //   articles-whitepapers/opencl-optimization-case-study-simple-reductions/
 5 | // adjusted to perform an ADD-reduction instead of a MIN-reduction
 6 |  
 7 | __kernel void reduce(
 8 |     __global float* buffer,
 9 |     __local float* scratch,
10 |     __const int length,
11 |     __global float* result) 
12 | {
13 |     int globalIndex = get_global_id(0);
14 |     float accumulator = 0;
15 | 
16 |     // Loop sequentially over chunks of input vector
17 |     while (globalIndex < length) 
18 |     {
19 |         float element = buffer[globalIndex];
20 |         accumulator += element;
21 |         globalIndex += get_global_size(0);
22 |     }
23 | 
24 |     // Perform parallel reduction
25 |     int lid = get_local_id(0);
26 |     scratch[lid] = accumulator;
27 |     barrier(CLK_LOCAL_MEM_FENCE);
28 |     for(int offset = get_local_size(0) / 2; offset > 0; offset = offset / 2) 
29 |     {
30 |         if (lid < offset) 
31 |         {
32 |             float other = scratch[lid + offset];
33 |             float mine = scratch[lid];
34 |             scratch[lid] = mine + other;
35 |         }
36 |         barrier(CLK_LOCAL_MEM_FENCE);
37 |     }
38 |     if (lid == 0) 
39 |     {
40 |         result[get_group_id(0)] = scratch[0];
41 |     }
42 | }


--------------------------------------------------------------------------------
/src/main/resources/kernels/simpleGL.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
 3 |  *
 4 |  * NVIDIA Corporation and its licensors retain all intellectual property and 
 5 |  * proprietary rights in and to this software and related documentation. 
 6 |  * Any use, reproduction, disclosure, or distribution of this software 
 7 |  * and related documentation without an express license agreement from
 8 |  * NVIDIA Corporation is strictly prohibited.
 9 |  *
10 |  * Please refer to the applicable NVIDIA end user license agreement (EULA) 
11 |  * associated with this source code for terms and conditions that govern 
12 |  * your use of this NVIDIA software.
13 |  * 
14 |  */
15 | 
16 |  /* This example demonstrates how to use the OpenCL/OpenGL bindings  */
17 | 
18 | ///////////////////////////////////////////////////////////////////////////////
19 | //! Simple kernel to modify vertex positions in sine wave pattern
20 | //! @param data  data in global memory
21 | ///////////////////////////////////////////////////////////////////////////////
22 | __kernel void sine_wave(__global float4* pos, unsigned int width, unsigned int height, float time)
23 | {
24 |     unsigned int x = get_global_id(0);
25 |     unsigned int y = get_global_id(1);
26 | 
27 |     // calculate uv coordinates
28 |     float u = x / (float) width;
29 |     float v = y / (float) height;
30 |     u = u*2.0f - 1.0f;
31 |     v = v*2.0f - 1.0f;
32 | 
33 |     // calculate simple sine wave pattern
34 |     float freq = 4.0f;
35 |     float w = sin(u*freq + time) * cos(v*freq + time) * 0.5f;
36 | 
37 |     // write output vertex
38 |     pos[y*width+x] = (float4)(u, w, v, 1.0f);
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------