├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
└── main
├── java
└── org
│ └── jocl
│ └── samples
│ ├── HistogramAMD.java
│ ├── HistogramNVIDIA.java
│ ├── JOCLBandwidthTest.java
│ ├── JOCLDeviceQuery.java
│ ├── JOCLEventSample.java
│ ├── JOCLMandelbrot.java
│ ├── JOCLMappedBufferSample.java
│ ├── JOCLMultiDeviceSample.java
│ ├── JOCLReduction.java
│ ├── JOCLSample.java
│ ├── JOCLSample_1_1.java
│ ├── JOCLSample_1_2_KernelArgs.java
│ ├── JOCLSample_2_0_SVM.java
│ ├── JOCLSimpleConvolution.java
│ ├── JOCLSimpleGL3.java
│ ├── JOCLSimpleImage.java
│ ├── JOCLSimpleLWJGL.java
│ ├── JOCLSimpleMandelbrot.java
│ ├── JOCLSubBufferSample.java
│ └── blast
│ ├── JOCLBlastCaxpyBatchedSample.java
│ ├── JOCLBlastDgemmSample.java
│ └── JOCLBlastSample.java
└── resources
├── data
└── lena512color.png
└── kernels
├── Histogram256.cl
├── Histogram_Kernels.cl
├── QuadFloat.cl
├── QuadFloatMandelbrot.cl
├── SimpleConvolution.cl
├── SimpleMandelbrot.cl
├── reduction.cl
└── simpleGL.cl
/.gitignore:
--------------------------------------------------------------------------------
1 | /.settings
2 | /target
3 | /.classpath
4 | /.project
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Marco Hutter
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # JOCLSamples
2 |
3 | Samples for JOCL - http://jocl.org
4 |
5 | **Note:** These samples have been moved here from the original samples page
6 | of the JOCL website, [http://www.jocl.org/samples/samples.html](http://www.jocl.org/samples/samples.html).
7 | These are mainly *standalone* samples, which means that each class contains
8 | the whole code that is required for the sample, although some of them refer
9 | to kernels that are stored in `src/main/resources/kernels`. Several methods
10 | (e.g. for the basic OpenCL initialization) appear in each of these samples.
11 | They may be moved to a utility class in the future.
12 |
13 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | org.jocl
6 | jocl-samples
7 | 0.0.1-SNAPSHOT
8 |
9 |
10 |
11 |
12 | org.apache.maven.plugins
13 | maven-compiler-plugin
14 | 2.3.2
15 |
16 | 1.7
17 | 1.7
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | org.jocl
26 | jocl
27 | 2.0.4
28 |
29 |
30 | org.jocl
31 | jocl-blast
32 | 1.5.0
33 |
34 |
35 | org.jogamp.gluegen
36 | gluegen-rt-main
37 | 2.3.1
38 |
39 |
40 | org.jogamp.jogl
41 | jogl-all-main
42 | 2.3.1
43 |
44 |
45 | org.lwjgl.lwjgl
46 | lwjgl
47 | 2.9.3
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/HistogramNVIDIA.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 |
8 | import static org.jocl.CL.*;
9 |
10 | import java.io.*;
11 | import java.util.Random;
12 |
13 | import org.jocl.*;
14 |
15 | /**
16 | * This class is a port of the NVIDIA OpenCL SDK "Histogram" sample.
17 | * The structure of the code has intentionally been kept similar
18 | * to the original sample.
19 | */
20 | public class HistogramNVIDIA
21 | {
22 | public static final int HISTOGRAM256_BIN_COUNT = 256;
23 |
24 | //OpenCL histogram256 program
25 | static cl_program cpHistogram256;
26 |
27 | //OpenCL histogram256 kernels
28 | static cl_kernel ckHistogram256, ckMergeHistogram256;
29 |
30 | //histogram256() intermediate results buffer
31 | static int PARTIAL_HISTOGRAM256_COUNT = 240;
32 | static cl_mem d_PartialHistograms;
33 |
34 | //Default command queue for histogram256 kernels
35 | static cl_command_queue cqDefaultCommandQue;
36 |
37 |
38 |
39 | ////////////////////////////////////////////////////////////////////////////////
40 | //Test driver
41 | ////////////////////////////////////////////////////////////////////////////////
42 | public static void main(String args[])
43 | {
44 | cl_context cxGPUContext; //OpenCL context
45 | cl_command_queue cqCommandQue; //OpenCL command que
46 | cl_mem d_Data, d_Histogram; //OpenCL memory buffer objects
47 |
48 | long dataBytes[] = new long[1];
49 | int ciErrNum[] = new int[1];
50 | int PassFailFlag = 1;
51 |
52 | byte h_Data[];
53 | int h_HistogramCPU[], h_HistogramGPU[];
54 |
55 | int byteCount = 128 * 8192;
56 |
57 | // start logs
58 | System.out.println("Starting...\n");
59 |
60 | System.out.println("Initializing data...");
61 | h_Data = new byte[byteCount];
62 | h_HistogramCPU = new int[HISTOGRAM256_BIN_COUNT];
63 | h_HistogramGPU = new int[HISTOGRAM256_BIN_COUNT];
64 |
65 | Random random = new Random(2009);
66 | for(int i = 0; i < byteCount; i++)
67 | h_Data[i] = (byte)(random.nextInt() & 0xFF);
68 |
69 | // This will allow us to subsequently omit the "shrCheckError" calls for this sample
70 | CL.setExceptionsEnabled(true);
71 |
72 | System.out.println("Initializing OpenCL...");
73 |
74 | // Obtain the platform IDs and initialize the context properties
75 | cl_platform_id platforms[] = new cl_platform_id[1];
76 | clGetPlatformIDs(platforms.length, platforms, null);
77 | cl_context_properties contextProperties = new cl_context_properties();
78 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]);
79 | cxGPUContext = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, null, null, ciErrNum);
80 |
81 | // get the list of GPU devices associated with context
82 | clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, null, dataBytes);
83 | cl_device_id cdDevices[] = new cl_device_id[(int)dataBytes[0] / Sizeof.cl_device_id];
84 |
85 | clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes[0], Pointer.to(cdDevices), null);
86 |
87 | //Create a command-queue
88 | cl_queue_properties properties = new cl_queue_properties();
89 | cqCommandQue = clCreateCommandQueueWithProperties(
90 | cxGPUContext, cdDevices[0], properties, ciErrNum);
91 |
92 | System.out.println("Allocating OpenCL memory...\n");
93 | d_Data = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, byteCount * Sizeof.cl_char, Pointer.to(h_Data), ciErrNum);
94 | d_Histogram = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, null, ciErrNum);
95 |
96 | System.out.println("Initializing 256-bin OpenCL histogram...");
97 | initHistogram256(cxGPUContext, cqCommandQue);
98 |
99 | System.out.printf("Running 256-bin OpenCL histogram for %d bytes...\n", byteCount);
100 | histogram256(null, d_Histogram, d_Data, byteCount);
101 |
102 | System.out.println("Validating OpenCL results...");
103 | System.out.println("...reading back OpenCL results");
104 | clEnqueueReadBuffer(cqCommandQue, d_Histogram, CL_TRUE, 0, HISTOGRAM256_BIN_COUNT * Sizeof.cl_int, Pointer.to(h_HistogramGPU), 0, null, null);
105 |
106 | System.out.println("...histogram256CPU()");
107 |
108 | histogram256CPU(h_HistogramCPU, h_Data, byteCount);
109 |
110 | for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++)
111 | {
112 | if(h_HistogramGPU[i] != h_HistogramCPU[i])
113 | {
114 | PassFailFlag = 0;
115 | }
116 | }
117 | System.out.println(PassFailFlag != 0 ? "256-bin histograms match\n" : "***256-bin histograms do not match!!!***\n" );
118 |
119 | System.out.println("Shutting down 256-bin OpenCL histogram...\n\n");
120 |
121 | //Release kernels and program
122 | closeHistogram256();
123 |
124 | // pass or fail
125 | System.out.printf("TEST %s\n", PassFailFlag != 0 ? "PASSED" : "FAILED !!!");
126 |
127 | System.out.println("Shutting down...");
128 |
129 | //Release other OpenCL Objects
130 | ciErrNum[0] = clReleaseMemObject(d_Histogram);
131 | ciErrNum[0] |= clReleaseMemObject(d_Data);
132 | ciErrNum[0] |= clReleaseCommandQueue(cqCommandQue);
133 | ciErrNum[0] |= clReleaseContext(cxGPUContext);
134 | }
135 |
136 |
137 | static void histogram256CPU(int h_Histogram[], byte h_Data[], int byteCount)
138 | {
139 | for(int i = 0; i < HISTOGRAM256_BIN_COUNT; i++)
140 | h_Histogram[i] = 0;
141 |
142 | for(int i = 0; i < byteCount; i++){
143 | int data = h_Data[i];
144 | if (data < 0)
145 | {
146 | data+=256;
147 | }
148 | h_Histogram[data]++;
149 | }
150 | }
151 |
152 |
153 | ////////////////////////////////////////////////////////////////////////////////
154 | // OpenCL launchers for histogram256 / mergeHistogram256 kernels
155 | ////////////////////////////////////////////////////////////////////////////////
156 |
157 | static void initHistogram256(cl_context cxGPUContext, cl_command_queue cqParamCommandQue)
158 | {
159 | int ciErrNum[] = new int[1];
160 |
161 | System.out.println("...loading Histogram256.cl");
162 | String cHistogram256 = readFile("src/main/resources/kernels/Histogram256.cl");
163 |
164 | System.out.println("...creating histogram256 program");
165 | cpHistogram256 = clCreateProgramWithSource(cxGPUContext, 1, new String[]{cHistogram256}, new long[]{cHistogram256.length()}, ciErrNum);
166 |
167 | System.out.println("...building histogram256 program");
168 | ciErrNum[0] = clBuildProgram(cpHistogram256, 0, null, null, null, null);
169 |
170 | System.out.println("...creating histogram256 kernels");
171 | ckHistogram256 = clCreateKernel(cpHistogram256, "histogram256", ciErrNum);
172 | ckMergeHistogram256 = clCreateKernel(cpHistogram256, "mergeHistogram256", ciErrNum);
173 |
174 | System.out.println("...allocating internal histogram256 buffer");
175 | d_PartialHistograms = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT * Sizeof.cl_uint, null, ciErrNum);
176 |
177 | //Save default command queue
178 | cqDefaultCommandQue = cqParamCommandQue;
179 | }
180 |
181 | static void closeHistogram256()
182 | {
183 | clReleaseMemObject(d_PartialHistograms);
184 | clReleaseKernel(ckMergeHistogram256);
185 | clReleaseKernel(ckHistogram256);
186 | clReleaseProgram(cpHistogram256);
187 | }
188 |
189 | static void histogram256(cl_command_queue cqCommandQue, cl_mem d_Histogram, cl_mem d_Data, int byteCount)
190 | {
191 | long localWorkSize[] = new long[1];
192 | long globalWorkSize[] = new long[1];
193 |
194 | if(cqCommandQue == null)
195 | cqCommandQue = cqDefaultCommandQue;
196 |
197 | int WARP_SIZE = 32;
198 | int WARP_COUNT = 6;
199 |
200 | int dataCount = byteCount / 4;
201 | clSetKernelArg(ckHistogram256, 0, Sizeof.cl_mem, Pointer.to(d_PartialHistograms));
202 | clSetKernelArg(ckHistogram256, 1, Sizeof.cl_mem, Pointer.to(d_Data));
203 | clSetKernelArg(ckHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{dataCount}));
204 |
205 | localWorkSize[0] = WARP_SIZE * WARP_COUNT;
206 | globalWorkSize[0] = PARTIAL_HISTOGRAM256_COUNT * localWorkSize[0];
207 |
208 | clEnqueueNDRangeKernel(cqCommandQue, ckHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null);
209 |
210 | int MERGE_WORKGROUP_SIZE = 256;
211 | clSetKernelArg(ckMergeHistogram256, 0, Sizeof.cl_mem, Pointer.to(d_Histogram));
212 | clSetKernelArg(ckMergeHistogram256, 1, Sizeof.cl_mem, Pointer.to(d_PartialHistograms));
213 | clSetKernelArg(ckMergeHistogram256, 2, Sizeof.cl_uint, Pointer.to(new int[]{PARTIAL_HISTOGRAM256_COUNT}));
214 |
215 | localWorkSize[0] = MERGE_WORKGROUP_SIZE;
216 | globalWorkSize[0] = HISTOGRAM256_BIN_COUNT * localWorkSize[0];
217 |
218 | clEnqueueNDRangeKernel(cqCommandQue, ckMergeHistogram256, 1, null, globalWorkSize, localWorkSize, 0, null, null);
219 | }
220 |
221 |
222 | private static String readFile(String fileName)
223 | {
224 | BufferedReader br = null;
225 | try
226 | {
227 | br = new BufferedReader(new FileReader(fileName));
228 | StringBuilder sb = new StringBuilder();
229 | String line = null;
230 | while (true)
231 | {
232 | line = br.readLine();
233 | if (line == null)
234 | {
235 | break;
236 | }
237 | sb.append(line+"\n");
238 | }
239 | return sb.toString();
240 | }
241 | catch (IOException e)
242 | {
243 | e.printStackTrace();
244 | return "";
245 | }
246 | finally
247 | {
248 | if (br != null)
249 | {
250 | try
251 | {
252 | br.close();
253 | }
254 | catch (IOException e)
255 | {
256 | e.printStackTrace();
257 | }
258 | }
259 | }
260 | }
261 |
262 | }
263 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLBandwidthTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 | import static org.jocl.CL.*;
8 |
9 | import java.nio.ByteBuffer;
10 | import java.util.Locale;
11 |
12 | import org.jocl.*;
13 |
14 | /**
15 | * A test for the bandwidth of of the data transfer from the host
16 | * to the device.
17 | */
18 | public class JOCLBandwidthTest
19 | {
20 | /**
21 | * The index of the OpenCL platform that this sample should run on
22 | */
23 | private static final int platformIndex = 0;
24 |
25 | /**
26 | * The OpenCL device type that will be used
27 | */
28 | private static final long deviceType = CL_DEVICE_TYPE_ALL;
29 |
30 | /**
31 | * The index of the OpenCL device that will be used
32 | */
33 | private static final int deviceIndex = 0;
34 |
35 | /**
36 | * The OpenCL context
37 | */
38 | private static cl_context context;
39 |
40 | /**
41 | * The OpenCL command queue
42 | */
43 | private static cl_command_queue commandQueue;
44 |
45 | /**
46 | * The host memory modes that will be tested
47 | */
48 | enum MemoryMode
49 | {
50 | PAGEABLE,
51 | PINNED
52 | }
53 |
54 | /**
55 | * The memory access modes that will be tested
56 | */
57 | enum AccessMode
58 | {
59 | MAPPED,
60 | DIRECT
61 | }
62 |
63 | /**
64 | * The number of memcopy operations to perform for each size
65 | */
66 | private static final long MEMCOPY_ITERATIONS = 100;
67 |
68 | /**
69 | * The entry point of this sample
70 | *
71 | * @param args Not used
72 | */
73 | public static void main(String args[])
74 | {
75 | initialize();
76 |
77 | for (MemoryMode memoryMode : MemoryMode.values())
78 | {
79 | for (AccessMode accessMode : AccessMode.values())
80 | {
81 | runTest(memoryMode, accessMode);
82 | }
83 | }
84 |
85 | shutdown();
86 | }
87 |
88 | /**
89 | * Run a bandwidth test with the given memory mode and access mode
90 | *
91 | * @param memoryMode The memory mode
92 | * @param accessMode The access mode
93 | */
94 | private static void runTest(MemoryMode memoryMode, AccessMode accessMode)
95 | {
96 | int minExponent = 10;
97 | int maxExponent = 26;
98 | int count = maxExponent - minExponent;
99 | int memorySizes[] = new int[count];
100 | double bandwidths[] = new double[memorySizes.length];
101 |
102 | System.out.print("Running");
103 | for (int i=0; i devices = new ArrayList();
40 | for (int i=0; i
181 | System.out.printf("CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t");
182 | int preferredVectorWidthChar = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR);
183 | int preferredVectorWidthShort = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT);
184 | int preferredVectorWidthInt = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT);
185 | int preferredVectorWidthLong = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG);
186 | int preferredVectorWidthFloat = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT);
187 | int preferredVectorWidthDouble = getInt(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE);
188 | System.out.printf("CHAR %d, SHORT %d, INT %d, LONG %d, FLOAT %d, DOUBLE %d\n\n\n",
189 | preferredVectorWidthChar, preferredVectorWidthShort,
190 | preferredVectorWidthInt, preferredVectorWidthLong,
191 | preferredVectorWidthFloat, preferredVectorWidthDouble);
192 | }
193 | }
194 |
195 | /**
196 | * Returns the value of the device info parameter with the given name
197 | *
198 | * @param device The device
199 | * @param paramName The parameter name
200 | * @return The value
201 | */
202 | private static int getInt(cl_device_id device, int paramName)
203 | {
204 | return getInts(device, paramName, 1)[0];
205 | }
206 |
207 | /**
208 | * Returns the values of the device info parameter with the given name
209 | *
210 | * @param device The device
211 | * @param paramName The parameter name
212 | * @param numValues The number of values
213 | * @return The value
214 | */
215 | private static int[] getInts(cl_device_id device, int paramName, int numValues)
216 | {
217 | int values[] = new int[numValues];
218 | clGetDeviceInfo(device, paramName, Sizeof.cl_int * numValues, Pointer.to(values), null);
219 | return values;
220 | }
221 |
222 | /**
223 | * Returns the value of the device info parameter with the given name
224 | *
225 | * @param device The device
226 | * @param paramName The parameter name
227 | * @return The value
228 | */
229 | private static long getLong(cl_device_id device, int paramName)
230 | {
231 | return getLongs(device, paramName, 1)[0];
232 | }
233 |
234 | /**
235 | * Returns the values of the device info parameter with the given name
236 | *
237 | * @param device The device
238 | * @param paramName The parameter name
239 | * @param numValues The number of values
240 | * @return The value
241 | */
242 | private static long[] getLongs(cl_device_id device, int paramName, int numValues)
243 | {
244 | long values[] = new long[numValues];
245 | clGetDeviceInfo(device, paramName, Sizeof.cl_long * numValues, Pointer.to(values), null);
246 | return values;
247 | }
248 |
249 | /**
250 | * Returns the value of the device info parameter with the given name
251 | *
252 | * @param device The device
253 | * @param paramName The parameter name
254 | * @return The value
255 | */
256 | private static String getString(cl_device_id device, int paramName)
257 | {
258 | // Obtain the length of the string that will be queried
259 | long size[] = new long[1];
260 | clGetDeviceInfo(device, paramName, 0, null, size);
261 |
262 | // Create a buffer of the appropriate size and fill it with the info
263 | byte buffer[] = new byte[(int)size[0]];
264 | clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
265 |
266 | // Create a string from the buffer (excluding the trailing \0 byte)
267 | return new String(buffer, 0, buffer.length-1);
268 | }
269 |
270 | /**
271 | * Returns the value of the platform info parameter with the given name
272 | *
273 | * @param platform The platform
274 | * @param paramName The parameter name
275 | * @return The value
276 | */
277 | private static String getString(cl_platform_id platform, int paramName)
278 | {
279 | // Obtain the length of the string that will be queried
280 | long size[] = new long[1];
281 | clGetPlatformInfo(platform, paramName, 0, null, size);
282 |
283 | // Create a buffer of the appropriate size and fill it with the info
284 | byte buffer[] = new byte[(int)size[0]];
285 | clGetPlatformInfo(platform, paramName, buffer.length, Pointer.to(buffer), null);
286 |
287 | // Create a string from the buffer (excluding the trailing \0 byte)
288 | return new String(buffer, 0, buffer.length-1);
289 | }
290 |
291 | /**
292 | * Returns the value of the device info parameter with the given name
293 | *
294 | * @param device The device
295 | * @param paramName The parameter name
296 | * @return The value
297 | */
298 | private static long getSize(cl_device_id device, int paramName)
299 | {
300 | return getSizes(device, paramName, 1)[0];
301 | }
302 |
303 | /**
304 | * Returns the values of the device info parameter with the given name
305 | *
306 | * @param device The device
307 | * @param paramName The parameter name
308 | * @param numValues The number of values
309 | * @return The value
310 | */
311 | static long[] getSizes(cl_device_id device, int paramName, int numValues)
312 | {
313 | // The size of the returned data has to depend on
314 | // the size of a size_t, which is handled here
315 | ByteBuffer buffer = ByteBuffer.allocate(
316 | numValues * Sizeof.size_t).order(ByteOrder.nativeOrder());
317 | clGetDeviceInfo(device, paramName, Sizeof.size_t * numValues,
318 | Pointer.to(buffer), null);
319 | long values[] = new long[numValues];
320 | if (Sizeof.size_t == 4)
321 | {
322 | for (int i=0; i max)
221 | {
222 | System.out.print(" ...");
223 | }
224 | }
225 | System.out.println("");
226 | }
227 |
228 | /**
229 | * A simple helper class for tracking cl_events and printing
230 | * timing information for the execution of the commands that
231 | * are associated with the events.
232 | */
233 | static class ExecutionStatistics
234 | {
235 | /**
236 | * A single entry of the ExecutionStatistics
237 | */
238 | private static class Entry
239 | {
240 | private String name;
241 | private long submitTime[] = new long[1];
242 | private long queuedTime[] = new long[1];
243 | private long startTime[] = new long[1];
244 | private long endTime[] = new long[1];
245 |
246 | Entry(String name, cl_event event)
247 | {
248 | this.name = name;
249 | clGetEventProfilingInfo(
250 | event, CL_PROFILING_COMMAND_QUEUED,
251 | Sizeof.cl_ulong, Pointer.to(queuedTime), null);
252 | clGetEventProfilingInfo(
253 | event, CL_PROFILING_COMMAND_SUBMIT,
254 | Sizeof.cl_ulong, Pointer.to(submitTime), null);
255 | clGetEventProfilingInfo(
256 | event, CL_PROFILING_COMMAND_START,
257 | Sizeof.cl_ulong, Pointer.to(startTime), null);
258 | clGetEventProfilingInfo(
259 | event, CL_PROFILING_COMMAND_END,
260 | Sizeof.cl_ulong, Pointer.to(endTime), null);
261 | }
262 |
263 | void normalize(long baseTime)
264 | {
265 | submitTime[0] -= baseTime;
266 | queuedTime[0] -= baseTime;
267 | startTime[0] -= baseTime;
268 | endTime[0] -= baseTime;
269 | }
270 |
271 | long getQueuedTime()
272 | {
273 | return queuedTime[0];
274 | }
275 |
276 | void print()
277 | {
278 | System.out.println("Event "+name+": ");
279 | System.out.println("Queued : "+
280 | String.format("%8.3f", queuedTime[0]/1e6)+" ms");
281 | System.out.println("Submit : "+
282 | String.format("%8.3f", submitTime[0]/1e6)+" ms");
283 | System.out.println("Start : "+
284 | String.format("%8.3f", startTime[0]/1e6)+" ms");
285 | System.out.println("End : "+
286 | String.format("%8.3f", endTime[0]/1e6)+" ms");
287 |
288 | long duration = endTime[0]-startTime[0];
289 | System.out.println("Time : "+
290 | String.format("%8.3f", duration / 1e6)+" ms");
291 | }
292 | }
293 |
294 | /**
295 | * The list of entries in this instance
296 | */
297 | private List entries = new ArrayList();
298 |
299 | /**
300 | * Adds the specified entry to this instance
301 | *
302 | * @param name A name for the event
303 | * @param event The event
304 | */
305 | public void addEntry(String name, cl_event event)
306 | {
307 | entries.add(new Entry(name, event));
308 | }
309 |
310 | /**
311 | * Removes all entries
312 | */
313 | public void clear()
314 | {
315 | entries.clear();
316 | }
317 |
318 | /**
319 | * Normalize the entries, so that the times are relative
320 | * to the time when the first event was queued
321 | */
322 | private void normalize()
323 | {
324 | long minQueuedTime = Long.MAX_VALUE;
325 | for (Entry entry : entries)
326 | {
327 | minQueuedTime = Math.min(minQueuedTime, entry.getQueuedTime());
328 | }
329 | for (Entry entry : entries)
330 | {
331 | entry.normalize(minQueuedTime);
332 | }
333 | }
334 |
335 | /**
336 | * Print the statistics
337 | */
338 | public void print()
339 | {
340 | normalize();
341 | for (Entry entry : entries)
342 | {
343 | entry.print();
344 | }
345 | }
346 |
347 |
348 | }
349 | }
350 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLMappedBufferSample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 |
8 | import static org.jocl.CL.*;
9 |
10 | import java.nio.*;
11 |
12 | import org.jocl.*;
13 |
14 | /**
15 | * A small JOCL sample, similar to the minimal JOCLSample, but
16 | * demonstrating how to map a cl_mem to a Java ByteBuffer
17 | */
18 | public class JOCLMappedBufferSample
19 | {
20 | /**
21 | * The source code of the OpenCL program to execute
22 | */
23 | private static String programSource =
24 | "__kernel void "+
25 | "sampleKernel(__global const float *a,"+
26 | " __global const float *b,"+
27 | " __global float *c)"+
28 | "{"+
29 | " int gid = get_global_id(0);"+
30 | " c[gid] = a[gid] * b[gid];"+
31 | "}";
32 |
33 | /**
34 | * The name of the kernel to execute
35 | */
36 | private static final String kernelName = "sampleKernel";
37 |
38 | /**
39 | * The index of the OpenCL platform that this sample should run on
40 | */
41 | private static final int platformIndex = 0;
42 |
43 | /**
44 | * The OpenCL device type that will be used
45 | */
46 | private static final long deviceType = CL_DEVICE_TYPE_ALL;
47 |
48 | /**
49 | * The index of the OpenCL device that will be used
50 | */
51 | private static final int deviceIndex = 0;
52 |
53 | /**
54 | * The OpenCL context
55 | */
56 | private static cl_context context;
57 |
58 | /**
59 | * The OpenCL command queue
60 | */
61 | private static cl_command_queue commandQueue;
62 |
63 | /**
64 | * The OpenCL program that contains the kernel
65 | */
66 | private static cl_program program;
67 |
68 | /**
69 | * The OpenCL kernel from the program
70 | */
71 | private static cl_kernel kernel;
72 |
73 | /**
74 | * The entry point of this sample
75 | *
76 | * @param args Not used
77 | */
78 | public static void main(String args[])
79 | {
80 | initialize();
81 |
82 | // Create input- and output data
83 | int n = 10;
84 | float srcArrayA[] = new float[n];
85 | float srcArrayB[] = new float[n];
86 | float dstArray[] = new float[n];
87 | for (int i=0; i
16 | *
17 | * Note: This is just a basic demo, showing the possibility to use multiple
18 | * devices simultaneously. Each device receives its own copy of the memory
19 | * objects to work on. In real applications, there may be a more complex
20 | * management of the buffers and the synchronization between the different
21 | * devices, which is beyond the scope of this sample.
22 | */
23 | public class JOCLMultiDeviceSample
24 | {
25 | /**
26 | * The source code of the OpenCL program to execute, containing
27 | * some artificial workload to compute
28 | */
29 | private static String programSource =
30 | "__kernel void sampleKernel(__global const float *input,"+
31 | " __global float *output, " +
32 | " int size)"+
33 | "{"+
34 | " int gid = get_global_id(0);"+
35 | " output[gid] = 0;" +
36 | " for (int i=0; i
71 | *
72 | * The reduction is performed in two phases: In the first phase, each
73 | * work group of the GPU computes the reduction of a part of the
74 | * input array. The size of this part is exactly the number of work
75 | * items in the group, and the reduction will be performed in local
76 | * memory. The results of these reductions will be written into
77 | * an output array. This output array is then reduced on the CPU.
78 | *
79 | * @param inputArray The array on which the reduction will be performed
80 | * @return The result of the reduction
81 | */
82 | private static float reduce(float inputArray[])
83 | {
84 | int localWorkSize = 128;
85 | int numWorkGroups = 64;
86 | float outputArray[] = new float[numWorkGroups];
87 |
88 | // Allocate the memory objects for the input- and output data
89 | cl_mem inputMem = clCreateBuffer(context,
90 | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
91 | Sizeof.cl_float * inputArray.length, Pointer.to(inputArray), null);
92 | cl_mem outputMem = clCreateBuffer(context,
93 | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
94 | Sizeof.cl_float * numWorkGroups, Pointer.to(outputArray), null);
95 |
96 | // Perform the reduction on the GPU: Each work group will
97 | // perform the reduction of 'localWorkSize' elements, and
98 | // the results will be written into the output memory
99 | reduce(
100 | inputMem, inputArray.length,
101 | outputMem, numWorkGroups,
102 | localWorkSize);
103 |
104 | // Read the output data
105 | clEnqueueReadBuffer(commandQueue, outputMem, CL_TRUE, 0,
106 | numWorkGroups * Sizeof.cl_float, Pointer.to(outputArray),
107 | 0, null, null);
108 |
109 | // Perform the final reduction, by reducing the results
110 | // from the work groups on the CPU
111 | float result = reduceHost(outputArray);
112 |
113 | // Release memory objects
114 | clReleaseMemObject(inputMem);
115 | clReleaseMemObject(outputMem);
116 |
117 | return result;
118 | }
119 |
120 |
121 | /**
122 | * Perform a reduction of the float elements in the given input memory.
123 | * Each work group will reduce 'localWorkSize' elements, and write the
124 | * result into the given output memory.
125 | *
126 | * @param inputMem The input memory containing the float values to reduce
127 | * @param n The number of values in the input memory
128 | * @param outputMem The output memory that will store the reduction
129 | * result for each work group
130 | * @param numWorkGroups The number of work groups
131 | * @param localWorkSize The local work size, that is, the number of
132 | * work items in each work group
133 | */
134 | private static void reduce(
135 | cl_mem inputMem, int n,
136 | cl_mem outputMem, int numWorkGroups,
137 | int localWorkSize)
138 | {
139 | // Set the arguments for the kernel
140 | int a = 0;
141 | clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(inputMem));
142 | clSetKernelArg(kernel, a++, Sizeof.cl_float * localWorkSize, null);
143 | clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[]{n}));
144 | clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(outputMem));
145 |
146 | // Compute the number of work groups and the global work size
147 | long globalWorkSize = numWorkGroups * localWorkSize;
148 |
149 | // Execute the kernel
150 | clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
151 | new long[]{ globalWorkSize }, new long[]{ localWorkSize},
152 | 0, null, null);
153 | }
154 |
155 | /**
156 | * Implementation of a Kahan summation reduction in plain Java
157 | *
158 | * @param array The input
159 | * @return The reduction result
160 | */
161 | private static float reduceHost(float array[])
162 | {
163 | float sum = array[0];
164 | float c = 0.0f;
165 | for (int i = 1; i < array.length; i++)
166 | {
167 | float y = array[i] - c;
168 | float t = sum + y;
169 | c = (t - sum) - y;
170 | sum = t;
171 | }
172 | return sum;
173 | }
174 |
175 | /**
176 | * Initialize a default OpenCL context, command queue, program and kernel
177 | */
178 | private static void initialize()
179 | {
180 | // The platform, device type and device number
181 | // that will be used
182 | final int platformIndex = 0;
183 | final long deviceType = CL_DEVICE_TYPE_ALL;
184 | final int deviceIndex = 0;
185 |
186 | // Enable exceptions and subsequently omit error checks in this sample
187 | CL.setExceptionsEnabled(true);
188 |
189 | // Obtain the number of platforms
190 | int numPlatformsArray[] = new int[1];
191 | clGetPlatformIDs(0, null, numPlatformsArray);
192 | int numPlatforms = numPlatformsArray[0];
193 |
194 | // Obtain a platform ID
195 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
196 | clGetPlatformIDs(platforms.length, platforms, null);
197 | cl_platform_id platform = platforms[platformIndex];
198 |
199 | // Initialize the context properties
200 | cl_context_properties contextProperties = new cl_context_properties();
201 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
202 |
203 | // Obtain the number of devices for the platform
204 | int numDevicesArray[] = new int[1];
205 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
206 | int numDevices = numDevicesArray[0];
207 |
208 | // Obtain a device ID
209 | cl_device_id devices[] = new cl_device_id[numDevices];
210 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
211 | cl_device_id device = devices[deviceIndex];
212 |
213 | // Create a context for the selected device
214 | context = clCreateContext(
215 | contextProperties, 1, new cl_device_id[]{device},
216 | null, null, null);
217 |
218 | // Create a command-queue for the selected device
219 | cl_queue_properties properties = new cl_queue_properties();
220 | commandQueue = clCreateCommandQueueWithProperties(
221 | context, device, properties, null);
222 |
223 | // Create the program from the source code
224 | String programSource = readFile("src/main/resources/kernels/reduction.cl");
225 | program = clCreateProgramWithSource(context,
226 | 1, new String[]{ programSource }, null, null);
227 |
228 | // Build the program
229 | clBuildProgram(program, 0, null, null, null, null);
230 |
231 | // Create the kernel
232 | kernel = clCreateKernel(program, "reduce", null);
233 | }
234 |
235 | /**
236 | * Shut down and release all resources that have been allocated
237 | * in {@link #initialize()}
238 | */
239 | private static void shutdown()
240 | {
241 | clReleaseKernel(kernel);
242 | clReleaseProgram(program);
243 | clReleaseCommandQueue(commandQueue);
244 | clReleaseContext(context);
245 | }
246 |
247 | /**
248 | * Read the contents of the file with the given name, and return
249 | * it as a string
250 | *
251 | * @param fileName The name of the file to read
252 | * @return The contents of the file
253 | */
254 | private static String readFile(String fileName)
255 | {
256 | BufferedReader br = null;
257 | try
258 | {
259 | br = new BufferedReader(new FileReader(fileName));
260 | StringBuilder sb = new StringBuilder();
261 | String line = null;
262 | while (true)
263 | {
264 | line = br.readLine();
265 | if (line == null)
266 | {
267 | break;
268 | }
269 | sb.append(line+"\n");
270 | }
271 | return sb.toString();
272 | }
273 | catch (IOException e)
274 | {
275 | e.printStackTrace();
276 | return "";
277 | }
278 | finally
279 | {
280 | if (br != null)
281 | {
282 | try
283 | {
284 | br.close();
285 | }
286 | catch (IOException ex)
287 | {
288 | ex.printStackTrace();
289 | }
290 | }
291 | }
292 | }
293 |
294 | }
295 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 |
8 | import static org.jocl.CL.*;
9 |
10 | import java.util.Arrays;
11 |
12 | import org.jocl.*;
13 |
14 | /**
15 | * A small JOCL sample.
16 | */
17 | public class JOCLSample
18 | {
19 | /**
20 | * The source code of the OpenCL program to execute
21 | */
22 | private static String programSource =
23 | "__kernel void "+
24 | "sampleKernel(__global const float *a,"+
25 | " __global const float *b,"+
26 | " __global float *c)"+
27 | "{"+
28 | " int gid = get_global_id(0);"+
29 | " c[gid] = a[gid] * b[gid];"+
30 | "}";
31 |
32 |
33 | /**
34 | * The entry point of this sample
35 | *
36 | * @param args Not used
37 | */
38 | public static void main(String args[])
39 | {
40 | // Create input- and output data
41 | int n = 10;
42 | float srcArrayA[] = new float[n];
43 | float srcArrayB[] = new float[n];
44 | float dstArray[] = new float[n];
45 | for (int i=0; i=1; i--)
158 | {
159 | System.out.println("Seconds left: "+i);
160 | try
161 | {
162 | Thread.sleep(1000);
163 | }
164 | catch (InterruptedException e)
165 | {
166 | Thread.currentThread().interrupt();
167 | }
168 | }
169 | System.out.println("Setting event status to CL_COMPLETE");
170 | clSetUserEventStatus(userEvent, CL.CL_COMPLETE);
171 | }
172 | });
173 | thread.start();
174 |
175 |
176 | // Create the destructor callback which will be called
177 | // when the output memory object is destroyed
178 | MemObjectDestructorCallbackFunction
179 | memObjectDestructorCallbackFunction =
180 | new MemObjectDestructorCallbackFunction()
181 | {
182 | @Override
183 | public void function(cl_mem memobj, Object user_data)
184 | {
185 | System.out.println("Memory object "+memobj+
186 | " was destroyed, user data: "+user_data);
187 | }
188 | };
189 | clSetMemObjectDestructorCallback(dstMem,
190 | memObjectDestructorCallbackFunction,
191 | "Memory object destructor callback user data");
192 |
193 | // Wait until all commands have completed
194 | clFinish(commandQueue);
195 |
196 | // Release kernel, program, and memory objects.
197 | clReleaseKernel(kernel);
198 | clReleaseProgram(program);
199 | clReleaseCommandQueue(commandQueue);
200 | clReleaseContext(context);
201 | clReleaseMemObject(srcMemA);
202 | clReleaseMemObject(srcMemB);
203 |
204 | // Releasing the output memory object will cause
205 | // the destructor callback to be called.
206 | clReleaseMemObject(dstMem);
207 |
208 | // Verify the result
209 | float reference[] = new float[]{10,12,18,20};
210 | float result[] = new float[regionSizeX*regionSizeY];
211 | regionData.get(result);
212 | boolean passed = Arrays.equals(result, reference);
213 | System.out.println(passed ? "PASSED" : "FAILED");
214 | }
215 |
216 | /**
217 | * Default OpenCL initialization of the context, command queue,
218 | * program and kernel
219 | */
220 | private static void defaultInitialization()
221 | {
222 | // The platform, device type and device number
223 | // that will be used
224 | final int platformIndex = 0;
225 | final long deviceType = CL_DEVICE_TYPE_ALL;
226 | final int deviceIndex = 0;
227 |
228 | // Enable exceptions and subsequently omit error checks in this sample
229 | CL.setExceptionsEnabled(true);
230 |
231 | // Obtain the number of platforms
232 | int numPlatformsArray[] = new int[1];
233 | clGetPlatformIDs(0, null, numPlatformsArray);
234 | int numPlatforms = numPlatformsArray[0];
235 |
236 | // Obtain a platform ID
237 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
238 | clGetPlatformIDs(platforms.length, platforms, null);
239 | cl_platform_id platform = platforms[platformIndex];
240 |
241 | // Initialize the context properties
242 | cl_context_properties contextProperties = new cl_context_properties();
243 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
244 |
245 | // Obtain the number of devices for the platform
246 | int numDevicesArray[] = new int[1];
247 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
248 | int numDevices = numDevicesArray[0];
249 |
250 | // Obtain a device ID
251 | cl_device_id devices[] = new cl_device_id[numDevices];
252 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
253 | cl_device_id device = devices[deviceIndex];
254 |
255 | // Create a context for the selected device
256 | context = clCreateContext(
257 | contextProperties, 1, new cl_device_id[]{device},
258 | null, null, null);
259 |
260 | String deviceName = getString(devices[0], CL_DEVICE_NAME);
261 | System.out.printf("CL_DEVICE_NAME: %s\n", deviceName);
262 |
263 | // Create a command-queue for the selected device
264 | cl_queue_properties properties = new cl_queue_properties();
265 | commandQueue = clCreateCommandQueueWithProperties(
266 | context, device, properties, null);
267 |
268 | // Create the program from the source code
269 | program = clCreateProgramWithSource(context,
270 | 1, new String[]{ programSource }, null, null);
271 |
272 | // Build the program
273 | clBuildProgram(program, 0, null, null, null, null);
274 |
275 | // Create the kernel
276 | kernel = clCreateKernel(program, "sampleKernel", null);
277 | }
278 |
279 | /**
280 | * Print the given buffer as a matrix with the given number of columns
281 | *
282 | * @param data The buffer
283 | * @param columns The number of columns
284 | */
285 | private static void print2D(FloatBuffer data, int columns)
286 | {
287 | StringBuffer sb = new StringBuffer();
288 | for (int i=0; i= 2.0)
213 | {
214 | System.out.println("Using device "+
215 | deviceName+", version "+version);
216 | device = currentDevice;
217 | break;
218 | }
219 | else
220 | {
221 | System.out.println("Skipping device "+
222 | deviceName+", version "+version);
223 | }
224 | }
225 | if (device == null)
226 | {
227 | System.out.println("No OpenCL 2.0 capable device found");
228 | System.exit(1);
229 | }
230 |
231 | // Create a context
232 | context = clCreateContext(
233 | contextProperties, 1, new cl_device_id[]{ device },
234 | null, null, null);
235 |
236 | // Create the command queue
237 | cl_queue_properties properties = new cl_queue_properties();
238 | commandQueue = clCreateCommandQueueWithProperties(
239 | context, device, properties, null);
240 |
241 | // Create the program from the source code
242 | cl_program program = clCreateProgramWithSource(context,
243 | 1, new String[]{ programSource }, null, null);
244 |
245 | // Build the program. It's important to specify the
246 | // -cl-std=CL2.0
247 | // build parameter here!
248 | clBuildProgram(program, 0, null, "-cl-std=CL2.0", null, null);
249 |
250 | // Create the kernel
251 | kernel = clCreateKernel(program, "sampleKernel", null);
252 |
253 | clReleaseProgram(program);
254 | }
255 |
256 | /**
257 | * Returns the OpenCL version of the given device, as a float
258 | * value
259 | *
260 | * @param device The device
261 | * @return The OpenCL version
262 | */
263 | private static float getOpenCLVersion(cl_device_id device)
264 | {
265 | String deviceVersion = getString(device, CL_DEVICE_VERSION);
266 | String versionString = deviceVersion.substring(7, 10);
267 | float version = Float.parseFloat(versionString);
268 | return version;
269 | }
270 |
271 | /**
272 | * Returns the value of the device info parameter with the given name
273 | *
274 | * @param device The device
275 | * @param paramName The parameter name
276 | * @return The value
277 | */
278 | private static String getString(cl_device_id device, int paramName)
279 | {
280 | // Obtain the length of the string that will be queried
281 | long size[] = new long[1];
282 | clGetDeviceInfo(device, paramName, 0, null, size);
283 |
284 | // Create a buffer of the appropriate size and fill it with the info
285 | byte buffer[] = new byte[(int)size[0]];
286 | clGetDeviceInfo(device, paramName, buffer.length, Pointer.to(buffer), null);
287 |
288 | // Create a string from the buffer (excluding the trailing \0 byte)
289 | return new String(buffer, 0, buffer.length-1);
290 | }
291 |
292 |
293 | }
294 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSimpleImage.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 |
8 | import static org.jocl.CL.*;
9 |
10 | import java.awt.*;
11 | import java.awt.image.*;
12 | import java.io.*;
13 |
14 | import javax.imageio.ImageIO;
15 | import javax.swing.*;
16 |
17 | import org.jocl.*;
18 |
19 | /**
20 | * A simple example demonstrating image handling between JOCL
21 | * and Swing. It shows an animation of a rotating image,
22 | * which is rotated using an OpenCL kernel involving some
23 | * basic image operations.
24 | */
25 | public class JOCLSimpleImage
26 | {
27 | /**
28 | * Entry point for this sample.
29 | *
30 | * @param args not used
31 | */
32 | public static void main(String args[])
33 | {
34 | SwingUtilities.invokeLater(new Runnable()
35 | {
36 | public void run()
37 | {
38 | new JOCLSimpleImage();
39 | }
40 | });
41 | }
42 |
43 | /**
44 | * The source code of the kernel to execute. It will rotate the
45 | * input image by the given angle and write the result into the
46 | * output image.
47 | */
48 | private static String programSource =
49 | ""+ "\n" +
50 | "const sampler_t samplerIn = "+ "\n" +
51 | " CLK_NORMALIZED_COORDS_FALSE | "+ "\n" +
52 | " CLK_ADDRESS_CLAMP |"+ "\n" +
53 | " CLK_FILTER_NEAREST;"+ "\n" +
54 | ""+ "\n" +
55 | "const sampler_t samplerOut = "+ "\n" +
56 | " CLK_NORMALIZED_COORDS_FALSE |"+ "\n" +
57 | " CLK_ADDRESS_CLAMP |"+ "\n" +
58 | " CLK_FILTER_NEAREST;"+ "\n" +
59 | ""+ "\n" +
60 | "__kernel void rotateImage("+ "\n" +
61 | " __read_only image2d_t sourceImage, "+ "\n" +
62 | " __write_only image2d_t targetImage, "+ "\n" +
63 | " float angle)"+ "\n" +
64 | "{"+ "\n" +
65 | " int gidX = get_global_id(0);"+ "\n" +
66 | " int gidY = get_global_id(1);"+ "\n" +
67 | " int w = get_image_width(sourceImage);"+ "\n" +
68 | " int h = get_image_height(sourceImage);"+ "\n" +
69 | " int cx = w/2;"+ "\n" +
70 | " int cy = h/2;"+ "\n" +
71 | " int dx = gidX-cx;"+ "\n" +
72 | " int dy = gidY-cy;"+ "\n" +
73 | " float ca = cos(angle);"+ "\n" +
74 | " float sa = sin(angle);"+ "\n" +
75 | " int inX = (int)(cx+ca*dx-sa*dy);"+ "\n" +
76 | " int inY = (int)(cy+sa*dx+ca*dy);"+ "\n" +
77 | " int2 posIn = {inX, inY};"+ "\n" +
78 | " int2 posOut = {gidX, gidY};"+ "\n" +
79 | " uint4 pixel = read_imageui(sourceImage, samplerIn, posIn);"+ "\n" +
80 | " write_imageui(targetImage, posOut, pixel);"+ "\n" +
81 | "}";
82 |
83 |
84 | /**
85 | * Creates a BufferedImage of with type TYPE_INT_RGB from the
86 | * file with the given name.
87 | *
88 | * @param fileName The file name
89 | * @return The image, or null if the file may not be read
90 | */
91 | private static BufferedImage createBufferedImage(String fileName)
92 | {
93 | BufferedImage image = null;
94 | try
95 | {
96 | image = ImageIO.read(new File(fileName));
97 | }
98 | catch (IOException e)
99 | {
100 | e.printStackTrace();
101 | return null;
102 | }
103 |
104 | int sizeX = image.getWidth();
105 | int sizeY = image.getHeight();
106 |
107 | BufferedImage result = new BufferedImage(
108 | sizeX, sizeY, BufferedImage.TYPE_INT_RGB);
109 | Graphics g = result.createGraphics();
110 | g.drawImage(image, 0, 0, null);
111 | g.dispose();
112 | return result;
113 | }
114 |
115 | /**
116 | * The input image
117 | */
118 | private BufferedImage inputImage;
119 |
120 | /**
121 | * The output image
122 | */
123 | private BufferedImage outputImage;
124 |
125 | /**
126 | * The OpenCL context
127 | */
128 | private cl_context context;
129 |
130 | /**
131 | * The OpenCL command queue
132 | */
133 | private cl_command_queue commandQueue;
134 |
135 | /**
136 | * The OpenCL kernel
137 | */
138 | private cl_kernel kernel;
139 |
140 | /**
141 | * The memory object for the input image
142 | */
143 | private cl_mem inputImageMem;
144 |
145 | /**
146 | * The memory object for the output image
147 | */
148 | private cl_mem outputImageMem;
149 |
150 | /**
151 | * The width of the image
152 | */
153 | private int imageSizeX;
154 |
155 | /**
156 | * The height of the image
157 | */
158 | private int imageSizeY;
159 |
160 | /**
161 | * Creates the JOCLSimpleImage sample
162 | */
163 | public JOCLSimpleImage()
164 | {
165 | // Read the input image file and create the output images
166 | String fileName = "src/main/resources/data/lena512color.png";
167 |
168 | inputImage = createBufferedImage(fileName);
169 | imageSizeX = inputImage.getWidth();
170 | imageSizeY = inputImage.getHeight();
171 |
172 | outputImage = new BufferedImage(
173 | imageSizeX, imageSizeY, BufferedImage.TYPE_INT_RGB);
174 |
175 | // Create the panel showing the input and output images
176 | JPanel mainPanel = new JPanel(new GridLayout(1,0));
177 | JLabel inputLabel = new JLabel(new ImageIcon(inputImage));
178 | mainPanel.add(inputLabel, BorderLayout.CENTER);
179 | JLabel outputLabel = new JLabel(new ImageIcon(outputImage));
180 | mainPanel.add(outputLabel, BorderLayout.CENTER);
181 |
182 | // Create the main frame
183 | JFrame frame = new JFrame("JOCL Simple Image Sample");
184 | frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
185 | frame.setLayout(new BorderLayout());
186 | frame.add(mainPanel, BorderLayout.CENTER);
187 | frame.pack();
188 | frame.setVisible(true);
189 |
190 | initCL();
191 | initImageMem();
192 | startAnimation(outputLabel);
193 | }
194 |
195 |
196 | /**
197 | * Starts the thread which will advance the animation state
198 | * and call call the animation method.
199 | *
200 | * @param outputComponent The component to repaint after each step
201 | */
202 | private void startAnimation(final Component outputComponent)
203 | {
204 | System.out.println("Starting animation...");
205 | Thread thread = new Thread(new Runnable()
206 | {
207 | float angle = 0.0f;
208 | public void run()
209 | {
210 | while (true)
211 | {
212 | rotateImage(angle);
213 | angle += 0.1f;
214 | outputComponent.repaint();
215 |
216 | try
217 | {
218 | Thread.sleep(20);
219 | }
220 | catch (InterruptedException e)
221 | {
222 | Thread.currentThread().interrupt();
223 | return;
224 | }
225 | }
226 | }
227 | });
228 | thread.setDaemon(true);
229 | thread.start();
230 | }
231 |
232 |
233 | /**
234 | * Initialize the OpenCL context, command queue and kernel
235 | */
236 | void initCL()
237 | {
238 | final int platformIndex = 0;
239 | final long deviceType = CL_DEVICE_TYPE_ALL;
240 | final int deviceIndex = 0;
241 |
242 | // Enable exceptions and subsequently omit error checks in this sample
243 | CL.setExceptionsEnabled(true);
244 |
245 | // Obtain the number of platforms
246 | int numPlatformsArray[] = new int[1];
247 | clGetPlatformIDs(0, null, numPlatformsArray);
248 | int numPlatforms = numPlatformsArray[0];
249 |
250 | // Obtain a platform ID
251 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
252 | clGetPlatformIDs(platforms.length, platforms, null);
253 | cl_platform_id platform = platforms[platformIndex];
254 |
255 | // Initialize the context properties
256 | cl_context_properties contextProperties = new cl_context_properties();
257 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
258 |
259 | // Obtain the number of devices for the platform
260 | int numDevicesArray[] = new int[1];
261 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
262 | int numDevices = numDevicesArray[0];
263 |
264 | // Obtain a device ID
265 | cl_device_id devices[] = new cl_device_id[numDevices];
266 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
267 | cl_device_id device = devices[deviceIndex];
268 |
269 | // Create a context for the selected device
270 | context = clCreateContext(
271 | contextProperties, 1, new cl_device_id[]{device},
272 | null, null, null);
273 |
274 | // Check if images are supported
275 | int imageSupport[] = new int[1];
276 | clGetDeviceInfo (device, CL.CL_DEVICE_IMAGE_SUPPORT,
277 | Sizeof.cl_int, Pointer.to(imageSupport), null);
278 | System.out.println("Images supported: "+(imageSupport[0]==1));
279 | if (imageSupport[0]==0)
280 | {
281 | System.out.println("Images are not supported");
282 | System.exit(1);
283 | return;
284 | }
285 |
286 | // Create a command-queue for the selected device
287 | cl_queue_properties properties = new cl_queue_properties();
288 | properties.addProperty(CL_QUEUE_PROFILING_ENABLE, 1);
289 | properties.addProperty(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 1);
290 | commandQueue = clCreateCommandQueueWithProperties(
291 | context, device, properties, null);
292 |
293 | // Create the program
294 | System.out.println("Creating program...");
295 | cl_program program = clCreateProgramWithSource(context,
296 | 1, new String[]{ programSource }, null, null);
297 |
298 | // Build the program
299 | System.out.println("Building program...");
300 | clBuildProgram(program, 0, null, null, null, null);
301 |
302 | // Create the kernel
303 | System.out.println("Creating kernel...");
304 | kernel = clCreateKernel(program, "rotateImage", null);
305 |
306 | }
307 |
308 | /**
309 | * Initialize the memory objects for the input and output images
310 | */
311 | private void initImageMem()
312 | {
313 | // Create the memory object for the input- and output image
314 | DataBufferInt dataBufferSrc =
315 | (DataBufferInt)inputImage.getRaster().getDataBuffer();
316 | int dataSrc[] = dataBufferSrc.getData();
317 |
318 | cl_image_format imageFormat = new cl_image_format();
319 | imageFormat.image_channel_order = CL_RGBA;
320 | imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
321 |
322 | inputImageMem = clCreateImage2D(
323 | context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
324 | new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY,
325 | imageSizeX * Sizeof.cl_uint, Pointer.to(dataSrc), null);
326 |
327 | outputImageMem = clCreateImage2D(
328 | context, CL_MEM_WRITE_ONLY,
329 | new cl_image_format[]{imageFormat}, imageSizeX, imageSizeY,
330 | 0, null, null);
331 | }
332 |
333 |
334 | /**
335 | * Rotate the input image by the given angle, and write it into
336 | * the output image
337 | *
338 | * @param angle The rotation angle
339 | */
340 | void rotateImage(float angle)
341 | {
342 | // Set up the work size and arguments, and execute the kernel
343 | long globalWorkSize[] = new long[2];
344 | globalWorkSize[0] = imageSizeX;
345 | globalWorkSize[1] = imageSizeY;
346 | clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(inputImageMem));
347 | clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(outputImageMem));
348 | clSetKernelArg(kernel, 2, Sizeof.cl_float,
349 | Pointer.to(new float[]{angle}));
350 | clEnqueueNDRangeKernel(commandQueue, kernel, 2, null,
351 | globalWorkSize, null, 0, null, null);
352 |
353 | // Read the pixel data into the output image
354 | DataBufferInt dataBufferDst =
355 | (DataBufferInt)outputImage.getRaster().getDataBuffer();
356 | int dataDst[] = dataBufferDst.getData();
357 | clEnqueueReadImage(
358 | commandQueue, outputImageMem, true, new long[3],
359 | new long[]{imageSizeX, imageSizeY, 1},
360 | imageSizeX * Sizeof.cl_uint, 0,
361 | Pointer.to(dataDst), 0, null, null);
362 | }
363 | }
364 |
365 |
--------------------------------------------------------------------------------
/src/main/java/org/jocl/samples/JOCLSimpleMandelbrot.java:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 | package org.jocl.samples;
7 |
8 | import static org.jocl.CL.*;
9 |
10 | import java.awt.*;
11 | import java.awt.event.*;
12 | import java.awt.image.*;
13 | import java.io.*;
14 |
15 | import javax.swing.*;
16 |
17 | import org.jocl.*;
18 |
19 | /**
20 | * A class that uses a simple OpenCL kernel to compute the
21 | * Mandelbrot set and displays it in an image
22 | */
23 | public class JOCLSimpleMandelbrot
24 | {
25 | /**
26 | * Entry point for this sample.
27 | *
28 | * @param args not used
29 | */
30 | public static void main(String args[])
31 | {
32 | SwingUtilities.invokeLater(new Runnable()
33 | {
34 | public void run()
35 | {
36 | new JOCLSimpleMandelbrot(500,500);
37 | }
38 | });
39 | }
40 |
41 | /**
42 | * The image which will contain the Mandelbrot pixel data
43 | */
44 | private BufferedImage image;
45 |
46 | /**
47 | * The width of the image
48 | */
49 | private int sizeX = 0;
50 |
51 | /**
52 | * The height of the image
53 | */
54 | private int sizeY = 0;
55 |
56 | /**
57 | * The component which is used for rendering the image
58 | */
59 | private JComponent imageComponent;
60 |
61 | /**
62 | * The OpenCL context
63 | */
64 | private cl_context context;
65 |
66 | /**
67 | * The OpenCL command queue
68 | */
69 | private cl_command_queue commandQueue;
70 |
71 | /**
72 | * The OpenCL kernel which will actually compute the Mandelbrot
73 | * set and store the pixel data in a CL memory object
74 | */
75 | private cl_kernel kernel;
76 |
77 | /**
78 | * The OpenCL memory object which stores the pixel data
79 | */
80 | private cl_mem pixelMem;
81 |
82 | /**
83 | * An OpenCL memory object which stores a nifty color map,
84 | * encoded as integers combining the RGB components of
85 | * the colors.
86 | */
87 | private cl_mem colorMapMem;
88 |
89 | /**
90 | * The color map which will be copied to OpenCL for filling
91 | * the PBO.
92 | */
93 | private int colorMap[];
94 |
95 | /**
96 | * The minimum x-value of the area in which the Mandelbrot
97 | * set should be computed
98 | */
99 | private float x0 = -2f;
100 |
101 | /**
102 | * The minimum y-value of the area in which the Mandelbrot
103 | * set should be computed
104 | */
105 | private float y0 = -1.3f;
106 |
107 | /**
108 | * The maximum x-value of the area in which the Mandelbrot
109 | * set should be computed
110 | */
111 | private float x1 = 0.6f;
112 |
113 | /**
114 | * The maximum y-value of the area in which the Mandelbrot
115 | * set should be computed
116 | */
117 | private float y1 = 1.3f;
118 |
119 |
120 | /**
121 | * Creates the JOCLSimpleMandelbrot sample with the given
122 | * width and height
123 | */
124 | public JOCLSimpleMandelbrot(int width, int height)
125 | {
126 | this.sizeX = width;
127 | this.sizeY = height;
128 |
129 | // Create the image and the component that will paint the image
130 | image = new BufferedImage(sizeX, sizeY, BufferedImage.TYPE_INT_RGB);
131 | imageComponent = new JPanel()
132 | {
133 | private static final long serialVersionUID = 1L;
134 | public void paintComponent(Graphics g)
135 | {
136 | super.paintComponent(g);
137 | g.drawImage(image, 0,0,this);
138 | }
139 | };
140 |
141 | // Initialize the mouse interaction
142 | initInteraction();
143 |
144 | // Initialize OpenCL
145 | initCL();
146 |
147 | // Initial image update
148 | updateImage();
149 |
150 | // Create the main frame
151 | JFrame frame = new JFrame("JOCL Simple Mandelbrot");
152 | frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
153 | frame.setLayout(new BorderLayout());
154 | imageComponent.setPreferredSize(new Dimension(width, height));
155 | frame.add(imageComponent, BorderLayout.CENTER);
156 | frame.pack();
157 |
158 | frame.setVisible(true);
159 | }
160 |
161 | /**
162 | * Initialize OpenCL: Create the context, the command queue
163 | * and the kernel.
164 | */
165 | private void initCL()
166 | {
167 | final int platformIndex = 0;
168 | final long deviceType = CL_DEVICE_TYPE_ALL;
169 | final int deviceIndex = 0;
170 |
171 | // Enable exceptions and subsequently omit error checks in this sample
172 | CL.setExceptionsEnabled(true);
173 |
174 | // Obtain the number of platforms
175 | int numPlatformsArray[] = new int[1];
176 | clGetPlatformIDs(0, null, numPlatformsArray);
177 | int numPlatforms = numPlatformsArray[0];
178 |
179 | // Obtain a platform ID
180 | cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
181 | clGetPlatformIDs(platforms.length, platforms, null);
182 | cl_platform_id platform = platforms[platformIndex];
183 |
184 | // Initialize the context properties
185 | cl_context_properties contextProperties = new cl_context_properties();
186 | contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
187 |
188 | // Obtain the number of devices for the platform
189 | int numDevicesArray[] = new int[1];
190 | clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
191 | int numDevices = numDevicesArray[0];
192 |
193 | // Obtain a device ID
194 | cl_device_id devices[] = new cl_device_id[numDevices];
195 | clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
196 | cl_device_id device = devices[deviceIndex];
197 |
198 | // Create a context for the selected device
199 | context = clCreateContext(
200 | contextProperties, 1, new cl_device_id[]{device},
201 | null, null, null);
202 |
203 | // Create a command-queue for the selected device
204 | cl_queue_properties properties = new cl_queue_properties();
205 | commandQueue = clCreateCommandQueueWithProperties(
206 | context, device, properties, null);
207 |
208 | // Program Setup
209 | String source =
210 | readFile("src/main/resources/kernels/SimpleMandelbrot.cl");
211 |
212 | // Create the program
213 | cl_program cpProgram = clCreateProgramWithSource(context, 1,
214 | new String[]{ source }, null, null);
215 |
216 | // Build the program
217 | clBuildProgram(cpProgram, 0, null, "-cl-mad-enable", null, null);
218 |
219 | // Create the kernel
220 | kernel = clCreateKernel(cpProgram, "computeMandelbrot", null);
221 |
222 | // Create the memory object which will be filled with the
223 | // pixel data
224 | pixelMem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
225 | sizeX * sizeY * Sizeof.cl_uint, null, null);
226 |
227 | // Create and fill the memory object containing the color map
228 | initColorMap(32, Color.RED, Color.GREEN, Color.BLUE);
229 | colorMapMem = clCreateBuffer(context, CL_MEM_READ_WRITE,
230 | colorMap.length * Sizeof.cl_uint, null, null);
231 | clEnqueueWriteBuffer(commandQueue, colorMapMem, true, 0,
232 | colorMap.length * Sizeof.cl_uint, Pointer.to(colorMap), 0, null, null);
233 | }
234 |
235 | /**
236 | * Helper function which reads the file with the given name and returns
237 | * the contents of this file as a String. Will exit the application
238 | * if the file can not be read.
239 | *
240 | * @param fileName The name of the file to read.
241 | * @return The contents of the file
242 | */
243 | private String readFile(String fileName)
244 | {
245 | BufferedReader br = null;
246 | try
247 | {
248 | br = new BufferedReader(
249 | new InputStreamReader(new FileInputStream(fileName)));
250 | StringBuffer sb = new StringBuffer();
251 | String line = null;
252 | while (true)
253 | {
254 | line = br.readLine();
255 | if (line == null)
256 | {
257 | break;
258 | }
259 | sb.append(line).append("\n");
260 | }
261 | return sb.toString();
262 | }
263 | catch (IOException e)
264 | {
265 | e.printStackTrace();
266 | System.exit(1);
267 | return null;
268 | }
269 | finally
270 | {
271 | if (br != null)
272 | {
273 | try
274 | {
275 | br.close();
276 | }
277 | catch (IOException e)
278 | {
279 | e.printStackTrace();
280 | }
281 | }
282 | }
283 | }
284 |
285 | /**
286 | * Creates the colorMap array which contains RGB colors as integers,
287 | * interpolated through the given colors with colors.length * stepSize
288 | * steps
289 | *
290 | * @param stepSize The number of interpolation steps between two colors
291 | * @param colors The colors for the map
292 | */
293 | private void initColorMap(int stepSize, Color ... colors)
294 | {
295 | colorMap = new int[stepSize*colors.length];
296 | int index = 0;
297 | for (int i=0; i> 0) & 0xFFU, tag);
54 | addByte(l_WarpHist, (data >> 8) & 0xFFU, tag);
55 | addByte(l_WarpHist, (data >> 16) & 0xFFU, tag);
56 | addByte(l_WarpHist, (data >> 24) & 0xFFU, tag);
57 | }
58 |
59 | __kernel __attribute__((reqd_work_group_size(HISTOGRAM256_WORKGROUP_SIZE, 1, 1)))
60 | void histogram256(
61 | __global uint *d_PartialHistograms,
62 | __global uint *d_Data,
63 | uint dataCount
64 | ){
65 | //Per-warp substorage storage
66 | __local uint l_Hist[WARP_COUNT * HISTOGRAM256_BIN_COUNT];
67 | __local uint *l_WarpHist = l_Hist + (get_local_id(0) >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
68 |
69 | //Clear shared memory storage for current threadblock before processing
70 | for(uint i = 0; i < (HISTOGRAM256_BIN_COUNT / WARP_SIZE); i++)
71 | l_Hist[get_local_id(0) + i * (WARP_COUNT * WARP_SIZE)] = 0;
72 |
73 | const uint tag = get_local_id(0) << (32 - LOG2_WARP_SIZE);
74 |
75 | //Read through the entire input buffer, build per-warp histograms
76 | barrier(CLK_LOCAL_MEM_FENCE);
77 | for(uint pos = get_global_id(0); pos < dataCount; pos += get_global_size(0)){
78 | uint data = d_Data[pos];
79 | addWord(l_WarpHist, data, tag);
80 | }
81 |
82 | //Per-block histogram reduction
83 | barrier(CLK_LOCAL_MEM_FENCE);
84 | for(uint pos = get_local_id(0); pos < HISTOGRAM256_BIN_COUNT; pos += HISTOGRAM256_WORKGROUP_SIZE){
85 | uint sum = 0;
86 |
87 | for(uint i = 0; i < WARP_COUNT; i++)
88 | sum += l_Hist[pos + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
89 |
90 | d_PartialHistograms[get_group_id(0) * HISTOGRAM256_BIN_COUNT + pos] = sum;
91 | }
92 | }
93 |
94 |
95 |
96 | ////////////////////////////////////////////////////////////////////////////////
97 | // Merge histogram256() output
98 | // Run one workgroup per bin; each workgroup adds up the same bin counter
99 | // from every partial histogram. Reads are uncoalesced, but mergeHistogram256
100 | // takes only a fraction of total processing time
101 | ////////////////////////////////////////////////////////////////////////////////
102 | #define MERGE_WORKGROUP_SIZE 256
103 |
104 | __kernel void mergeHistogram256(
105 | __global uint *d_Histogram,
106 | __global uint *d_PartialHistograms,
107 | uint histogramCount
108 | ){
109 | __local uint l_Data[MERGE_WORKGROUP_SIZE];
110 |
111 | uint sum = 0;
112 | for(uint i = get_local_id(0); i < histogramCount; i += MERGE_WORKGROUP_SIZE)
113 | sum += d_PartialHistograms[get_group_id(0) + i * HISTOGRAM256_BIN_COUNT];
114 | l_Data[get_local_id(0)] = sum;
115 |
116 | for(uint stride = MERGE_WORKGROUP_SIZE / 2; stride > 0; stride >>= 1){
117 | barrier(CLK_LOCAL_MEM_FENCE);
118 | if(get_local_id(0) < stride)
119 | l_Data[get_local_id(0)] += l_Data[get_local_id(0) + stride];
120 | }
121 |
122 | if(get_local_id(0) == 0)
123 | d_Histogram[get_group_id(0)] = l_Data[0];
124 | }
125 |
--------------------------------------------------------------------------------
/src/main/resources/kernels/Histogram_Kernels.cl:
--------------------------------------------------------------------------------
1 | /* ============================================================
2 | Copyright (c) 2009-2010 Advanced Micro Devices, Inc. All rights reserved.
3 |
4 | Redistribution and use of this material is permitted under the following
5 | conditions:
6 |
7 | Redistributions must retain the above copyright notice and all terms of this
8 | license.
9 |
10 | In no event shall anyone redistributing or accessing or using this material
11 | commence or participate in any arbitration or legal action relating to this
12 | material against Advanced Micro Devices, Inc. or any copyright holders or
13 | contributors. The foregoing shall survive any expiration or termination of
14 | this license or any agreement or access or use related to this material.
15 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION
16 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL.
17 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT
18 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY
19 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO
20 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE
21 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER
22 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED
23 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
24 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT.
25 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE,
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
28 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR
29 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY
30 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY
31 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES,
32 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS
33 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS
34 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND
35 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES,
36 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE
37 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE
38 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR
39 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
40 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE
41 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL
42 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR
43 | ACCESS OR USE RELATED TO THIS MATERIAL.
44 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS
45 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO
46 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER
47 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH
48 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS
49 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S.
50 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED,
51 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS,
52 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS,
53 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS.
54 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY
55 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL.
56 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is
57 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to
58 | computer software and technical data, respectively. Use, duplication,
59 | distribution or disclosure by the U.S. Government and/or DOD agencies is
60 | subject to the full extent of restrictions in all applicable regulations,
61 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor
62 | regulations thereof. Use of this material by the U.S. Government and/or DOD
63 | agencies is acknowledgment of the proprietary rights of any copyright holders
64 | and contributors, including those of Advanced Micro Devices, Inc., as well as
65 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or
66 | commercial computer software.
67 | This license forms the entire agreement regarding the subject matter hereof and
68 | supersedes all proposals and prior discussions and writings between the parties
69 | with respect thereto. This license does not affect any ownership, rights, title,
70 | or interest in, or relating to, this material. No terms of this license can be
71 | modified or waived, and no breach of this license can be excused, unless done
72 | so in a writing signed by all affected parties. Each term of this license is
73 | separately enforceable. If any term of this license is determined to be or
74 | becomes unenforceable or illegal, such term shall be reformed to the minimum
75 | extent necessary in order for this license to remain in effect in accordance
76 | with its terms as modified by such reformation. This license shall be governed
77 | by and construed in accordance with the laws of the State of Texas without
78 | regard to rules on conflicts of law of any state or jurisdiction or the United
79 | Nations Convention on the International Sale of Goods. All disputes arising out
80 | of this license shall be subject to the jurisdiction of the federal and state
81 | courts in Austin, Texas, and all defenses are hereby waived concerning personal
82 | jurisdiction and venue of these courts.
83 | ============================================================ */
84 |
85 |
86 | /*
87 | * For a description of the algorithm and the terms used, please see the
88 | * documentation for this sample.
89 | *
90 | * On invocation of kernel blackScholes, each work thread calculates
91 | * thread-histogram bin and finally all thread-histograms merged into
92 | * block-histogram bin. Outside the kernel, All block-histograms merged
93 | * into final histogram
94 | */
95 |
96 | #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
97 |
98 | #define BIN_SIZE 256
99 | #define GROUP_SIZE 16
100 |
101 | /**
102 | * @brief Calculates block-histogram bin whose bin size is 256
103 | * @param data input data pointer
104 | * @param sharedArray shared array for thread-histogram bins
105 | * @param binResult block-histogram array
106 | */
107 | __kernel
108 | void histogram256(__global const uint* data,
109 | __local uchar* sharedArray,
110 | __global uint* binResult)
111 | {
112 | size_t localId = get_local_id(0);
113 | size_t globalId = get_global_id(0);
114 | size_t groupId = get_group_id(0);
115 |
116 | /* initialize shared array to zero */
117 | for(int i = 0; i < BIN_SIZE; ++i)
118 | sharedArray[localId * BIN_SIZE + i] = 0;
119 |
120 | barrier(CLK_LOCAL_MEM_FENCE);
121 |
122 | /* calculate thread-histograms */
123 | for(int i = 0; i < BIN_SIZE; ++i)
124 | {
125 | uint value = data[globalId * BIN_SIZE + i];
126 | sharedArray[localId * BIN_SIZE + value]++;
127 | }
128 |
129 | barrier(CLK_LOCAL_MEM_FENCE);
130 |
131 | /* merge all thread-histograms into block-histogram */
132 | for(int i = 0; i < BIN_SIZE / GROUP_SIZE; ++i)
133 | {
134 | uint binCount = 0;
135 | for(int j = 0; j < GROUP_SIZE; ++j)
136 | binCount += sharedArray[j * BIN_SIZE + i * GROUP_SIZE + localId];
137 |
138 | binResult[groupId * BIN_SIZE + i * GROUP_SIZE + localId] = binCount;
139 | }
140 | }
141 |
142 |
143 |
--------------------------------------------------------------------------------
/src/main/resources/kernels/QuadFloat.cl:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 |
7 | // Quad-Float functions for OpenCL float4 type.
8 | // Ported from quad-double (QD) package:
9 | // http://crd.lbl.gov/~dhbailey/mpdist/index.html
10 |
11 | inline float4 qfAssign(float value)
12 | {
13 | return (float4)(value, 0.0f, 0.0f, 0.0f);
14 | }
15 |
16 | inline float4 qfAssign2(float2 value)
17 | {
18 | return (float4)(value.x, value.y, 0.0f, 0.0f);
19 | }
20 |
21 | inline float4 qfNegate(float4 value)
22 | {
23 | return (float4)(-value.x, -value.y, -value.z, -value.w);
24 | }
25 |
26 | inline float two_sum(float a, float b, float *err)
27 | {
28 | float s = a + b;
29 | float bb = s - a;
30 | *err = (a - (s - bb)) + (b - bb);
31 | return s;
32 | }
33 |
34 | inline void three_sum(float *a, float *b, float *c)
35 | {
36 | float t1, t2, t3;
37 | t1 = two_sum(*a, *b, &t2);
38 | *a = two_sum(*c, t1, &t3);
39 | *b = two_sum(t2, t3, c);
40 | }
41 |
42 | inline void three_sum2(float *a, float *b, float *c)
43 | {
44 | float t1, t2, t3;
45 | t1 = two_sum(*a, *b, &t2);
46 | *a = two_sum(*c, t1, &t3);
47 | *b = t2 + t3;
48 | }
49 |
50 |
51 | inline float quick_two_sum(float a, float b, float *err)
52 | {
53 | float s = a + b;
54 | *err = b - (s - a);
55 | return s;
56 | }
57 |
58 | inline void renorm(float *c0, float *c1,
59 | float *c2, float *c3, float *c4)
60 | {
61 | float s0, s1, s2 = 0.0f, s3 = 0.0f;
62 |
63 | s0 = quick_two_sum(*c3, *c4, c4);
64 | s0 = quick_two_sum(*c2, s0, c3);
65 | s0 = quick_two_sum(*c1, s0, c2);
66 | *c0 = quick_two_sum(*c0, s0, c1);
67 |
68 | s0 = *c0;
69 | s1 = *c1;
70 |
71 | s0 = quick_two_sum(*c0, *c1, &s1);
72 | if (s1 != 0.0f)
73 | {
74 | s1 = quick_two_sum(s1, *c2, &s2);
75 | if (s2 != 0.0f)
76 | {
77 | s2 = quick_two_sum(s2, *c3, &s3);
78 | if (s3 != 0.0f)
79 | {
80 | s3 += *c4;
81 | }
82 | else
83 | {
84 | s2 += *c4;
85 | }
86 | }
87 | else
88 | {
89 | s1 = quick_two_sum(s1, *c3, &s2);
90 | if (s2 != 0.0f)
91 | {
92 | s2 = quick_two_sum(s2, *c4, &s3);
93 | }
94 | else
95 | {
96 | s1 = quick_two_sum(s1, *c4, &s2);
97 | }
98 | }
99 | }
100 | else
101 | {
102 | s0 = quick_two_sum(s0, *c2, &s1);
103 | if (s1 != 0.0f)
104 | {
105 | s1 = quick_two_sum(s1, *c3, &s2);
106 | if (s2 != 0.0f)
107 | {
108 | s2 = quick_two_sum(s2, *c4, &s3);
109 | }
110 | else
111 | {
112 | s1 = quick_two_sum(s1, *c4, &s2);
113 | }
114 | }
115 | else
116 | {
117 | s0 = quick_two_sum(s0, *c3, &s1);
118 | if (s1 != 0.0f)
119 | {
120 | s1 = quick_two_sum(s1, *c4, &s2);
121 | }
122 | else
123 | {
124 | s0 = quick_two_sum(s0, *c4, &s1);
125 | }
126 | }
127 | }
128 |
129 | *c0 = s0;
130 | *c1 = s1;
131 | *c2 = s2;
132 | *c3 = s3;
133 | }
134 |
135 |
136 |
137 | inline void qfAdd(float4 *sum, const float4 a, const float4 b)
138 | {
139 | float s0, s1, s2, s3;
140 | float t0, t1, t2, t3;
141 |
142 | s0 = two_sum(a.x, b.x, &t0);
143 | s1 = two_sum(a.y, b.y, &t1);
144 | s2 = two_sum(a.z, b.z, &t2);
145 | s3 = two_sum(a.w, b.w, &t3);
146 |
147 | s1 = two_sum(s1, t0, &t0);
148 | three_sum(&s2, &t0, &t1);
149 | three_sum2(&s3, &t0, &t2);
150 | t0 = t0 + t1 + t3;
151 |
152 | renorm(&s0, &s1, &s2, &s3, &t0);
153 | (*sum).x = s0;
154 | (*sum).y = s1;
155 | (*sum).z = s2;
156 | (*sum).w = s3;
157 | }
158 |
159 | inline void split(float a, float *hi, float *lo)
160 | {
161 | float temp = ((1<<12)+1) * a;
162 | *hi = temp - (temp - a);
163 | *lo = a - *hi;
164 | }
165 |
166 |
167 | inline float two_prod(float a, float b, float *err)
168 | {
169 | float a_hi, a_lo, b_hi, b_lo;
170 | float p = a * b;
171 | split(a, &a_hi, &a_lo);
172 | split(b, &b_hi, &b_lo);
173 | *err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
174 | return p;
175 | }
176 |
177 |
178 | inline void qfMul(float4 *prod, const float4 a, const float4 b)
179 | {
180 | float p0, p1, p2, p3, p4, p5;
181 | float q0, q1, q2, q3, q4, q5;
182 | float t0, t1;
183 | float s0, s1, s2;
184 |
185 | p0 = two_prod(a.x, b.x, &q0);
186 |
187 | p1 = two_prod(a.x, b.y, &q1);
188 | p2 = two_prod(a.y, b.x, &q2);
189 |
190 | p3 = two_prod(a.x, b.z, &q3);
191 | p4 = two_prod(a.y, b.y, &q4);
192 | p5 = two_prod(a.z, b.x, &q5);
193 |
194 | three_sum(&p1, &p2, &q0);
195 |
196 | three_sum(&p2, &q1, &q2);
197 | three_sum(&p3, &p4, &p5);
198 |
199 | s0 = two_sum(p2, p3, &t0);
200 | s1 = two_sum(q1, p4, &t1);
201 | s2 = q2 + p5;
202 | s1 = two_sum(s1, t0, &t0);
203 | s2 += (t0 + t1);
204 |
205 | s1 += a.x*b.w + a.y*b.z + a.z*b.y + a.w*b.x + q0 + q3 + q4 + q5;
206 | renorm(&p0, &p1, &s0, &s1, &s2);
207 | (*prod).x = p0;
208 | (*prod).y = p1;
209 | (*prod).z = p2;
210 | (*prod).w = p3;
211 | }
212 |
213 |
214 | inline void qfMulFloat(float4 *prod, const float4 a, const float b)
215 | {
216 | float p0, p1, p2, p3;
217 | float q0, q1, q2;
218 | float s0, s1, s2, s3, s4;
219 |
220 | p0 = two_prod(a.x, b, &q0);
221 | p1 = two_prod(a.y, b, &q1);
222 | p2 = two_prod(a.z, b, &q2);
223 | p3 = a.w * b;
224 |
225 | s0 = p0;
226 |
227 | s1 = two_sum(q0, p1, &s2);
228 |
229 | three_sum(&s2, &q1, &p2);
230 |
231 | three_sum2(&q1, &q2, &p3);
232 | s3 = q1;
233 |
234 | s4 = q2 + p2;
235 |
236 | renorm(&s0, &s1, &s2, &s3, &s4);
237 | (*prod).x = s0;
238 | (*prod).y = s1;
239 | (*prod).z = s2;
240 | (*prod).w = s3;
241 | }
242 |
243 |
244 | inline bool qfLessThan(float4 *a, float b)
245 | {
246 | return ((*a).x < b || ((*a).x == b && (*a).y < 0.0f));
247 | }
248 |
249 | inline void renorm4(float *c0, float *c1,
250 | float *c2, float *c3)
251 | {
252 | float s0, s1, s2 = 0.0f, s3 = 0.0f;
253 |
254 | s0 = quick_two_sum(*c2, *c3, c3);
255 | s0 = quick_two_sum(*c1, s0, c2);
256 | *c0 = quick_two_sum(*c0, s0, c1);
257 |
258 | s0 = *c0;
259 | s1 = *c1;
260 | if (s1 != 0.0f)
261 | {
262 | s1 = quick_two_sum(s1, *c2, &s2);
263 | if (s2 != 0.0f)
264 | {
265 | s2 = quick_two_sum(s2, *c3, &s3);
266 | }
267 | else
268 | {
269 | s1 = quick_two_sum(s1, *c3, &s2);
270 | }
271 | }
272 | else
273 | {
274 | s0 = quick_two_sum(s0, *c2, &s1);
275 | if (s1 != 0.0f)
276 | {
277 | s1 = quick_two_sum(s1, *c3, &s2);
278 | }
279 | else
280 | {
281 | s0 = quick_two_sum(s0, *c3, &s1);
282 | }
283 | }
284 | *c0 = s0;
285 | *c1 = s1;
286 | *c2 = s2;
287 | *c3 = s3;
288 | }
289 |
290 | float4 qfDiv(const float4 a, const float4 b)
291 | {
292 | float q0, q1, q2, q3;
293 |
294 | float4 r;
295 | float4 p;
296 |
297 | q0 = a.x / b.x;
298 |
299 | // r = a - (b * q0);
300 | qfMulFloat(&p, b, q0);
301 | p = qfNegate(p);
302 | qfAdd(&r, a, p);
303 |
304 | q1 = r.x / b.x;
305 | // r -= (b * q1);
306 | qfMulFloat(&p, b, q1);
307 | p = qfNegate(p);
308 | qfAdd(&r, r, p);
309 |
310 | q2 = r.x / b.x;
311 | //r -= (b * q2);
312 | qfMulFloat(&p, b, q2);
313 | p = qfNegate(p);
314 | qfAdd(&r, r, p);
315 |
316 | q3 = r.x / b.x;
317 |
318 | renorm4(&q0, &q1, &q2, &q3);
319 |
320 | return (float4)(q0, q1, q2, q3);
321 | }
322 |
--------------------------------------------------------------------------------
/src/main/resources/kernels/QuadFloatMandelbrot.cl:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 |
7 | // A mandelbrot kernel using QuadFloat functions
8 |
9 | inline int iterate(
10 | float2 x0, float2 y0,
11 | float2 dx, float2 dy,
12 | float relX, float relY,
13 | int maxIterations)
14 | {
15 | float4 qx0 = qfAssign2(x0);
16 | float4 qy0 = qfAssign2(y0);
17 | float4 qdx = qfAssign2(dx);
18 | float4 qdy = qfAssign2(dy);
19 |
20 | float4 qr = qfAssign(0);
21 | float4 qi = qfAssign(0);
22 |
23 | float4 qx = qfAssign(0);
24 | float4 qy = qfAssign(0);
25 |
26 | float4 qxx = qfAssign(0);
27 | float4 qyy = qfAssign(0);
28 |
29 | float4 qfTemp = qfAssign(0);
30 | float4 magnitudeSquared = qfAssign(0);
31 |
32 | //float r = x0 + ((float)ix / sizeX) * dx;
33 | //float i = y0 + ((float)iy / sizeY) * dy;
34 | qfMulFloat(&qfTemp, qdx, relX);
35 | qfAdd(&qr, qx0, qfTemp);
36 |
37 | qfMulFloat(&qfTemp, qdy, relY);
38 | qfAdd(&qi, qy0, qfTemp);
39 |
40 | int iteration = 0;
41 | while (iteration= maskOrigin.x &&
21 | gy >= maskOrigin.y &&
22 | gx < imageSize.x - (maskSize.x-maskOrigin.x-1) &&
23 | gy < imageSize.y - (maskSize.y-maskOrigin.y-1))
24 | {
25 | float4 sum = (float4)0;
26 | for(int mx=0; mx= 0 && gx < imageSize.x &&
43 | gy >= 0 && gy < imageSize.y)
44 | {
45 | output[mul24(gy, imageSize.x)+gx] = (uchar4)0;
46 | }
47 | }
48 |
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/src/main/resources/kernels/SimpleMandelbrot.cl:
--------------------------------------------------------------------------------
1 | /*
2 | * JOCL - Java bindings for OpenCL
3 | *
4 | * Copyright 2009-2019 Marco Hutter - http://www.jocl.org/
5 | */
6 |
7 | // A very simple OpenCL kernel for computing the mandelbrot set
8 | //
9 | // output : A buffer with sizeX*sizeY elements, storing
10 | // the colors as RGB ints
11 | // sizeX, sizeX : The width and height of the buffer
12 | // x0,y0,x1,y1 : The rectangle in which the mandelbrot
13 | // set will be computed
14 | // maxIterations : The maximum number of iterations
15 | // colorMap : A buffer with colorMapSize elements,
16 | // containing the pixel colors
17 |
18 | __kernel void computeMandelbrot(
19 | __global uint *output,
20 | int sizeX, int sizeY,
21 | float x0, float y0,
22 | float x1, float y1,
23 | int maxIterations,
24 | __global uint *colorMap,
25 | int colorMapSize
26 | )
27 | {
28 | unsigned int ix = get_global_id(0);
29 | unsigned int iy = get_global_id(1);
30 |
31 | float r = x0 + ix * (x1-x0) / sizeX;
32 | float i = y0 + iy * (y1-y0) / sizeY;
33 |
34 | float x = 0;
35 | float y = 0;
36 |
37 | float magnitudeSquared = 0;
38 | int iteration = 0;
39 | while (iteration 0; offset = offset / 2)
29 | {
30 | if (lid < offset)
31 | {
32 | float other = scratch[lid + offset];
33 | float mine = scratch[lid];
34 | scratch[lid] = mine + other;
35 | }
36 | barrier(CLK_LOCAL_MEM_FENCE);
37 | }
38 | if (lid == 0)
39 | {
40 | result[get_group_id(0)] = scratch[0];
41 | }
42 | }
--------------------------------------------------------------------------------
/src/main/resources/kernels/simpleGL.cl:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
3 | *
4 | * NVIDIA Corporation and its licensors retain all intellectual property and
5 | * proprietary rights in and to this software and related documentation.
6 | * Any use, reproduction, disclosure, or distribution of this software
7 | * and related documentation without an express license agreement from
8 | * NVIDIA Corporation is strictly prohibited.
9 | *
10 | * Please refer to the applicable NVIDIA end user license agreement (EULA)
11 | * associated with this source code for terms and conditions that govern
12 | * your use of this NVIDIA software.
13 | *
14 | */
15 |
16 | /* This example demonstrates how to use the OpenCL/OpenGL bindings */
17 |
18 | ///////////////////////////////////////////////////////////////////////////////
19 | //! Simple kernel to modify vertex positions in sine wave pattern
20 | //! @param data data in global memory
21 | ///////////////////////////////////////////////////////////////////////////////
22 | __kernel void sine_wave(__global float4* pos, unsigned int width, unsigned int height, float time)
23 | {
24 | unsigned int x = get_global_id(0);
25 | unsigned int y = get_global_id(1);
26 |
27 | // calculate uv coordinates
28 | float u = x / (float) width;
29 | float v = y / (float) height;
30 | u = u*2.0f - 1.0f;
31 | v = v*2.0f - 1.0f;
32 |
33 | // calculate simple sine wave pattern
34 | float freq = 4.0f;
35 | float w = sin(u*freq + time) * cos(v*freq + time) * 0.5f;
36 |
37 | // write output vertex
38 | pos[y*width+x] = (float4)(u, w, v, 1.0f);
39 | }
40 |
41 |
--------------------------------------------------------------------------------