├── samples ├── 04_julia │ ├── julia.png │ ├── CMakeLists.txt │ └── README.md ├── 04_sobel │ ├── sobel.png │ ├── CMakeLists.txt │ └── README.md ├── 03_mandelbrot │ ├── mandelbrot.png │ ├── CMakeLists.txt │ └── README.md ├── python │ ├── 04_julia │ │ ├── README.md │ │ └── CMakeLists.txt │ ├── requirements.txt │ ├── 00_enumopencl │ │ ├── README.md │ │ ├── CMakeLists.txt │ │ └── enumopencl.py │ ├── 01_copybuffer │ │ ├── README.md │ │ ├── CMakeLists.txt │ │ └── copybuffer.py │ ├── 03_mandelbrot │ │ ├── README.md │ │ ├── CMakeLists.txt │ │ └── mandelbrot.py │ ├── 02_copybufferkernel │ │ ├── README.md │ │ ├── CMakeLists.txt │ │ └── copybufferkernel.py │ ├── README.md │ └── CMakeLists.txt ├── vulkan │ ├── 00_juliavk │ │ ├── juliavk.frag.spv │ │ ├── juliavk.vert.spv │ │ ├── juliavk.frag │ │ ├── CMakeLists.txt │ │ └── juliavk.vert │ ├── 01_nbodyvk │ │ ├── nbodyvk.frag.spv │ │ ├── nbodyvk.vert.spv │ │ ├── nbodyvk.frag │ │ ├── nbodyvk.vert │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ └── README.md ├── 14_ooqcommandbuffers │ ├── ooq_cmdbuf.png │ ├── CMakeLists.txt │ └── README.md ├── 05_kernelfromfile │ ├── sample_kernel.cl │ ├── CMakeLists.txt │ └── README.md ├── 05_spirvkernelfromfile │ ├── sample_kernel.cl │ ├── sample_kernel32.spv │ ├── sample_kernel64.spv │ └── CMakeLists.txt ├── 06_ndrangekernelfromfile │ ├── ndrange_sample_kernel.cl │ └── CMakeLists.txt ├── images │ ├── CMakeLists.txt │ ├── 00_enumimageformats │ │ └── CMakeLists.txt │ └── README.md ├── 00_enumopencl │ ├── CMakeLists.txt │ └── README.md ├── 00_loaderinfo │ ├── CMakeLists.txt │ └── main.cpp ├── 00_newqueries │ ├── CMakeLists.txt │ └── README.md ├── 01_copybuffer │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── 00_enumopenclpp │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── 00_newqueriespp │ ├── CMakeLists.txt │ └── README.md ├── 16_floatatomics │ ├── CMakeLists.txt │ └── README.md ├── 00_enumqueuefamilies │ ├── CMakeLists.txt │ └── README.md ├── 02_copybufferkernel │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── 00_extendeddevicequeries │ ├── CMakeLists.txt │ └── README.md ├── 11_semaphores │ ├── CMakeLists.txt │ └── README.md ├── svm │ ├── 00_svmqueries │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── main.cpp │ ├── 100_cgsvmhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 101_cgsvmlinkedlist │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 200_fgsvmhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 201_fgsvmlinkedlist │ │ ├── CMakeLists.txt │ │ └── README.md │ └── CMakeLists.txt ├── 12_commandbuffers │ ├── CMakeLists.txt │ └── README.md ├── 12_commandbufferspp │ ├── CMakeLists.txt │ └── README.md ├── 13_mutablecommandbuffers │ ├── CMakeLists.txt │ └── README.md ├── usm │ ├── 00_usmqueries │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── main.cpp │ ├── 01_usmmeminfo │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 100_dmemhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 101_dmemlinkedlist │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 200_hmemhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 201_hmemlinkedlist │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 300_smemhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 301_smemlinkedlist │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 310_usmmigratemem │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 400_sysmemhelloworld │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── CMakeLists.txt │ └── README.md ├── 00_spirvqueries │ ├── CMakeLists.txt │ └── main.cpp ├── 10_queueexperiments │ └── CMakeLists.txt ├── 15_mutablecommandbufferasserts │ ├── CMakeLists.txt │ └── README.md ├── opengl │ ├── 00_juliagl │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 01_nbodygl │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── 02_sobelgl │ │ ├── CMakeLists.txt │ │ └── README.md │ ├── CMakeLists.txt │ └── README.md └── CMakeLists.txt ├── tutorials ├── interceptlayer │ ├── sinjulia.png │ ├── CMakeLists.txt │ ├── README.md │ ├── part6.md │ └── part0.md └── CMakeLists.txt ├── .gitignore ├── layers ├── 00_example │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── 11_semaemu │ ├── CMakeLists.txt │ ├── README.md │ └── emulate.h ├── 10_cmdbufemu │ ├── CMakeLists.txt │ └── README.md ├── 12_spirvqueriesemu │ ├── CMakeLists.txt │ └── emulate.h ├── README.md └── CMakeLists.txt ├── .github ├── dependabot.yml └── workflows │ └── build.yml ├── .travis.yml ├── .appveyor.yml ├── LICENSE ├── CMakeLists.txt ├── include ├── util.hpp ├── getenv_util.hpp └── layer_util.hpp ├── docs └── env │ └── ubuntu │ ├── 19.04.md │ ├── 19.10.md │ ├── 18.04.md │ ├── 22.04.md │ ├── 24.04.md │ └── 20.04.md └── README.md /samples/04_julia/julia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/04_julia/julia.png -------------------------------------------------------------------------------- /samples/04_sobel/sobel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/04_sobel/sobel.png -------------------------------------------------------------------------------- /samples/03_mandelbrot/mandelbrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/03_mandelbrot/mandelbrot.png -------------------------------------------------------------------------------- /samples/python/04_julia/README.md: -------------------------------------------------------------------------------- 1 | # Julia Set 2 | 3 | Documentation for this sample can be found [here](../../04_julia/README.md). 4 | -------------------------------------------------------------------------------- /tutorials/interceptlayer/sinjulia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/tutorials/interceptlayer/sinjulia.png -------------------------------------------------------------------------------- /samples/python/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.8.2 2 | numpy==1.26.3 3 | pyopencl==2023.1.4 4 | pillow==10.3.0 5 | 6 | jupyterlab==4.4.8 7 | -------------------------------------------------------------------------------- /samples/python/00_enumopencl/README.md: -------------------------------------------------------------------------------- 1 | # enumopencl 2 | 3 | Documentation for this sample can be found [here](../../00_enumopencl/README.md). 4 | -------------------------------------------------------------------------------- /samples/python/01_copybuffer/README.md: -------------------------------------------------------------------------------- 1 | # copybuffer 2 | 3 | Documentation for this sample can be found [here](../../01_copybuffer/README.md). 4 | -------------------------------------------------------------------------------- /samples/vulkan/00_juliavk/juliavk.frag.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/vulkan/00_juliavk/juliavk.frag.spv -------------------------------------------------------------------------------- /samples/vulkan/00_juliavk/juliavk.vert.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/vulkan/00_juliavk/juliavk.vert.spv -------------------------------------------------------------------------------- /samples/vulkan/01_nbodyvk/nbodyvk.frag.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/vulkan/01_nbodyvk/nbodyvk.frag.spv -------------------------------------------------------------------------------- /samples/vulkan/01_nbodyvk/nbodyvk.vert.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/vulkan/01_nbodyvk/nbodyvk.vert.spv -------------------------------------------------------------------------------- /samples/14_ooqcommandbuffers/ooq_cmdbuf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/14_ooqcommandbuffers/ooq_cmdbuf.png -------------------------------------------------------------------------------- /samples/python/03_mandelbrot/README.md: -------------------------------------------------------------------------------- 1 | # Mandelbrot Set 2 | 3 | Documentation for this sample can be found [here](../../03_mandelbrot/README.md). 4 | -------------------------------------------------------------------------------- /samples/05_kernelfromfile/sample_kernel.cl: -------------------------------------------------------------------------------- 1 | kernel void Test( global uint* dst ) 2 | { 3 | uint index = get_global_id(0); 4 | dst[index] = index; 5 | } 6 | -------------------------------------------------------------------------------- /samples/05_spirvkernelfromfile/sample_kernel.cl: -------------------------------------------------------------------------------- 1 | kernel void Test( global uint* dst ) 2 | { 3 | uint index = get_global_id(0); 4 | dst[index] = index; 5 | } 6 | -------------------------------------------------------------------------------- /samples/05_spirvkernelfromfile/sample_kernel32.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/05_spirvkernelfromfile/sample_kernel32.spv -------------------------------------------------------------------------------- /samples/05_spirvkernelfromfile/sample_kernel64.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bashbaug/SimpleOpenCLSamples/HEAD/samples/05_spirvkernelfromfile/sample_kernel64.spv -------------------------------------------------------------------------------- /samples/python/02_copybufferkernel/README.md: -------------------------------------------------------------------------------- 1 | # copybufferkernel 2 | 3 | Documentation for this sample can be found [here](../../02_copybufferkernel/README.md). 4 | -------------------------------------------------------------------------------- /samples/06_ndrangekernelfromfile/ndrange_sample_kernel.cl: -------------------------------------------------------------------------------- 1 | kernel void Test( global uint* dst ) 2 | { 3 | uint index = get_global_id(0); 4 | dst[index] = index; 5 | } 6 | -------------------------------------------------------------------------------- /samples/python/04_julia/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_python_sample( 6 | NUMBER 04 7 | SOURCES julia.py) 8 | -------------------------------------------------------------------------------- /samples/python/00_enumopencl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_python_sample( 6 | NUMBER 00 7 | SOURCES enumopencl.py) 8 | -------------------------------------------------------------------------------- /samples/python/01_copybuffer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_python_sample( 6 | NUMBER 01 7 | SOURCES copybuffer.py) 8 | -------------------------------------------------------------------------------- /samples/python/03_mandelbrot/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_python_sample( 6 | NUMBER 03 7 | SOURCES mandelbrot.py) 8 | -------------------------------------------------------------------------------- /samples/python/02_copybufferkernel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_python_sample( 6 | NUMBER 02 7 | SOURCES copybufferkernel.py) 8 | -------------------------------------------------------------------------------- /tutorials/interceptlayer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_tutorial( 6 | TARGET sinjulia 7 | VERSION 120 8 | SOURCES main.cpp) 9 | -------------------------------------------------------------------------------- /samples/vulkan/01_nbodyvk/nbodyvk.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(location = 0) in vec3 fragColor; 4 | 5 | layout(location = 0) out vec4 outColor; 6 | 7 | void main() { 8 | outColor = vec4(fragColor, 1.0); 9 | } 10 | -------------------------------------------------------------------------------- /samples/python/README.md: -------------------------------------------------------------------------------- 1 | # Python Samples 2 | 3 | This directory contains experimental samples demonstrating use of OpenCL via Python. 4 | 5 | These samples use the [PyOpencl](https://pypi.org/project/pyopencl/) bindings for OpenCL. 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .klocwork/ 2 | .vscode/ 3 | _bin/ 4 | _bin32/ 5 | _bin64/ 6 | build/ 7 | build32/ 8 | build64/ 9 | external/ 10 | external/OpenCL-Headers/ 11 | external/OpenCL-ICD-Loader/ 12 | install/ 13 | *~ 14 | .ipynb_checkpoints 15 | -------------------------------------------------------------------------------- /layers/00_example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_layer( 6 | NUMBER 00 7 | TARGET example 8 | VERSION 300 9 | SOURCES main.cpp ) 10 | -------------------------------------------------------------------------------- /samples/images/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | # Require OpenCL 2.0, for read-write images: 6 | set(SAMPLES_CL_VERSION 200) 7 | 8 | add_subdirectory( 00_enumimageformats ) 9 | -------------------------------------------------------------------------------- /samples/04_julia/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 04 8 | TARGET julia 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/04_sobel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 04 8 | TARGET sobel 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | groups: 8 | github-actions: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /layers/11_semaemu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_layer( 6 | NUMBER 11 7 | TARGET SemaEmu 8 | VERSION 300 9 | SOURCES main.cpp emulate.cpp emulate.h) 10 | -------------------------------------------------------------------------------- /samples/00_enumopencl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET enumopencl 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/00_loaderinfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET loaderinfo 9 | VERSION 100 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/00_newqueries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET newqueries 9 | VERSION 300 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/01_copybuffer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 01 8 | TARGET copybuffer 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/03_mandelbrot/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 03 8 | TARGET mandelbrot 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /layers/10_cmdbufemu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_layer( 6 | NUMBER 10 7 | TARGET CmdBufEmu 8 | VERSION 300 9 | SOURCES main.cpp emulate.cpp emulate.h) 10 | -------------------------------------------------------------------------------- /samples/00_enumopenclpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET enumopenclpp 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/00_newqueriespp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET newqueriespp 9 | VERSION 300 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/16_floatatomics/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 16 8 | TARGET floatatomics 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/00_enumqueuefamilies/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET enumqueuefamilies 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/02_copybufferkernel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 02 8 | TARGET copybufferkernel 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/00_extendeddevicequeries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET extendeddevicequeries 9 | VERSION 120 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/11_semaphores/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 11 8 | TARGET semaphores 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/svm/00_svmqueries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET svmqueries 9 | VERSION 200 10 | CATEGORY svm 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/12_commandbuffers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 12 8 | TARGET commandbuffers 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/12_commandbufferspp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 12 8 | TARGET commandbufferspp 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/14_ooqcommandbuffers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 14 8 | TARGET ooqcommandbuffers 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/svm/100_cgsvmhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 100 8 | TARGET cgsvmhelloworld 9 | VERSION 200 10 | CATEGORY svm 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/svm/101_cgsvmlinkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 101 8 | TARGET cgsvmlinkedlist 9 | VERSION 200 10 | CATEGORY svm 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/svm/200_fgsvmhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 200 8 | TARGET fgsvmhelloworld 9 | VERSION 200 10 | CATEGORY svm 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/svm/201_fgsvmlinkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 201 8 | TARGET fgsvmlinkedlist 9 | VERSION 200 10 | CATEGORY svm 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/05_kernelfromfile/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 05 8 | TARGET kernelfromfile 9 | VERSION 120 10 | SOURCES main.cpp 11 | KERNELS sample_kernel.cl) 12 | -------------------------------------------------------------------------------- /samples/13_mutablecommandbuffers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 12 8 | TARGET mutablecommandbuffers 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/vulkan/01_nbodyvk/nbodyvk.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(location = 0) in vec3 inPosition; 4 | layout(location = 0) out vec3 fragColor; 5 | 6 | void main() { 7 | gl_Position = vec4(inPosition, 1.0); 8 | gl_PointSize = 1.0; 9 | fragColor = vec3(1.0, 0.6, 0.0); 10 | } 11 | -------------------------------------------------------------------------------- /samples/usm/00_usmqueries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET usmqueries 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/01_usmmeminfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET usmmeminfo 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/00_spirvqueries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET spirvqueries 9 | VERSION 120 10 | SOURCES main.cpp 11 | INCLUDES ${SPIRV-Headers_SOURCE_DIR}/include) 12 | -------------------------------------------------------------------------------- /samples/10_queueexperiments/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 10 8 | TARGET queueexperiments 9 | VERSION 200 # for clCreateCommandQueueWithProperties 10 | SOURCES main.cpp) 11 | -------------------------------------------------------------------------------- /samples/15_mutablecommandbufferasserts/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 12 8 | TARGET mutablecommandbufferasserts 9 | VERSION 120 10 | SOURCES main.cpp 11 | LIBS OpenCLExt) 12 | -------------------------------------------------------------------------------- /samples/usm/100_dmemhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 100 8 | TARGET dmemhelloworld 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/101_dmemlinkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 101 8 | TARGET dmemlinkedlist 9 | VERSION 200 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/200_hmemhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 200 8 | TARGET hmemhelloworld 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/201_hmemlinkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 201 8 | TARGET hmemlinkedlist 9 | VERSION 200 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/300_smemhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 300 8 | TARGET smemhelloworld 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/301_smemlinkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 301 8 | TARGET smemlinkedlist 9 | VERSION 200 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/usm/310_usmmigratemem/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 301 8 | TARGET usmmigratemem 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /layers/12_spirvqueriesemu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_layer( 6 | NUMBER 12 7 | TARGET SpirvQueriesEmu 8 | VERSION 300 9 | SOURCES main.cpp emulate.cpp emulate.h 10 | INCLUDES ${SPIRV-Headers_SOURCE_DIR}/include) 11 | -------------------------------------------------------------------------------- /samples/06_ndrangekernelfromfile/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 06 8 | TARGET ndrangekernelfromfile 9 | VERSION 120 10 | SOURCES main.cpp 11 | KERNELS ndrange_sample_kernel.cl) 12 | -------------------------------------------------------------------------------- /samples/usm/400_sysmemhelloworld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 400 8 | TARGET sysmemhelloworld 9 | VERSION 120 10 | CATEGORY usm 11 | SOURCES main.cpp 12 | LIBS OpenCLExt) 13 | -------------------------------------------------------------------------------- /samples/05_spirvkernelfromfile/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 05 8 | TARGET spirvkernelfromfile 9 | VERSION 210 10 | SOURCES main.cpp 11 | KERNELS sample_kernel32.spv sample_kernel64.spv) 12 | -------------------------------------------------------------------------------- /samples/images/00_enumimageformats/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | TEST 7 | NUMBER 00 8 | TARGET enumimageformats 9 | VERSION 300 # for OpenCL 2.0+ image formats 10 | CATEGORY images 11 | SOURCES main.cpp) 12 | -------------------------------------------------------------------------------- /samples/svm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_subdirectory( 00_svmqueries ) 6 | 7 | add_subdirectory( 100_cgsvmhelloworld ) 8 | add_subdirectory( 101_cgsvmlinkedlist ) 9 | 10 | add_subdirectory( 200_fgsvmhelloworld ) 11 | add_subdirectory( 201_fgsvmlinkedlist ) 12 | -------------------------------------------------------------------------------- /samples/opengl/00_juliagl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | NUMBER 00 7 | TARGET juliagl 8 | VERSION 120 9 | CATEGORY opengl 10 | SOURCES main.cpp 11 | INCLUDES ${OPENGL_INCLUDE_DIRS} 12 | LIBS ${OPENGL_LIBRARIES} glfw) 13 | -------------------------------------------------------------------------------- /samples/opengl/01_nbodygl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | NUMBER 01 7 | TARGET nbodygl 8 | VERSION 120 9 | CATEGORY opengl 10 | SOURCES main.cpp 11 | INCLUDES ${OPENGL_INCLUDE_DIRS} 12 | LIBS ${OPENGL_LIBRARIES} glfw) 13 | -------------------------------------------------------------------------------- /samples/opengl/02_sobelgl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | NUMBER 02 7 | TARGET sobelgl 8 | VERSION 120 9 | CATEGORY opengl 10 | SOURCES main.cpp 11 | INCLUDES ${OPENGL_INCLUDE_DIRS} 12 | LIBS ${OPENGL_LIBRARIES} glfw) 13 | -------------------------------------------------------------------------------- /samples/vulkan/00_juliavk/juliavk.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(binding = 0) uniform sampler2D texSampler; 4 | 5 | layout(location = 0) in vec3 fragColor; 6 | layout(location = 1) in vec2 fragTexCoord; 7 | 8 | layout(location = 0) out vec4 outColor; 9 | 10 | void main() { 11 | outColor = texture(texSampler, fragTexCoord); 12 | } 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | compiler: 4 | - gcc 5 | - clang 6 | 7 | os: 8 | - linux 9 | #- osx 10 | 11 | before_install: 12 | - git clone https://github.com/KhronosGroup/OpenCL-Headers external/OpenCL-Headers 13 | - git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader external/opencl-icd-loader 14 | 15 | script: 16 | - mkdir -p build 17 | - cd build 18 | - cmake .. 19 | - make 20 | -------------------------------------------------------------------------------- /samples/vulkan/00_juliavk/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | NUMBER 00 7 | TARGET juliavk 8 | VERSION 300 # clCreateImageWithProperties 9 | CATEGORY vulkan 10 | SOURCES main.cpp 11 | KERNELS juliavk.vert.spv juliavk.frag.spv 12 | INCLUDES ${Vulkan_INCLUDE_DIR} 13 | LIBS ${Vulkan_LIBRARY} glfw) 14 | -------------------------------------------------------------------------------- /samples/vulkan/01_nbodyvk/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_opencl_sample( 6 | NUMBER 00 7 | TARGET nbodyvk 8 | VERSION 300 # clCreateBufferWithProperties 9 | CATEGORY vulkan 10 | SOURCES main.cpp 11 | KERNELS nbodyvk.vert.spv nbodyvk.frag.spv 12 | INCLUDES ${Vulkan_INCLUDE_DIR} 13 | LIBS ${Vulkan_LIBRARY} glfw) 14 | -------------------------------------------------------------------------------- /layers/README.md: -------------------------------------------------------------------------------- 1 | # Layer Samples 2 | 3 | This directory contains samples demonstrating installable interception layers. 4 | Layers are a feature supported by newer versions of the [OpenCL ICD Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader),and may be use to profile, trace, or modify the behavior of an OpenCL implementation at runtime. 5 | For more information, please see the [cl_loader_layers](https://github.com/KhronosGroup/OpenCL-Docs/blob/main/ext/cl_loader_layers.asciidoc) specification. 6 | -------------------------------------------------------------------------------- /samples/svm/00_svmqueries/README.md: -------------------------------------------------------------------------------- 1 | # svmqueries 2 | 3 | ## Sample Purpose 4 | 5 | This sample queries and prints the Shared Virtual Memory (SVM) capabilities for all devices in the system. 6 | Many SVM samples require specific SVM capabilities and this sample can be used to verify if it will or will not run on a device. 7 | 8 | ## Key APIs and Concepts 9 | 10 | This sample demonstrates the one query for Shared Virtual Memory capabilities. 11 | 12 | ## Command Line Options 13 | 14 | None 15 | -------------------------------------------------------------------------------- /samples/usm/00_usmqueries/README.md: -------------------------------------------------------------------------------- 1 | # usmqueries 2 | 3 | ## Sample Purpose 4 | 5 | This sample queries and prints the Unified Shared Memory (USM) capabilities for all devices in the system. 6 | Many USM samples require specific USM capabilities and this sample can be used to verify if it will or will not run on a device. 7 | 8 | ## Key APIs and Concepts 9 | 10 | This sample demonstrates the new device queries for Unified Shared Memory capabilities. 11 | 12 | ## Command Line Options 13 | 14 | None 15 | -------------------------------------------------------------------------------- /samples/images/README.md: -------------------------------------------------------------------------------- 1 | # Image Samples 2 | 3 | This directory contains samples demonstrating use of OpenCL image memory objects. 4 | Unlike OpenCL buffer memory objects, OpenCL image memory objects have a specified format and dimensionality. 5 | Within kernels, images are represented as opaque handles, and are accessed using specialized read and write functions. 6 | 7 | ## Summary of Image Samples 8 | 9 | * [enumimageformats](./00_enumimageformats): Queries and prints the supported image formats for a device. 10 | -------------------------------------------------------------------------------- /.appveyor.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - Visual Studio 2017 3 | #- Visual Studio 2015 4 | 5 | platform: 6 | - Win32 7 | - x64 8 | 9 | configuration: 10 | #- Debug 11 | - Release 12 | 13 | skip_commits: 14 | files: 15 | - '**/*.md' 16 | 17 | before_build: 18 | - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-Headers external/OpenCL-Headers 19 | - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader external/opencl-icd-loader 20 | - cmake -H. -Bbuild -A%PLATFORM% 21 | 22 | build: 23 | project: build\SimpleOpenCLSamples.sln 24 | parallel: true 25 | verbosity: normal 26 | -------------------------------------------------------------------------------- /samples/vulkan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | find_package(Vulkan) 6 | find_package(glfw3 HINTS "../../external") 7 | 8 | set(BUILD_VULKAN_SAMPLES TRUE) 9 | if(NOT VULKAN_FOUND) 10 | message(STATUS "Skipping Vulkan Samples - Vulkan is not found.") 11 | set(BUILD_VULKAN_SAMPLES FALSE) 12 | endif() 13 | if(NOT glfw3_FOUND) 14 | message(STATUS "Skipping Vulkan Samples - GLFW is not found.") 15 | set(BUILD_VULKAN_SAMPLES FALSE) 16 | endif() 17 | 18 | if(BUILD_VULKAN_SAMPLES) 19 | add_subdirectory( 00_juliavk ) 20 | add_subdirectory( 01_nbodyvk ) 21 | endif() 22 | -------------------------------------------------------------------------------- /samples/12_commandbufferspp/README.md: -------------------------------------------------------------------------------- 1 | # commandbuffers 2 | 3 | ## Sample Purpose 4 | 5 | This is a modified version of the commandbuffers sample that demonstrates how to use the OpenCL extension [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer) using proof-of-concept C++ bindings for command buffers. 6 | 7 | This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. 8 | 9 | ## Command Line Options 10 | 11 | | Option | Default Value | Description | 12 | |:--|:-:|:--| 13 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 14 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 15 | -------------------------------------------------------------------------------- /samples/opengl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | if(NOT DEFINED OpenGL_GL_PREFERENCE) 6 | set(OpenGL_GL_PREFERENCE "LEGACY") 7 | endif() 8 | 9 | find_package(OpenGL) 10 | find_package(glfw3 HINTS "../../external") 11 | 12 | set(BUILD_OPENGL_SAMPLES TRUE) 13 | if(NOT OPENGL_FOUND) 14 | message(STATUS "Skipping OpenGL Samples - OpenGL is not found.") 15 | set(BUILD_OPENGL_SAMPLES FALSE) 16 | endif() 17 | if(NOT glfw3_FOUND) 18 | message(STATUS "Skipping OpenGL Samples - GLFW is not found.") 19 | set(BUILD_OPENGL_SAMPLES FALSE) 20 | endif() 21 | 22 | if(BUILD_OPENGL_SAMPLES) 23 | add_subdirectory( 00_juliagl ) 24 | add_subdirectory( 01_nbodygl ) 25 | add_subdirectory( 02_sobelgl ) 26 | endif() 27 | -------------------------------------------------------------------------------- /samples/vulkan/00_juliavk/juliavk.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(location = 0) out vec3 fragColor; 4 | layout(location = 1) out vec2 fragTexCoord; 5 | 6 | vec2 positions[4] = vec2[]( 7 | vec2(-1.0, 1.0), 8 | vec2(-1.0, -1.0), 9 | vec2( 1.0, 1.0), 10 | vec2( 1.0, -1.0) 11 | ); 12 | 13 | vec3 colors[4] = vec3[]( 14 | vec3(1.0, 0.0, 0.0), 15 | vec3(0.0, 1.0, 0.0), 16 | vec3(0.0, 0.0, 1.0), 17 | vec3(1.0, 1.0, 1.0) 18 | ); 19 | 20 | vec2 texCoords[4] = vec2[]( 21 | vec2(0.0, 0.0), 22 | vec2(0.0, 1.0), 23 | vec2(1.0, 0.0), 24 | vec2(1.0, 1.0) 25 | ); 26 | 27 | void main() { 28 | gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0); 29 | fragColor = colors[gl_VertexIndex]; 30 | fragTexCoord = texCoords[gl_VertexIndex]; 31 | } 32 | -------------------------------------------------------------------------------- /samples/00_newqueriespp/README.md: -------------------------------------------------------------------------------- 1 | # newqueriespp 2 | 3 | ## Sample Purpose 4 | 5 | This is another sample that demonstrates the new platform and device queries that were added in OpenCL 3.0. 6 | 7 | This sample uses the OpenCL C++ API bindings instead of the OpenCL C APIs. 8 | As before, by using the OpenCL C++ API bindings this sample is much shorter than the equivalent sample using the OpenCL C APIs! 9 | 10 | This sample will only execute the new queries for platforms or devices that support OpenCL 3.0, and will skip any platforms or devices that do not support OpenCL 3.0. 11 | 12 | ## Key APIs and Concepts 13 | 14 | This sample demonstrates how to use the OpenCL C++ API bindings to perform the new platform and device queries that were added in OpenCL 3.0. 15 | 16 | ```c 17 | clGetPlatformInfo 18 | clGetDeviceInfo 19 | ``` 20 | -------------------------------------------------------------------------------- /samples/usm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | set(BUILD_USM_SAMPLES TRUE) 6 | if(NOT TARGET OpenCLExt) 7 | message(STATUS "Skipping USM Samples - OpenCL Extension Loader is not found.") 8 | set(BUILD_USM_SAMPLES FALSE) 9 | endif() 10 | 11 | if(BUILD_USM_SAMPLES) 12 | add_subdirectory( 00_usmqueries ) 13 | add_subdirectory( 01_usmmeminfo ) 14 | 15 | add_subdirectory( 100_dmemhelloworld ) 16 | add_subdirectory( 101_dmemlinkedlist ) 17 | 18 | add_subdirectory( 200_hmemhelloworld ) 19 | add_subdirectory( 201_hmemlinkedlist ) 20 | 21 | add_subdirectory( 300_smemhelloworld ) 22 | add_subdirectory( 301_smemlinkedlist ) 23 | add_subdirectory( 310_usmmigratemem ) 24 | 25 | add_subdirectory( 400_sysmemhelloworld ) 26 | endif() 27 | -------------------------------------------------------------------------------- /samples/00_enumqueuefamilies/README.md: -------------------------------------------------------------------------------- 1 | # enumqueuefamilies 2 | 3 | ## Sample Purpose 4 | 5 | This is a simple sample that queries and prints the command queue families that are supported by all OpenCL devices that support the command queue families extension. 6 | 7 | Command queue families are described in the OpenCL extension [cl_intel_command_queue_families](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_command_queue_families.html). 8 | This is an optional extension and some devices may not support command queue families. 9 | 10 | For each command queue family, the sample prints: 11 | 12 | * The name of the command queue family. 13 | * The number of command queues in the command queue family. 14 | * The command queue properties supported by command queues in the command queue family. 15 | * The supported command queue capabilities for command queues in the command queue family. 16 | 17 | ## Key APIs and Concepts 18 | 19 | This sample demonstrates the new device query `CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL`. 20 | -------------------------------------------------------------------------------- /samples/00_extendeddevicequeries/README.md: -------------------------------------------------------------------------------- 1 | # extendeddevicequeries 2 | 3 | ## Sample Purpose 4 | 5 | This is a simple sample that queries and prints extended device queries. 6 | Currently, the queries added by the following extensions are supported: 7 | 8 | * [cl_intel_device_attribute_query](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_device_attribute_query.html) 9 | * [cl_khr_device_uuid](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_device_uuid) 10 | * [cl_khr_pci_bus_info](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_pci_bus_info) 11 | 12 | There is also partial support for the extensions: 13 | 14 | * [cl_amd_device_attribute_query](https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt) 15 | * [cl_nv_device_attribute_query](https://www.khronos.org/registry/OpenCL/extensions/nv/cl_nv_device_attribute_query.txt) 16 | 17 | ## Key APIs and Concepts 18 | 19 | This sample demonstrates device attribute queries added by extensions. 20 | -------------------------------------------------------------------------------- /layers/00_example/README.md: -------------------------------------------------------------------------------- 1 | # example 2 | 3 | ## Layer Purpose 4 | 5 | This is a very simple layer that demonstrates how to write a layer to trace an OpenCL API, in this case `clGetPlatformIDs`. 6 | This example layer was heavily inspired by Brice Videau's (@Kerilk's) [presentation](https://youtu.be/QUKhspUEh00) and [sample code](https://github.com/Kerilk/OpenCL-Layers-Tutorial) from [IWOCL](https://www.iwocl.org/) 2021. 7 | Because it is so simple, this is a good layer to verify that the correct version of the OpenCL ICD loader is installed and that your environment is correctly setup to build and use layers. 8 | 9 | Please see the presentatation for more information about how layers work in general, and how this layer works specifically. 10 | 11 | ## Key APIs and Concepts 12 | 13 | The most important concepts to understand from this sample are the functions to query the properties of a layer and to install the dispatch table for the layer. 14 | The contents of these functions will be similar for all other layers. 15 | 16 | ```c 17 | clGetLayerInfo 18 | clInitLayer 19 | ``` 20 | -------------------------------------------------------------------------------- /samples/python/00_enumopencl/enumopencl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2019-2025 Ben Ashbaugh 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | import pyopencl as cl 8 | 9 | for p, platform in enumerate(cl.get_platforms()): 10 | print("Platform[{}]:".format(p)) 11 | print(" Name: " + platform.get_info(cl.platform_info.NAME)) 12 | print(" Vendor: " + platform.get_info(cl.platform_info.VENDOR)) 13 | print(" Driver Version: " + platform.get_info(cl.platform_info.VERSION)) 14 | for d, device in enumerate(platform.get_devices()): 15 | print("Device[{}]:".format(d)) 16 | print(" Type: " + cl.device_type.to_string(device.get_info(cl.device_info.TYPE))) 17 | print(" Name: " + device.get_info(cl.device_info.NAME)) 18 | print(" Vendor: " + device.get_info(cl.device_info.VENDOR)) 19 | print(" Device Version: " + device.get_info(cl.device_info.VERSION)) 20 | print(" Driver Version: " + device.get_info(cl.device_info.DRIVER_VERSION)) 21 | print() 22 | -------------------------------------------------------------------------------- /samples/00_enumopencl/README.md: -------------------------------------------------------------------------------- 1 | # enumopencl 2 | 3 | ## Sample Purpose 4 | 5 | This is a very simple sample that demonstrates how to enumerate the OpenCL platforms that are installed on a machine, and the OpenCL devices that these platforms expose. 6 | 7 | This is one of the few samples that uses the OpenCL C APIs, as described in the OpenCL specification. 8 | Most of other samples use the OpenCL C++ API bindings, since they make it a lot easier to write and understand OpenCL code! 9 | 10 | This is a good first sample to run to verify that OpenCL is correctly installed on your machine, and that your build environment is correctly setup. 11 | 12 | ## Key APIs and Concepts 13 | 14 | The most important concepts to understand from this sample are OpenCL platforms and OpenCL devices: 15 | 16 | An OpenCL platform is a container describes a collection of OpenCL devices supported by an OpenCL implementation. 17 | An OpenCL device will eventually be used to execute OpenCL code, but this sample only lists the devices available on the system. 18 | 19 | ```c 20 | clGetPlatformIDs 21 | clGetDeviceIDs 22 | clGetPlatformInfo 23 | clGetDeviceInfo 24 | ``` 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2025 Ben Ashbaugh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /layers/12_spirvqueriesemu/emulate.h: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | extern const struct _cl_icd_dispatch* g_pNextDispatch; 11 | 12 | #ifndef cl_khr_spirv_queries 13 | #define cl_khr_spirv_queries 1 14 | #define CL_KHR_SPIRV_QUERIES_EXTENSION_NAME "cl_khr_spirv_queries" 15 | #define CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR 0x12B9 16 | #define CL_DEVICE_SPIRV_EXTENSIONS_KHR 0x12BA 17 | #define CL_DEVICE_SPIRV_CAPABILITIES_KHR 0x12BB 18 | #endif 19 | 20 | /////////////////////////////////////////////////////////////////////////////// 21 | // Override Functions 22 | 23 | bool clGetDeviceInfo_override( 24 | cl_device_id device, 25 | cl_device_info param_name, 26 | size_t param_value_size, 27 | void* param_value, 28 | size_t* param_value_size_ret, 29 | cl_int* errcode_ret); 30 | 31 | bool clGetPlatformInfo_override( 32 | cl_platform_id platform, 33 | cl_platform_info param_name, 34 | size_t param_value_size, 35 | void* param_value, 36 | size_t* param_value_size_ret, 37 | cl_int* errcode_ret); 38 | -------------------------------------------------------------------------------- /samples/00_newqueries/README.md: -------------------------------------------------------------------------------- 1 | # newqueries 2 | 3 | ## Sample Purpose 4 | 5 | This is a sample that demonstrates the new platform and device queries that were added in OpenCL 3.0. 6 | It builds on the previous sample that simply enumerates the OpenCL platforms that are installed on a machine, and the OpenCL devices that these platforms expose. 7 | 8 | This is one of the few samples that uses the OpenCL C APIs, as described in the OpenCL specification. 9 | Most of other samples use the OpenCL C++ API bindings, since they make it a lot easier to write and understand OpenCL code! 10 | 11 | This sample will only execute the new queries for platforms or devices that support OpenCL 3.0, and will skip any platforms or devices that do not support OpenCL 3.0. 12 | 13 | ## Key APIs and Concepts 14 | 15 | The new OpenCL 3.0 queries make it much easier to identify platform and device capabilities such as the supported OpenCL version, OpenCL C versions, intermediate language versions, and extensions. 16 | In many cases the same information can be queried using queries from earlier versions of OpenCL, but extracting the same information frequently required parsing string queries and hence was complicated and error-prone. 17 | 18 | ```c 19 | clGetPlatformInfo 20 | clGetDeviceInfo 21 | ``` 22 | -------------------------------------------------------------------------------- /samples/svm/101_cgsvmlinkedlist/README.md: -------------------------------------------------------------------------------- 1 | # cgsvmlinkedlist 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to build a linked list on the host using coarse-grained Shared Virtual Memory (SVM) allocations, how to access and modify the linked list in a kernel, then how to access and check the contents of the linked list on the host. 6 | 7 | Because device coarse-grained SVM cannot be directly read from or written to on the host, this example constructs and verifies the linked list using explicit memory copies. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample demonstrates how to use `clEnqueueSVMMemcpy` to explicitly copy between a Shared Virtual Memory allocation and an allocation on the host. 12 | 13 | This sample also demonstrates how to specifying a set of indirectly accessed SVM pointers using `clSetKernelExecInfo` and `CL_KERNEL_EXEC_INFO_SVM_PTRS`. 14 | This is required for kernels that operate on complex data structures consisting of Shared Virtual Memory allocations that are not directly passed as kernel arguments. 15 | 16 | ## Command Line Options 17 | 18 | | Option | Default Value | Description | 19 | |:--|:-:|:--| 20 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 21 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 22 | | `-n ` | 4 | Specify the number of linked list nodes to create. 23 | -------------------------------------------------------------------------------- /samples/00_enumopenclpp/README.md: -------------------------------------------------------------------------------- 1 | # enumopenclpp 2 | 3 | ## Sample Purpose 4 | 5 | This is another very simple sample that demonstrates how to enumerate the OpenCL platforms that are installed on a machine, and the OpenCL devices that these platforms expose. 6 | 7 | This sample uses the OpenCL C++ API bindings instead of the OpenCL C APIs. 8 | By using the OpenCL C++ API bindings, this sample can be written using a little more than 100 lines of code, versus a little more than 300 lines of code for the version using the OpenCL C APIs! 9 | 10 | This sample should produce the same output as the sample that uses the OpenCL C APIs, making it another good first sample to run to verify that OpenCL is correctly installed on your machine, and that your build environment is correctly setup. 11 | 12 | ## Key APIs and Concepts 13 | 14 | The most important concept to understand from this sample are how the OpenCL C++ API bindings can be used to write code that is more concise and (at least, in this author's opinion) easier to author and understand, compared to OpenCL code written using the OpenCL C APIs. 15 | 16 | This isn't the only way to write code with a higher level model that eventually generates OpenCL API calls, but it is one that is well-supported and documented. 17 | Most of the samples in this repo will use the OpenCL C++ API bindings. 18 | 19 | ```c 20 | clGetPlatformIDs 21 | clGetDeviceIDs 22 | clGetPlatformInfo 23 | clGetDeviceInfo 24 | ``` 25 | -------------------------------------------------------------------------------- /samples/11_semaphores/README.md: -------------------------------------------------------------------------------- 1 | # semaphores 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to use the OpenCL extension [cl_khr_semaphore](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_semaphore) to enforce dependencies between command queues. 6 | As of this writing, `cl_khr_semaphore` is a provisional extension. 7 | This sample uses the functionality described in v0.9.0 of the extension. 8 | 9 | This is an optional extension and some devices may not support `cl_khr_semaphore`, but the sample may still run using the [cl_khr_semaphore emulation layer](../../layers/11_semaemu). 10 | 11 | This sample requires the OpenCL Extension Loader to get the extension APIs for semaphores. 12 | 13 | ## Key APIs and Concepts 14 | 15 | This sample demonstrates how to query the semaphore properties supported by a device, and the properties of a semaphore. 16 | 17 | This sample also demonstrates how to create, signal, and wait on a semaphore. 18 | 19 | ```c 20 | clCreateSemaphoreWithPropertiesKHR 21 | clEnqueueSignalSemaphoresKHR 22 | clEnqueueWaitSemaphoresKHR 23 | clGetSemaphoreInfoKHR 24 | ``` 25 | 26 | ## Command Line Options 27 | 28 | | Option | Default Value | Description | 29 | |:--|:-:|:--| 30 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 31 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 32 | | `--gwx ` | 512 | Specify the global work size to execute. 33 | -------------------------------------------------------------------------------- /samples/01_copybuffer/README.md: -------------------------------------------------------------------------------- 1 | # copybuffer 2 | 3 | ## Sample Purpose 4 | 5 | This is first example that uses OpenCL APIs to do work. 6 | In this very simple sample, OpenCL APIs are used to copy the contents of one buffer to another buffer on the OpenCL device. 7 | To do this, OpenCL APIs are used to create both buffers, to create the OpenCL command queue, and to initialize the source buffer and verify the contents of the destination buffer on the host. 8 | 9 | By default, this sample will run in the first enumerated OpenCL device on the first enumerated OpenCL platform. 10 | To run on a different OpenCL device or platform, please use the provided command line options. 11 | 12 | ## Key APIs and Concepts 13 | 14 | This example shows how to create OpenCL buffers and command queues, and how to enqueue a command to copy between the two buffers in the command queue. 15 | Additionally, the example shows one way to initialize the contents of the source buffer on the host, and one way to check the contents of the destination buffer on the device. 16 | 17 | ```c 18 | clCreateCommandQueue / clCreateCommandQueueWithProperties 19 | clCreateBuffer 20 | clEnqueueMapBuffer 21 | clEnqueueUnmapMemObject 22 | clEnqueueCopyBuffer 23 | ``` 24 | 25 | ## Command Line Options 26 | 27 | | Option | Default Value | Description | 28 | |:--|:-:|:--| 29 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 30 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 31 | -------------------------------------------------------------------------------- /samples/python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | function(add_opencl_python_sample) 6 | set(options ) 7 | set(one_value_args NUMBER CATEGORY) 8 | set(multi_value_args SOURCES KERNELS) 9 | cmake_parse_arguments(OPENCL_PYTHON_SAMPLE 10 | "${options}" "${one_value_args}" "${multi_value_args}" 11 | ${ARGN} 12 | ) 13 | 14 | if(NOT OPENCL_PYTHON_SAMPLE_NUMBER) 15 | message(STATUS "No sample number specified for python sample ${OPENCL_PYTHON_SAMPLE_TARGET}, using 99.") 16 | set(OPENCL_PYTHON_SAMPLE_NUMBER 99) 17 | endif() 18 | 19 | set_target_properties(${OPENCL_PYTHON_SAMPLE_TARGET} PROPERTIES FOLDER "Samples/Python/${OPENCL_PYTHO_SAMPLE_CATEGORY}/${OPENCL_PYTHON_SAMPLE_NUMBER}_${OPENCL_PYTHON_SAMPLE_TARGET}") 20 | 21 | if(CMAKE_CONFIGURATION_TYPES) 22 | set(OPENCL_PYTHON_SAMPLE_CONFIGS ${CMAKE_CONFIGURATION_TYPES}) 23 | else() 24 | set(OPENCL_PYTHON_SAMPLE_CONFIGS ${CMAKE_BUILD_TYPE}) 25 | endif() 26 | foreach(CONFIG ${OPENCL_PYTHON_SAMPLE_CONFIGS}) 27 | install(FILES ${OPENCL_PYTHON_SAMPLE_SOURCES} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 28 | install(FILES ${OPENCL_PYTHON_SAMPLE_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 29 | endforeach() 30 | endfunction() 31 | 32 | add_subdirectory( 00_enumopencl ) 33 | add_subdirectory( 01_copybuffer ) 34 | add_subdirectory( 02_copybufferkernel ) 35 | add_subdirectory( 03_mandelbrot ) 36 | add_subdirectory( 04_julia ) 37 | -------------------------------------------------------------------------------- /samples/03_mandelbrot/README.md: -------------------------------------------------------------------------------- 1 | # Mandelbrot Set 2 | 3 | ## Sample Purpose 4 | 5 | This is a port of the [ISPC Mandelbrot](https://github.com/ispc/ispc/tree/master/examples/mandelbrot) sample. 6 | It uses an OpenCL kernel to compute a [Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set) image, which is then written to a BMP file. 7 | Each OpenCL work item computes one element of the set. 8 | 9 | This assuredly is not the fastest Mandelbrot kernel on any OpenCL implementation, but it should perform reasonably well - much better than an equivalent serial implementation! 10 | 11 | ![Mandelbrot Image](mandelbrot.png) 12 | 13 | As with prior samples, the source code for the OpenCL kernel is embedded into the host code as a raw string, and by default, this sample will run in the first enumerated OpenCL device on the first enumerated OpenCL platform. 14 | To run on a different OpenCL device or platform, please use the provided command line options. 15 | 16 | ## Key APIs and Concepts 17 | 18 | This example shows how to create an OpenCL program from a source string and enqueue an ND range for the kernel into an OpenCL command queue. 19 | 20 | ```c 21 | clCreateProgramWithSource 22 | clBuildProgram 23 | clCreateKernel 24 | clSetKernelArg 25 | clEnqueueNDRangeKernel 26 | ``` 27 | 28 | ## Command Line Options 29 | 30 | | Option | Default Value | Description | 31 | |:--|:-:|:--| 32 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 33 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 34 | -------------------------------------------------------------------------------- /samples/svm/201_fgsvmlinkedlist/README.md: -------------------------------------------------------------------------------- 1 | # fgsvmlinkedlist 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to build a linked list on the host using fine-grained Shared Virtual Memory (SVM) allocations, how to access and modify the linked list in a kernel, then how to access and check the contents of the linked list on the host. 6 | 7 | Because fine-grained SVM does not require any API calls to access the contents of an allocation on the host, this sample is much simpler than the coarse-grained SVM sample. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample only needs to ensure the device is not accessing the fine-grained SVM allocation before initializing the contents of the source allocation or verifying that the copy was performed correctly. 12 | For simplicity, this sample calls `clFinish` to ensure all execution is complete on the device. 13 | 14 | This sample also demonstrates how to specifying a set of indirectly accessed SVM pointers using `clSetKernelExecInfo` and `CL_KERNEL_EXEC_INFO_SVM_PTRS`. 15 | This is still required for kernels that operate on complex data structures consisting of fine-grained Shared Virtual Memory allocations that are not directly passed as kernel arguments. 16 | 17 | ## Command Line Options 18 | 19 | | Option | Default Value | Description | 20 | |:--|:-:|:--| 21 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 22 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 23 | | `-n ` | 4 | Specify the number of linked list nodes to create. 24 | -------------------------------------------------------------------------------- /samples/opengl/01_nbodygl/README.md: -------------------------------------------------------------------------------- 1 | # N-Body Simulation with OpenGL 2 | 3 | ## Sample Purpose 4 | 5 | This sample uses OpenCL to compute an [N-body simulation](https://en.wikipedia.org/wiki/N-body_simulation), which is then rendered with OpenGL. 6 | 7 | This sample currently does not share the OpenCL buffer with OpenGL and will unconditionally copy from OpenCL to OpenGL on the host. 8 | It is most useful as a reference for the similar Vulkan sample. 9 | 10 | ## Key APIs and Concepts 11 | 12 | This example shows how to copy from an OpenCL buffer to OpenGL. 13 | 14 | ## Command Line Options 15 | 16 | | Option | Default Value | Description | 17 | |:--|:-:|:--| 18 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 19 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 20 | | `-n` | 1024 | Specify the number of bodies to simulate. 21 | | `-g` | 0| Specify the local work size. If the local works size is zero a `NULL` local work size is used. 22 | | `-w` | 1024 | Specify the render width in pixels. 23 | | `-h` | 1024 | Specify the render height in pixels. 24 | | `--paused` | n/a | Start with the animation paused. 25 | 26 | ## Controls While Running 27 | 28 | | Control | Description | 29 | |:--|:--| 30 | | `Escape` | Exits from the sample. 31 | | `Space` | Toggle animation. 32 | | `S` | Single-step the simulation. 33 | | `R` | Re-initialize the simulation. 34 | | `V` | Toggle vsync (default: `true`). Disabling vsync may increase framerate but may cause [screen tearing](https://en.wikipedia.org/wiki/Screen_tearing). 35 | -------------------------------------------------------------------------------- /samples/13_mutablecommandbuffers/README.md: -------------------------------------------------------------------------------- 1 | # mutablecommandbuffers 2 | 3 | ## Sample Purpose 4 | 5 | This is an intermediate-level sample that demonstrates how to use the OpenCL extension [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) to modify a command buffer after it has been finalized. 6 | As of this writing, `cl_khr_command_buffer_mutable_dispatch` is a provisional extension. 7 | This sample uses the functionality described in v0.9.0 of the extension. 8 | 9 | This is an optional extension and some devices may not support `cl_khr_command_buffer_mutable_dispatch`, but the sample may still run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu). 10 | 11 | This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. 12 | 13 | ## Key APIs and Concepts 14 | 15 | This sample demonstrates how to query the mutable dispatch capabilities supported by a device, how to create a mutable command buffer, and how to query the properties of a mutable command. 16 | 17 | This sample also demonstrates how to mutate (modify) a command buffer after it has been finalized. 18 | 19 | ```c 20 | clGetMutableCommandInfoKHR 21 | clUpdateMutableCommandsKHR 22 | ``` 23 | 24 | ## Command Line Options 25 | 26 | | Option | Default Value | Description | 27 | |:--|:-:|:--| 28 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 29 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 30 | -------------------------------------------------------------------------------- /samples/svm/200_fgsvmhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # fgsvmhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates usage of fine-grained Shared Virtual Memory (SVM) allocations. 6 | This sample may not run on all OpenCL devices because many devices do not support fine-grained SVM. 7 | 8 | The sample initializes a fine-grained SVM allocation, copies it to a destination coarse-grained SVM allocation using a kernel, then checks on the host that the copy was performed correctly. 9 | Because fine-grained SVM does not require any API calls to access the contents of an allocation on the host, this sample is much simpler than the coarse-grained SVM sample. 10 | 11 | ## Key APIs and Concepts 12 | 13 | This sample allocates fine-grained SVM memory using `clSVMAlloc` and frees it using `clSVMFree`. 14 | 15 | This sample only needs to ensure the device is not accessing the fine-grained SVM allocation before initializing the contents of the source allocation or verifying that the copy was performed correctly. 16 | For simplicity, this sample calls `clFinish` to ensure all execution is complete on the device. 17 | 18 | Within a kernel, a Shared Virtual Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`). 19 | Shared Virtual Memory allocations are set as an argument to a kernel using `clSetKernelArgSVMPointer`. 20 | 21 | ## Command Line Options 22 | 23 | | Option | Default Value | Description | 24 | |:--|:-:|:--| 25 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 26 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 27 | -------------------------------------------------------------------------------- /samples/svm/100_cgsvmhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # cgsvmhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This is the first Shared Virtual Memory (SVM) sample that meaningfully stores and uses data in a Shared Virtual Memory allocation. 6 | This sample demonstrates usage of coarse-grained SVM allocations. 7 | Other similar samples demonstrate usage of fine-grained SVM allocations. 8 | This sample may not run on all OpenCL devices because SVM is an optional feature, though many devices do support coarse-grained SVM. 9 | 10 | The sample initializes a coarse-grained SVM allocation, copies it to a destination coarse-grained SVM allocation using a kernel, then checks on the host that the copy was performed correctly. 11 | 12 | ## Key APIs and Concepts 13 | 14 | This sample allocates coarse-grained SVM memory using `clSVMAlloc` and frees it using `clSVMFree`. 15 | 16 | Since coarse-grained SVM cannot be directly accessed by the host, this sample initializes the source allocation by mapping it using `clEnqueueSVMMap`. 17 | This sample also uses `clEnqueueSVMMap` to map the destination buffer to verify that the copy was performed correctly. 18 | 19 | Within a kernel, a Shared Virtual Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`). 20 | Shared Virtual Memory allocations are set as an argument to a kernel using `clSetKernelArgSVMPointer`. 21 | 22 | ## Command Line Options 23 | 24 | | Option | Default Value | Description | 25 | |:--|:-:|:--| 26 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 27 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 28 | -------------------------------------------------------------------------------- /samples/04_sobel/README.md: -------------------------------------------------------------------------------- 1 | # Sobel Filter 2 | 3 | ## Sample Purpose 4 | 5 | This is an enhanced version of the earlier Julia set sample. 6 | Unlike the previous sample that simply generated Julia set image, this sample executes a [Sobel edge detection filter](https://en.wikipedia.org/wiki/Sobel_operator) on the generated Julia set. 7 | 8 | ![Sobel Julia Set Image](sobel.png) 9 | 10 | ## Key APIs and Concepts 11 | 12 | This example shows how to include two different kernels in the same program source string. 13 | This example also shows how both kernels can be enqueued asynchronously into the same in-order queue. 14 | 15 | ```c 16 | clCreateProgramWithSource with two kernels in the source string 17 | clCreateKernel 18 | ``` 19 | 20 | ## Command Line Options 21 | 22 | | Option | Default Value | Description | 23 | |:--|:-:|:--| 24 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 25 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 26 | | `-i ` | 16 | Specify the number of iterations to execute. 27 | | `--gwx ` | 512 | Specify the global work size to execute, in the X direction. This also determines the width of the generated image. 28 | | `--gwy ` | 512 | Specify the global work size to execute, in the Y direction. This also determines the height of the generated image. 29 | | `--lwx ` | 0 | Specify the local work size in the X direction. If either local works size dimension is zero a `NULL` local work size is used. 30 | | `--lwy ` | 0 | Specify the local work size in the Y direction. If either local works size dimension is zero a `NULL` local work size is used. 31 | -------------------------------------------------------------------------------- /samples/usm/310_usmmigratemem/README.md: -------------------------------------------------------------------------------- 1 | # usmmigratemem 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to explicitly migrate shared memory allocations to control when and how shared memory migrations occur. 6 | 7 | Functionally, this sample is identical to [smemhelloworld](../300_smemhelloworld/README.md), but with explicit calls to migrate the source and destination shared allocations to the device before executing the copy kernel. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample explicitly migrates the source and destination shared memory allocations using `clEnqueueMigrateMemINTEL`. 12 | The source allocation preserves its contents during migration, but the device allocation may be migrated without preserving its contents using `CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED`. 13 | 14 | The costs of the explicit migrations may be profiled using standard event profiling mechanisms. 15 | 16 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 17 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 18 | 19 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 20 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 21 | 22 | ## Command Line Options 23 | 24 | | Option | Default Value | Description | 25 | |:--|:-:|:--| 26 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 27 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 28 | -------------------------------------------------------------------------------- /samples/02_copybufferkernel/README.md: -------------------------------------------------------------------------------- 1 | # copybufferkernel 2 | 3 | ## Sample Purpose 4 | 5 | This is the first example that uses an OpenCL kernel to do work. 6 | An OpenCL kernel is a short program defining what one OpenCL work item should do. 7 | In this case, each OpenCL work item will copy value from a source buffer to a destination buffer. 8 | Since this sample launches one work item for every element in the source buffer, behaviorally this sample will do exactly the same thing as the previous copy buffer sample. 9 | 10 | In this sample, the source code for the OpenCL kernel is embedded into the host code as a raw string. 11 | At runtime, an OpenCL program is created from the raw string, and the OpenCL device compiler is invoked to compile the OpenCL program for the OpenCL device. 12 | This isn't the only way to create OpenCL programs, but it is fairly common, especially while learning and developing an OpenCL application. 13 | 14 | By default, this sample will run in the first enumerated OpenCL device on the first enumerated OpenCL platform. 15 | To run on a different OpenCL device or platform, please use the provided command line options. 16 | 17 | ## Key APIs and Concepts 18 | 19 | This example shows how to create an OpenCL program from a source string and enqueue an ND range for the kernel into an OpenCL command queue. 20 | 21 | 22 | ```c 23 | clCreateProgramWithSource 24 | clBuildProgram 25 | clCreateKernel 26 | clSetKernelArg 27 | clEnqueueNDRangeKernel 28 | ``` 29 | 30 | ## Command Line Options 31 | 32 | | Option | Default Value | Description | 33 | |:--|:-:|:--| 34 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 35 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 36 | -------------------------------------------------------------------------------- /samples/usm/101_dmemlinkedlist/README.md: -------------------------------------------------------------------------------- 1 | # dmemlinkedlist 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to build a linked list on the host in device Unified Shared Memory, access and modify the linked list in a kernel, then access and check the contents of the linked list on the host. 6 | 7 | Because device Unified Shared Memory cannot be directly read from or written to on the host, the linked list must be constructed and verified using explicit memory copies. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample demonstrates how to indicate that a kernel may access any device Unified Shared Memory allocation using `clSetKernelExecInfo` and `CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL`, without specifying all allocations explicitly. 12 | For kernels that operate on complex data structures consisting of many Unified Shared Memory allocations, this can considerably improve API efficiency. 13 | 14 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 15 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 16 | 17 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 18 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 19 | 20 | ## Command Line Options 21 | 22 | | Option | Default Value | Description | 23 | |:--|:-:|:--| 24 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 25 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 26 | | `-n ` | 4 | Specify the number of linked list nodes to create. 27 | -------------------------------------------------------------------------------- /layers/11_semaemu/README.md: -------------------------------------------------------------------------------- 1 | # Semaphore Emulation 2 | 3 | ## Layer Purpose 4 | 5 | This is a layer that demonstrates how to emulate functionality - in this case, the [cl_khr_semaphore](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_semaphore) extension - using a layer. 6 | It works by intercepting calls to `clGetExtensionFunctionAddressForPlatform` to query function pointers for the `cl_khr_semaphore` extension APIs. 7 | If a query succeeds by default then the layer does nothing and simply returns the queried function pointer as-is. 8 | If the query is unsuccessful however, then the layer returns its own function pointer, which will emulate semaphores using events. 9 | 10 | This semaphore emulation layer currently implements v0.9.1 of the `cl_khr_semaphore` extension. 11 | The functionality in this emulation layer is sufficient to run the semaphore samples in this repository. 12 | 13 | Please note that the emulated semaphores are intended to be functional, but unlike a native implementation, they may not provide any performance benefit over similar code without using semaphores. 14 | 15 | ## Layer Requirement 16 | 17 | This layer calls `clEnqueueMarkerWithWaitList` and therefore requires OpenCL 1.2. 18 | 19 | ## Key APIs and Concepts 20 | 21 | The most important concepts to understand from this sample are how to intercept `clGetExtensionFunctionAddressForPlatform` to return emulated functions for an extension. 22 | 23 | ```c 24 | clGetExtensionFunctionAddressForPlatform 25 | clInitLayer 26 | ``` 27 | 28 | ## Known Limitations 29 | 30 | This section describes some of the limitations of the emulated `cl_khr_semaphore` functionality: 31 | 32 | * The layer does not support waiting on a semaphore (blocked by a user event) before signaling the semaphore. 33 | * Many error conditions are not properly checked for and returned. 34 | -------------------------------------------------------------------------------- /samples/usm/201_hmemlinkedlist/README.md: -------------------------------------------------------------------------------- 1 | # hmemlinkedlist 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to build a linked list on the host using host Unified Shared Memory, access and modify the linked list in a kernel, then access and check the contents of the linked list on the host. 6 | 7 | Because host Unified Shared Memory can be directly read from and written to on the host, this samples is much more straightforward than the equivalent sample that builds a linked list in device memory. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample demonstrates how to indicate that a kernel may access any host Unified Shared Memory allocation using `clSetKernelExecInfo` and `CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL`, without specifying all allocations explicitly. 12 | For kernels that operate on complex data structures consisting of many Unified Shared Memory allocations, this can considerably improve API efficiency. 13 | 14 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 15 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 16 | 17 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 18 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 19 | 20 | ## Command Line Options 21 | 22 | | Option | Default Value | Description | 23 | |:--|:-:|:--| 24 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 25 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 26 | | `-n ` | 4 | Specify the number of linked list nodes to create. 27 | -------------------------------------------------------------------------------- /samples/15_mutablecommandbufferasserts/README.md: -------------------------------------------------------------------------------- 1 | # mutablecommandbufferasserts 2 | 3 | ## Sample Purpose 4 | 5 | This is an intermediate-level sample that demonstrates how to pass assertions guaranteeing certain behavior when modifying command buffers using the OpenCL extension [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch). 6 | As of this writing, `cl_khr_command_buffer_mutable_dispatch` is a provisional extension. 7 | This sample uses the functionality described in v0.9.1 of the extension. 8 | 9 | This is an optional extension and some devices may not support `cl_khr_command_buffer_mutable_dispatch`, but the sample may still run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu). 10 | 11 | This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. 12 | 13 | ## Key APIs and Concepts 14 | 15 | This sample demonstrates how to pass mutable dispatch assertions when command buffer is created or when an ND-range kernel command is recorded into a command buffer. 16 | 17 | ``` 18 | CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 19 | CL_MUTABLE_DISPATCH_ASSERTS_KHR 20 | CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR 21 | ``` 22 | 23 | ## Command Line Options 24 | 25 | | Option | Default Value | Description | 26 | |:--|:-:|:--| 27 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 28 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 29 | | `--noCmdBufAssert` | N/A | Do not pass an assertion when the command buffer is created (`CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR`) 30 | | `--noCmdAssert` | N/A | Do not pass an assertion when the command is recorded into the command buffer (`CL_MUTABLE_DISPATCH_ASSERTS_KHR`) 31 | -------------------------------------------------------------------------------- /samples/12_commandbuffers/README.md: -------------------------------------------------------------------------------- 1 | # commandbuffers 2 | 3 | ## Sample Purpose 4 | 5 | This is a modified version of the copybufferkernel sample that demonstrates how to use the OpenCL extension [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer). 6 | As of this writing, `cl_khr_command_buffer` is a provisional extension. 7 | This sample uses the functionality described in v0.9.0 of the extension. 8 | 9 | This is an optional extension and some devices may not support `cl_khr_command_buffer`, but the sample may still run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu). 10 | 11 | This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. 12 | 13 | ## Key APIs and Concepts 14 | 15 | This sample demonstrates how to query the command buffer properties supported by a device, and the properties of a command buffer. 16 | 17 | This sample also demonstrates how to create, finalize, and execute a command buffer. 18 | 19 | ```c 20 | clCreateCommandBufferKHR 21 | clGetCommandBufferInfoKHR 22 | clCommandNDRangeKernelKHR 23 | clFinalizeCommandBufferKHR 24 | clEnqueueCommandBufferKHR 25 | ``` 26 | 27 | ## Things to Try 28 | 29 | Here are some suggested ways to modify this sample to learn more: 30 | 31 | 1. Change the kernel arguments after recording the ND-range kernel command command into the command buffer. 32 | Does this affect the command in the command buffer? 33 | 2. Try timing the same commands with and without a command buffer. 34 | Is it faster or slower to execute commands from a command buffer? 35 | 36 | ## Command Line Options 37 | 38 | | Option | Default Value | Description | 39 | |:--|:-:|:--| 40 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 41 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 42 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | cmake_minimum_required(VERSION 3.16 FATAL_ERROR) 6 | 7 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) 8 | set(CMAKE_CXX_STANDARD 14) 9 | 10 | if (NOT CMAKE_BUILD_TYPE) 11 | message(STATUS "No build type selected, default to Release") 12 | set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE) 13 | endif() 14 | 15 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 16 | 17 | project(SimpleOpenCLSamples VERSION 1.0) 18 | 19 | option(SAMPLES_ENABLE_EXCEPTIONS "Enable Exceptions for OpenCL Errors") 20 | 21 | set(OpenCL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/OpenCL-Headers CACHE PATH "Path to OpenCL Headers") 22 | find_package(OpenCL) 23 | 24 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 25 | include_directories(${OpenCL_INCLUDE_DIR}) 26 | 27 | add_subdirectory(external/OpenCL-Headers) 28 | add_subdirectory(external/opencl-icd-loader) 29 | set_target_properties(OpenCL PROPERTIES FOLDER "OpenCL-ICD-Loader") 30 | set_target_properties(cllayerinfo PROPERTIES FOLDER "OpenCL-ICD-Loader") 31 | set(OpenCL_LIBRARIES OpenCL) 32 | 33 | if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/opencl-extension-loader) 34 | add_subdirectory(external/opencl-extension-loader) 35 | else() 36 | message(STATUS "OpenCL Extension Loader is not found.") 37 | endif() 38 | 39 | if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/SPIRV-Headers) 40 | add_subdirectory(external/SPIRV-Headers) 41 | else() 42 | message(STATUS "SPIR-V Headers are not found.") 43 | endif() 44 | 45 | if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) 46 | enable_testing() 47 | endif() 48 | 49 | add_subdirectory(layers) 50 | add_subdirectory(samples) 51 | add_subdirectory(tutorials) 52 | 53 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 54 | set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/install" CACHE PATH "Install Path" FORCE) 55 | endif() 56 | -------------------------------------------------------------------------------- /samples/usm/301_smemlinkedlist/README.md: -------------------------------------------------------------------------------- 1 | # smemlinkedlist 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates how to build a linked list on the host in shared Unified Shared Memory, access and modify the linked list in a kernel, then access and check the contents of the linked list on the host. 6 | 7 | Because shared Unified Shared Memory can be directly read from and written to on the host, this samples is just as straightforward as the sample that builds the linked list in host memory, and is much more straightforward than the equivalent sample that builds the linked list in device memory. 8 | 9 | ## Key APIs and Concepts 10 | 11 | This sample demonstrates how to indicate that a kernel may access any shared Unified Shared Memory allocation using `clSetKernelExecInfo` and `CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL`, without specifying all allocations explicitly. 12 | For kernels that operate on complex data structures consisting of many Unified Shared Memory allocations, this can considerably improve API efficiency. 13 | 14 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 15 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 16 | 17 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 18 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 19 | 20 | ## Command Line Options 21 | 22 | | Option | Default Value | Description | 23 | |:--|:-:|:--| 24 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 25 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 26 | | `-n ` | 4 | Specify the number of linked list nodes to create. 27 | -------------------------------------------------------------------------------- /include/util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2021-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | static cl_version getDeviceOpenCLVersion( 12 | const cl::Device& device) 13 | { 14 | cl_uint major = 0; 15 | cl_uint minor = 0; 16 | 17 | std::string version = device.getInfo(); 18 | 19 | // The device version string has the form: 20 | // OpenCL . 21 | const std::string prefix{"OpenCL "}; 22 | if (!version.compare(0, prefix.length(), prefix)) { 23 | const char* check = version.c_str() + prefix.length(); 24 | while (isdigit(check[0])) { 25 | major *= 10; 26 | major += check[0] - '0'; 27 | ++check; 28 | } 29 | if (check[0] == '.') { 30 | ++check; 31 | } 32 | while (isdigit(check[0])) { 33 | minor *= 10; 34 | minor += check[0] - '0'; 35 | ++check; 36 | } 37 | } 38 | 39 | return CL_MAKE_VERSION(major, minor, 0); 40 | } 41 | 42 | static bool checkDeviceForExtension( 43 | const cl::Device& device, 44 | const char* extensionName) 45 | { 46 | bool supported = false; 47 | 48 | if (extensionName && !strchr(extensionName, ' ')) { 49 | std::string deviceExtensions = device.getInfo(); 50 | 51 | const char* start = deviceExtensions.c_str(); 52 | while (true) { 53 | const char* where = strstr(start, extensionName); 54 | if (!where) { 55 | break; 56 | } 57 | const char* terminator = where + strlen(extensionName); 58 | if (where == start || *(where - 1) == ' ') { 59 | if (*terminator == ' ' || *terminator == '\0') { 60 | supported = true; 61 | break; 62 | } 63 | } 64 | start = terminator; 65 | } 66 | } 67 | 68 | return supported; 69 | } 70 | -------------------------------------------------------------------------------- /samples/usm/01_usmmeminfo/README.md: -------------------------------------------------------------------------------- 1 | # usmmeminfo 2 | 3 | ## Sample Purpose 4 | 5 | This sample allocates Unified Shared Memory of each supported type and queries the properties of each allocation. 6 | These properties can be used to determine if a pointer points to a USM allocation, and if so, how the USM allocation may be used on a device. 7 | 8 | All properties are queried for a pointer into the middle of an allocation. 9 | The "type" property of the allocation is queried for the base address of the allocation, a pointer into the middle of the allocation, and an out-of-range pointer. 10 | Querying the type of an out-of-range pointer is not an error, but will return that the type of the allocation is "unknown". 11 | 12 | ## Key APIs and Concepts 13 | 14 | This sample primarily demonstrates the `clGetMemAllocInfoINTEL` API that queries properties of a Unified Shared Memory allocation. 15 | This is also the first sample that allocates (and frees) Unified Shared Memory. 16 | This sample allocates host memory using `clHostMemAllocINTEL`, device memory using `clDeviceMemAllocINTEL`, and shared memory using `clSharedMemAllocINTEL`. 17 | When all queries are complete and the USM allocation is no longer required, the allocation is freed using `clMemFreeINTEL`. 18 | 19 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 20 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 21 | 22 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 23 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 24 | 25 | ## Command Line Options 26 | 27 | | Option | Default Value | Description | 28 | |:--|:-:|:--| 29 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 30 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 31 | -------------------------------------------------------------------------------- /tutorials/interceptlayer/README.md: -------------------------------------------------------------------------------- 1 | # Using the Intercept Layer for OpenCL Applications 2 | 3 | ## Tutorial Purpose 4 | 5 | This tutorial demonstrates common usages of the [Intercept Layer for OpenCL Applications](https://github.com/intel/opencl-intercept-layer) to debug and optimize an OpenCL program. 6 | The initial version of this program is syntactically correct and compiles, but it crashes, has bugs, and it is slow! 7 | In this tutorial we will fix the bugs and likely improve the application performance. 8 | When we are done we will generate a cool [sine-based Julia Set](http://paulbourke.net/fractals/sinjulia/) fractal. 9 | 10 | ![Sin-Based Julia Set Image](sinjulia.png) 11 | 12 | This tutorial has multiple parts, with each part building upon the previous part. 13 | Try to solve each part on your own, but if you get stuck solutions are provided for each part. 14 | 15 | * Part 0: [Building and Running the Tutorial](part0.md) 16 | * Part 1: [Fixing an OpenCL Error](part1.md) 17 | * Part 2: [Fixing an OpenCL Program Build Error](part2.md) 18 | * Part 3: [Fixing an OpenCL Map Error](part3.md) 19 | * Part 4: [Profiling and Improving Performance](part4.md) 20 | * Part 5: [Improving Performance More](part5.md) 21 | * Part 6: [Final Words and Additional Things to Try](part6.md) 22 | 23 | ## Command Line Options 24 | 25 | | Option | Default Value | Description | 26 | |:--|:-:|:--| 27 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 28 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 29 | | `-i ` | 16 | Specify the number of iterations to execute. 30 | | `--gwx ` | TBD! | Specify the global work size to execute, in the X direction. This also determines the width of the generated image. 31 | | `--gwy ` | TBD! | Specify the global work size to execute, in the Y direction. This also determines the height of the generated image. 32 | | `--lwx ` | 0 | Specify the local work-group size in the X direction. If either local work-group size is zero a `NULL` local work-group size is used. 33 | | `--lwy ` | 0 | Specify the local work-group size in the Y direction. If either local work-group size is zero a `NULL` local work-group size is used. 34 | -------------------------------------------------------------------------------- /include/getenv_util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2022-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #if defined(_WIN32) 14 | 15 | #include 16 | 17 | #define GETENV( _name, _value ) _dupenv_s( &_value, NULL, _name ) 18 | #define FREEENV( _value ) free( _value ) 19 | 20 | #else 21 | 22 | #define GETENV( _name, _value ) _value = getenv(_name) 23 | #define FREEENV( _value ) (void)_value 24 | 25 | #endif 26 | 27 | static inline bool getControlFromEnvironment( 28 | const char* name, 29 | void* pValue, 30 | size_t size ) 31 | { 32 | char* envVal = NULL; 33 | GETENV( name, envVal ); 34 | 35 | if( envVal != NULL ) 36 | { 37 | if( size == sizeof(unsigned int) ) 38 | { 39 | unsigned int* puVal = (unsigned int*)pValue; 40 | *puVal = atoi(envVal); 41 | } 42 | else if( strlen(envVal) < size ) 43 | { 44 | char* pStr = (char*)pValue; 45 | strcpy( pStr, envVal ); 46 | } 47 | 48 | FREEENV( envVal ); 49 | return true; 50 | } 51 | 52 | return false; 53 | } 54 | 55 | template 56 | static bool getControl( 57 | const char* name, 58 | T& value ) 59 | { 60 | unsigned int readValue = 0; 61 | bool success = getControlFromEnvironment( name, &readValue, sizeof(readValue) ); 62 | if( success ) 63 | { 64 | value = readValue; 65 | } 66 | 67 | return success; 68 | } 69 | 70 | template <> 71 | bool getControl( 72 | const char* name, 73 | bool& value ) 74 | { 75 | unsigned int readValue = 0; 76 | bool success = getControlFromEnvironment( name, &readValue, sizeof(readValue) ); 77 | if( success ) 78 | { 79 | value = ( readValue != 0 ); 80 | } 81 | 82 | return success; 83 | } 84 | 85 | template <> 86 | bool getControl( 87 | const char* name, 88 | std::string& value ) 89 | { 90 | char readValue[256] = ""; 91 | bool success = getControlFromEnvironment( name, readValue, sizeof(readValue) ); 92 | if( success ) 93 | { 94 | value = readValue; 95 | } 96 | 97 | return success; 98 | } 99 | -------------------------------------------------------------------------------- /samples/usm/100_dmemhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # dmemhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This is the first Unified Shared Memory sample that meaningfully stores and uses data in a Unified Shared Memory allocation. 6 | 7 | This sample demonstrates usage of device memory allocations. 8 | Other similar samples demonstrate usage of host memory and shared memory allocations. 9 | Device memory allocations are owned by a specific device, and generally trade off high performance for limited access. 10 | Kernels operating on device memory should perform just as well, if not better, than OpenCL buffers or Shared Virtual Memory allocations. 11 | 12 | The sample initializes a source USM allocation, copies it to a destination USM allocation using a kernel, then checks on the host that the copy was performed correctly. 13 | 14 | ## Key APIs and Concepts 15 | 16 | This sample allocates device memory using `clDeviceMemAllocINTEL` and frees it using `clMemFreeINTEL`. 17 | 18 | Since device memory cannot be directly accessed by the host, this sample initializes the source buffer by copying into it using `clEnqueueMemcpyINTEL`. 19 | This sample also uses `clEnqueueMemcpyINTEL` to copy out of the destination buffer to verify that the copy was performed correctly. 20 | 21 | Within a kernel, a Unified Shared Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`), or a Shared Virtual Memory allocation. 22 | Unified Shared Memory allocations are set as an argument to a kernel using `clSetKernelArgMemPointerINTEL`. 23 | 24 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 25 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 26 | 27 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 28 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 29 | 30 | ## Command Line Options 31 | 32 | | Option | Default Value | Description | 33 | |:--|:-:|:--| 34 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 35 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 36 | -------------------------------------------------------------------------------- /tutorials/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | function(add_opencl_tutorial) 6 | set(options TEST) 7 | set(one_value_args TARGET VERSION CATEGORY) 8 | set(multi_value_args SOURCES KERNELS INCLUDES LIBS) 9 | cmake_parse_arguments(OPENCL_TUTORIAL 10 | "${options}" "${one_value_args}" "${multi_value_args}" 11 | ${ARGN} 12 | ) 13 | 14 | if(NOT OPENCL_TUTORIAL_VERSION) 15 | message(STATUS "No OpenCL version specified for tutorial ${OPENCL_TUTORIAL_TARGET}, using OpenCL 3.0.") 16 | set(OPENCL_TUTORIAL_VERSION 300) 17 | endif() 18 | 19 | add_executable(${OPENCL_TUTORIAL_TARGET} ${OPENCL_TUTORIAL_SOURCES}) 20 | 21 | target_include_directories(${OPENCL_TUTORIAL_TARGET} PRIVATE ${OpenCL_INCLUDE_DIR} ${OPENCL_TUTORIAL_INCLUDES}) 22 | target_link_libraries(${OPENCL_TUTORIAL_TARGET} ${OpenCL_LIBRARIES} ${OPENCL_TUTORIAL_LIBS}) 23 | 24 | target_compile_definitions(${OPENCL_TUTORIAL_TARGET} PRIVATE CL_TARGET_OPENCL_VERSION=${OPENCL_TUTORIAL_VERSION}) 25 | target_compile_definitions(${OPENCL_TUTORIAL_TARGET} PRIVATE CL_ENABLE_BETA_EXTENSIONS) 26 | target_compile_definitions(${OPENCL_TUTORIAL_TARGET} PRIVATE CL_HPP_TARGET_OPENCL_VERSION=${OPENCL_TUTORIAL_VERSION}) 27 | target_compile_definitions(${OPENCL_TUTORIAL_TARGET} PRIVATE CL_HPP_MINIMUM_OPENCL_VERSION=${OPENCL_TUTORIAL_VERSION}) 28 | if (WIN32) 29 | target_compile_definitions(${OPENCL_TUTORIAL_TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS NOMINMAX) 30 | endif() 31 | 32 | set_target_properties(${OPENCL_TUTORIAL_TARGET} PROPERTIES FOLDER "Tutorials/${OPENCL_TUTORIAL_CATEGORY}/${OPENCL_TUTORIAL_TARGET}") 33 | 34 | if(CMAKE_CONFIGURATION_TYPES) 35 | set(OPENCL_TUTORIAL_CONFIGS ${CMAKE_CONFIGURATION_TYPES}) 36 | else() 37 | set(OPENCL_TUTORIAL_CONFIGS ${CMAKE_BUILD_TYPE}) 38 | endif() 39 | foreach(CONFIG ${OPENCL_TUTORIAL_CONFIGS}) 40 | install(TARGETS ${OPENCL_TUTORIAL_TARGET} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 41 | install(FILES ${OPENCL_TUTORIAL_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 42 | endforeach() 43 | if(OPENCL_TUTORIAL_TEST) 44 | add_test(NAME ${OPENCL_TUTORIAL_TARGET} COMMAND ${OPENCL_TUTORIAL_TARGET}) 45 | endif() 46 | endfunction() 47 | 48 | add_subdirectory( interceptlayer ) 49 | -------------------------------------------------------------------------------- /samples/00_loaderinfo/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2019-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | typedef cl_uint cl_icdl_info; 14 | 15 | #define CL_ICDL_OCL_VERSION 1 16 | #define CL_ICDL_VERSION 2 17 | #define CL_ICDL_NAME 3 18 | #define CL_ICDL_VENDOR 4 19 | 20 | typedef cl_int (*pfn_clGetICDLoaderInfoOCLICD)(cl_icdl_info, size_t, void*, size_t*); 21 | pfn_clGetICDLoaderInfoOCLICD clGetICDLoaderInfoOCLICD = NULL; 22 | 23 | static void PrintLoaderInfo(const char* label, cl_icdl_info info) 24 | { 25 | size_t sz = 0; 26 | clGetICDLoaderInfoOCLICD(info, 0, nullptr, &sz); 27 | 28 | std::vector str(sz); 29 | clGetICDLoaderInfoOCLICD(info, sz, str.data(), nullptr); 30 | 31 | printf("Query for for %s (size = %zu) returned: %s\n", label, sz, str.data()); 32 | } 33 | 34 | int main( 35 | int argc, 36 | char** argv ) 37 | { 38 | { 39 | popl::OptionParser op("Supported Options"); 40 | 41 | bool printUsage = false; 42 | try { 43 | op.parse(argc, argv); 44 | } catch (std::exception& e) { 45 | fprintf(stderr, "Error: %s\n\n", e.what()); 46 | printUsage = true; 47 | } 48 | 49 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 50 | fprintf(stderr, 51 | "Usage: loaderinfo [options]\n" 52 | "%s", op.help().c_str()); 53 | return -1; 54 | } 55 | } 56 | 57 | 58 | clGetICDLoaderInfoOCLICD = (pfn_clGetICDLoaderInfoOCLICD) 59 | clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD"); 60 | 61 | if (clGetICDLoaderInfoOCLICD == NULL) { 62 | printf("Couldn't get function pointer to clGetICDLoaderInfoOCLICD!\n"); 63 | printf("This is normal and some ICD loaders do not support this functionality.\n"); 64 | printf("Exiting...\n"); 65 | return 0; 66 | } 67 | 68 | #define QUERY_AND_PRINT_LOADER_INFO(_info) \ 69 | PrintLoaderInfo(#_info, _info); 70 | 71 | QUERY_AND_PRINT_LOADER_INFO(CL_ICDL_OCL_VERSION); 72 | QUERY_AND_PRINT_LOADER_INFO(CL_ICDL_VERSION); 73 | QUERY_AND_PRINT_LOADER_INFO(CL_ICDL_NAME); 74 | QUERY_AND_PRINT_LOADER_INFO(CL_ICDL_VENDOR); 75 | 76 | return 0; 77 | } 78 | -------------------------------------------------------------------------------- /samples/svm/00_svmqueries/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2024-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | 9 | #include 10 | 11 | void PrintSVMCaps( 12 | const char* label, 13 | cl_device_svm_capabilities svmcaps ) 14 | { 15 | printf("\t%s: %s%s%s%s\n", 16 | label, 17 | ( svmcaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ) ? "\n\t\tCL_DEVICE_SVM_COARSE_GRAIN_BUFFER" : "", 18 | ( svmcaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ) ? "\n\t\tCL_DEVICE_SVM_FINE_GRAIN_BUFFER" : "", 19 | ( svmcaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ) ? "\n\t\tCL_DEVICE_SVM_FINE_GRAIN_SYSTEM" : "", 20 | ( svmcaps & CL_DEVICE_SVM_ATOMICS ) ? "\n\t\tCL_DEVICE_SVM_ATOMICS" : "" ); 21 | } 22 | 23 | int main( 24 | int argc, 25 | char** argv ) 26 | { 27 | { 28 | popl::OptionParser op("Supported Options"); 29 | 30 | bool printUsage = false; 31 | try { 32 | op.parse(argc, argv); 33 | } catch (std::exception& e) { 34 | fprintf(stderr, "Error: %s\n\n", e.what()); 35 | printUsage = true; 36 | } 37 | 38 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 39 | fprintf(stderr, 40 | "Usage: svmqueries [options]\n" 41 | "%s", op.help().c_str()); 42 | return -1; 43 | } 44 | } 45 | 46 | std::vector platforms; 47 | cl::Platform::get(&platforms); 48 | 49 | for( size_t i = 0; i < platforms.size(); i++ ) 50 | { 51 | printf( "Platform[%zu]: %s\n", 52 | i, 53 | platforms[i].getInfo().c_str()); 54 | 55 | std::vector devices; 56 | platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices); 57 | 58 | for( size_t d = 0; d < devices.size(); d++ ) 59 | { 60 | printf("\tDevice[%zu]: %s\n", 61 | d, 62 | devices[d].getInfo().c_str()); 63 | 64 | cl_device_svm_capabilities svmcaps = 65 | devices[d].getInfo(); 66 | PrintSVMCaps( "CL_DEVICE_SVM_CAPABILITIES", svmcaps ); 67 | 68 | printf( "\n" ); 69 | } 70 | } 71 | 72 | printf("Cleaning up...\n"); 73 | 74 | return 0; 75 | } -------------------------------------------------------------------------------- /samples/python/01_copybuffer/copybuffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2019-2025 Ben Ashbaugh 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | import numpy as np 8 | import pyopencl as cl 9 | import argparse 10 | 11 | gwx = 1024 * 1024 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('-p', '--platform', type=int, action='store', default=0, help='Platform Index') 16 | parser.add_argument('-d', '--device', type=int, action='store', default=0, help='Device Index') 17 | 18 | args = parser.parse_args() 19 | platformIndex = args.platform 20 | deviceIndex = args.device 21 | 22 | platforms = cl.get_platforms() 23 | print('Running on platform: ' + platforms[platformIndex].get_info(cl.platform_info.NAME)) 24 | 25 | devices = platforms[platformIndex].get_devices() 26 | print('Running on device: ' + devices[deviceIndex].get_info(cl.device_info.NAME)) 27 | 28 | context = cl.Context([devices[deviceIndex]]) 29 | commandQueue = cl.CommandQueue(context, devices[deviceIndex]) 30 | 31 | deviceMemSrc = cl.Buffer(context, cl.mem_flags.ALLOC_HOST_PTR, gwx * np.uint32().itemsize) 32 | deviceMemDst = cl.Buffer(context, cl.mem_flags.ALLOC_HOST_PTR, gwx * np.uint32().itemsize) 33 | 34 | # initialization 35 | mapped_src, event = cl.enqueue_map_buffer(commandQueue, deviceMemSrc, 36 | cl.map_flags.WRITE_INVALIDATE_REGION, 37 | 0, gwx, np.uint32) 38 | with mapped_src.base: 39 | for i in range(gwx): 40 | mapped_src[i] = i 41 | 42 | # execution 43 | cl.enqueue_copy(commandQueue, deviceMemDst, deviceMemSrc) 44 | 45 | # verification 46 | mapped_dst, event = cl.enqueue_map_buffer(commandQueue, deviceMemDst, 47 | cl.map_flags.READ, 48 | 0, gwx, np.uint32) 49 | with mapped_dst.base: 50 | mismatches = 0 51 | for i, val in enumerate(mapped_dst): 52 | if val != i: 53 | if mismatches < 16: 54 | print('Mismatch! dst[{}] == {}, want {}'.format(i, val, i)) 55 | mismatches = mismatches + 1 56 | if mismatches != 0: 57 | print('Error: Found {} mismatches / {} values!!!'.format(mismatches, gwx)) 58 | else: 59 | print('Success.') 60 | -------------------------------------------------------------------------------- /samples/usm/200_hmemhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # hmemhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates usage of host memory allocations. 6 | Other similar samples demonstrate usage of device memory and shared memory allocations. 7 | 8 | Host memory allocations are owned by the host, and generally trade wide access for potentially lower performance. 9 | Because of its wide access, using host memory is one of the easiest ways to enable an application to use Unified Shared Memory, albeit at a potential performance cost. 10 | 11 | The sample initializes a source USM allocation, copies it to a destination USM allocation using a kernel, then checks on the host that the copy was performed correctly. 12 | 13 | ## Key APIs and Concepts 14 | 15 | This sample allocates host memory using `clHostMemAllocINTEL` and frees it using `clMemFreeINTEL`. 16 | 17 | Since host memory may be directly accessed and manipulated on the host, this sample does not need to use any special Unified Shared Memory APIs to copy to or from a host allocation, or to map or unmap a host allocation. 18 | Instead, this sample simply ensures that copy kernel is complete before verifying that the copy was performed correctly. 19 | For simplicity, this sample ensures all commands in the command queue are complete using `clFinish`, but other completion mechanisms could be used instead that may be more efficient. 20 | 21 | Within a kernel, a Unified Shared Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`), or a Shared Virtual Memory allocation. 22 | Unified Shared Memory allocations are set as an argument to a kernel using `clSetKernelArgMemPointerINTEL`. 23 | 24 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 25 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 26 | 27 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 28 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 29 | 30 | ## Command Line Options 31 | 32 | | Option | Default Value | Description | 33 | |:--|:-:|:--| 34 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 35 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 36 | -------------------------------------------------------------------------------- /layers/11_semaemu/emulate.h: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2022-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | struct SLayerContext 13 | { 14 | typedef std::map CEventMap; 15 | CEventMap EventMap; 16 | }; 17 | 18 | SLayerContext& getLayerContext(void); 19 | 20 | extern const struct _cl_icd_dispatch* g_pNextDispatch; 21 | 22 | /////////////////////////////////////////////////////////////////////////////// 23 | // Emulated Functions 24 | 25 | cl_semaphore_khr CL_API_CALL clCreateSemaphoreWithPropertiesKHR_EMU( 26 | cl_context context, 27 | const cl_semaphore_properties_khr *sema_props, 28 | cl_int *errcode_ret); 29 | 30 | cl_int CL_API_CALL clEnqueueWaitSemaphoresKHR_EMU( 31 | cl_command_queue command_queue, 32 | cl_uint num_sema_objects, 33 | const cl_semaphore_khr *sema_objects, 34 | const cl_semaphore_payload_khr *sema_payload_list, 35 | cl_uint num_events_in_wait_list, 36 | const cl_event *event_wait_list, 37 | cl_event *event); 38 | 39 | cl_int CL_API_CALL clEnqueueSignalSemaphoresKHR_EMU( 40 | cl_command_queue command_queue, 41 | cl_uint num_sema_objects, 42 | const cl_semaphore_khr *sema_objects, 43 | const cl_semaphore_payload_khr *sema_payload_list, 44 | cl_uint num_events_in_wait_list, 45 | const cl_event *event_wait_list, 46 | cl_event *event); 47 | 48 | cl_int CL_API_CALL clGetSemaphoreInfoKHR_EMU( 49 | cl_semaphore_khr semaphore, 50 | cl_semaphore_info_khr param_name, 51 | size_t param_value_size, 52 | void *param_value, 53 | size_t *param_value_size_ret); 54 | 55 | cl_int CL_API_CALL clRetainSemaphoreKHR_EMU( 56 | cl_semaphore_khr semaphore); 57 | 58 | cl_int CL_API_CALL clReleaseSemaphoreKHR_EMU( 59 | cl_semaphore_khr semaphore); 60 | 61 | /////////////////////////////////////////////////////////////////////////////// 62 | // Override Functions 63 | 64 | bool clGetDeviceInfo_override( 65 | cl_device_id device, 66 | cl_device_info param_name, 67 | size_t param_value_size, 68 | void* param_value, 69 | size_t* param_value_size_ret, 70 | cl_int* errcode_ret); 71 | 72 | bool clGetEventInfo_override( 73 | cl_event event, 74 | cl_event_info param_name, 75 | size_t param_value_size, 76 | void* param_value, 77 | size_t* param_value_size_ret, 78 | cl_int* errcode_ret); 79 | 80 | bool clGetPlatformInfo_override( 81 | cl_platform_id platform, 82 | cl_platform_info param_name, 83 | size_t param_value_size, 84 | void* param_value, 85 | size_t* param_value_size_ret, 86 | cl_int* errcode_ret); 87 | -------------------------------------------------------------------------------- /samples/python/02_copybufferkernel/copybufferkernel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2019-2025 Ben Ashbaugh 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | import numpy as np 8 | import pyopencl as cl 9 | import argparse 10 | 11 | gwx = 1024 * 1024 12 | 13 | kernelString = """ 14 | kernel void CopyBuffer( global uint* dst, global uint* src ) 15 | { 16 | uint id = get_global_id(0); 17 | dst[id] = src[id]; 18 | } 19 | """ 20 | 21 | if __name__ == "__main__": 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('-p', '--platform', type=int, action='store', default=0, help='Platform Index') 24 | parser.add_argument('-d', '--device', type=int, action='store', default=0, help='Device Index') 25 | 26 | args = parser.parse_args() 27 | platformIndex = args.platform 28 | deviceIndex = args.device 29 | 30 | platforms = cl.get_platforms() 31 | print('Running on platform: ' + platforms[platformIndex].get_info(cl.platform_info.NAME)) 32 | 33 | devices = platforms[platformIndex].get_devices() 34 | print('Running on device: ' + devices[deviceIndex].get_info(cl.device_info.NAME)) 35 | 36 | context = cl.Context([devices[deviceIndex]]) 37 | commandQueue = cl.CommandQueue(context, devices[deviceIndex]) 38 | 39 | program = cl.Program(context, kernelString) 40 | program.build() 41 | kernel = program.CopyBuffer 42 | 43 | deviceMemSrc = cl.Buffer(context, cl.mem_flags.ALLOC_HOST_PTR, gwx * np.uint32().itemsize) 44 | deviceMemDst = cl.Buffer(context, cl.mem_flags.ALLOC_HOST_PTR, gwx * np.uint32().itemsize) 45 | 46 | # initialization 47 | mapped_src, event = cl.enqueue_map_buffer(commandQueue, deviceMemSrc, 48 | cl.map_flags.WRITE_INVALIDATE_REGION, 49 | 0, gwx, np.uint32) 50 | with mapped_src.base: 51 | for i in range(gwx): 52 | mapped_src[i] = i 53 | 54 | # execution 55 | kernel(commandQueue, [gwx], None, deviceMemDst, deviceMemSrc) 56 | 57 | # verification 58 | mapped_dst, event = cl.enqueue_map_buffer(commandQueue, deviceMemDst, 59 | cl.map_flags.READ, 60 | 0, gwx, np.uint32) 61 | with mapped_dst.base: 62 | mismatches = 0 63 | for i, val in enumerate(mapped_dst): 64 | if val != i: 65 | if mismatches < 16: 66 | print('Mismatch! dst[{}] == {}, want {}'.format(i, val, i)) 67 | mismatches = mismatches + 1 68 | if mismatches != 0: 69 | print('Error: Found {} mismatches / {} values!!!'.format(mismatches, gwx)) 70 | else: 71 | print('Success.') 72 | -------------------------------------------------------------------------------- /layers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | function(add_opencl_layer) 6 | set(options TEST) 7 | set(one_value_args NUMBER TARGET VERSION CATEGORY) 8 | set(multi_value_args SOURCES KERNELS INCLUDES LIBS) 9 | cmake_parse_arguments(OPENCL_LAYER 10 | "${options}" "${one_value_args}" "${multi_value_args}" 11 | ${ARGN} 12 | ) 13 | 14 | if(NOT OPENCL_LAYER_VERSION) 15 | message(STATUS "No OpenCL version specified for sample ${OPENCL_LAYER_TARGET}, using OpenCL 3.0.") 16 | set(OPENCL_LAYER_VERSION 300) 17 | endif() 18 | if(NOT OPENCL_LAYER_NUMBER) 19 | message(STATUS "No sample number specified for sample ${OPENCL_LAYER_TARGET}, using 99.") 20 | set(OPENCL_LAYER_NUMBER 99) 21 | endif() 22 | 23 | add_library(${OPENCL_LAYER_TARGET} MODULE ${OPENCL_LAYER_SOURCES}) 24 | 25 | target_include_directories(${OPENCL_LAYER_TARGET} PRIVATE ${OpenCL_INCLUDE_DIR} ${OPENCL_LAYER_INCLUDES}) 26 | 27 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_TARGET_OPENCL_VERSION=${OPENCL_LAYER_VERSION}) 28 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_ENABLE_BETA_EXTENSIONS) 29 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_1_0_APIS) 30 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_1_1_APIS) 31 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_1_2_APIS) 32 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_2_0_APIS) 33 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_2_1_APIS) 34 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE CL_USE_DEPRECATED_OPENCL_2_2_APIS) 35 | 36 | if (WIN32) 37 | target_compile_definitions(${OPENCL_LAYER_TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS NOMINMAX) 38 | endif() 39 | 40 | set_target_properties(${OPENCL_LAYER_TARGET} PROPERTIES FOLDER "Layers/${OPENCL_LAYER_CATEGORY}/${OPENCL_LAYER_NUMBER}_${OPENCL_LAYER_TARGET}") 41 | 42 | if(CMAKE_CONFIGURATION_TYPES) 43 | set(OPENCL_LAYER_CONFIGS ${CMAKE_CONFIGURATION_TYPES}) 44 | else() 45 | set(OPENCL_LAYER_CONFIGS ${CMAKE_BUILD_TYPE}) 46 | endif() 47 | foreach(CONFIG ${OPENCL_LAYER_CONFIGS}) 48 | install(TARGETS ${OPENCL_LAYER_TARGET} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 49 | install(FILES ${OPENCL_LAYER_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 50 | endforeach() 51 | endfunction() 52 | 53 | add_subdirectory( 00_example ) 54 | 55 | add_subdirectory( 10_cmdbufemu ) 56 | add_subdirectory( 11_semaemu ) 57 | 58 | if(TARGET SPIRV-Headers) 59 | add_subdirectory( 12_spirvqueriesemu ) 60 | endif() 61 | -------------------------------------------------------------------------------- /samples/opengl/README.md: -------------------------------------------------------------------------------- 1 | # OpenGL Samples 2 | 3 | This directory contains samples that use [OpenGL](https://www.khronos.org/opengl/) to visualize results. 4 | OpenGL is a widely supported industry standard for graphics and rendering and can interoperate well with many OpenCL devices. 5 | 6 | ## Dependencies 7 | 8 | These samples require OpenGL headers and libraries. 9 | 10 | Additionally, these samples use [GLFW](https://www.glfw.org/) to abstract many of the operating system specific parts of OpenGL. 11 | GLFW supports Windows, macOS, and Linux. 12 | Pre-built packages are available for many platforms, or GLFW may be built from source. 13 | 14 | If these dependencies are not found then the OpenGL samples will not be built. 15 | 16 | ### Using Pre-build GLFW Packages 17 | 18 | This is the preferred method for using GLFW. 19 | Please see the [GLFW Download Page](https://www.glfw.org/download.html) for details. 20 | 21 | ### Building GLFW from Source 22 | 23 | The following steps are recommended when GLFW is built from source. 24 | Please refer to the [Compiling GLFW](https://www.glfw.org/docs/latest/compile_guide.html) reference page for details. 25 | 26 | 1. Build GLFW separately from these OpenCL samples. 27 | The GLFW source code may be cloned into a completely separate directory or into the `external` directory for these samples. 28 | 2. Build GLFW as a static library. 29 | 3. On Linux, build GLFW for X11. 30 | 3. Build a `Release` or `RelWithDebInfo` GLFW. 31 | 4. Install GLFW either into the `external` directory for the samples (recommended), or into a system directory. 32 | If GLFW is installed into the `external` directory it _should_ be detected automatically by these samples. 33 | 34 | Sample build instructions: 35 | 36 | 1. Clone the GLFW source code. 37 | In these instructions we will clone into the `external` directory. 38 | 39 | ```sh 40 | $ git clone https://github.com/glfw/glfw.git external/glfw-src 41 | ``` 42 | 43 | 2. Create build files. 44 | 45 | ```sh 46 | $ cd external/glfw-src && mkdir build && cd build 47 | $ cmake .. \ 48 | -DCMAKE_BUILD_TYPE=Release \ 49 | -DCMAKE_INSTALL_PREFIX=/path/to/your/SimpleOpenCLSamples/external/glfw \ 50 | -DGLFW_BUILD_DOCS=0 -DGLFW_BUILD_EXAMPLES=0 -DGLFW_BUILD_TESTS=0 51 | ``` 52 | 53 | 3. Build and install GLFW into the `external` directory. 54 | 55 | ```sh 56 | $ cmake --build . --target install --config Release 57 | ``` 58 | 59 | After installing, GLFW should be found by the OpenCL samples. 60 | 61 | ## Summary of OpenGL Samples 62 | 63 | * [juliagl](./00_juliagl): Demonstrates sharing an OpenGL texture with OpenCL. 64 | * [nbodygl](./01_nbodygl): Demonstrates rendering an OpenCL vertex buffer with OpenGL. 65 | * [sobelgl](./01_sobelgl): Displays the output of a Sobel edge detection filter using OpenGL. 66 | -------------------------------------------------------------------------------- /tutorials/interceptlayer/part6.md: -------------------------------------------------------------------------------- 1 | # Using the Intercept Layer for OpenCL Applications 2 | 3 | ## Part 6: Final Words and Additional Things to Try 4 | 5 | If you have made it this far, great work! 6 | We started with an OpenCL application that crashed and used the Intercept Layer for OpenCL Applications identify and fix the bugs that were preventing it from running correctly, then to profile the application to significantly improve its performance. 7 | 8 | After fixing all of the bugs and making the suggested performance improvements, we should be able to execute the tutorial application without the Intercept Layer now and it will still run well: 9 | 10 | ``` 11 | $ ./sinjulia 12 | Running on platform: Intel(R) OpenCL HD Graphics 13 | Running on device: Intel(R) Graphics [0x5916] 14 | Executing the kernel 16 times 15 | Global Work Size = ( 3840, 2160 ) 16 | Local work size = NULL 17 | Finished in 0.513813 seconds 18 | Wrote image file sinjulia.bmp 19 | ``` 20 | 21 | This is the "official" end of the tutorial, but if you are looking for some additional things to try, either to explore the capabilities of the Intercept Layer for OpenCL Applications or to experiment with fractal images, here are a few suggestions: 22 | 23 | * Visually trace the tutorial application using [Chrome Tracing](https://github.com/intel/opencl-intercept-layer/blob/master/docs/chrome_tracing.md). 24 | * How does the trace change if you enable the `FinishAfterEnqueue` control? 25 | * Modify the tutorial application to output to an OpenCL image instead of to an OpenCL buffer. 26 | * Dump the output image using `DumpImagesBeforeEnqueue` and `DumpImagesAfterEnqueue`. 27 | * Does the version that outputs to an OpenCL image perform better or worse than the version that outputs to an OpenCL buffer? 28 | * Write the tutorial application in [SYCL](https://www.khronos.org/sycl/) instead of using OpenCL directly. 29 | * Does the SYCL version generate similar OpenCL calls as the direct OpenCL version? 30 | * Does the SYCL version perform similarly to the direct OpenCL version? If not, can you determine why it doesn't? 31 | * Modify the tutorial application to generate different fractal images. 32 | * Choose a different complex constant `c` by changing the values of `cr` and `ci`. 33 | * Choose a different iteration function. 34 | Note that the tutorial application currently uses `f(z) = c * sin(z)` as its iteration function. 35 | Other common iteration functions can be found on Paul Bourke's site [here](http://paulbourke.net/fractals/juliaset/). 36 | Remember that the inputs to these functions are complex numbers! 37 | * Use a different range of inputs. 38 | Note that the tutorial application currently goes from `-pi/2` to `+pi/2` for both the real and imaginary axes. 39 | * Map the result value onto a different color or onto multiple colors. 40 | -------------------------------------------------------------------------------- /samples/16_floatatomics/README.md: -------------------------------------------------------------------------------- 1 | # Floating-point Atomic Adds 2 | 3 | ## Sample Purpose 4 | 5 | This is an advanced sample that demonstrates how to do atomic floating-point addition in a kernel. 6 | The most standard way to perform atomic floating-point addition uses the [cl_ext_float_atomics](https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html) extension. 7 | This extension adds device queries and built-in functions to optionally support floating-point atomic add, min, max, load, and store on 16-bit, 32-bit, and 64-bit floating-point types. 8 | When the `cl_ext_float_atomics` extension is supported, and 32-bit floating point atomic adds are supported, this sample will use the built-in functions added by this extension. 9 | 10 | This sample also includes fallback implementations when the `cl_ext_float_atomics` extension is not supported: 11 | 12 | * For NVIDIA GPUs, this sample includes a fallback that does the floating-point atomic add using inline PTX assembly language. 13 | * For AMD GPUs, this sample includes a fallback that calls a compiler intrinsic to do the floating-point atomic add. 14 | * For other devices, this sample includes two fallback implementations: 15 | * The first emulates the floating-point atomic add using 32-bit `atomic_xchg` functions. 16 | This fallback implementation cannot reliably return the "old" value that was in memory before performing the atomic add, so it is unsuitable for all usages, but it does work for some important uses-cases, such as reductions. 17 | * The second emulates the floating-point atomic add using 32-bit `atomic_cmpxchg` functions. 18 | This is a slower emulation, but it is able to reliably return the "old" value that was in memory before performing the atomic add. 19 | 20 | This sample was inspired by the blog post: https://violetspace.github.io/blog/atomic-float-addition-in-opencl.html 21 | 22 | ## Key APIs and Concepts 23 | 24 | ``` 25 | CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT 26 | __opencl_c_ext_fp32_global_atomic_add 27 | atomic_fetch_add_explicit 28 | atomic_xchg 29 | atomic_cmpxchg 30 | ``` 31 | 32 | ## Command Line Options 33 | 34 | | Option | Default Value | Description | 35 | |:--|:-:|:--| 36 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 37 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 38 | | `-i ` | 16 | Specify the number of iterations to execute. 39 | | `--gwx ` | 16384 | Specify the global work size, which is also the number of floating-point atomics to perform. 40 | | `-e` | N/A | Unconditionally use the emulated floating-point atomic add. 41 | | `-s` | N/A | Unconditionally use the slower and safer emulated floating-point atomic add. 42 | | `-e` | N/A | Check intermediate results for correctness, unsupported for the faster emulated atomics, requires adding a positive value. 43 | -------------------------------------------------------------------------------- /samples/usm/300_smemhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # smemhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates usage of shared memory allocations. 6 | Other similar samples demonstrate usage of device memory and host memory allocations. 7 | 8 | Shared memory allocations share ownership and are intended to implicitly migrate between the host and one or more devices. 9 | Because shared memory allocations may migrate, they may generally be accessed with good performance on both the host and a device, albeit after paying for the cost of migration. 10 | When supported, shared memory is a great way to enable an application to use Unified Shared Memory with very good performance. 11 | 12 | The sample initializes a source USM allocation, copies it to a destination USM allocation using a kernel, then checks on the host that the copy was performed correctly. 13 | 14 | ## Key APIs and Concepts 15 | 16 | This sample allocates shared memory using `clSharedMemAllocINTEL` and frees it using `clMemFreeINTEL`. 17 | 18 | Since shared memory may be directly accessed and manipulated on the host, this sample does not need to use any special Unified Shared Memory APIs to copy to or from a shared allocation, or to map or unmap a shared allocation. 19 | Instead, this sample simply ensures that copy kernel is complete before verifying that the copy was performed correctly. 20 | For simplicity, this sample ensures all commands in the command queue are complete using `clFinish`, but other completion mechanisms could be used instead that may be more efficient. 21 | 22 | Within a kernel, a Unified Shared Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`), or a Shared Virtual Memory allocation. 23 | Unified Shared Memory allocations are set as an argument to a kernel using `clSetKernelArgMemPointerINTEL`. 24 | 25 | When profiling an application using shared memory allocations, be aware that migrations between the host and the device may be occurring implicitly. 26 | These implicit transfers may cause additional apparent latency when launching a kernel (for transfers to the device) or completion latency (for transfers to the host) versus device memory or host memory allocations. 27 | 28 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 29 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 30 | 31 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 32 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 33 | 34 | ## Command Line Options 35 | 36 | | Option | Default Value | Description | 37 | |:--|:-:|:--| 38 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 39 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 40 | -------------------------------------------------------------------------------- /samples/14_ooqcommandbuffers/README.md: -------------------------------------------------------------------------------- 1 | # ooqcommandbuffers 2 | 3 | ## Sample Purpose 4 | 5 | This is an intermediate-level sample that demonstrates how to use the OpenCL extension [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer) to create a command buffer that may execute out-of-order. 6 | Using an out-of-order command buffer may improve performance by allowing commands in a command buffer to execute in parallel. 7 | 8 | An out-of-order command buffer is created by passing an out-of-order command queue to `clCreateCommandBufferKHR`. 9 | 10 | This sample works by creating a command buffer with two relatively slow kernels to initialize two buffers followed by a faster data parallel kernel to add the two buffers together. 11 | 12 | ![Out-of-Order Command Buffer Diagram](ooq_cmdbuf.png) 13 | 14 | There are no dependencies between the two relatively slow kernels but they must both complete before the data parallel kernel can execute. 15 | If the command buffer is an in-order command buffer then the two relatively slow kernels must execute sequentially. 16 | If the command buffer is an out-or-order command buffer, however, the two relatively slow kernels may execute in parallel, which may improve the execution time of the command buffer. 17 | Note that there is no requirement to execute the two relatively slow kernels in parallel, and therefore some implementations may not see a performance improvement with an out-of-order command buffers. 18 | For some implementations though, the out-of-order command buffer will execute almost twice as fast as the same in-order command buffer! 19 | 20 | This sample also requires an implementation that supports out-of-order command queues and out-of-order command buffers. 21 | These are optional capabilities that may not be supported by all OpenCL devices. 22 | If a device supports out-of-order command queues the sample may run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu). 23 | 24 | This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. 25 | 26 | ## Key APIs and Concepts 27 | 28 | This sample demonstrates how to create and execute a command buffer that may execute out-of-order. 29 | This sample also demonstrates how to time commands in an out-of-order command queue. 30 | 31 | ```c 32 | CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR 33 | clCommandNDRangeKernelKHR with sync points 34 | clEnqueueBarrierWithWaitList 35 | ``` 36 | 37 | ## Command Line Options 38 | 39 | | Option | Default Value | Description | 40 | |:--|:-:|:--| 41 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 42 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 43 | | `--ioq` | n/a | Execute the sample in-order instead of out-of-order. 44 | | `-i ` | 16 | Specify the number of iterations to execute. 45 | | `-e ` | 1M | Specify the number of 32-bit integers in each buffer. 46 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | permissions: 4 | contents: read 5 | 6 | on: 7 | push: 8 | paths-ignore: 9 | - '**/*.md' 10 | pull_request: 11 | branches: 12 | - main 13 | 14 | env: 15 | BUILD_TYPE: RelWithDebInfo 16 | 17 | jobs: 18 | check: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 22 | 23 | - name: Check Tabs 24 | run: | 25 | if git grep -n $'\t' samples/*.cpp samples/*.h tutorials/*.cpp tutorials/*.h; then echo 'Please replace tabs with spaces in source files.'; false; fi 26 | 27 | - name: Check Whitespace 28 | run: | 29 | if git grep -n '[[:blank:]]$' samples/*.cpp samples/*.h tutorials/*.cpp tutorials/*.h; then echo 'Please remove trailing whitespace from source files.'; false; fi 30 | 31 | build: 32 | needs: check 33 | 34 | strategy: 35 | matrix: 36 | os: [ubuntu-latest, windows-latest] 37 | ext: [YES, NO] 38 | 39 | runs-on: ${{matrix.os}} 40 | 41 | steps: 42 | - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 43 | 44 | - name: Get Ubuntu OpenGL Dependencies 45 | if: matrix.os == 'ubuntu-latest' 46 | run: | 47 | sudo apt-get update 48 | sudo apt-get install -y libglfw3-dev 49 | 50 | - name: Get OpenCL Headers 51 | uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 52 | with: 53 | repository: KhronosGroup/OpenCL-Headers 54 | path: external/OpenCL-Headers 55 | 56 | - name: Get OpenCL ICD Loader 57 | uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 58 | with: 59 | repository: KhronosGroup/OpenCL-ICD-Loader 60 | path: external/opencl-icd-loader 61 | 62 | - name: Get OpenCL Extension Loader 63 | if: matrix.ext == 'YES' 64 | uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 65 | with: 66 | repository: bashbaug/opencl-extension-loader 67 | path: external/opencl-extension-loader 68 | 69 | - name: Get SPIR-V Headers 70 | if: matrix.ext == 'YES' 71 | uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 72 | with: 73 | repository: KhronosGroup/SPIRV-Headers 74 | path: external/SPIRV-Headers 75 | 76 | - name: Create Build Directory 77 | run: cmake -E make_directory ${{runner.workspace}}/build 78 | 79 | - name: Run CMake 80 | shell: bash 81 | working-directory: ${{runner.workspace}}/build 82 | run: cmake -DOpenCL_INCLUDE_DIRS=$GITHUB_WORKSPACE/external/OpenCL-Headers -DCMAKE_BUILD_TYPE=$BUILD_TYPE $GITHUB_WORKSPACE 83 | 84 | - name: Build 85 | working-directory: ${{runner.workspace}}/build 86 | shell: bash 87 | run: cmake --build . --parallel --config $BUILD_TYPE 88 | 89 | #- name: Test 90 | # working-directory: ${{runner.workspace}}/build 91 | # shell: bash 92 | # run: ctest -C $BUILD_TYPE 93 | -------------------------------------------------------------------------------- /samples/usm/README.md: -------------------------------------------------------------------------------- 1 | # Unified Shared Memory Samples 2 | 3 | This directory contains samples demonstrating Unified Shared Memory (USM). 4 | Unified Shared Memory is intended to bring pointer-based programming to OpenCL, and is an alternative to OpenCL 2.0 Shared Virtual Memory (SVM). 5 | 6 | ## Unified Shared Memory Extension Status 7 | 8 | The `cl_intel_unified_shared_memory` extension is a widely supported vendor extension. 9 | The latest Unified Shared Memory extension specification can be found [here](https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_unified_shared_memory.html). 10 | 11 | These samples require the OpenCL Extension Loader to get the extension APIs for Unified Shared Memory. 12 | 13 | ## Unified Shared Memory Advantages 14 | 15 | Unified Shared Memory (USM) provides: 16 | 17 | * Easier integration into existing code bases by representing OpenCL memory allocations as pointers rather than handles (`cl_mems`), with full support for pointer arithmetic into allocations. 18 | 19 | * Fine-grain control over ownership and accessibility of OpenCL memory allocations, to optimally choose between performance and programmer convenience. 20 | 21 | * A simpler programming model, by automatically migrating some memory allocations between OpenCL devices and the host. 22 | 23 | Compared to Shared Virtual Memory (SVM), Unified Shared Memory provides: 24 | 25 | * A similar pointer-based representation of memory allocations. 26 | 27 | * A similar address equivalence for pointers to allocations on the host and the device. 28 | 29 | * No need to map or unmap any USM allocations, similar to fine grain SVM allocations. 30 | 31 | * No need to specify all of the allocations used by an OpenCL kernel, similar to fine grain system SVM allocations. 32 | 33 | * More control over the initial placement of a memory allocation, and where a memory allocation is able to migrate. 34 | 35 | * The ability to pass other implementation-specific properties during allocation. 36 | 37 | * The ability to provide implementation-specific memory advice for some or all of a memory allocation, after allocation. 38 | 39 | * The ability to query information about a memory allocation. 40 | 41 | ## Summary of Unified Shared Memory Samples 42 | 43 | * [usmqueries](./00_usmqueries): Queries and prints the USM capabilities of a device. 44 | * [usmmeminfo](./01_usmmeminfo): Allocates and queries properties of a USM allocation. 45 | * [dmemhelloworld](./100_dmemhelloworld): Copy one "device" memory allocation to another. 46 | * [dmemlinkedlist](./101_dmemlinkedlist): Create and modify a linked list in "device" memory. 47 | * [hmemhelloworld](./200_hmemhelloworld): Copy one "host" memory allocation to another. 48 | * [hmemlinkedlist](./201_hmemlinkedlist): Create and modify a linked list in "host" memory. 49 | * [smemhelloworld](./300_smemhelloworld): Copy one "shared" memory allocation to another. 50 | * [smemlinkedlist](./301_smemlinkedlist): Create and modify a linked list in "shared" memory. 51 | * [usmmigratemem](./310_usmmigratemem): Copy one "shared" memory allocation to another, with explicit calls to migrate memory. -------------------------------------------------------------------------------- /samples/00_enumopenclpp/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2019-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | static cl_int PrintPlatformInfoSummary( 14 | cl::Platform platform ) 15 | { 16 | printf("\tName: %s\n", platform.getInfo().c_str() ); 17 | printf("\tVendor: %s\n", platform.getInfo().c_str() ); 18 | printf("\tDriver Version: %s\n", platform.getInfo().c_str() ); 19 | 20 | return CL_SUCCESS; 21 | } 22 | 23 | static void PrintDeviceType( 24 | const char* label, 25 | cl_device_type type ) 26 | { 27 | printf("%s%s%s%s%s%s\n", 28 | label, 29 | ( type & CL_DEVICE_TYPE_DEFAULT ) ? "DEFAULT " : "", 30 | ( type & CL_DEVICE_TYPE_CPU ) ? "CPU " : "", 31 | ( type & CL_DEVICE_TYPE_GPU ) ? "GPU " : "", 32 | ( type & CL_DEVICE_TYPE_ACCELERATOR ) ? "ACCELERATOR " : "", 33 | ( type & CL_DEVICE_TYPE_CUSTOM ) ? "CUSTOM " : ""); 34 | } 35 | 36 | static cl_int PrintDeviceInfoSummary( 37 | const std::vector& devices ) 38 | { 39 | for( size_t i = 0; i < devices.size(); i++ ) 40 | { 41 | printf("Device[%zu]:\n", i ); 42 | 43 | cl_device_type deviceType = devices[i].getInfo(); 44 | PrintDeviceType("\tType: ", deviceType); 45 | 46 | printf("\tName: %s\n", devices[i].getInfo().c_str() ); 47 | printf("\tVendor: %s\n", devices[i].getInfo().c_str() ); 48 | printf("\tDevice Version: %s\n", devices[i].getInfo().c_str() ); 49 | printf("\tDriver Version: %s\n", devices[i].getInfo().c_str() ); 50 | } 51 | 52 | return CL_SUCCESS; 53 | } 54 | 55 | int main( 56 | int argc, 57 | char** argv ) 58 | { 59 | { 60 | popl::OptionParser op("Supported Options"); 61 | 62 | bool printUsage = false; 63 | try { 64 | op.parse(argc, argv); 65 | } catch (std::exception& e) { 66 | fprintf(stderr, "Error: %s\n\n", e.what()); 67 | printUsage = true; 68 | } 69 | 70 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 71 | fprintf(stderr, 72 | "Usage: enumopenclpp [options]\n" 73 | "%s", op.help().c_str()); 74 | return -1; 75 | } 76 | } 77 | 78 | std::vector platforms; 79 | cl::Platform::get(&platforms); 80 | 81 | for( size_t i = 0; i < platforms.size(); i++ ) 82 | { 83 | printf( "Platform[%zu]:\n", i ); 84 | PrintPlatformInfoSummary( platforms[i] ); 85 | 86 | std::vector devices; 87 | platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices); 88 | 89 | PrintDeviceInfoSummary( devices ); 90 | printf( "\n" ); 91 | } 92 | 93 | printf( "Done.\n" ); 94 | 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /docs/env/ubuntu/19.04.md: -------------------------------------------------------------------------------- 1 | # Verify Hardware Support 2 | 3 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 4 | 5 | # Build Dependencies 6 | 7 | OCL Headers: 8 | ``` 9 | $ sudo apt install opencl-c-headers opencl-clhpp-headers 10 | ``` 11 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 12 | 13 | OCL ICD Loader: 14 | 15 | ``` 16 | $ sudo apt install ocl-icd-opencl-dev 17 | ``` 18 | 19 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 20 | 21 | # Runtime Dependencies 22 | 23 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 24 | 25 | ## OCL ICD Loader 26 | 27 | ``` 28 | $ sudo apt install ocl-icd-libopencl1 29 | ``` 30 | 31 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 32 | 33 | ## Compute Runtime 34 | 35 | ### AMD 36 | 37 | AMD provides official support for LTS releases only. 38 | 39 | ### Intel 40 | Intel's OpenCL support is provided through the NEO compute runtime, available as a PPA. 41 | 42 | ``` 43 | $ sudo add-apt-repository ppa:intel-opencl/intel-opencl 44 | $ sudo apt update 45 | $ sudo apt install intel-opencl 46 | ``` 47 | 48 | ### Nvidia 49 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 50 | 51 | ``` 52 | sudo add-apt-repository ppa:graphics-drivers/ppa 53 | sudo apt update 54 | sudo apt install nvidia-graphics-drivers- 55 | ``` 56 | 57 | ## Configure Permissions 58 | ``` 59 | $ sudo usermod -a -G video $USER 60 | ``` 61 | 62 | Users running OpenCL applications require direct access to the GPU; this access is granted by membership in the `video` group. 63 | 64 | ## Verification 65 | The `clinfo` utility can be used to verify the environment has been properly configured. 66 | 67 | ``` 68 | $ sudo apt install clinfo 69 | $ clinfo 70 | 71 | ``` 72 | 73 | ## Troubleshooting 74 | 75 | If `clinfo` indicates there are 0 supported platforms: 76 | 77 | 1. Verify your host has OpenCL-capable hardware attached 78 | 2. Verify clinfo is running as a user with direct GPU access (member of the `video` group) 79 | 3. Verify the correct compute runtime is installed 80 | -------------------------------------------------------------------------------- /samples/python/03_mandelbrot/mandelbrot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2019-2025 Ben Ashbaugh 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | from PIL import Image 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | import argparse 12 | import PIL 13 | 14 | filename = 'mandelbrot.bmp' 15 | 16 | width = 768 17 | height = 512 18 | 19 | maxIterations = 256 20 | 21 | kernelString = """ 22 | static inline int mandel(float c_re, float c_im, int count) { 23 | float z_re = c_re, z_im = c_im; 24 | int i; 25 | for (i = 0; i < count; ++i) { 26 | if (z_re * z_re + z_im * z_im > 4.) 27 | break; 28 | 29 | float new_re = z_re*z_re - z_im*z_im; 30 | float new_im = 2.f * z_re * z_im; 31 | 32 | z_re = c_re + new_re; 33 | z_im = c_im + new_im; 34 | } 35 | 36 | return i; 37 | } 38 | kernel void Mandelbrot( 39 | float x0, float y0, 40 | float x1, float y1, 41 | int width, int height, 42 | int maxIterations, 43 | global int* output) 44 | { 45 | float dx = (x1 - x0) / width; 46 | float dy = (y1 - y0) / height; 47 | 48 | float x = x0 + get_global_id(0) * dx; 49 | float y = y0 + get_global_id(1) * dy; 50 | 51 | int index = get_global_id(1) * width + get_global_id(0); 52 | output[index] = mandel(x, y, maxIterations); 53 | } 54 | """ 55 | 56 | if __name__ == "__main__": 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('-p', '--platform', type=int, action='store', default=0, help='Platform Index') 59 | parser.add_argument('-d', '--device', type=int, action='store', default=0, help='Device Index') 60 | 61 | args = parser.parse_args() 62 | platformIndex = args.platform 63 | deviceIndex = args.device 64 | 65 | platforms = cl.get_platforms() 66 | print('Running on platform: ' + platforms[platformIndex].get_info(cl.platform_info.NAME)) 67 | 68 | devices = platforms[platformIndex].get_devices() 69 | print('Running on device: ' + devices[deviceIndex].get_info(cl.device_info.NAME)) 70 | 71 | context = cl.Context([devices[deviceIndex]]) 72 | commandQueue = cl.CommandQueue(context, devices[deviceIndex]) 73 | 74 | program = cl.Program(context, kernelString) 75 | program.build() 76 | kernel = program.Mandelbrot 77 | 78 | deviceMemDst = cl.Buffer(context, cl.mem_flags.ALLOC_HOST_PTR, 79 | width * height * np.uint32().itemsize) 80 | 81 | # execution 82 | kernel(commandQueue, [width, height], None, 83 | np.float32(-2.0), np.float32(-1.0), np.float32(1.0), np.float32(1.0), 84 | np.int32(width), np.int32(height), np.int32(maxIterations), deviceMemDst) 85 | 86 | # save bitmap 87 | mapped_dst, event = cl.enqueue_map_buffer(commandQueue, deviceMemDst, 88 | cl.map_flags.READ, 89 | 0, width * height, np.uint32) 90 | with mapped_dst.base: 91 | colors = np.fromiter((240 if x & 1 else 20 for x in mapped_dst), np.uint8) 92 | image = Image.fromarray(colors.reshape((height, width))) 93 | image.save(filename) 94 | print('Wrote image file {}'.format(filename)) 95 | -------------------------------------------------------------------------------- /samples/04_julia/README.md: -------------------------------------------------------------------------------- 1 | # Julia Set 2 | 3 | ## Sample Purpose 4 | 5 | This is another sample that generates a fractal image. 6 | It uses an OpenCL kernel to compute a [Julia set](https://en.wikipedia.org/wiki/Julia_set) image, which is then written to a BMP file. 7 | Each OpenCL work item computes one element of the set. 8 | 9 | By default, similar to prior samples, this sample does not specify a local work size when it enqueues an ND range for the kernel into an OpenCL command queue. 10 | This enables the OpenCL implementation to determine how to group work items, and is a reasonable best practice when work items may execute in any grouping. 11 | Unlike prior samples, however, this sample can optionally specify a local work size grouping. 12 | The local work size grouping is one way to tune an application for an architecture. 13 | Can you find a local work size grouping that performs better than the implementation-determined grouping? 14 | Is there a local work size grouping that performs very poorly on your implementation? 15 | 16 | Note that because this kernel is compiled as an OpenCL 1.x kernel, the local work size grouping must evenly divide the global work size. 17 | Since the default global work size is a power of two, this means that the local work size grouping must also be a power of two. 18 | 19 | This sample also demonstrates how to measure the wall clock time for an OpenCL kernel. 20 | Note especially how `clFinish` is used to ensure that the OpenCL command queue is empty before starting the timer, and that all processing is complete before stopping the timer. 21 | 22 | ![Julia Set Image](julia.png) 23 | 24 | As with prior samples, the source code for the OpenCL kernel is embedded into the host code as a raw string, and by default, this sample will run in the first enumerated OpenCL device on the first enumerated OpenCL platform. 25 | To run on a different OpenCL device or platform, please use the provided command line options. 26 | 27 | ## Key APIs and Concepts 28 | 29 | This example shows how to specify a local work size grouping when a kernel is enqueued into an OpenCL command queue. 30 | It also demonstrates how to measure the performance of an OpenCL kernel. 31 | 32 | ```c 33 | clEnqueueNDRangeKernel with a local work size 34 | clFinish 35 | ``` 36 | 37 | ## Command Line Options 38 | 39 | | Option | Default Value | Description | 40 | |:--|:-:|:--| 41 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 42 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 43 | | `-i ` | 16 | Specify the number of iterations to execute. 44 | | `--gwx ` | 512 | Specify the global work size to execute, in the X direction. This also determines the width of the generated image. 45 | | `--gwy ` | 512 | Specify the global work size to execute, in the Y direction. This also determines the height of the generated image. 46 | | `--lwx ` | 0 | Specify the local work size in the X direction. If either local works size dimension is zero a `NULL` local work size is used. 47 | | `--lwy ` | 0 | Specify the local work size in the Y direction. If either local works size dimension is zero a `NULL` local work size is used. 48 | -------------------------------------------------------------------------------- /docs/env/ubuntu/19.10.md: -------------------------------------------------------------------------------- 1 | # Verify Hardware Support 2 | 3 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 4 | 5 | # Build Dependencies 6 | 7 | OCL Headers: 8 | ``` 9 | $ sudo apt install opencl-c-headers opencl-clhpp-headers 10 | ``` 11 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 12 | 13 | OCL ICD Loader: 14 | 15 | ``` 16 | $ sudo apt install ocl-icd-opencl-dev 17 | ``` 18 | 19 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 20 | 21 | # Runtime Dependencies 22 | 23 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 24 | 25 | ## OCL ICD Loader 26 | 27 | ``` 28 | $ sudo apt install ocl-icd-libopencl1 29 | ``` 30 | 31 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 32 | 33 | ## Compute Runtime 34 | 35 | ### AMD 36 | 37 | AMD provides official support for LTS releases only. 38 | 39 | ### Intel 40 | Intel's OpenCL support is provided through the NEO compute runtime, available as a PPA. 41 | 42 | ``` 43 | $ sudo add-apt-repository ppa:intel-opencl/intel-opencl 44 | $ sudo apt update 45 | $ sudo apt install intel-opencl 46 | ``` 47 | 48 | ### Nvidia 49 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 50 | 51 | ``` 52 | $ sudo add-apt-repository ppa:graphics-drivers/ppa 53 | $ sudo apt update 54 | $ sudo apt install nvidia-graphics-drivers- 55 | ``` 56 | 57 | ## Configure Permissions 58 | ``` 59 | $ sudo usermod -a -G video $USER 60 | $ sudo usermod -a -G render $USER 61 | ``` 62 | 63 | Users running OpenCL applications on a GPU require additional permissions granted by the groups above. 64 | 65 | ## Verification 66 | The `clinfo` utility can be used to verify the environment has been properly configured. 67 | 68 | ``` 69 | $ sudo apt install clinfo 70 | $ clinfo 71 | 72 | ``` 73 | 74 | ## Troubleshooting 75 | 76 | If `clinfo` indicates there are 0 supported platforms: 77 | 78 | 1. Verify your host has OpenCL-capable hardware attached 79 | 2. Verify clinfo is running as a user with appropriate group membership 80 | 3. Verify new group membership has been applied (this may require logout or reboot) 81 | 4. Verify the correct compute runtime is installed 82 | -------------------------------------------------------------------------------- /samples/vulkan/README.md: -------------------------------------------------------------------------------- 1 | # Vulkan Samples 2 | 3 | This directory contains samples that use [Vulkan](https://www.vulkan.org/) to visualize results. 4 | Vulkan is a widely supported industry standard for graphics and rendering. 5 | Vulkan is a lower-level API that can provide more control and hence more performance compared to other graphics APIs like OpenGL. 6 | Several recent OpenCL extensions enable [interop between OpenCL and Vulkan devices](https://www.khronos.org/blog/khronos-releases-opencl-3.0-extensions-for-neural-network-inferencing-and-opencl-vulkan-interop). 7 | 8 | ## Dependencies 9 | 10 | These samples require Vulkan headers and libraries, which are most commonly provided by the [Vulkan SDK](https://www.lunarg.com/vulkan-sdk/). 11 | 12 | Additionally, these samples use [GLFW](https://www.glfw.org/) to abstract many of the operating system specific parts of Vulkan. 13 | GLFW supports Windows, macOS, and Linux. 14 | Pre-built packages are available for many platforms, or GLFW may be built from source. 15 | 16 | If these dependencies are not found then the Vulkan samples will not be built. 17 | 18 | ## Validation Layers 19 | 20 | Please note that `Debug` builds will enable Vulkan validation layers to help catch bugs and to verify correct usage of the Vulkan APIs. 21 | 22 | ### Using Pre-build GLFW Packages 23 | 24 | This is the preferred method for using GLFW. 25 | Please see the [GLFW Download Page](https://www.glfw.org/download.html) for details. 26 | 27 | ### Building GLFW from Source 28 | 29 | The following steps are recommended when GLFW is built from source. 30 | Please refer to the [Compiling GLFW](https://www.glfw.org/docs/latest/compile_guide.html) reference page for details. 31 | 32 | 1. Build GLFW separately from these Vulkan samples. 33 | The GLFW source code may be cloned into a completely separate directory or into the `external` directory for these samples. 34 | 2. Build GLFW as a static library. 35 | 3. On Linux, build GLFW for X11. 36 | 3. Build a `Release` or `RelWithDebInfo` GLFW. 37 | 4. Install GLFW either into the `external` directory for the samples (recommended), or into a system directory. 38 | If GLFW is installed into the `external` directory it _should_ be detected automatically by these samples. 39 | 40 | Sample build instructions: 41 | 42 | 1. Clone the GLFW source code. 43 | In these instructions we will clone into the `external` directory. 44 | 45 | ```sh 46 | $ git clone https://github.com/glfw/glfw.git external/glfw-src 47 | ``` 48 | 49 | 2. Create build files. 50 | 51 | ```sh 52 | $ cd external/glfw-src && mkdir build && cd build 53 | $ cmake .. \ 54 | -DCMAKE_BUILD_TYPE=Release \ 55 | -DCMAKE_INSTALL_PREFIX=/path/to/your/SimpleOpenCLSamples/external/glfw \ 56 | -DGLFW_BUILD_DOCS=0 -DGLFW_BUILD_EXAMPLES=0 -DGLFW_BUILD_TESTS=0 57 | ``` 58 | 59 | 3. Build and install GLFW into the `external` directory. 60 | 61 | ```sh 62 | $ cmake --build . --target install --config Release 63 | ``` 64 | 65 | After installing, GLFW should be found by the OpenCL samples. 66 | 67 | ## Summary of Vulkan Samples 68 | 69 | * [juliavk](./00_juliavk): Demonstrates sharing a Vulkan texture with OpenCL. 70 | * [nbodyvk](./01_nbodyvk): Demonstrates sharing a Vulkan buffer with OpenCL. 71 | -------------------------------------------------------------------------------- /samples/usm/400_sysmemhelloworld/README.md: -------------------------------------------------------------------------------- 1 | # smemhelloworld 2 | 3 | ## Sample Purpose 4 | 5 | This sample demonstrates usage of shared system memory allocations. 6 | Shared system memory is allocated using a system allocator, such as `malloc` or `new`. 7 | Other similar samples demonstrate usage of device memory, host memory, and shared memory that is allocated using special unified shared memory APIs. 8 | 9 | Just like the shared memory that is allocated using special unified shared memory APIs, shared system memory allocations share ownership and are intended to implicitly migrate between the host and one or more devices. 10 | Shared system memory allocations are the easiest way to enable applications to use Unified Shared Memory, but implementing shared system memory requires support from the OpenCL device, the OpenCL implementation, and the operating system, and its usage is not widespread (yet!). 11 | 12 | The sample initializes a source USM allocation, copies it to a destination USM allocation using a kernel, then checks on the host that the copy was performed correctly. 13 | 14 | ## Key APIs and Concepts 15 | 16 | This sample allocates shared system memory using the standard system `malloc` and frees it using `free`. 17 | 18 | Since shared system memory may be directly accessed and manipulated on the host, this sample does not need to use any special Unified Shared Memory APIs to copy to or from a shared system allocation, or to map or unmap a shared system allocation. 19 | Instead, this sample simply ensures that copy kernel is complete before verifying that the copy was performed correctly. 20 | For simplicity, this sample ensures all commands in the command queue are complete using `clFinish`, but other completion mechanisms could be used instead that may be more efficient. 21 | 22 | Within a kernel, a Unified Shared Memory allocation can be accessed similar to an OpenCL buffer (a `cl_mem`), or a Shared Virtual Memory allocation. 23 | Unified Shared Memory allocations are set as an argument to a kernel using `clSetKernelArgMemPointerINTEL`. 24 | 25 | When profiling an application using shared memory allocations, be aware that migrations between the host and the device may be occurring implicitly. 26 | These implicit transfers may cause additional apparent latency when launching a kernel (for transfers to the device) or completion latency (for transfers to the host) versus device memory or host memory allocations. 27 | 28 | Since Unified Shared Memory is an OpenCL extension, this sample uses the `OpenCLExt` extension loader library to query the extension APIs. 29 | Please see the OpenCL Extension Loader [README](https://github.com/bashbaug/opencl-extension-loader) for more detail. 30 | 31 | This sample currently uses c APIs because the C++ bindings do not support Unified Shared Memory (yet). 32 | When support for Unified Shared Memory is added to the C++ bindings the samples will be updated to use the C++ bindings instead, which should simplify the sample slightly. 33 | 34 | ## Command Line Options 35 | 36 | | Option | Default Value | Description | 37 | |:--|:-:|:--| 38 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 39 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 40 | -------------------------------------------------------------------------------- /docs/env/ubuntu/18.04.md: -------------------------------------------------------------------------------- 1 | # Verify Hardware Support 2 | 3 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 4 | 5 | # Build Dependencies 6 | 7 | OCL Headers: 8 | ``` 9 | $ sudo apt install opencl-c-headers opencl-clhpp-headers 10 | ``` 11 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 12 | 13 | OCL ICD Loader: 14 | 15 | ``` 16 | $ sudo apt install ocl-icd-opencl-dev 17 | ``` 18 | 19 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 20 | 21 | # Runtime Dependencies 22 | 23 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 24 | 25 | ## OCL ICD Loader 26 | 27 | ``` 28 | $ sudo apt install ocl-icd-libopencl1 29 | ``` 30 | 31 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 32 | 33 | ## Compute Runtime 34 | 35 | ### AMD 36 | 37 | AMD supports OpenCL through their ROCm platform. Installation instructions are [here](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#ubuntu). 38 | 39 | ### Intel 40 | Intel's OpenCL support is provided through the NEO compute runtime, available as a PPA. 41 | 42 | ``` 43 | $ sudo add-apt-repository ppa:intel-opencl/intel-opencl 44 | $ sudo apt update 45 | $ sudo apt install intel-opencl 46 | ``` 47 | 48 | ### Nvidia 49 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 50 | 51 | ``` 52 | sudo add-apt-repository ppa:graphics-drivers/ppa 53 | sudo apt update 54 | sudo apt install nvidia-graphics-drivers- 55 | ``` 56 | 57 | ## Configure Permissions 58 | ``` 59 | $ sudo usermod -a -G video $USER 60 | ``` 61 | 62 | Users running OpenCL applications require direct access to the GPU; this access is granted by membership in the `video` group. 63 | 64 | ## Verification 65 | The `clinfo` utility can be used to verify the environment has been properly configured. 66 | 67 | ``` 68 | $ sudo apt install clinfo 69 | $ clinfo 70 | 71 | ``` 72 | 73 | ## Troubleshooting 74 | 75 | If `clinfo` indicates there are 0 supported platforms: 76 | 77 | 1. Verify your host has OpenCL-capable hardware attached 78 | 2. Verify clinfo is running as a user with direct GPU access (member of the `video` group) 79 | 3. Verify the correct compute runtime is installed 80 | -------------------------------------------------------------------------------- /samples/05_kernelfromfile/README.md: -------------------------------------------------------------------------------- 1 | # kernelfromfile 2 | 3 | ## Sample Purpose 4 | 5 | In all of the samples so far the OpenCL C kernel source has been defined in the host code as a C++ raw string literal. 6 | This is convenient for samples because the kernel source gets embedded into the compiled application executable. 7 | In some cases, though, it is convenient to keep the kernel source separate from the application instead. 8 | For example, if the kernel source is in a separate file, the kernel may be modified without rebuilding the application or requiring host application source code. 9 | This sample demonstrates how to read kernel code from a separate file. 10 | 11 | Notes: 12 | 13 | 1. This sample builds and runs the kernel in the file, but does not check for specific results. 14 | 2. To run successfully, the kernel should accept a single global memory kernel argument, and should write fewer than `gwx` 32-bit values to the kernel argument buffer. 15 | 3. The `install` target (`make install` on Linux, or right-click on `INSTALL` and build in Visual Studio, for example) will automatically copy the kernel file to the install directory with the application directory. 16 | 17 | ## Key APIs and Concepts 18 | 19 | This sample also demonstrates additional API features that are often useful when building OpenCL programs: 20 | 21 | * This sample supports optional program "build options". 22 | * This sample queries and prints the program "build log" after compilation. 23 | The program build log contains compiler diagnostics, such as build errors or warnings. 24 | 25 | ```c 26 | clBuildProgram with build options 27 | clGetProgramBuildInfo with CL_PROGRAM_BUILD_LOG 28 | ``` 29 | 30 | ## Things to Try 31 | 32 | Here are some suggested ways to modify this sample to learn more: 33 | 34 | 1. Change the kernel source file to write a different value to the result buffer. 35 | If you make a mistake and the kernel is syntactically incorrect, what gets printed in the program build log? 36 | 2. Pass in a program build option and observe how it modifies how the kernel is compiled or the behavior of the kernel. 37 | The easiest way to do this is to define a preprocessor symbol with `-D`. 38 | Or, compile for a specific OpenCL C version using `-cl-std`. 39 | 3. Print a value from the kernel using `printf`. 40 | Do you see the value printed that you expect? 41 | 4. Modify the host code to print the first few values in the result buffer, or to validate that the results are what you expect. 42 | Can you read the expected result buffer from a file? 43 | 5. Modify the host code to pass an additional buffer to the OpenCL kernel. 44 | Can you initialize the contents of the buffer from a file? 45 | 46 | ## Command Line Options 47 | 48 | | Option | Default Value | Description | 49 | |:--|:-:|:--| 50 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 51 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 52 | | `--file ` | `sample_kernel.cl` | Specify the name of the file with the OpenCL kernel source. 53 | | `--name ` | `Test` | Specify the name of the OpenCL kernel in the source file. 54 | | `--options ` | None | Specify optional program build options. 55 | | `--gwx ` | 512 | Specify the global work size to execute. 56 | -------------------------------------------------------------------------------- /layers/10_cmdbufemu/README.md: -------------------------------------------------------------------------------- 1 | # Command Buffer Emulation 2 | 3 | ## Layer Purpose 4 | 5 | This is a layer that demonstrates how to emulate functionality - in this case, the [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer) extension and the related [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) extensions - using a layer. 6 | It works by intercepting calls to `clGetExtensionFunctionAddressForPlatform` to query function pointers for the `cl_khr_command_buffer` and `cl_khr_command_buffer_mutable_dispatch` extension APIs. 7 | If a query succeeds by default then the layer does nothing and simply returns the queried function pointer as-is. 8 | If the query is unsuccessful however, then the layer returns its own function pointer, which will record the contents of the command buffer for later playback. 9 | 10 | This command buffer emulation layer currently implements v0.9.8 of the `cl_khr_command_buffer` extension and v0.9.5 of the `cl_khr_command_buffer_mutable_dispatch` extension. 11 | The functionality in this emulation layer is sufficient to run the command buffer samples in this repository. 12 | 13 | Please note that the emulated command buffers are intended to be functional, but unlike a native implementation, they may not provide any performance benefit over similar code without using command buffers. 14 | 15 | ## Layer Requirement 16 | 17 | Because this layer calls `clCloneKernel` when recording a command buffer it requires an OpenCL 2.1 or newer device. 18 | If an older device is detected then the layer will not advertise support for the `cl_khr_command_buffer` or `cl_khr_command_buffer_mutable_dispatch` extensions. 19 | 20 | ## Key APIs and Concepts 21 | 22 | The most important concepts to understand from this sample are how to intercept `clGetExtensionFunctionAddressForPlatform` to return emulated functions for an extension. 23 | 24 | ```c 25 | clGetExtensionFunctionAddressForPlatform 26 | clInitLayer 27 | ``` 28 | 29 | ## Optional Controls 30 | 31 | The following environment variables can modify the behavior of the command buffer emulation layer: 32 | 33 | | Environment Variable | Behavior | Example Format | 34 | |----------------------|----------|-----------------| 35 | | `CMDBUFEMU_EnhancedErrorChecking` | Enables additional error checking when commands are added to a command buffer using a command buffer "test queue". By default, the additional error checking is disabled. | `export CMDBUFEMU_EnhancedErrorChecking=1`

`set CMDBUFEMU_EnhancedErrorChecking=1` | 36 | | `CMDBUFEMU_KernelForProfiling` | Enables use of an empty kernel for event profiling instead of event profiling on a command-queue barrier. By default, to minimize overhead, the empty kernel is not used. | `export CMDBUFEMU_KernelForProfiling=1`

`set CMDBUFEMU_KernelForProfiling=1` | 37 | 38 | ## Known Limitations 39 | 40 | This section describes some of the limitations of the emulated `cl_khr_command_buffer` functionality: 41 | 42 | * Some error conditions are not properly checked for and returned. 43 | * Deferred kernel arguments are supported, but `CL_COMMAND_BUFFER_STATE_FINALIZED_KHR` is not properly handled. 44 | * Many functions are not thread safe. 45 | -------------------------------------------------------------------------------- /samples/usm/00_usmqueries/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2020-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | 9 | #include 10 | 11 | void PrintUSMCaps( 12 | const char* label, 13 | cl_device_unified_shared_memory_capabilities_intel usmcaps ) 14 | { 15 | printf("\t%s: %s%s%s%s\n", 16 | label, 17 | ( usmcaps & CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL ) ? "\n\t\tCL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL" : "", 18 | ( usmcaps & CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL ) ? "\n\t\tCL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL" : "", 19 | ( usmcaps & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL ) ? "\n\t\tCL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL" : "", 20 | ( usmcaps & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL ) ? "\n\t\tCL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL" : "" ); 21 | } 22 | 23 | int main( 24 | int argc, 25 | char** argv ) 26 | { 27 | { 28 | popl::OptionParser op("Supported Options"); 29 | 30 | bool printUsage = false; 31 | try { 32 | op.parse(argc, argv); 33 | } catch (std::exception& e) { 34 | fprintf(stderr, "Error: %s\n\n", e.what()); 35 | printUsage = true; 36 | } 37 | 38 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 39 | fprintf(stderr, 40 | "Usage: usmqueries [options]\n" 41 | "%s", op.help().c_str()); 42 | return -1; 43 | } 44 | } 45 | 46 | std::vector platforms; 47 | cl::Platform::get(&platforms); 48 | 49 | for( size_t i = 0; i < platforms.size(); i++ ) 50 | { 51 | printf( "Platform[%zu]: %s\n", 52 | i, 53 | platforms[i].getInfo().c_str()); 54 | 55 | std::vector devices; 56 | platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices); 57 | 58 | for( size_t d = 0; d < devices.size(); d++ ) 59 | { 60 | printf("\tDevice[%zu]: %s\n", 61 | d, 62 | devices[d].getInfo().c_str()); 63 | 64 | cl_device_unified_shared_memory_capabilities_intel usmcaps = 0; 65 | 66 | usmcaps = devices[d].getInfo(); 67 | PrintUSMCaps( "CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL", usmcaps ); 68 | 69 | usmcaps = devices[d].getInfo(); 70 | PrintUSMCaps( "CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL", usmcaps ); 71 | 72 | usmcaps = devices[d].getInfo(); 73 | PrintUSMCaps( "CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL", usmcaps ); 74 | 75 | usmcaps = devices[d].getInfo(); 76 | PrintUSMCaps( "CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL", usmcaps ); 77 | 78 | usmcaps = devices[d].getInfo(); 79 | PrintUSMCaps( "CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL", usmcaps ); 80 | 81 | printf( "\n" ); 82 | } 83 | } 84 | 85 | printf("Cleaning up...\n"); 86 | 87 | return 0; 88 | } -------------------------------------------------------------------------------- /samples/opengl/00_juliagl/README.md: -------------------------------------------------------------------------------- 1 | # Julia Set with OpenGL 2 | 3 | ## Sample Purpose 4 | 5 | This is a modified version of the earlier Julia set sample. 6 | Similar to the earlier Julia Set sample, an OpenCL kernel is used to generate a Julia set image. 7 | The main difference between this sample and the earlier sample is that in this sample the Julia set image is used as an OpenGL texture and rendered to the screen instead of writing it to a BMP file. 8 | 9 | This sample can share the OpenGL texture with OpenCL when supported. 10 | In order to share the OpenGL texture with OpenCL, the OpenCL device must support the [cl_khr_gl_sharing](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing) extension, and the OpenCL device must support sharing with the OpenGL context. 11 | If sharing is not supported then the application will still run, but the Julia set image will be copied from OpenCL to OpenGL on the host. 12 | 13 | Additionally, this sample can use implicit synchronization between OpenGL and OpenCL when supported. 14 | Implicit synchronization requires support for the [cl_khr_gl_event](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_event) extension. 15 | If implicit synchronization is not supported then the application will still run, but synchronization will be done manually. 16 | 17 | ## Key APIs and Concepts 18 | 19 | This example shows how to share an OpenGL texture with OpenCL. 20 | 21 | ```c 22 | clGetGLContextInfoKHR 23 | clCreateFromGLTexture2D 24 | clEnqueueAcquireGLObjects 25 | clEnqueueReleaseGLObjects 26 | ``` 27 | 28 | ## Command Line Options 29 | 30 | Note: Many of these command line arguments are identical to the earlier Julia set sample. 31 | 32 | | Option | Default Value | Description | 33 | |:--|:-:|:--| 34 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 35 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 36 | | `--hostcopy` | n/a | Do not use the `cl_khr_gl_sharing` extension and unconditionally copy on the host. 37 | | `--hostsync` | n/a | Do not use the `cl_khr_gl_event` extension and exclusively synchronize on the host. 38 | | `--gwx ` | 512 | Specify the global work size to execute, in the X direction. This also determines the width of the generated image. 39 | | `--gwy ` | 512 | Specify the global work size to execute, in the Y direction. This also determines the height of the generated image. 40 | | `--lwx ` | 0 | Specify the local work size in the X direction. If either local works size dimension is zero a `NULL` local work size is used. 41 | | `--lwy ` | 0 | Specify the local work size in the Y direction. If either local works size dimension is zero a `NULL` local work size is used. 42 | | `--paused` | n/a | Start with the animation paused. 43 | 44 | ## Controls While Running 45 | 46 | | Control | Description | 47 | |:--|:--| 48 | | `Escape` | Exits from the sample. 49 | | `Space` | Toggle animation. 50 | | `V` | Toggle vsync (default: `true`). Disabling vsync may increase framerate but may cause [screen tearing](https://en.wikipedia.org/wiki/Screen_tearing). 51 | | `A` | Increase the real part of the complex constant `C`. 52 | | `Z` | Decrease the real part of the complex constant `C`. 53 | | `S` | Increase the imaginary part of the complex constant `C`. 54 | | `X` | Decrease the imaginary part of the complex constant `C`. 55 | -------------------------------------------------------------------------------- /docs/env/ubuntu/22.04.md: -------------------------------------------------------------------------------- 1 | # Ubuntu 22.04 Setup Instructions 2 | 3 | ## Verify Hardware Support 4 | 5 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 6 | 7 | ## Build Dependencies 8 | 9 | OCL Headers: 10 | 11 | ```bash 12 | sudo apt install opencl-c-headers opencl-clhpp-headers 13 | ``` 14 | 15 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 16 | 17 | OCL ICD Loader: 18 | 19 | ```bash 20 | sudo apt install ocl-icd-opencl-dev 21 | ``` 22 | 23 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 24 | 25 | ## Runtime Dependencies 26 | 27 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 28 | 29 | ### OCL ICD Loader 30 | 31 | ```bash 32 | sudo apt install ocl-icd-libopencl1 33 | ``` 34 | 35 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 36 | 37 | ### AMD Compute Runtime 38 | 39 | AMD supports OpenCL through their ROCm platform. Installation instructions are [here](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#ubuntu). 40 | 41 | ### Intel Compute Runtime 42 | 43 | Intel's OpenCL support is provided through the NEO compute runtime. Download packages from the project's [GitHub releases page](https://github.com/intel/compute-runtime/releases). 44 | 45 | ### Nvidia Compute Runtime 46 | 47 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 48 | 49 | ```bash 50 | sudo add-apt-repository ppa:graphics-drivers/ppa 51 | sudo apt update 52 | sudo apt install nvidia-graphics-drivers- 53 | ``` 54 | 55 | ### Configure Permissions 56 | 57 | ```bash 58 | sudo usermod -a -G video $USER 59 | sudo usermod -a -G render $USER 60 | ``` 61 | 62 | Users running OpenCL applications on a GPU require additional permissions granted by the groups above. 63 | 64 | ## Verification 65 | 66 | The `clinfo` utility can be used to verify the environment has been properly configured. 67 | 68 | ```bash 69 | $ sudo apt install clinfo 70 | $ clinfo 71 | 72 | ``` 73 | 74 | ## Troubleshooting 75 | 76 | If `clinfo` indicates there are 0 supported platforms: 77 | 78 | 1. Verify your host has OpenCL-capable hardware attached 79 | 2. Verify clinfo is running as a user with appropriate group membership 80 | 3. Verify new group membership has been applied (this may require logout or reboot) 81 | 4. Verify the correct compute runtime is installed 82 | 5. Check the kernel log (`sudo dmesg`) for related errors 83 | -------------------------------------------------------------------------------- /docs/env/ubuntu/24.04.md: -------------------------------------------------------------------------------- 1 | # Ubuntu 24.04 Setup Instructions 2 | 3 | ## Verify Hardware Support 4 | 5 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 6 | 7 | ## Build Dependencies 8 | 9 | OCL Headers: 10 | 11 | ```bash 12 | sudo apt install opencl-c-headers opencl-clhpp-headers 13 | ``` 14 | 15 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 16 | 17 | OCL ICD Loader: 18 | 19 | ```bash 20 | sudo apt install ocl-icd-opencl-dev 21 | ``` 22 | 23 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 24 | 25 | ## Runtime Dependencies 26 | 27 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 28 | 29 | ### OCL ICD Loader 30 | 31 | ```bash 32 | sudo apt install ocl-icd-libopencl1 33 | ``` 34 | 35 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 36 | 37 | ### AMD Compute Runtime 38 | 39 | AMD supports OpenCL through their ROCm platform. Installation instructions are [here](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#ubuntu). 40 | 41 | ### Intel Compute Runtime 42 | 43 | Intel's OpenCL support is provided through the NEO compute runtime. Download packages from the project's [GitHub releases page](https://github.com/intel/compute-runtime/releases). 44 | 45 | ### Nvidia Compute Runtime 46 | 47 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 48 | 49 | ```bash 50 | sudo add-apt-repository ppa:graphics-drivers/ppa 51 | sudo apt update 52 | sudo apt install nvidia-graphics-drivers- 53 | ``` 54 | 55 | ### Configure Permissions 56 | 57 | ```bash 58 | sudo usermod -a -G video $USER 59 | sudo usermod -a -G render $USER 60 | ``` 61 | 62 | Users running OpenCL applications on a GPU require additional permissions granted by the groups above. 63 | 64 | ## Verification 65 | 66 | The `clinfo` utility can be used to verify the environment has been properly configured. 67 | 68 | ```bash 69 | $ sudo apt install clinfo 70 | $ clinfo 71 | 72 | ``` 73 | 74 | ## Troubleshooting 75 | 76 | If `clinfo` indicates there are 0 supported platforms: 77 | 78 | 1. Verify your host has OpenCL-capable hardware attached 79 | 2. Verify clinfo is running as a user with appropriate group membership 80 | 3. Verify new group membership has been applied (this may require logout or reboot) 81 | 4. Verify the correct compute runtime is installed 82 | 5. Check the kernel log (`sudo dmesg`) for related errors 83 | -------------------------------------------------------------------------------- /docs/env/ubuntu/20.04.md: -------------------------------------------------------------------------------- 1 | # Ubuntu 20.04 Setup Instructions 2 | 3 | ## Verify Hardware Support 4 | 5 | Most modern GPUs support OpenCL. For integrated graphics devices (iGPUs), use `lscpu` to get the processor SKU. Detailed information for Intel SKUs is available from [ark.intel.com](ark.intel.com). Detailed information for AMD processors is available from [AMD's product page](https://www.amd.com/en/products/specifications/processors). 6 | 7 | ## Build Dependencies 8 | 9 | OCL Headers: 10 | 11 | ```bash 12 | sudo apt install opencl-c-headers opencl-clhpp-headers 13 | ``` 14 | 15 | The OpenCL API has its own set of header files; the above command installs both C and C++ headers files. The C header can be found in ``; the C++ header is in ``. 16 | 17 | OCL ICD Loader: 18 | 19 | ```bash 20 | sudo apt install ocl-icd-opencl-dev 21 | ``` 22 | 23 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 24 | 25 | ## Runtime Dependencies 26 | 27 | OpenCL requires a compute runtime to manage the interaction between the OpenCL API and the GPU. 28 | 29 | ### OCL ICD Loader 30 | 31 | ```bash 32 | sudo apt install ocl-icd-libopencl1 33 | ``` 34 | 35 | OpenCL applications generally link against an OpenCL Installable Compute Device (ICD) loader instead of a specific OpenCL implementation; see [https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc](https://github.com/bashbaug/OpenCLPapers/blob/master/OpenCLOnLinux.asciidoc) for more information about this system. 36 | 37 | ### AMD Compute Runtime 38 | 39 | AMD supports OpenCL through their ROCm platform. Installation instructions are [here](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#ubuntu). 40 | 41 | ### Intel Compute Runtime 42 | 43 | Intel's OpenCL support is provided through the NEO compute runtime, available as a PPA. 44 | 45 | ```bash 46 | sudo add-apt-repository ppa:intel-opencl/intel-opencl 47 | sudo apt update 48 | sudo apt install intel-opencl 49 | ``` 50 | 51 | ### Nvidia Compute Runtime 52 | 53 | Nvidia provides OpenCL support through their proprietary driver, available from the [graphics-drivers PPA](https://launchpad.net/~graphics-drivers/+archive/ubuntu/ppa). 54 | 55 | ```bash 56 | sudo add-apt-repository ppa:graphics-drivers/ppa 57 | sudo apt update 58 | sudo apt install nvidia-graphics-drivers- 59 | ``` 60 | 61 | ### Configure Permissions 62 | 63 | ```bash 64 | sudo usermod -a -G video $USER 65 | sudo usermod -a -G render $USER 66 | ``` 67 | 68 | Users running OpenCL applications on a GPU require additional permissions granted by the groups above. 69 | 70 | ## Verification 71 | 72 | The `clinfo` utility can be used to verify the environment has been properly configured. 73 | 74 | ```bash 75 | $ sudo apt install clinfo 76 | $ clinfo 77 | 78 | ``` 79 | 80 | ## Troubleshooting 81 | 82 | If `clinfo` indicates there are 0 supported platforms: 83 | 84 | 1. Verify your host has OpenCL-capable hardware attached 85 | 2. Verify clinfo is running as a user with appropriate group membership 86 | 3. Verify new group membership has been applied (this may require logout or reboot) 87 | 4. Verify the correct compute runtime is installed 88 | 5. Check the kernel log (`sudo dmesg`) for related errors 89 | -------------------------------------------------------------------------------- /include/layer_util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2022-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | template 15 | cl_int writeParamToMemory( 16 | size_t param_value_size, 17 | T param, 18 | size_t* param_value_size_ret, 19 | T* pointer) 20 | { 21 | if (pointer != nullptr) { 22 | if (param_value_size < sizeof(param)) { 23 | return CL_INVALID_VALUE; 24 | } 25 | *pointer = param; 26 | } 27 | 28 | if (param_value_size_ret != nullptr) { 29 | *param_value_size_ret = sizeof(param); 30 | } 31 | 32 | return CL_SUCCESS; 33 | } 34 | 35 | template 36 | cl_int writeVectorToMemory( 37 | size_t param_value_size, 38 | const std::vector& param, 39 | size_t *param_value_size_ret, 40 | T* pointer ) 41 | { 42 | size_t size = param.size() * sizeof(T); 43 | 44 | if (pointer != nullptr) { 45 | if (param_value_size < size) { 46 | return CL_INVALID_VALUE; 47 | } 48 | memcpy(pointer, param.data(), size); 49 | } 50 | 51 | if (param_value_size_ret != nullptr) { 52 | *param_value_size_ret = size; 53 | } 54 | 55 | return CL_SUCCESS; 56 | } 57 | 58 | static inline cl_int writeStringToMemory( 59 | size_t param_value_size, 60 | const char* param, 61 | size_t* param_value_size_ret, 62 | char* pointer ) 63 | { 64 | size_t size = strlen(param) + 1; 65 | 66 | if (pointer != nullptr) { 67 | if (param_value_size < size) { 68 | return CL_INVALID_VALUE; 69 | } 70 | strcpy(pointer, param); 71 | } 72 | 73 | if (param_value_size_ret != nullptr) { 74 | *param_value_size_ret = size; 75 | } 76 | 77 | return CL_SUCCESS; 78 | } 79 | 80 | static cl_version getOpenCLVersionFromString( 81 | const char* str) 82 | { 83 | cl_uint major = 0; 84 | cl_uint minor = 0; 85 | 86 | // The device version string has the form: 87 | // OpenCL . 88 | const char* prefix = "OpenCL "; 89 | size_t sz = strlen(prefix); 90 | if (strlen(str) > sz && 91 | strncmp(str, prefix, sz) == 0) { 92 | const char* check = str + sz; 93 | while (isdigit(check[0])) { 94 | major *= 10; 95 | major += check[0] - '0'; 96 | ++check; 97 | } 98 | if (check[0] == '.') { 99 | ++check; 100 | } 101 | while (isdigit(check[0])) { 102 | minor *= 10; 103 | minor += check[0] - '0'; 104 | ++check; 105 | } 106 | } 107 | 108 | return CL_MAKE_VERSION(major, minor, 0); 109 | } 110 | 111 | static inline bool checkStringForExtension( 112 | const char* str, 113 | const char* extensionName ) 114 | { 115 | bool supported = false; 116 | 117 | if (extensionName && !strchr(extensionName, ' ')) { 118 | const char* start = str; 119 | while (true) { 120 | const char* where = strstr(start, extensionName); 121 | if (!where) { 122 | break; 123 | } 124 | const char* terminator = where + strlen(extensionName); 125 | if (where == start || *(where - 1) == ' ') { 126 | if (*terminator == ' ' || *terminator == '\0') { 127 | supported = true; 128 | break; 129 | } 130 | } 131 | start = terminator; 132 | } 133 | } 134 | 135 | return supported; 136 | } 137 | -------------------------------------------------------------------------------- /samples/00_spirvqueries/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #ifndef cl_khr_spirv_queries 15 | #define cl_khr_spirv_queries 1 16 | #define CL_KHR_SPIRV_QUERIES_EXTENSION_NAME "cl_khr_spirv_queries" 17 | #define CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR 0x12B9 18 | #define CL_DEVICE_SPIRV_EXTENSIONS_KHR 0x12BA 19 | #define CL_DEVICE_SPIRV_CAPABILITIES_KHR 0x12BB 20 | #endif 21 | 22 | #define SPV_ENABLE_UTILITY_CODE 23 | #include 24 | 25 | #include 26 | 27 | #include "util.hpp" 28 | 29 | int main( 30 | int argc, 31 | char** argv ) 32 | { 33 | { 34 | popl::OptionParser op("Supported Options"); 35 | 36 | bool printUsage = false; 37 | try { 38 | op.parse(argc, argv); 39 | } catch (std::exception& e) { 40 | fprintf(stderr, "Error: %s\n\n", e.what()); 41 | printUsage = true; 42 | } 43 | 44 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 45 | fprintf(stderr, 46 | "Usage: spirvqueries [options]\n" 47 | "%s", op.help().c_str()); 48 | return -1; 49 | } 50 | } 51 | 52 | std::vector platforms; 53 | cl::Platform::get(&platforms); 54 | 55 | for (size_t p = 0; p < platforms.size(); p++) { 56 | const cl::Platform& platform = platforms[p]; 57 | 58 | printf("Platform[%zu]:\n", p); 59 | printf("\tName: %s\n", platform.getInfo().c_str() ); 60 | printf("\tVendor: %s\n", platform.getInfo().c_str() ); 61 | printf("\tDriver Version: %s\n", platform.getInfo().c_str() ); 62 | 63 | std::vector devices; 64 | platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); 65 | for (size_t d = 0; d < devices.size(); d++) { 66 | const cl::Device& device = devices[d]; 67 | 68 | printf("\tDevice[%zu]:\n", d); 69 | printf("\tName: %s\n", device.getInfo().c_str() ); 70 | printf("\tVendor: %s\n", device.getInfo().c_str() ); 71 | printf("\tDevice Version: %s\n", device.getInfo().c_str() ); 72 | printf("\tDriver Version: %s\n", device.getInfo().c_str() ); 73 | 74 | auto spirvExtendedInstructionSets = 75 | device.getInfo(); 76 | printf("\t\tSupported SPIR-V ExtendedInstructionSets:\n"); 77 | for (auto s : spirvExtendedInstructionSets) { 78 | printf("\t\t\t%s\n", s); 79 | } 80 | 81 | auto spirvExtensions = 82 | device.getInfo(); 83 | printf("\t\tSupported SPIR-V Extensions:\n"); 84 | for (auto s : spirvExtensions) { 85 | printf("\t\t\t%s\n", s); 86 | } 87 | 88 | auto spirvCapabilities = 89 | device.getInfo(); 90 | printf("\t\tSupported SPIR-V Capabilities:\n"); 91 | for (auto c : spirvCapabilities) { 92 | printf("\t\t\t%s\n", spv::CapabilityToString(static_cast(c))); 93 | } 94 | } 95 | printf("\n"); 96 | } 97 | 98 | printf( "Done.\n" ); 99 | 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /samples/opengl/02_sobelgl/README.md: -------------------------------------------------------------------------------- 1 | # Sobel Filter with OpenGL 2 | 3 | ## Sample Purpose 4 | 5 | This is an enhanced version of the earlier Julia set sample. 6 | Unlike the previous sample that simply displayed the generated Julia set image, this sample executes a [Sobel edge detection filter](https://en.wikipedia.org/wiki/Sobel_operator) on the generated Julia set before displaying it. 7 | 8 | This sample can share the OpenGL texture with OpenCL when supported. 9 | In order to share the OpenGL texture with OpenCL, the OpenCL device must support the [cl_khr_gl_sharing](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing) extension, and the OpenCL device must support sharing with the OpenGL context. 10 | If sharing is not supported then the application will still run, but the output image will be copied from OpenCL to OpenGL on the host. 11 | 12 | Additionally, this sample can use implicit synchronization between OpenGL and OpenCL when supported. 13 | Implicit synchronization requires support for the [cl_khr_gl_event](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_event) extension. 14 | If implicit synchronization is not supported then the application will still run, but synchronization will be done manually. 15 | 16 | ## Key APIs and Concepts 17 | 18 | This example shows how create a sampler, pass it to a kernel, and use it to read data from an image. 19 | 20 | ```c 21 | clCreateSampler 22 | ``` 23 | 24 | ## Things to Try 25 | 26 | Here are some suggested ways to modify this sample to learn more: 27 | 28 | 1. Instead of creating a sampler and passing it to the Sobel kernel, instead define the sampler in the kernel itself. 29 | Does this change the performance of the kernel? 30 | 2. Instead of using a sampler to access the image, use the sampler-less `read_imagef` functions instead. 31 | Be careful doing this because the sampler-less functions to not support reading out-of-bounds (the sampler-less address mode is implicitly `CL_ADDRESS_NONE`). 32 | Does this change the performance of the kernel? 33 | 34 | ## Command Line Options 35 | 36 | Note: Many of these command line arguments are identical to the earlier Julia set sample. 37 | 38 | | Option | Default Value | Description | 39 | |:--|:-:|:--| 40 | | `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. 41 | | `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. 42 | | `--hostcopy` | n/a | Do not use the `cl_khr_gl_sharing` extension and unconditionally copy on the host. 43 | | `--hostsync` | n/a | Do not use the `cl_khr_gl_event` extension and exclusively synchronize on the host. 44 | | `--gwx ` | 512 | Specify the global work size to execute, in the X direction. This also determines the width of the generated image. 45 | | `--gwy ` | 512 | Specify the global work size to execute, in the Y direction. This also determines the height of the generated image. 46 | | `--lwx ` | 0 | Specify the local work size in the X direction. If either local works size dimension is zero a `NULL` local work size is used. 47 | | `--lwy ` | 0 | Specify the local work size in the Y direction. If either local works size dimension is zero a `NULL` local work size is used. 48 | | `--paused` | n/a | Start with the animation paused. 49 | 50 | ## Controls While Running 51 | 52 | | Control | Description | 53 | |:--|:--| 54 | | `Escape` | Exits from the sample. 55 | | `Space` | Toggle animation. 56 | | `V` | Toggle vsync (default: `true`). Disabling vsync may increase framerate but may cause [screen tearing](https://en.wikipedia.org/wiki/Screen_tearing). 57 | | `A` | Increase the real part of the complex constant `C`. 58 | | `Z` | Decrease the real part of the complex constant `C`. 59 | | `S` | Increase the imaginary part of the complex constant `C`. 60 | | `X` | Decrease the imaginary part of the complex constant `C`. 61 | -------------------------------------------------------------------------------- /samples/01_copybuffer/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2019-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | 9 | #include 10 | 11 | const size_t gwx = 1024*1024; 12 | 13 | int main( 14 | int argc, 15 | char** argv ) 16 | { 17 | int platformIndex = 0; 18 | int deviceIndex = 0; 19 | 20 | { 21 | popl::OptionParser op("Supported Options"); 22 | op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); 23 | op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); 24 | 25 | bool printUsage = false; 26 | try { 27 | op.parse(argc, argv); 28 | } catch (std::exception& e) { 29 | fprintf(stderr, "Error: %s\n\n", e.what()); 30 | printUsage = true; 31 | } 32 | 33 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 34 | fprintf(stderr, 35 | "Usage: copybuffer [options]\n" 36 | "%s", op.help().c_str()); 37 | return -1; 38 | } 39 | } 40 | 41 | std::vector platforms; 42 | cl::Platform::get(&platforms); 43 | 44 | printf("Running on platform: %s\n", 45 | platforms[platformIndex].getInfo().c_str() ); 46 | 47 | std::vector devices; 48 | platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices); 49 | 50 | printf("Running on device: %s\n", 51 | devices[deviceIndex].getInfo().c_str() ); 52 | 53 | cl::Context context{devices[deviceIndex]}; 54 | cl::CommandQueue commandQueue{context, devices[deviceIndex]}; 55 | 56 | cl::Buffer deviceMemSrc = cl::Buffer{ 57 | context, 58 | CL_MEM_ALLOC_HOST_PTR, 59 | gwx * sizeof( cl_uint ) }; 60 | 61 | cl::Buffer deviceMemDst = cl::Buffer{ 62 | context, 63 | CL_MEM_ALLOC_HOST_PTR, 64 | gwx * sizeof( cl_uint ) }; 65 | 66 | // initialization 67 | { 68 | cl_uint* pSrc = (cl_uint*)commandQueue.enqueueMapBuffer( 69 | deviceMemSrc, 70 | CL_TRUE, 71 | CL_MAP_WRITE_INVALIDATE_REGION, 72 | 0, 73 | gwx * sizeof(cl_uint) ); 74 | 75 | for( size_t i = 0; i < gwx; i++ ) 76 | { 77 | pSrc[i] = (cl_uint)(i); 78 | } 79 | 80 | commandQueue.enqueueUnmapMemObject( 81 | deviceMemSrc, 82 | pSrc ); 83 | } 84 | 85 | // execution 86 | commandQueue.enqueueCopyBuffer( 87 | deviceMemSrc, 88 | deviceMemDst, 89 | 0, 90 | 0, 91 | gwx * sizeof(cl_uint) ); 92 | 93 | // verification 94 | { 95 | const cl_uint* pDst = (const cl_uint*)commandQueue.enqueueMapBuffer( 96 | deviceMemDst, 97 | CL_TRUE, 98 | CL_MAP_READ, 99 | 0, 100 | gwx * sizeof(cl_uint) ); 101 | 102 | unsigned int mismatches = 0; 103 | 104 | for( size_t i = 0; i < gwx; i++ ) 105 | { 106 | if( pDst[i] != i ) 107 | { 108 | if( mismatches < 16 ) 109 | { 110 | fprintf(stderr, "MisMatch! dst[%d] == %08X, want %08X\n", 111 | (unsigned int)i, 112 | pDst[i], 113 | (unsigned int)i ); 114 | } 115 | mismatches++; 116 | } 117 | } 118 | 119 | if( mismatches ) 120 | { 121 | fprintf(stderr, "Error: Found %d mismatches / %d values!!!\n", 122 | mismatches, 123 | (unsigned int)gwx ); 124 | } 125 | else 126 | { 127 | printf("Success.\n"); 128 | } 129 | 130 | commandQueue.enqueueUnmapMemObject( 131 | deviceMemDst, 132 | (void*)pDst ); 133 | } 134 | 135 | return 0; 136 | } 137 | -------------------------------------------------------------------------------- /layers/00_example/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2022-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | // This example layer was heavily inspired by the OpenCL-Layers-Tutorial: 8 | // https://github.com/Kerilk/OpenCL-Layers-Tutorial 9 | 10 | #if defined _WIN32 || defined __CYGWIN__ 11 | #ifdef __GNUC__ 12 | #define CL_API_ENTRY __attribute__((dllexport)) 13 | #else 14 | #define CL_API_ENTRY __declspec(dllexport) 15 | #endif 16 | #else 17 | #if __GNUC__ >= 4 18 | #define CL_API_ENTRY __attribute__((visibility("default"))) 19 | #else 20 | #define CL_API_ENTRY 21 | #endif 22 | #endif 23 | 24 | #include 25 | 26 | #include 27 | 28 | // Utility functions to properly check and return values from queries. 29 | #include "layer_util.hpp" 30 | 31 | // This is the dispatch table for this layer. 32 | // It will contain functions that this layer hooks. 33 | static struct _cl_icd_dispatch dispatch; 34 | 35 | // This is the next dispatch table. 36 | // The layer should use this dispatch table to make OpenCL calls. 37 | static const struct _cl_icd_dispatch* pNextDispatch; 38 | 39 | // This is the only function that this layer will hook. 40 | // It simply prints the function arguments and return values. 41 | static cl_int CL_API_CALL clGetPlatformIDs_layer( 42 | cl_uint num_entries, 43 | cl_platform_id* platforms, 44 | cl_uint* num_platforms) 45 | { 46 | fprintf(stderr, "Example Layer: clGetPlatformIDs(num_entries: %d, platforms: %p, num_platforms: %p)\n", 47 | num_entries, platforms, num_platforms); 48 | 49 | cl_int res = pNextDispatch->clGetPlatformIDs(num_entries, platforms, num_platforms); 50 | 51 | fprintf(stderr, "Example Layer: clGetPlatformIDs result: %d, num_platforms: %d\n", 52 | res, num_platforms ? num_platforms[0] : 0); 53 | return res; 54 | } 55 | 56 | // This is a utility function to setup the dispatch table for this layer. 57 | static void _init_dispatch() 58 | { 59 | dispatch.clGetPlatformIDs = &clGetPlatformIDs_layer; 60 | } 61 | 62 | // This is boilerplate code that will be similar for all layers. 63 | 64 | CL_API_ENTRY cl_int CL_API_CALL clGetLayerInfo( 65 | cl_layer_info param_name, 66 | size_t param_value_size, 67 | void* param_value, 68 | size_t* param_value_size_ret) 69 | { 70 | switch (param_name) { 71 | case CL_LAYER_API_VERSION: 72 | { 73 | auto ptr = (cl_layer_api_version*)param_value; 74 | auto value = cl_layer_api_version{CL_LAYER_API_VERSION_100}; 75 | return writeParamToMemory( 76 | param_value_size, 77 | value, 78 | param_value_size_ret, 79 | ptr); 80 | } 81 | break; 82 | #if defined(CL_LAYER_NAME) 83 | case CL_LAYER_NAME: 84 | { 85 | auto ptr = (char*)param_value; 86 | return writeStringToMemory( 87 | param_value_size, 88 | "Example Layer", 89 | param_value_size_ret, 90 | ptr); 91 | } 92 | break; 93 | #endif 94 | default: 95 | return CL_INVALID_VALUE; 96 | } 97 | return CL_SUCCESS; 98 | } 99 | 100 | CL_API_ENTRY cl_int CL_API_CALL clInitLayer( 101 | cl_uint num_entries, 102 | const struct _cl_icd_dispatch* target_dispatch, 103 | cl_uint* num_entries_out, 104 | const struct _cl_icd_dispatch** layer_dispatch_ret) 105 | { 106 | const size_t dispatchTableSize = 107 | sizeof(dispatch) / sizeof(dispatch.clGetPlatformIDs); 108 | 109 | if (target_dispatch == nullptr || 110 | num_entries_out == nullptr || 111 | layer_dispatch_ret == nullptr) { 112 | return CL_INVALID_VALUE; 113 | } 114 | 115 | if (num_entries < dispatchTableSize) { 116 | return CL_INVALID_VALUE; 117 | } 118 | 119 | _init_dispatch(); 120 | 121 | pNextDispatch = target_dispatch; 122 | 123 | *layer_dispatch_ret = &dispatch; 124 | *num_entries_out = dispatchTableSize; 125 | 126 | return CL_SUCCESS; 127 | } 128 | 129 | -------------------------------------------------------------------------------- /samples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2025 Ben Ashbaugh 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | function(add_opencl_sample) 6 | set(options TEST) 7 | set(one_value_args NUMBER TARGET VERSION CATEGORY) 8 | set(multi_value_args SOURCES KERNELS INCLUDES LIBS) 9 | cmake_parse_arguments(OPENCL_SAMPLE 10 | "${options}" "${one_value_args}" "${multi_value_args}" 11 | ${ARGN} 12 | ) 13 | 14 | if(NOT OPENCL_SAMPLE_VERSION) 15 | message(STATUS "No OpenCL version specified for sample ${OPENCL_SAMPLE_TARGET}, using OpenCL 3.0.") 16 | set(OPENCL_SAMPLE_VERSION 300) 17 | endif() 18 | if(NOT OPENCL_SAMPLE_NUMBER) 19 | message(STATUS "No sample number specified for sample ${OPENCL_SAMPLE_TARGET}, using 99.") 20 | set(OPENCL_SAMPLE_NUMBER 99) 21 | endif() 22 | 23 | add_executable(${OPENCL_SAMPLE_TARGET} ${OPENCL_SAMPLE_SOURCES}) 24 | 25 | target_include_directories(${OPENCL_SAMPLE_TARGET} PRIVATE ${OpenCL_INCLUDE_DIR} ${OPENCL_SAMPLE_INCLUDES}) 26 | target_link_libraries(${OPENCL_SAMPLE_TARGET} ${OpenCL_LIBRARIES} ${OPENCL_SAMPLE_LIBS}) 27 | 28 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE CL_TARGET_OPENCL_VERSION=${OPENCL_SAMPLE_VERSION}) 29 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE CL_ENABLE_BETA_EXTENSIONS) 30 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE CL_HPP_TARGET_OPENCL_VERSION=${OPENCL_SAMPLE_VERSION}) 31 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE CL_HPP_MINIMUM_OPENCL_VERSION=${OPENCL_SAMPLE_VERSION}) 32 | if (SAMPLES_ENABLE_EXCEPTIONS) 33 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE CL_HPP_ENABLE_EXCEPTIONS) 34 | endif() 35 | if (WIN32) 36 | target_compile_definitions(${OPENCL_SAMPLE_TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS NOMINMAX) 37 | endif() 38 | 39 | set_target_properties(${OPENCL_SAMPLE_TARGET} PROPERTIES FOLDER "Samples/${OPENCL_SAMPLE_CATEGORY}/${OPENCL_SAMPLE_NUMBER}_${OPENCL_SAMPLE_TARGET}") 40 | 41 | if(CMAKE_CONFIGURATION_TYPES) 42 | set(OPENCL_SAMPLE_CONFIGS ${CMAKE_CONFIGURATION_TYPES}) 43 | else() 44 | set(OPENCL_SAMPLE_CONFIGS ${CMAKE_BUILD_TYPE}) 45 | endif() 46 | foreach(CONFIG ${OPENCL_SAMPLE_CONFIGS}) 47 | install(TARGETS ${OPENCL_SAMPLE_TARGET} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 48 | install(FILES ${OPENCL_SAMPLE_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CONFIG}) 49 | endforeach() 50 | if(OPENCL_SAMPLE_TEST) 51 | add_test(NAME ${OPENCL_SAMPLE_TARGET} COMMAND ${OPENCL_SAMPLE_TARGET}) 52 | endif() 53 | endfunction() 54 | 55 | add_subdirectory( images ) 56 | add_subdirectory( opengl ) 57 | add_subdirectory( python ) 58 | add_subdirectory( vulkan ) 59 | add_subdirectory( svm ) 60 | add_subdirectory( usm ) 61 | 62 | add_subdirectory( 00_enumopencl ) 63 | add_subdirectory( 00_enumopenclpp ) 64 | add_subdirectory( 00_enumqueuefamilies ) 65 | add_subdirectory( 00_extendeddevicequeries ) 66 | add_subdirectory( 00_loaderinfo ) 67 | add_subdirectory( 00_newqueries ) 68 | add_subdirectory( 00_newqueriespp ) 69 | add_subdirectory( 01_copybuffer ) 70 | add_subdirectory( 02_copybufferkernel ) 71 | add_subdirectory( 03_mandelbrot ) 72 | add_subdirectory( 04_julia ) 73 | add_subdirectory( 04_sobel ) 74 | add_subdirectory( 05_kernelfromfile ) 75 | add_subdirectory( 05_spirvkernelfromfile ) 76 | add_subdirectory( 06_ndrangekernelfromfile ) 77 | 78 | add_subdirectory( 10_queueexperiments ) 79 | add_subdirectory( 16_floatatomics ) 80 | 81 | set(BUILD_EXTENSION_SAMPLES TRUE) 82 | if(NOT TARGET OpenCLExt) 83 | message(STATUS "Skipping Extension Samples - OpenCL Extension Loader is not found.") 84 | set(BUILD_EXTENSION_SAMPLES FALSE) 85 | endif() 86 | 87 | if(BUILD_EXTENSION_SAMPLES) 88 | add_subdirectory( 11_semaphores ) 89 | add_subdirectory( 12_commandbuffers ) 90 | add_subdirectory( 12_commandbufferspp ) 91 | add_subdirectory( 13_mutablecommandbuffers ) 92 | add_subdirectory( 14_ooqcommandbuffers ) 93 | add_subdirectory( 15_mutablecommandbufferasserts ) 94 | endif() 95 | 96 | if(TARGET SPIRV-Headers) 97 | add_subdirectory( 00_spirvqueries ) 98 | endif() 99 | -------------------------------------------------------------------------------- /tutorials/interceptlayer/part0.md: -------------------------------------------------------------------------------- 1 | # Using the Intercept Layer for OpenCL Applications 2 | 3 | ## Part 0: Building and Running the Tutorial 4 | 5 | This part of the tutorial is to ensure everything is setup correctly. 6 | We will need to build the tutorial application itself and the Intercept Layer for OpenCL Applications. 7 | 8 | ### Building and Running the Tutorial Application 9 | 10 | First, ensure that the tutorial application itself builds and runs. 11 | It will crash initially - that's fine! 12 | As part of the tutorial we will fix bugs that are preventing the tutorial application from running and running well. 13 | 14 | ``` 15 | $ ./sinjulia 16 | *** Important Note! *** 17 | This is the Intercept Layer Tutorial application. 18 | It will crash initially! Please see the tutorial README for details. 19 | Running on platform: Intel(R) OpenCL HD Graphics 20 | Segmentation fault (core dumped) 21 | ``` 22 | 23 | If your system has multiple OpenCL platforms installed and you want to run on a different platform, choose it by passing the `-p` command line option. 24 | If your OpenCL platform supports multiple OpenCL devices and you want to run on a different device, choose it by passing the `-d` command line option. 25 | You can view the installed platforms and devices by running the `enumopencl` sample. 26 | 27 | ### Building and Installing the Intercept Layer 28 | 29 | After the tutorial application is building and running, next build the [Intercept Layer for OpenCL Applications](https://github.com/intel/opencl-intercept-layer) by following the provided [build instructions](https://github.com/intel/opencl-intercept-layer/blob/master/docs/build.md). 30 | This tutorial is written to use the [cliloader](https://github.com/intel/opencl-intercept-layer/blob/master/docs/cliloader.md) utility, but if you prefer you may follow the [installation instructions](https://github.com/intel/opencl-intercept-layer/blob/master/docs/install.md) instead. 31 | After building and installing the Intercept Layer and `cliloader`, you should be able to use it to execute the tutorial application. 32 | 33 | ``` 34 | $ cliloader ./sinjulia 35 | -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 36 | CLIntercept (64-bit) is loading... 37 | CLIntercept file location: /home/bashbaug/bin/../lib/libOpenCL.so 38 | CLIntercept URL: https://github.com/intel/opencl-intercept-layer 39 | CLIntercept git description: v3.0.0-11-gd73caba 40 | CLIntercept git refspec: refs/heads/master 41 | CLIntercept git hash: d73caba0273207c47d3094865c1a9e145acf2018 42 | CLIntercept optional features: 43 | cliloader(supported) 44 | cliprof(supported) 45 | kernel overrides(supported) 46 | ITT tracing(NOT supported) 47 | MDAPI(supported) 48 | clock(steady_clock) 49 | CLIntercept environment variable prefix: CLI_ 50 | CLIntercept config file: clintercept.conf 51 | Trying to load dispatch from: ./real_libOpenCL.so 52 | Couldn't load library: ./real_libOpenCL.so 53 | Trying to load dispatch from: /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 54 | Couldn't get exported function pointer to: clGetGLContextInfoKHR 55 | ... success! 56 | Control ReportToStderr is set to non-default value: true 57 | Timer Started! 58 | ... loading complete. 59 | *** Important Note! *** 60 | This is the Intercept Layer Tutorial application. 61 | It will crash initially! Please see the tutorial README for details. 62 | Running on platform: Intel(R) OpenCL HD Graphics 63 | Segmentation fault (core dumped) 64 | ``` 65 | 66 | The tutorial application will still crash, but you should see output from the Intercept Layer as it is loading. 67 | The output from the Intercept Layer as it is running is referred to as the "log". 68 | By default, the log is emitted to `stderr`, but the `LogToFile` control can emit the log to a file and the `LogToDebugger` control can emit the log to a debugger instead, which is convenient for GUI applications or if the application generates a lot of log data. 69 | 70 | If the Intercept Layer isn't working, please check the [Troubleshooting and Frequently Asked Questions](https://github.com/intel/opencl-intercept-layer/blob/master/docs/FAQ.md) page. 71 | 72 | If it is working, move on to part 1! 73 | 74 | ## Next Step 75 | 76 | * Part 1: [Fixing an OpenCL Error](part1.md) 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple OpenCLTM Samples 2 | 3 | [![build](https://github.com/bashbaug/SimpleOpenCLSamples/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/bashbaug/SimpleOpenCLSamples/actions?query=workflow%3Abuild+branch%3Amain) 4 | 5 | This repo contains simple OpenCL samples that demonstrate how to build 6 | OpenCL applications using only the Khronos-provided headers and libs. 7 | All samples have been tested on Windows and Linux. 8 | 9 | Most of the samples are written in C and C++ using the OpenCL C++ bindings. 10 | A few of the samples have been ported to Python using [PyOpenCL](https://pypi.org/project/pyopencl/). 11 | 12 | 13 | ## Code Structure 14 | 15 | ``` 16 | README.md This file 17 | LICENSE License information 18 | CMakeLists.txt Top-level CMakefile 19 | external/ External Projects (headers and libs) 20 | include/ Include Files (OpenCL C++ bindings) 21 | layers/ Sample Layers 22 | samples/ Sample Applications 23 | tutorials/ Tutorials 24 | ``` 25 | 26 | ## How to Build the Samples 27 | 28 | The samples require the following external dependencies: 29 | 30 | OpenCL Headers: 31 | 32 | git clone https://github.com/KhronosGroup/OpenCL-Headers external/OpenCL-Headers 33 | 34 | OpenCL ICD Loader: 35 | 36 | git clone https://github.com/KhronosGroup/opencl-icd-loader external/opencl-icd-loader 37 | 38 | Many samples that use extensions additionally require the OpenCL Extension Loader: 39 | 40 | git clone https://github.com/bashbaug/opencl-extension-loader external/opencl-extension-loader 41 | 42 | Several samples that interact with SPIR-V require the SPIR-V headres: 43 | 44 | git clone https://github.com/KhronosGroup/SPIRV-Headers external/SPIRV-Headers 45 | 46 | After satisfying the external dependencies create build files using CMake. For example: 47 | 48 | mkdir build && cd build 49 | cmake .. 50 | 51 | Then, build with the generated build files. 52 | 53 | ## How to Run the Samples 54 | 55 | To run the samples, you will need to obtain and install an ICD loader and an 56 | OpenCL implementation (ICD) that supports the `cl_khr_icd` extension. 57 | 58 | The ICD loader is likely provided by your operating system or an OpenCL 59 | implementation. If desired, you may use the ICD loader that is built along 60 | with these OpenCL samples. The OpenCL implementation will likely be provided 61 | by your OpenCL device vendor. There are several open source OpenCL 62 | implementations as well. 63 | 64 | ## Further Reading 65 | 66 | * [Environment Setup for Ubuntu 22.04](docs/env/ubuntu/22.04.md) 67 | * [OpenCLPapers](https://github.com/bashbaug/OpenCLPapers) 68 | * [OpenCL Specs](https://registry.khronos.org/OpenCL/) 69 | * [OpenCL Error Codes](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#error_codes) 70 | 71 | ## A Note About Error Checking 72 | 73 | For brevity, most samples do not include error checking. This means that a 74 | sample may crash or incorrectly report success if an OpenCL error occurs. By 75 | defining the CMake variable `SAMPLES_ENABLE_EXCEPTIONS` many samples can instead 76 | throw an exception if an OpenCL error occurs. 77 | 78 | Tools like the [OpenCL Intercept Layer](https://github.com/intel/opencl-intercept-layer) 79 | can also be useful to detect when an OpenCL error occurs and to identify the 80 | cause of the error. 81 | 82 | ## License 83 | 84 | These samples are licensed under the [MIT License](LICENSE). 85 | 86 | Notes: 87 | * The OpenCL C++ bindings are built from the 88 | [Khronos OpenCL-CLHPP Repo](https://github.com/KhronosGroup/OpenCL-CLHPP), 89 | and is licensed under the 90 | [Khronos(tm) License](https://github.com/KhronosGroup/OpenCL-CLHPP/blob/master/LICENSE.txt). 91 | * The samples use [popl](https://github.com/badaix/popl) for its options 92 | parsing, which is licensed under the MIT License. 93 | * The samples use [stb](https://github.com/nothings/stb) for image reading and 94 | writing, which is dual-licensed under a public domain license and the MIT 95 | license. 96 | 97 | --- 98 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. 99 | 100 | \* Other names and brands may be claimed as the property of others. -------------------------------------------------------------------------------- /samples/02_copybufferkernel/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright (c) 2019-2025 Ben Ashbaugh 3 | // 4 | // SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include 8 | 9 | #include 10 | 11 | const size_t gwx = 1024*1024; 12 | 13 | static const char kernelString[] = R"CLC( 14 | kernel void CopyBuffer( global uint* dst, global uint* src ) 15 | { 16 | uint id = get_global_id(0); 17 | dst[id] = src[id]; 18 | } 19 | )CLC"; 20 | 21 | int main( 22 | int argc, 23 | char** argv ) 24 | { 25 | int platformIndex = 0; 26 | int deviceIndex = 0; 27 | 28 | { 29 | popl::OptionParser op("Supported Options"); 30 | op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); 31 | op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); 32 | 33 | bool printUsage = false; 34 | try { 35 | op.parse(argc, argv); 36 | } catch (std::exception& e) { 37 | fprintf(stderr, "Error: %s\n\n", e.what()); 38 | printUsage = true; 39 | } 40 | 41 | if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { 42 | fprintf(stderr, 43 | "Usage: copybufferkernel [options]\n" 44 | "%s", op.help().c_str()); 45 | return -1; 46 | } 47 | } 48 | 49 | std::vector platforms; 50 | cl::Platform::get(&platforms); 51 | 52 | printf("Running on platform: %s\n", 53 | platforms[platformIndex].getInfo().c_str() ); 54 | 55 | std::vector devices; 56 | platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices); 57 | 58 | printf("Running on device: %s\n", 59 | devices[deviceIndex].getInfo().c_str() ); 60 | 61 | cl::Context context{devices[deviceIndex]}; 62 | cl::CommandQueue commandQueue{context, devices[deviceIndex]}; 63 | 64 | cl::Program program{ context, kernelString }; 65 | program.build(); 66 | cl::Kernel kernel = cl::Kernel{ program, "CopyBuffer" }; 67 | 68 | cl::Buffer deviceMemSrc = cl::Buffer{ 69 | context, 70 | CL_MEM_ALLOC_HOST_PTR, 71 | gwx * sizeof( cl_uint ) }; 72 | 73 | cl::Buffer deviceMemDst = cl::Buffer{ 74 | context, 75 | CL_MEM_ALLOC_HOST_PTR, 76 | gwx * sizeof( cl_uint ) }; 77 | 78 | // initialization 79 | { 80 | cl_uint* pSrc = (cl_uint*)commandQueue.enqueueMapBuffer( 81 | deviceMemSrc, 82 | CL_TRUE, 83 | CL_MAP_WRITE_INVALIDATE_REGION, 84 | 0, 85 | gwx * sizeof(cl_uint) ); 86 | 87 | for( size_t i = 0; i < gwx; i++ ) 88 | { 89 | pSrc[i] = (cl_uint)(i); 90 | } 91 | 92 | commandQueue.enqueueUnmapMemObject( 93 | deviceMemSrc, 94 | pSrc ); 95 | } 96 | 97 | // execution 98 | kernel.setArg(0, deviceMemDst); 99 | kernel.setArg(1, deviceMemSrc); 100 | commandQueue.enqueueNDRangeKernel( 101 | kernel, 102 | cl::NullRange, 103 | cl::NDRange{gwx} ); 104 | 105 | // verification 106 | { 107 | const cl_uint* pDst = (const cl_uint*)commandQueue.enqueueMapBuffer( 108 | deviceMemDst, 109 | CL_TRUE, 110 | CL_MAP_READ, 111 | 0, 112 | gwx * sizeof(cl_uint) ); 113 | 114 | unsigned int mismatches = 0; 115 | 116 | for( size_t i = 0; i < gwx; i++ ) 117 | { 118 | if( pDst[i] != i ) 119 | { 120 | if( mismatches < 16 ) 121 | { 122 | fprintf(stderr, "MisMatch! dst[%d] == %08X, want %08X\n", 123 | (unsigned int)i, 124 | pDst[i], 125 | (unsigned int)i ); 126 | } 127 | mismatches++; 128 | } 129 | } 130 | 131 | if( mismatches ) 132 | { 133 | fprintf(stderr, "Error: Found %d mismatches / %d values!!!\n", 134 | mismatches, 135 | (unsigned int)gwx ); 136 | } 137 | else 138 | { 139 | printf("Success.\n"); 140 | } 141 | 142 | commandQueue.enqueueUnmapMemObject( 143 | deviceMemDst, 144 | (void*)pDst ); 145 | } 146 | 147 | return 0; 148 | } 149 | --------------------------------------------------------------------------------