├── .gitignore ├── Android.mk ├── CMakeLists.txt ├── README.md ├── build_android.sh ├── build_linux_oe.sh ├── docs └── extensions │ ├── cl_qcom_accelerated_image_ops.txt │ ├── cl_qcom_android_native_buffer_host_ptr.txt │ ├── cl_qcom_compressed_image.txt │ ├── cl_qcom_create_buffer_from_image.txt │ ├── cl_qcom_ext_host_ptr.txt │ ├── cl_qcom_ext_host_ptr_iocoherent.txt │ ├── cl_qcom_extract_image_plane.txt │ ├── cl_qcom_ion_host_ptr.txt │ ├── cl_qcom_other_image.txt │ ├── cl_qcom_perf_hint.txt │ ├── cl_qcom_priority_hint.txt │ ├── cl_qcom_protected_context.txt │ ├── cl_qcom_subgroup_shuffle.txt │ └── cl_qcom_vector_image_ops.txt ├── example_images ├── CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat ├── CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat ├── CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat ├── CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat ├── CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat └── create_example_images.py ├── inc └── CL │ ├── cl.h │ ├── cl.hpp │ ├── cl2.hpp │ ├── cl_egl.h │ ├── cl_ext.h │ ├── cl_ext_qcom.h │ ├── cl_gl.h │ ├── cl_gl_ext.h │ ├── cl_platform.h │ └── opencl.h ├── src ├── examples │ ├── basic │ │ ├── compressed_image_nv12.cpp │ │ ├── compressed_image_rgba.cpp │ │ ├── hello_world.cpp │ │ ├── qcom_block_match_sad.cpp │ │ ├── qcom_block_match_ssd.cpp │ │ ├── qcom_box_filter_image.cpp │ │ └── qcom_convolve_image.cpp │ ├── bayer_mipi │ │ ├── bayer_mipi10_to_rgba.cpp │ │ ├── mipi10_to_unpacked.cpp │ │ ├── unpacked_bayer_to_rgba.cpp │ │ └── unpacked_to_mipi10.cpp │ ├── conversions │ │ ├── nv12_to_rgba.cpp │ │ └── p010_to_compressed_tp10.cpp │ ├── convolutions │ │ ├── accelerated_convolution.cpp │ │ └── convolution.cpp │ ├── fft │ │ ├── fft_image.cpp │ │ └── fft_matrix.cpp │ ├── io_coherent_ion │ │ ├── io_coherent_ion_buffers.cpp │ │ └── io_coherent_ion_images.cpp │ ├── linear_algebra │ │ ├── buffer_matrix_multiplication.cpp │ │ ├── buffer_matrix_multiplication_half.cpp │ │ ├── buffer_matrix_transpose.cpp │ │ ├── image_matrix_multiplication.cpp │ │ ├── image_matrix_multiplication_half.cpp │ │ ├── image_matrix_transpose.cpp │ │ └── matrix_addition.cpp │ └── vector_image_ops │ │ ├── compressed_nv12_vector_image_ops.cpp │ │ ├── compressed_p010_vector_image_ops.cpp │ │ ├── compressed_tp10_vector_image_ops.cpp │ │ ├── nv12_vector_image_ops.cpp │ │ ├── p010_vector_image_ops.cpp │ │ └── tp10_vector_image_ops.cpp └── util │ ├── cl_wrapper.cpp │ ├── cl_wrapper.h │ ├── half_float.cpp │ ├── half_float.h │ ├── util.cpp │ └── util.h └── toolchain └── linux_embedded └── linux_embedded-toolchain.cmake /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------------------------------- 2 | # File: CMakeLists.txt 3 | # Desc: 4 | # 5 | # Author: QUALCOMM 6 | # 7 | # Copyright (c) 2017 QUALCOMM Technologies, Inc. 8 | # All Rights Reserved. 9 | # QUALCOMM Proprietary/GTDR 10 | #-------------------------------------------------------------------------------------- 11 | cmake_minimum_required(VERSION 2.8) 12 | project(sdk_examples) 13 | 14 | set(CMAKE_CXX_STANDARD 11) # CMAKE_CXX_STANDARD exists for cmake 3.1 and later 15 | if (CMAKE_VERSION VERSION_LESS "3.1") 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") # This is more or less equivalent to the above for older cmake 17 | endif () 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror") 19 | 20 | set(COMMON_SOURCE_FILES 21 | src/util/util.h 22 | src/util/util.cpp 23 | src/util/half_float.h 24 | src/util/half_float.cpp 25 | src/util/cl_wrapper.h 26 | src/util/cl_wrapper.cpp 27 | ) 28 | 29 | if(ANDROID) 30 | if("${ION_INCLUDE_PATH}" STREQUAL "") 31 | set(ION_INCLUDE_PATH "$ENV{ION_INCLUDE_PATH}") 32 | endif() 33 | 34 | if("${ION_INCLUDE_PATH}" STREQUAL "") 35 | message(FATAL_ERROR "Please set CMake variable ION_INCLUDE_PATH") 36 | endif() 37 | 38 | message("Using ${ION_INCLUDE_PATH} as include path for ION headers") 39 | 40 | add_definitions(-DANDROID -DUSES_ANDROID_CMAKE) 41 | set(ION_INCLUDE_PATH ${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include ${ION_INCLUDE_PATH}) 42 | endif() #ANDROID 43 | 44 | include_directories( 45 | src 46 | inc 47 | ${ION_INCLUDE_PATH} 48 | ) 49 | 50 | if("${OPEN_CL_LIB}" STREQUAL "") 51 | message(FATAL_ERROR "Can't find libOpenCL.so, please set the CMake variable OPEN_CL_LIB to /path/to/libOpenCL.so.") 52 | endif() 53 | 54 | add_executable(qcom_box_filter_image ${COMMON_SOURCE_FILES} src/examples/basic/qcom_box_filter_image.cpp) 55 | add_executable(qcom_convolve_image ${COMMON_SOURCE_FILES} src/examples/basic/qcom_convolve_image.cpp) 56 | add_executable(qcom_block_match_sad ${COMMON_SOURCE_FILES} src/examples/basic/qcom_block_match_sad.cpp) 57 | add_executable(qcom_block_match_ssd ${COMMON_SOURCE_FILES} src/examples/basic/qcom_block_match_ssd.cpp) 58 | add_executable(accelerated_convolution ${COMMON_SOURCE_FILES} src/examples/convolutions/accelerated_convolution.cpp) 59 | add_executable(convolution ${COMMON_SOURCE_FILES} src/examples/convolutions/convolution.cpp) 60 | add_executable(compressed_image_nv12 ${COMMON_SOURCE_FILES} src/examples/basic/compressed_image_nv12.cpp) 61 | add_executable(nv12_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/nv12_vector_image_ops.cpp) 62 | add_executable(tp10_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/tp10_vector_image_ops.cpp) 63 | add_executable(p010_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/p010_vector_image_ops.cpp) 64 | add_executable(compressed_nv12_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_nv12_vector_image_ops.cpp) 65 | add_executable(compressed_p010_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_p010_vector_image_ops.cpp) 66 | add_executable(compressed_tp10_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_tp10_vector_image_ops.cpp) 67 | add_executable(hello_world ${COMMON_SOURCE_FILES} src/examples/basic/hello_world.cpp) 68 | add_executable(p010_to_compressed_tp10 ${COMMON_SOURCE_FILES} src/examples/conversions/p010_to_compressed_tp10.cpp) 69 | add_executable(nv12_to_rgba ${COMMON_SOURCE_FILES} src/examples/conversions/nv12_to_rgba.cpp) 70 | add_executable(matrix_addition ${COMMON_SOURCE_FILES} src/examples/linear_algebra/matrix_addition.cpp) 71 | add_executable(image_matrix_multiplication ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_multiplication.cpp) 72 | add_executable(buffer_matrix_multiplication ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_multiplication.cpp) 73 | add_executable(buffer_matrix_transpose ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_transpose.cpp) 74 | add_executable(image_matrix_transpose ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_transpose.cpp) 75 | add_executable(bayer_mipi10_to_rgba ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/bayer_mipi10_to_rgba.cpp) 76 | add_executable(mipi10_to_unpacked ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/mipi10_to_unpacked.cpp) 77 | add_executable(unpacked_bayer_to_rgba ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/unpacked_bayer_to_rgba.cpp) 78 | add_executable(unpacked_to_mipi10 ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/unpacked_to_mipi10.cpp) 79 | add_executable(fft_image ${COMMON_SOURCE_FILES} src/examples/fft/fft_image.cpp) 80 | add_executable(fft_matrix ${COMMON_SOURCE_FILES} src/examples/fft/fft_matrix.cpp) 81 | add_executable(image_matrix_multiplication_half ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_multiplication_half.cpp) 82 | add_executable(buffer_matrix_multiplication_half ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_multiplication_half.cpp) 83 | add_executable(io_coherent_ion_buffers ${COMMON_SOURCE_FILES} src/examples/io_coherent_ion/io_coherent_ion_buffers.cpp) 84 | add_executable(io_coherent_ion_images ${COMMON_SOURCE_FILES} src/examples/io_coherent_ion/io_coherent_ion_images.cpp) 85 | add_executable(compressed_image_rgba ${COMMON_SOURCE_FILES} src/examples/basic/compressed_image_rgba.cpp) 86 | 87 | target_link_libraries(qcom_box_filter_image ${OPEN_CL_LIB}) 88 | target_link_libraries(qcom_convolve_image ${OPEN_CL_LIB}) 89 | target_link_libraries(qcom_block_match_sad ${OPEN_CL_LIB}) 90 | target_link_libraries(qcom_block_match_ssd ${OPEN_CL_LIB}) 91 | target_link_libraries(accelerated_convolution ${OPEN_CL_LIB}) 92 | target_link_libraries(convolution ${OPEN_CL_LIB}) 93 | target_link_libraries(compressed_image_nv12 ${OPEN_CL_LIB}) 94 | target_link_libraries(nv12_vector_image_ops ${OPEN_CL_LIB}) 95 | target_link_libraries(tp10_vector_image_ops ${OPEN_CL_LIB}) 96 | target_link_libraries(p010_vector_image_ops ${OPEN_CL_LIB}) 97 | target_link_libraries(compressed_nv12_vector_image_ops ${OPEN_CL_LIB}) 98 | target_link_libraries(compressed_p010_vector_image_ops ${OPEN_CL_LIB}) 99 | target_link_libraries(compressed_tp10_vector_image_ops ${OPEN_CL_LIB}) 100 | target_link_libraries(hello_world ${OPEN_CL_LIB}) 101 | target_link_libraries(p010_to_compressed_tp10 ${OPEN_CL_LIB}) 102 | target_link_libraries(nv12_to_rgba ${OPEN_CL_LIB}) 103 | target_link_libraries(matrix_addition ${OPEN_CL_LIB}) 104 | target_link_libraries(image_matrix_multiplication ${OPEN_CL_LIB}) 105 | target_link_libraries(buffer_matrix_multiplication ${OPEN_CL_LIB}) 106 | target_link_libraries(buffer_matrix_transpose ${OPEN_CL_LIB}) 107 | target_link_libraries(image_matrix_transpose ${OPEN_CL_LIB}) 108 | target_link_libraries(bayer_mipi10_to_rgba ${OPEN_CL_LIB}) 109 | target_link_libraries(mipi10_to_unpacked ${OPEN_CL_LIB}) 110 | target_link_libraries(unpacked_bayer_to_rgba ${OPEN_CL_LIB}) 111 | target_link_libraries(unpacked_to_mipi10 ${OPEN_CL_LIB}) 112 | target_link_libraries(fft_image ${OPEN_CL_LIB}) 113 | target_link_libraries(fft_matrix ${OPEN_CL_LIB}) 114 | target_link_libraries(image_matrix_multiplication_half ${OPEN_CL_LIB}) 115 | target_link_libraries(buffer_matrix_multiplication_half ${OPEN_CL_LIB}) 116 | target_link_libraries(io_coherent_ion_buffers ${OPEN_CL_LIB}) 117 | target_link_libraries(io_coherent_ion_images ${OPEN_CL_LIB}) 118 | target_link_libraries(compressed_image_rgba ${OPEN_CL_LIB}) 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SDK Examples 2 | 3 | ## What is this? 4 | 5 | Usage examples for Qualcomm's extensions to OpenCL. 6 | 7 | ## Building for Android 8 | 9 | There's a few things you'll need: 10 | 11 | * The Android Open Source Project (AOSP) tree set up to build for your target 12 | device. 13 | * Appropriate kernel headers (`linux/ion.h` and `linux/msm_ion.h`) 14 | * A `libOpenCL` module defined by an `Android.mk` file. 15 | 16 | More on those below. Once everything is set up just run `mma` in this directory 17 | to build all the examples. 18 | 19 | ### Where do I get kernel headers? 20 | 21 | If your target device's kernel has the appropriate headers, they still need to 22 | be in a location where the Android build system can discover them. One way to 23 | ensure this is to build a bootimage, which will export the appropriate files: 24 | 25 | ``` 26 | > cd $ANDROID_BUILD_TOP 27 | > make bootimage 28 | ``` 29 | 30 | ### Where do I get the libOpenCL module? 31 | 32 | At the time of this writing `libOpenCL` is not available as part of Google's 33 | prebuilt graphics libraries releases for Qualcomm devices. If you are lucky 34 | enough to have it anyway, then you shouldn't need to do anything. Running `mma` 35 | in this directory will build all dependencies, including `libOpenCL`. 36 | 37 | ### I don't have the libOpenCL module, can I still use these examples? 38 | 39 | Maybe, if you have the `libOpenCL.so` binary for your device to link against, 40 | but it's not for the faint of heart. Provided here is a `CMakeLists.txt` file 41 | and a script `build_android.sh` that can be used as a starting point, but 42 | there's no guarantee it will work for your target device. You'll still need the 43 | AOSP tree for the kernel headers, so go get it if you don't have it. 44 | 45 | Find taka-no-me's `android-cmake` project online and clone it into the 46 | `android-cmake` directory here. 47 | 48 | All of these examples use ION buffers, so you'll still need appropriate ION 49 | headers. Find where your target device's `msm_ion.h` and `ion.h` headers are. 50 | For example you might see them at 51 | `$ANDROID_BUILD_TOP/hardware/qcom//kernel-headers/linux` where 52 | `` should be replaced by your target device. You'll include 53 | this directory in the header search path. 54 | 55 | You'll also need the Android NDK, Revision 11c. The specific version is 56 | important. 57 | 58 | Then run the build script, substituting the paths specific to your build 59 | environment: 60 | 61 | ``` 62 | ANDROID_NDK=/path/to/android-ndk-r11c \ 63 | OPEN_CL_LIB=/path/to/libOpenCL.so \ 64 | ION_INCLUDE_PATH=$ANDROID_BUILD_TOP/hardware/qcom//kernel-headers/linux \ 65 | ./build_android.sh 66 | ``` 67 | 68 | `` should be `32` or `64` depending on your target architecture. 69 | 70 | ## Usage 71 | 72 | Building will produce a set of binaries. Run each one without arguments to see 73 | a help message and description of what it does. Most binaries take an input 74 | image in the format described above -- several sample images are given in the 75 | example_images directory, which contains arbitrary data (e.g. it is not 76 | visually interesting). 77 | 78 | ## Descriptions 79 | 80 | ### src/examples/basic directory 81 | 82 | #### hello_world.cpp 83 | 84 | A very basic example to test out building. It simply copies one file to another. 85 | 86 | #### qcom_block_match_sad.cpp, qcom_block_match_ssd.cpp, qcom_box_filter_image.cpp, qcom_convolve_image.cpp 87 | 88 | These examples all demonstrate basic usage for the named built-in extension functions. 89 | Look here for minimal examples of how to use the extensions. 90 | 91 | #### compressed_image_nv12.cpp, compressed_image_rgba.cpp 92 | 93 | Demonstrates use of compressed images using Qualcomm extensions to OpenCL. 94 | The input image is compressed and then decompressed, with the result written 95 | to the specified output file for comparison. (The compression is not lossy so 96 | they are identical.) 97 | 98 | Compressed image formats may be saved to disk, however be advised that the format 99 | is specific to each GPU. 100 | 101 | The two examples show compression for NV12 and RGBA images. 102 | 103 | ### src/examples/bayer_mipi 104 | 105 | The examples in this directory show how to use Bayer-ordered images and packed 106 | MIPI data formats. 107 | 108 | Bayer-ordered images have one red, green or blue value per pixel, and the pixels 109 | are interleaved in a mosaic pattern. In order to get an equivalent RGB image 110 | one must "demosaic" the image by interpolating the missing red, green, and blue 111 | values. Bayer-ordered images are addressed by 2x2 blocks of such pixels, where 112 | each block has one red and blue value, and two green values. A Bayer-ordered 113 | image may also be addressed as a single-channel (`CL_R`) image to get one color 114 | channel at a time. 115 | 116 | `bayer_mipi10_to_rgba.cpp` and `unpacked_bayer_to_rgba.cpp` both demonstrate one 117 | scheme for demosaicing. The former uses the packed MIPI10 format, and the latter 118 | uses an unpacked 10-bit format (held in a 16-bit int with 6 bits unused). Both 119 | use Bayer-ordered images to exploit the GPU's interpolation capabilities without 120 | mixing different color channels. The destination format has 8-bits per channel, 121 | so some precision is lost. 122 | 123 | `mipi10_to_unpacked.cpp` and `unpacked_to_mipi10.cpp` demonstrate using the 124 | MIPI10 data format with a single-channel `CL_R` order. The former converts a 125 | packed MIPI10 image into an unpacked 10-bit image. The latter shows the 126 | unpacked-to-packed conversion. 127 | 128 | ### src/examples/conversions 129 | 130 | The examples in this directory show conversions to and from various image formats. 131 | 132 | ### src/examples/convolutions 133 | 134 | #### convolution.cpp 135 | 136 | Demonstrates efficient convolution without the use of built-in extension functions. 137 | 138 | #### accelerated_convolution.cpp 139 | 140 | Demonstrates efficient convolution with the qcom_convolve_imagef built-in extension 141 | function. 142 | 143 | ### src/examples/fft 144 | 145 | These examples compute the 2-dimensional fast Fourier transform (2D FFT) of an 146 | image or matrix using the in-place Cooley-Tukey algorithm. First in the 147 | "row pass" each work group calculates the 1D FFT of a row, by reading initial 148 | data from global memory into local memory, and calculating intermediate results 149 | in-place using local memory. The final result is written to global memory in 150 | transposed order. This procedure is then repeated in a "column pass" that acts 151 | on the rows of the result of the first pass. Calculating the 1D FFTs 152 | back-to-back in this way is equal to the 2D FFT. 153 | 154 | For the image-based version, the input is an 8-bit per channel NV12 image, and 155 | the outputs are two single-channel images with a 32-bit float data type. The 156 | outputs contain the real and imaginary parts of the FFT. The example acts on 157 | the Y-plane only. 158 | 159 | The buffer-based version takes a real-valued matrix as input (specified as 160 | below), and produces two matrices as the output holding the real and imaginary 161 | parts of the FFT. 162 | 163 | ### src/examples/io_coherent_ion 164 | 165 | These simple examples demonstrate using the IO-coherent host cache policy for 166 | ION buffers. Both examples simply copy a specified file or image. Except for 167 | the parameters used to create the ION buffers, there is no difference in the 168 | host or kernel code compared to using uncached ION buffers. 169 | 170 | ### src/examples/linear_algebra 171 | 172 | Demonstrates some basic linear algebra operations: 173 | 174 | * Matrix addition 175 | * Matrix multiplication 176 | * Matrix transposition 177 | 178 | The transposition and multiplication examples come in two flavors, one using 179 | OpenCL buffers and another that packs the matrices into 2D images. It is not a 180 | foregone conclusion that using an image or a buffer will enjoy better 181 | performance in any given use case, so generally one must try and see what works 182 | best. 183 | 184 | The image versions of both examples pad irregularly sized matrices, both because 185 | images have per-row alignment requirements and because this permits an efficient 186 | tiled algorithm to be applied uniformly. This approach can use substantially 187 | more memory than the buffer-based version. 188 | 189 | In contrast, the buffer versions do not pad the input matrices. They use an 190 | efficient tiled algorithm where possible, and a less efficient algorithm to 191 | calculate the remaining portion of the output not covered by the tiled 192 | algorithm. 193 | 194 | The multiplication examples additionally have a "half" variant, that 195 | demonstrates using the 16-bit half-float data type. The input, output and 196 | arithmetic all use half-floats. This can be a significant performance advantage, 197 | although it introduces more error. One may mix use of floats and half-floats to 198 | achieve the desired performance/accuracy trade off. 199 | 200 | ### src/examples/vector_image_ops 201 | 202 | All examples in this directory demonstrate a variety of kernels using vector 203 | read and write operations for the given image formats. 204 | 205 | ## Image data format 206 | 207 | Input and output images have the following format, where multi-byte data types are written with the least significant 208 | byte first: 209 | 210 | * 4 bytes: plane width in pixels (unsigned integer) 211 | * 4 bytes: plane height (unsigned integer) 212 | * 4 bytes: OpenCL channel data type. 213 | * 4 bytes: OpenCL channel order. 214 | * N bytes: pixel data, where N is dependent on the preceding four values. 215 | 216 | ## Matrix data format 217 | 218 | Matrices used by the examples in the `linear_algebra` directory have the 219 | following plain text format: 220 | 221 | * Two integers separated by whitespace indicating the number of columns and rows 222 | of the matrix. 223 | * A sequence of whitespace-separated floating point element values in row-major 224 | order. 225 | 226 | For example, the following represents a 3x2 matrix: 227 | 228 | ``` 229 | 2 3 230 | 1.0 2.0 231 | 3.1 4.1 232 | 6 0 233 | ``` 234 | -------------------------------------------------------------------------------- /build_android.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #-------------------------------------------------------------------------------------- 3 | # File: build_android.sh 4 | # Desc: 5 | # 6 | # Author: QUALCOMM 7 | # 8 | # Copyright (c) 2017 QUALCOMM Technologies, Inc. 9 | # All Rights Reserved. 10 | # QUALCOMM Proprietary/GTDR 11 | #-------------------------------------------------------------------------------------- 12 | set -e 13 | 14 | if [[ -z "$ANDROID_NDK" ]]; then 15 | echo "Please set \$ANDROID_NDK to the root of your standalone Android ndk build tree" 16 | exit 1 17 | fi 18 | 19 | if [[ -z "$OPEN_CL_LIB" ]]; then 20 | echo "Please set \$OPEN_CL_LIB to the path to libOpenCL.so (e.g. /path/to/libOpenCL.so)" 21 | exit 1 22 | fi 23 | 24 | if [[ -z "$ION_INCLUDE_PATH" ]]; then 25 | echo "Please set \$ION_INCLUDE_PATH to the directory containing ION headers." 26 | exit 1 27 | fi 28 | 29 | if [[ "$#" -lt 1 ]]; then 30 | echo "Error: invalid number of arguments: $#" 31 | echo "Usage: $0 [args-to-pass-to-make]" 32 | echo " BITNESS: 32 or 64" 33 | exit 1 34 | fi 35 | 36 | BITNESS=$1 37 | 38 | if [[ "64" == "$BITNESS" ]]; then 39 | ANDROID_ABI="arm64-v8a" 40 | ANDROID_TOOLCHAIN="aarch64-linux-android-clang" 41 | NDK_ARCH=arm64 42 | elif [[ "32" == "$BITNESS" ]]; then 43 | ANDROID_ABI="armeabi-v7a" 44 | ANDROID_TOOLCHAIN="arm-linux-androideabi-clang" 45 | NDK_ARCH=arm 46 | else 47 | echo "Invalid bitness!: $BITNESS" 48 | exit 1 49 | fi 50 | 51 | if [ ! -d "android-cmake" ]; then 52 | echo "Couldn't find `pwd`/android-cmake, please install it to this directory." 53 | exit 1 54 | fi 55 | 56 | # Creates an android standalone toolchain in this dir, for use with android-cmake 57 | ANDROID_STANDALONE_TOOLCHAIN=`pwd`/android_standalone_toolchain_$ANDROID_ABI 58 | if [ ! -d "$ANDROID_STANDALONE_TOOLCHAIN" ]; then 59 | $ANDROID_NDK/build/tools/make-standalone-toolchain.sh \ 60 | --install-dir=$ANDROID_STANDALONE_TOOLCHAIN \ 61 | --arch=$NDK_ARCH \ 62 | --platform=android-21 \ 63 | --toolchain=$ANDROID_TOOLCHAIN 64 | fi 65 | 66 | BUILD_DIR=bld_android_$BITNESS 67 | 68 | mkdir --parents $BUILD_DIR 69 | cd $BUILD_DIR 70 | cmake \ 71 | --debug-trycompile \ 72 | -DANDROID=True \ 73 | -DCMAKE_TOOLCHAIN_FILE=../android-cmake/android.toolchain.cmake \ 74 | -DCMAKE_BUILD_TYPE=Debug \ 75 | -DANDROID_SO_UNDEFINED=ON \ 76 | -DANDROID_ABI=$ANDROID_ABI \ 77 | -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ 78 | -DOPEN_CL_LIB=$OPEN_CL_LIB \ 79 | -g "Unix Makefiles" ../ 80 | 81 | # Passes extra cmd line arguments to make 82 | make "${@:2}" 83 | -------------------------------------------------------------------------------- /build_linux_oe.sh: -------------------------------------------------------------------------------- 1 | if ! [[ "$OSTYPE" =~ linux ]]; then 2 | echo "Not a Linux System " 3 | exit 1 4 | fi 5 | 6 | if [[ -z "$OE_ROOT" ]]; then 7 | echo "Please set \$OE_ROOT to your root directory" 8 | exit 1 9 | fi 10 | 11 | #if [[ -z "$OPEN_CL_LIB" ]]; then 12 | # echo "Please set \$OPEN_CL_LIB" 13 | # exit 1 14 | #fi 15 | 16 | if [[ "$#" -lt 2 ]]; then 17 | echo "Error: invalid number of arguments: $#" 18 | echo "Usage: $0 [args-to-pass-to-make]" 19 | echo " OE_TARGET: target platform (8053, 8096, 8074)" 20 | echo " BITNESS: 32 or 64" 21 | exit 1 22 | fi 23 | 24 | OE_TARGET=$1 25 | ARCH=$2 26 | 27 | if [[ "64" == "$ARCH" ]]; then 28 | ARM_ARCH="arm64v8" 29 | elif [[ "32" == "$ARCH" ]]; then 30 | ARM_ARCH="armv7" 31 | else 32 | echo "Invalid bitness!: $ARCH" 33 | exit 1 34 | fi 35 | 36 | OE_TOOLCHAIN_FILE="toolchain/linux_embedded/linux_embedded-toolchain.cmake" 37 | if [[ ! -f $OE_TOOLCHAIN_FILE ]]; then 38 | echo "Can't find toolchain file: $OE_TOOLCHAIN_FILE" 39 | exit 1 40 | fi 41 | 42 | BUILD_DIR=bld_linux_oe_$ARCH 43 | 44 | mkdir -p $BUILD_DIR 45 | cd $BUILD_DIR 46 | cmake \ 47 | --debug-trycompile \ 48 | -DLINUX_OE=YES \ 49 | -DCMAKE_TOOLCHAIN_FILE=$OE_TOOLCHAIN_FILE \ 50 | -DOE_ROOT=$OE_ROOT \ 51 | -DCMAKE_ARM_COMPILER=YES \ 52 | -DNEEDS_TO_LINK_PTHREAD=YES \ 53 | -DARCH=$ARCH \ 54 | -DOPEN_CL_LIB=$OPEN_CL_LIB \ 55 | -g "Unix Makefiles" ../ 56 | make "${@:3}" 57 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_android_native_buffer_host_ptr.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_android_native_buffer_host_ptr 4 | 5 | Contributors 6 | 7 | Hossein Mohtasham, Qualcomm Technologies, Inc. 8 | Sushmita Susheelendra, Qualcomm Innovation Center, Inc. 9 | Balaji Calidas, Qualcomm Technologies, Inc. 10 | 11 | Contact 12 | 13 | bcalidas at qti dot qualcomm dot com 14 | 15 | Version 16 | 17 | Version 6, 2018/01/19 18 | 19 | Number 20 | 21 | OpenCL Extension #32 22 | 23 | Status 24 | 25 | Shipping 26 | 27 | Extension Type 28 | 29 | OpenCL device extension 30 | 31 | Dependencies 32 | 33 | OpenCL 1.1 or later is required. 34 | cl_qcom_ext_host_ptr is required. 35 | Android OS is required. 36 | 37 | This extension is written against the OpenCL 1.1 specification. This 38 | extension provides functionality, beyond and above the cl_qcom_ion_host_ptr 39 | extension, to create buffers and images directly from Android native 40 | buffers. 41 | 42 | If present, cl_qcom_ext_host_ptr_iocoherent extends the functionality 43 | of this extension. 44 | 45 | Overview 46 | 47 | This extension extends the functionality provided by clCreateBuffer, 48 | clCreateImage2D, and clCreateImage. It allows applications to pass an 49 | Android ANativeWindowBuffer (ANB), aka graphics buffer, that is based on 50 | ION memory allocator to these functions so that it can be mapped to the 51 | device's address space. Using this extension, we can avoid having to copy 52 | data back and forth between the graphic buffer and the device. This 53 | extension is for 2D images only; clCreateImage will fail out with 54 | CL_INVALID_VALUE if anything other than a 2D image is specified. 55 | 56 | Header File 57 | 58 | cl_ext.h 59 | 60 | New Tokens 61 | 62 | Accepted by the argument of clCreateBuffer, clCreateImage2D: 63 | 64 | typedef struct _cl_mem_android_native_buffer_host_ptr 65 | { 66 | // Type of external memory allocation. 67 | // Must be CL_MEM_ANB_HOST_PTR_QCOM for Android Native Buffers. 68 | cl_mem_ext_host_ptr ext_host_ptr; 69 | 70 | // Host pointer to the Android Native Buffer (ANativeBuffer*) 71 | void* anb_ptr; 72 | 73 | } cl_mem_android_native_buffer_host_ptr; 74 | 75 | Used together with CL_MEM_EXT_HOST_PTR_QCOM: 76 | 77 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 78 | 79 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification 80 | (Creating Buffer Objects) 81 | 82 | When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the argument, then 83 | is interpreted as a pointer to cl_mem_ext_host_ptr. When 84 | ->allocation_type is equal to 85 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM then can also be 86 | interpreted as a pointer to cl_mem_android_native_buffer_host_ptr. 87 | 88 | In addition to that, the application must also initialize the following 89 | struct fields: 90 | 91 | * ->host_cache_policy should be set as follows - If the 92 | Graphic Buffer was created as cached and 93 | cl_mem_ext_host_ptr_iocoherent is present, 94 | ->host_cache_policy can be set to either 95 | CL_MEM_HOST_WRITEBACK_QCOM or CL_MEM_HOST_IOCOHERENT_QCOM. If the 96 | Graphic Buffer was created as cached and 97 | cl_mem_ext_host_ptr_iocoherent is not present, 98 | ->host_cache_policy should be set to 99 | CL_MEM_HOST_WRITEBACK_QCOM. It must be equal to 100 | CL_MEM_HOST_UNCACHED_QCOM otherwise. 101 | 102 | * ->anb_ptr must be the host virtual pointer associated with 103 | the ANativeBuffer. 104 | 105 | The caching policy provided in ext_host_ptr.host_cache_policy must be 106 | the same policy the GraphicBuffer is created with. Any mismatch will 107 | result in undefined behavior. 108 | 109 | Only Buffers and 2D images are supported. Use of other image types will 110 | result in undefined behavior. 111 | 112 | The application is responsible for maintaining the consistency of image 113 | attributes, i.e. format, width, height, and pitch, between the OpenCL 114 | image and the Android native buffer (aka graphics buffer). Also, if an 115 | OpenCL buffer is created from a native buffer, the application is 116 | responsible for making sure that the size of the buffer matches the 117 | actual linear size of the native buffer; creating a buffer with a size 118 | different than the passed-in native buffer will result in undefined 119 | behavior. 120 | 121 | The application is responsible for ensuring that the underlying native 122 | buffer is not released while the cl object is in use. Proper 123 | synchronization between different APIs that share the underlying buffer 124 | is to be handled by the application. 125 | 126 | Sample Code 127 | 128 | 1) Using the extension for CL buffer objects 129 | 130 | cl_mem buffer_object = NULL; 131 | size_t buffer_size_in_bytes = 0; 132 | cl_mem_android_native_buffer_host_ptr myANBmem = {0}; 133 | 134 | // Create an OpenCL buffer object that uses myANBmem as its data store. 135 | myANBmem.ext_host_ptr.allocation_type = 136 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM; 137 | myANBmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM; 138 | myANBmem.anb_ptr = gb->getNativeBuffer(); // gb is Android GraphicBuffer 139 | 140 | // The stride returned is in pixels, so we have to factor in pixel_size 141 | // (4 for RGBA) when calculating buffer_size_in_bytes 142 | buffer_size_in_bytes = gb->getHeight() * gb->getStride() * 4; 143 | 144 | buffer_object = clCreateBuffer(context, 145 | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 146 | buffer_size_in_bytes, &myANBmem, &errcode); 147 | 148 | 2) Using the extension for CL image objects 149 | 150 | cl_mem image_object = NULL; 151 | cl_mem_android_native_buffer_host_ptr myANBmem = {0}; 152 | 153 | // Create an OpenCL image object that uses myANBmem as its data store. 154 | myANBmem.ext_host_ptr.allocation_type = 155 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM; 156 | myANBmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_WRITEBACK_QCOM; 157 | myANBmem.anb_ptr = gb->getNativeBuffer(); // gb is Android GraphicBuffer 158 | 159 | imgw = gb->getWidth(); 160 | imgh = gb->getHeight(); 161 | // The stride returned is in pixels, so we have to factor in pixel_size 162 | // (4 for RGBA) when calculating row_pitch 163 | row_pitch = gb->getStride() * 4; 164 | // pick any CL format as long as it is consistent with graphic buffer 165 | // width and stride. 166 | image_format = {CL_RGBA, CL_UNSIGNED_INT8}; 167 | 168 | image_object = clCreateImage2D(context, 169 | CL_MEM_USE_HOST_PTR|CL_MEM_EXT_HOST_PTR_QCOM, &image_fmt, imgw, 170 | imgh, row_pitch, &myANBmem, &errcode); 171 | 172 | Revision History 173 | 174 | Revision 1, 2014/06/05: Initial version. 175 | Revision 2, 2017/06/16: Clean up. No functional changes. 176 | Revision 3, 2017/10/24: Updated sample code. 177 | Revision 4, 2017/11/13: Clean up. No functional changes. 178 | Revision 5, 2018/01/03: Add reference to cl_qcom_ext_host_ptr_iocoherent. 179 | Revision 6, 2018/01/19: Formatting and misc changes. No functional changes. 180 | 181 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_compressed_image.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_compressed_image 4 | 5 | Contributors 6 | 7 | Balaji Calidas, Qualcomm Technologies, Inc. 8 | Roto Le, Qualcomm Technologies, Inc. 9 | Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc. 10 | Chris Grimm, Qualcomm Technologies, Inc. 11 | 12 | Contact 13 | 14 | bcalidas at qti dot qualcomm dot com 15 | 16 | Version 17 | 18 | Version 3, 2018/01/19 19 | 20 | Status 21 | 22 | Shipping 23 | 24 | Extension Type 25 | 26 | OpenCL device extension 27 | 28 | Dependencies 29 | 30 | OpenCL 2.0 or later is required. 31 | 32 | cl_qcom_android_native_buffer_host_ptr or cl_qcom_ion_host_ptr is required. 33 | 34 | This extension is written against the OpenCL 2.0 Specification. 35 | 36 | Overview 37 | 38 | This extension enables an application to read from and write to 39 | OpenCL image objects holding Qualcomm compressed image data. 40 | 41 | Compressed images are enabled on specific Qualcomm GPUs. 42 | The main advantage of this feature is to reduce the overhead of reading and 43 | writing images. 44 | 45 | An application can use this extension to query supported compressed image 46 | formats. It can then create an image of a supported compressed format from 47 | an ION or an Android Native Buffer (ANB) allocation. Only reads and writes 48 | of this image from this inside a CL kernel are defined. The results of any 49 | host access are undefined. 50 | 51 | Header File 52 | 53 | cl_ext_qcom.h 54 | 55 | New Procedures and Functions 56 | 57 | None 58 | 59 | New Tokens 60 | 61 | Added to the list of supported cl_mem_flags by clCreateImage in 62 | Table 5.3 of the OpenCL 2.0 Specification. 63 | 64 | CL_MEM_COMPRESSED_IMAGE_QCOM 65 | 66 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification 67 | (Creating Image Objects) 68 | 69 | (Append to the section introduced by cl_qcom_ion_host_ptr extension) 70 | 71 | Compressed images can be created from ION buffers in much the same way as 72 | uncompressed images. ->allocation_type should be set to 73 | CL_MEM_ION_HOST_PTR_QCOM. The application is responsible for the layout of 74 | compressed image data held in an ION allocation, which must follow the 75 | structure defined by Qualcomm compression standard. Any deviation from the 76 | standard will lead to undefined results. 77 | 78 | While creating compressed images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled 79 | in the argument, the image_row_pitch and image_slice_pitch fields of 80 | cl_image_desc must be set to 0. 81 | 82 | Additions to Chapter 5.3.2 of the OpenCL 2.0 Specification 83 | (Querying List of Supported Image Formats) 84 | 85 | When CL_MEM_COMPRESSED_IMAGE_QCOM is enabled in the argument, the 86 | implementation will return a list of supported compressed image formats. 87 | CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE and 88 | CL_MEM_KERNEL_READ_AND_WRITE flags retain the same meaning as for 89 | uncompressed formats. 90 | 91 | Additions to Chapter 6.13.14.2 of the OpenCL-C 2.0 Specification 92 | (Built-in Image Read Functions) 93 | 94 | The sampler used for reading compressed images takes the following values: 95 | CLK_FILTER_NEAREST and CLK_FILTER_LINEAR for the sampler's filter mode. 96 | CLK_ADDRESS_NONE, CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE for the 97 | sampler's addressing mode 98 | 99 | Android Native Buffer Option 100 | 101 | On Android platforms it is possible to create a compressed image from 102 | ANativeWindowBuffer (ANB) aka graphics buffer. The application is 103 | responsible for creating an ANB buffer of appropriate format and size which 104 | can be used to store the compressed image data. 105 | 106 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification 107 | (Creating Image Objects) 108 | 109 | (Append to the section introduced by cl_qcom_android_native_buffer_host_ptr 110 | extension) 111 | 112 | Compressed images can be created from ANB buffers in much the same way as 113 | uncompressed images. ->allocation_type should be set to 114 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM. The application is responsible 115 | for the layout of compressed image data held in an ANB allocation, which 116 | must follow the structure defined by Qualcomm compression standard. Any 117 | deviation from the standard will lead to undefined results. 118 | 119 | While creating compressed images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled 120 | in the argument, the image_row_pitch and image_slice_pitch fields of 121 | cl_image_desc must be set to 0. 122 | 123 | Sample Code 124 | 125 | 1) Querying supported formats for read_only compressed images 126 | 127 | #define MAX_NUM_FORMATS 128 128 | 129 | cl_image_format format_list[ MAX_NUM_FORMATS] = {0}; 130 | cl_int num_format_list_entries = MAX_NUM_FORMATS; 131 | cl_int num_reported_image_formats = 0; 132 | cl_int errcode = 0; 133 | 134 | // Query the supported formats for COMPRESSED_IMAGE_QCOMM 135 | errcode = clGetSupportedImageFormats( 136 | context, 137 | CL_MEM_READ_ONLY | CL_MEM_COMPRESSED_IMAGE_QCOM, 138 | CL_MEM_OBJECT_IMAGE2D, 139 | num_format_list_entries, 140 | format_list, 141 | &num_reported_image_formats); 142 | 143 | 2) Creating an ION buffer for holding compressed image data. 144 | 145 | cl_mem_ion_host_ptr compressed_ionmem = {0}; 146 | 147 | // Initialize ION buffer attributes 148 | compressed_ionmem.ext_host_ptr.allocation_type = 149 | CL_MEM_ION_HOST_PTR_QCOM; 150 | compressed_ionmem.ext_host_ptr.host_cache_policy = 151 | CL_MEM_HOST_UNCACHED_QCOM; 152 | compressed_ionmem.ion_filedesc = 153 | ion_info_fd.file_descriptor; // file descriptor for ION 154 | compressed_ionmem.ion_hostptr = 155 | ion_info.host_virtual_address; // hostptr returned by ION 156 | 157 | 3) Using cl_qcom_ion_host_ptr holding compressed image data to create a 158 | compressed RGBA image object. 159 | 160 | cl_image_format image_format = {0}; 161 | cl_image_desc image_desc = {0}; 162 | cl_int errcode = 0; 163 | 164 | // Set image format 165 | image_format->image_channel_order = CL_QCOM_COMPRESSED_RGBA; 166 | image_format->image_channel_data_type = CL_UNORM_INT8; 167 | 168 | // Set image parameters 169 | image_desc->image_width = 128; 170 | image_desc->image_height = 256; 171 | image_desc->image_row_pitch = 0; // must be 0 for compressed images 172 | image_desc->image_slice_pitch = 0; // must be 0 for compressed images 173 | 174 | // Create a compressed image 175 | compressed_rbga_image = clCreateImage( 176 | context, 177 | CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY, 178 | image_format, 179 | image_desc, 180 | (void*)compressed_ionmem, 181 | &errcode); 182 | 183 | 4) Creating an ANB buffer for holding compressed data 184 | 185 | cl_mem_android_native_buffer_host_ptr compressed_ANBmem = {0}; 186 | GraphicBuffer *gb; // previously created 187 | 188 | compressed_ANBmem.ext_host_ptr.allocation_type = 189 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM; 190 | compressed_ANBmem.ext_host_ptr.host_cache_policy = 191 | CL_MEM_HOST_WRITEBACK_QCOM; 192 | // the hostptr to a native buffer and gb is an Android GraphicBuffer 193 | compressed_ANBmem.anb_ptr = gb->getNativeBuffer(); 194 | 195 | 5) Using cl_qcom_android_native_buffer_host_ptr holding compressed image 196 | data to create a compressed RGBA image object. 197 | 198 | cl_image_format image_format = {0}; 199 | cl_image_desc image_desc = {0}; 200 | cl_int errcode = 0; 201 | 202 | // Set image format 203 | image_format->image_channel_order = CL_QCOM_COMPRESSED_RGBA; 204 | image_format->image_channel_data_type = CL_UNORM_INT8; 205 | 206 | // Set image parameters 207 | image_desc->image_width = 128; 208 | image_desc->image_height = 256; 209 | image_desc->image_row_pitch = 0; // always 0 for compressed images 210 | image_desc->image_slice_pitch = 0; // always 0 for compressed images 211 | 212 | // Create a compressed image 213 | compressed_rbga_image = clCreateImage( 214 | context, 215 | CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY, 216 | image_format, 217 | image_desc, 218 | (void*)compressed_ANBmem, 219 | &errcode); 220 | 221 | Revision History 222 | 223 | Revision 1, 2016/06/02: Initial version. 224 | Revision 2, 2017/06/16: Clean up. No functional changes. 225 | Revision 3, 2018/01/19: Formatting and misc changes. No functional changes. 226 | 227 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_create_buffer_from_image.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_create_buffer_from_image 4 | 5 | Contributors 6 | 7 | Alex Bourd, Qualcomm Technologies, Inc. 8 | Balaji Calidas, Qualcomm Technologies, Inc. 9 | David Garcia, Qualcomm Technologies, Inc. 10 | Samuel Pauls, Qualcomm Technologies, Inc. 11 | 12 | Contact 13 | 14 | bcalidas at qti dot qualcomm dot com 15 | 16 | Version 17 | 18 | Version 7, 2018/01/19 19 | 20 | Status 21 | 22 | Shipping 23 | 24 | Extension Type 25 | 26 | OpenCL device extension 27 | 28 | Dependencies 29 | 30 | OpenCL 1.0 and the cl_qcom_ext_host_ptr extension are required. This 31 | extension is written against OpenCL 1.2 rev 15 and cl_qcom_ext_host_ptr 32 | rev 1. 33 | 34 | Overview 35 | 36 | There are times when developers want to access image data as raw pointers 37 | in the OpenCL C language without the safety layer provided by the image 38 | read/write builtin functions. One particular case for this would be reading 39 | from or writing to EGL external images exposed indirectly to OpenCL through 40 | GL/CL interop extensions. Other example would be expert developers who want 41 | to read/write multiple pixels with a single memory load/store operation. 42 | 43 | Header File 44 | 45 | cl_ext_qcom.h 46 | 47 | New Procedures and Functions 48 | 49 | clCreateBufferFromImageQCOM 50 | 51 | New Tokens 52 | 53 | Accepted as arguments of clGetDeviceImageInfoQCOM: 54 | 55 | CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM 0x40C0 56 | CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM 0x40C1 57 | 58 | Additions to Chapter 5.2 of the OpenCL 1.2 Specification 59 | 60 | The function 61 | 62 | cl_mem clCreateBufferFromImageQCOM(cl_mem image, 63 | cl_mem_flags flags, 64 | cl_int *errcode_ret) 65 | 66 | can be used to create a new buffer object from an existing image. 67 | 68 | is a valid image and cannot be of type 69 | CL_MEM_OBJECT_IMAGE1D_BUFFER nor can it use CL_MEM_USE_HOST_PTR. 70 | 71 | is a bit-field that is used to specify allocation and usage 72 | information about the buffer memory object being created and is described 73 | in Table 5.3. 74 | 75 | will return an appropriate error code. If is 76 | NULL, no error code is returned. 77 | 78 | If the call succeeds, the buffer that is returned references the data store 79 | allocated for and points to the origin pixel in this data store. The 80 | data layout is equivalent to what is produced by clEnqueueMapImage when 81 | is (0,0,0) and is (, , ). The 82 | from which the buffer is created is called the of the 83 | buffer. 84 | 85 | In order to access the pixel data in the returned buffer correctly, the 86 | client must query the parent image row pitch and slice pitch using 87 | clGetDeviceImageInfoQCOM with the parameter names 88 | CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM and 89 | CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM. For example, it is incorrect to 90 | assume that the row pitch of the data contained in the buffer is simply the 91 | image element size multiplied by the image width. 92 | 93 | Concurrent reading from and writing to both a buffer object and its parent 94 | image is undefined. Concurrent reading from and writing to buffer objects 95 | created with the same parent image is undefined. Only concurrent reading 96 | from both a buffer object and its parent image object and concurrent reading 97 | from multiple buffer objects created from the same image is defined. 98 | 99 | clCreateBufferFromImageQCOM returns a valid non-zero buffer object and 100 | is set to CL_SUCCESS if the buffer object is created 101 | successfully. Otherwise, it returns a NULL value with one of the following 102 | error values returned in : 103 | 104 | * CL_INVALID_MEM_OBJECT if is not a valid image object or if it is 105 | of type CL_MEM_OBJECT_IMAGE1D_BUFFER. 106 | 107 | * CL_INVALID_VALUE if was created with CL_MEM_WRITE_ONLY and 108 | specifies CL_MEM_READ_WRITE or CL_MEM_READ_ONLY, or if was created 109 | with CL_MEM_READ_ONLY and specifies CL_MEM_READ_WRITE or 110 | CL_MEM_WRITE_ONLY, or if was created with CL_MEM_USE_HOST_PTR, or 111 | if specifies CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR or 112 | CL_MEM_COPY_HOST_PTR. 113 | 114 | * CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory 115 | for the buffer object. 116 | 117 | * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required 118 | by the OpenCL implementation on the device. 119 | 120 | * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required 121 | by the OpenCL implementation on the host. 122 | 123 | Modifications to Table 5.xxx in cl_qcom_ext_host_ptr 124 | 125 | Additional supported param_names by clGetDeviceImageInfoQCOM 126 | 127 | cl_image_pitch_info_qcom Return Type Info returned in param_value 128 | 129 | CL_BUFFER_FROM_IMAGE_ cl_uint Returns the image row pitch in 130 | ROW_PITCH_QCOM bytes supported by this device 131 | in regard to 132 | cl_qcom_create_buffer_from_image 133 | 134 | CL_BUFFER_FROM_IMAGE_ cl_int Returns the image row pitch in 135 | SLICE_PITCH_QCOM bytes supported by this device 136 | in regard to 137 | cl_qcom_create_buffer_from_image 138 | 139 | Modifications to Table 5.9 in Chapter 5.4 of the OpenCL 1.2. Specification 140 | 141 | The row that defines the semantics of CL_MEM_ASSOCIATED_MEMOBJECT now reads: 142 | 143 | Return memory object from which memobj is created. 144 | 145 | This returns the parent buffer argument specified when 146 | clCreateSubBuffer or clCreateImage were called. It also returns the 147 | parent image specified when clCreateBufferFromImage was called. 148 | 149 | Otherwise a NULL value is returned. 150 | 151 | Issues 152 | 153 | (1) Should we prevent buffers to be created out of 154 | CL_MEM_OBJECT_IMAGE1D_BUFFER images? 155 | 156 | RESOLVED: Qualcomm has added the restriction that the image cannot be of 157 | type CL_MEM_OBJECT_IMAGE1D_BUFFER. 158 | 159 | (2) How do we define the layout of the data pointed at by the buffer 160 | created by clCreateBufferFromImage? 161 | 162 | RESOLVED: The data in the buffer must be linear. 163 | 164 | Sample Code 165 | 166 | cl_mem buffer = NULL; 167 | cl_mem image = NULL; 168 | size_t row_pitch = 0; 169 | cl_image_desc image_desc = {0}; 170 | 171 | image_desc.buffer = NULL; 172 | image_desc.image_array_size = 1; 173 | image_desc.image_width = W; 174 | image_desc.image_height = H; 175 | image_desc.image_depth = 1; 176 | image_desc.image_row_pitch = 0; 177 | image_desc.image_slice_pitch = 0; 178 | image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 179 | image_desc.num_mip_levels = 0; 180 | image_desc.num_samples = 0; 181 | 182 | // Create a 2D image 183 | clCreateImage(context, CL_MEM_READ_WRITE, format, &image_desc, NULL, NULL); 184 | 185 | // convert the image to a buffer 186 | buffer = clCreateBufferFromImageQCOM(image, CL_MEM_READ_WRITE, NULL); 187 | 188 | // retrieve the image row pitch in order to calculate total size 189 | clGetDeviceImageInfoQCOM(device, 190 | width, 191 | height, 192 | &format, 193 | CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM, 194 | sizeof(row_pitch), 195 | &row_pitch, 196 | NULL); 197 | 198 | // map the buffer for reading using the total image size 199 | clEnqueueMapBuffer(queue, 200 | buffer, 201 | CL_TRUE, 202 | CL_MAP_READ, 203 | 0, 204 | image_desc.image_height * row_pitch, 205 | 0, 206 | NULL, 207 | NULL, 208 | NULL); 209 | 210 | Revision History 211 | 212 | Revision 1, 2012/10/17: First draft. 213 | Revision 2, 2013/02/28: The implementation doesn't support an image created 214 | with CL_MEM_USE_HOST_PTR. 215 | Revision 3, 2013/05/01: clGetDeviceImageInfoQCOM must now be used instead 216 | of clGetImageInfo to get the pitches. 217 | Revision 4, 2013/08/27: Created new image pitch tokens for 218 | clGetDeviceImageInfoQCOM. 219 | Revision 5, 2017/06/16: Clean up. No functional changes. 220 | Revision 6, 2017/11/13: Clean up. No functional changes. 221 | Revision 7, 2018/01/19: Formatting and misc changes. No functional changes. 222 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_ext_host_ptr.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_ext_host_ptr 4 | 5 | Contributors 6 | 7 | Balaji Calidas, Qualcomm Technologies, Inc. 8 | David Garcia, Qualcomm Technologies, Inc. 9 | Rajeev Rao, Qualcomm Technologies, Inc. 10 | Sushmita Susheelendra, Qualcomm Innovation Center, Inc. 11 | 12 | Contact 13 | 14 | bcalidas at qti dot qualcomm dot com 15 | 16 | Version 17 | 18 | Version 5, 2018/01/19 19 | 20 | Number 21 | 22 | OpenCL Extension #21 23 | 24 | Status 25 | 26 | Shipping 27 | 28 | Extension Type 29 | 30 | OpenCL device extension 31 | 32 | Dependencies 33 | 34 | OpenCL 1.1 is required. 35 | 36 | This extension is written against the OpenCL 1.1 specification 37 | 38 | Overview 39 | 40 | This extension extends the functionality provided by clCreateBuffer, 41 | clCreateImage2D, clCreateImage3D. It allows applications to specify a new 42 | flag CL_MEM_EXT_HOST_PTR_QCOM which enables the driver to map external 43 | memory allocations, to be defined in layered extensions, to the device's 44 | address space and thus avoiding having to copy data back and forth between 45 | the host and the device. 46 | 47 | Header File 48 | 49 | cl_ext.h 50 | 51 | New Procedures and Functions 52 | 53 | cl_int clGetDeviceImageInfoQCOM( 54 | cl_device_id device, 55 | size_t image_width, 56 | size_t image_height, 57 | const cl_image_format *image_format, 58 | cl_image_pitch_info_qcom param_name, 59 | size_t param_value_size, 60 | void *param_value, 61 | size_t *param_value_size_ret); 62 | 63 | New Types 64 | 65 | typedef cl_uint cl_image_pitch_info_qcom; 66 | 67 | New Tokens 68 | 69 | Accepted by the argument of clGetDeviceInfo 70 | 71 | CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 72 | CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 73 | 74 | Accepted by the argument of clCreateBuffer, clCreateImage2D and 75 | clCreateImage3D: 76 | 77 | CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) 78 | 79 | Accepted by the argument of clCreateBuffer, clCreateImage2D and 80 | clCreateImage3D: 81 | 82 | typedef struct _cl_mem_ext_host_ptr 83 | { 84 | // Type of external memory allocation. 85 | // Legal values will be defined in layered extensions. 86 | cl_uint allocation_type; 87 | 88 | // Host cache policy for this external memory allocation. 89 | cl_uint host_cache_policy; 90 | 91 | } cl_mem_ext_host_ptr; 92 | 93 | Accepted values for cl_mem_ext_host_ptr::host_cache_policy: 94 | 95 | CL_MEM_HOST_UNCACHED_QCOM 0x40A4 96 | CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 97 | CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 98 | CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 99 | 100 | Accepted by the argument of clGetDeviceImageInfoQCOM 101 | 102 | CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 103 | CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 104 | 105 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification 106 | (Creating Buffer Objects) 107 | 108 | Add the following token to Table 5.3 (clCreateBuffer List of supported 109 | cl_mem_flags values): 110 | 111 | CL_MEM_EXT_HOST_PTR_QCOM This flag is valid only when used 112 | together with CL_MEM_USE_HOST_PTR. 113 | If specified, it indicates that the 114 | argument provided by the 115 | application is actually a pointer 116 | to cl_mem_ext_host_ptr. 117 | 118 | 119 | When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the argument, then 120 | is interpreted as a pointer to cl_mem_ext_host_ptr. The 121 | application must then initialize cl_mem_ext_host_ptr::allocation_type to 122 | the allowed token values defined in future layered extensions. 123 | 124 | The application must also initialize 125 | cl_mem_ext_host_ptr::host_cache_policy to one of 126 | CL_MEM_HOST_UNCACHED_QCOM, CL_MEM_HOST_WRITEBACK_QCOM, 127 | CL_MEM_HOST_WRITETHROUGH_QCOM, or CL_MEM_HOST_WRITE_COMBINING_QCOM 128 | according to the cache policy used in the host for this memory 129 | allocation. 130 | 131 | Add the following token to Table 4.3 (clGetDeviceInfo OpenCL Device 132 | Queries): 133 | 134 | CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM Returns the amount of memory 135 | padding that the application 136 | must add to the end of every 137 | external allocation that will 138 | be used in conjunction with 139 | CL_MEM_EXT_HOST_PTR_QCOM. 140 | 141 | CL_DEVICE_PAGE_SIZE_QCOM Returns the device's page 142 | size. 143 | 144 | The application may query the row and slice pitch values using 145 | clGetDeviceImageInfoQCOM and provide the queried values or any other 146 | supported value to clCreateImage2D and clCreateImage3D when using 147 | CL_MEM_EXT_HOST_PTR_QCOM. 148 | 149 | A supported value for row pitch and slice pitch is defined respectively 150 | as: 151 | 152 | * Any value greater than or equal to CL_IMAGE_ROW_PITCH that is also a 153 | multiple of CL_IMAGE_ROW_ALIGNMENT_QCOM. 154 | * Any value greater than or equal to CL_IMAGE_SLICE_PITCH that is also 155 | a multiple of CL_IMAGE_SLICE_ALIGNMENT_QCOM. 156 | 157 | Additions to Section 5.3 after clGetImageInfo 158 | 159 | An application that creates OpenCL image objects with the 160 | CL_MEM_EXT_HOST_PTR_QCOM flag can invoke the following function to query the 161 | required row pitch, slice pitch and alignment for a particular device: 162 | 163 | cl_int clGetDeviceImageInfoQCOM( 164 | cl_device_id device, 165 | size_t image_width, 166 | size_t image_height, 167 | const cl_image_format *image_format, 168 | cl_image_pitch_info_qcom param_name, 169 | size_t param_value_size, 170 | void *param_value, 171 | size_t *param_value_size_ret); 172 | 173 | device - is a valid device 174 | 175 | image_width - width of the image in image elements (pixels) 176 | 177 | image_height - height of the image in image elements (pixels) 178 | 179 | image_format - pointer to image format descriptor structure 180 | 181 | param_name - specifies the information to query. The list of 182 | supported param_name types and the information 183 | returned in param_value by clGetImageInfo is 184 | described in Table 5.XXX 185 | 186 | param_value - is a pointer to memory where the appropriate result 187 | being queried is returned. If param_value is NULL, it 188 | is ignored. 189 | 190 | param_value_size - is used to specify the size in bytes of memory 191 | pointed to by param_value. This size must be greater 192 | than or equal to the size of return type as described 193 | in Table 5.8. 194 | 195 | param_value_size_ret - returns the actual size in bytes of data being 196 | queried by param_value. If param_value_size_ret is 197 | NULL, it is ignored. 198 | 199 | clGetDeviceImageInfoQCOM returns CL_SUCCESS if the function is executed 200 | successfully. Otherwise, it returns one of the following errors: 201 | 202 | CL_INVALID_VALUE - if param_name is not valid, or if size in bytes 203 | specified by param_value_size is less than the 204 | size of return type for that param_value and 205 | param_value is not NULL. 206 | 207 | CL_INVALID_MEM_OBJECT - if image is a not a valid image object. 208 | 209 | CL_OUT_OF_RESOURCES - if there is a failure to allocate resources required 210 | by the OpenCL implementation on the device. 211 | 212 | CL_OUT_OF_HOST_MEMORY - if there is a failure to allocate resources required 213 | by the OpenCL implementation on the host. 214 | 215 | Table 5.XXX 216 | 217 | List of supported param_names by clGetDeviceImageInfoQCOM 218 | 219 | cl_image_pitch_info_qcom Return Type Info returned in 220 | param_value 221 | 222 | CL_IMAGE_ROW_PITCH cl_uint Returns the image row pitch 223 | supported by this device 224 | 225 | CL_IMAGE_ROW_ALIGNMENT_QCOM cl_uint Returns the image row pitch 226 | alignment supported by this 227 | device 228 | 229 | CL_IMAGE_SLICE_PITCH cl_uint Returns the image slice 230 | pitch supported by this 231 | device 232 | 233 | CL_IMAGE_SLICE_ALIGNMENT_QCOM cl_uint Returns the image slice 234 | pitch alignment supported 235 | by this device 236 | 237 | Additions to Section 5.3.1. (Creating Image Objects) at the end of the list of 238 | errors returned by clCreateImage2D and clCreateImage3D: 239 | 240 | CL_INVALID_VALUE if has CL_MEM_EXT_HOST_PTR_QCOM enabled and yet 241 | CL_MEM_USE_HOST_PTR is not enabled. 242 | 243 | CL_INVALID_VALUE if has CL_MEM_EXT_HOST_PTR_QCOM enabled and if 244 | and/or fail to match the requirements 245 | of section 5.3 246 | 247 | CL_INVALID_VALUE if any of the fields in the struct pointed at by 248 | are invalid. 249 | 250 | Revision History 251 | 252 | Revision 1, 2013/05/27: Initial version. 253 | Revision 2, 2017/06/16: Clean up. No functional changes. 254 | Revision 3, 2017/11/13: Clean up. No functional changes. 255 | Revision 4, 2017/11/30: Corrected parameter description for 256 | clGetDeviceImageInfoQCOM. 257 | Revision 5, 2018/01/19: Formatting and misc changes. No functional changes. 258 | 259 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_ext_host_ptr_iocoherent.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_ext_host_ptr_iocoherent 4 | 5 | Contributors 6 | 7 | Balaji Calidas, Qualcomm Technologies, Inc. 8 | Manali Torpe, Qualcomm Innovation Center, Inc. 9 | Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc. 10 | 11 | Contact 12 | 13 | bcalidas at qti dot qualcomm dot com 14 | 15 | Version 16 | 17 | Version 4, 2018/03/06 18 | 19 | Number 20 | 21 | OpenCL Extension #53 22 | 23 | Status 24 | 25 | Shipping 26 | 27 | Extension Type 28 | 29 | OpenCL device extension 30 | 31 | Dependencies 32 | 33 | OpenCL 1.1 is required. 34 | cl_qcom_ext_host_ptr is required. 35 | 36 | This extension extends the functionality of cl_qcom_ext_host_ptr. 37 | 38 | Overview 39 | 40 | This extension extends the functionality provided by cl_qcom_ext_host_ptr by 41 | adding a new host cache policy. It allows applications to specify a new 42 | value, CL_MEM_HOST_IOCOHERENT_QCOM, for 43 | cl_mem_ext_host_ptr::host_cache_policy. When the application selects this 44 | value for host cache policy, the imported allocation is mapped as 45 | io-coherent for the GPU. This in turn avoids the need for the OpenCL driver 46 | to explicitly issue CPU cache operation calls. Although GPU performance can 47 | be slower in some cases for io-coherent allocations, the overall performance 48 | can improve due to the elimination of explicit CPU cache operations. 49 | 50 | Header File 51 | 52 | cl_ext.h 53 | 54 | New Tokens 55 | 56 | Modification to handling of argument of clCreateBuffer, 57 | clCreateImage2D and clCreateImage3D when CL_MEM_EXT_HOST_PTR_QCOM is 58 | specified in the argument. 59 | 60 | New accepted value for cl_mem_ext_host_ptr::host_cache_policy: 61 | 62 | CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 63 | 64 | As described in the cl_qcom_ext_host_ptr spec, when CL_MEM_EXT_HOST_PTR_QCOM 65 | is enabled in the argument, then is interpreted as a 66 | pointer to cl_mem_ext_host_ptr. The application must initialize 67 | cl_mem_ext_host_ptr::host_cache_policy to one of CL_MEM_HOST_UNCACHED_QCOM, 68 | CL_MEM_HOST_WRITEBACK_QCOM, or CL_MEM_HOST_IOCOHERENT_QCOM according to the 69 | cache policy used in the host for this memory allocation. 70 | CL_MEM_HOST_IOCOHERENT_QCOM can only be specified when the memory was 71 | originally allocated as cached. Use of this value with an uncached 72 | allocation will lead to undefined results. 73 | 74 | Revision History 75 | 76 | Revision 1, 2018/01/03: Initial version. 77 | Revision 2, 2018/01/15: Minor edits. 78 | Revision 3, 2018/01/19: Formatting and misc changes. No functional changes. 79 | Revision 4, 2018/03/06: Corrected token value. 80 | 81 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_extract_image_plane.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_extract_image_plane 4 | 5 | Contributors 6 | 7 | Roto Le, Qualcomm Technologies, Inc. 8 | Balaji Calidas, Qualcomm Technologies, Inc. 9 | 10 | Contact 11 | 12 | bcalidas at qti dot qualcomm dot com 13 | 14 | Version 15 | 16 | Version 5, 2018/01/19 17 | 18 | Status 19 | 20 | Shipping 21 | 22 | Extension Type 23 | 24 | OpenCL device extension 25 | 26 | Dependencies 27 | 28 | OpenCL 2.0 and the cl_qcom_other_image and cl_qcom_compressed_image 29 | extension are required. 30 | 31 | This extension is written against OpenCL 2.0 rev 26, cl_qcom_other_image 32 | rev 1, and cl_qcom_compressed_image rev 1. 33 | 34 | Overview 35 | 36 | This extension allows an application to create a single plane derivative of 37 | a multi-plane planar image. This would allow the application to process each 38 | plane of a planar image as a separate image. For example a YUV image could 39 | be split into Y-only and UV-only images. Image processing could be applied 40 | exclusively to the Y-only image. Also, on some GPUs, direct writes to a YUV 41 | image may not be supported but writes to Y-only and UV-only images may be 42 | available. The child derivative images and the parent planar image both 43 | reference the same image data. No data copy is involved. 44 | 45 | Header File 46 | 47 | None 48 | 49 | New Procedures and Functions 50 | 51 | None 52 | 53 | New Tokens 54 | 55 | None 56 | 57 | Additions to Chapter 5.3 of the OpenCL 2.0 Specification 58 | 59 | The function 60 | 61 | cl_mem clCreateImage(cl_context context, 62 | cl_mem_flags flags, 63 | const cl_image_format * image_format, 64 | const cl_image_desc * image_desc, 65 | void * host_ptr, 66 | cl_int * errcode_ret) 67 | 68 | can be used to create child images from an existing parent planar image. 69 | 70 | is a pointer to a structure that describes type and dimensions 71 | of the child image to be created. 72 | 73 | * image_desc->mem_object refers to a valid planar image memory object, which 74 | is the parent image of the to be created child images. 75 | 76 | * image_desc->image_type must match exactly the type of the parent image 77 | object and it must also be supported image type for the given 78 | . 79 | 80 | * image_desc's dimensions must match exactly the dimensions of the parent 81 | image object. 82 | 83 | * image_desc's pitches (i.e image_row_pitch, image_slice_pitch) must be set 84 | to zero. 85 | 86 | is a valid OpenCL context on which the image object is to be 87 | created. 88 | 89 | * context must be identical to the context of the parent image object. 90 | 91 | holds the format of the to be created child image. 92 | 93 | * The child image_format must be a single plane derivative of the parent 94 | image. For example CL_QCOM_NV12_Y and CL_QCOM_NV12_UV are derivatives of 95 | CL_QCOM_NV12. 96 | 97 | * The compression type or packing type of the child image formats must match 98 | exactly that of the parent image. For example creating a linear 99 | CL_QCOM_NV12_Y child image from a compressed CL_QCOM_COMPRESSED_NV12 100 | parent image is not valid. 101 | 102 | is a bit-field that is used to specify allocation and usage 103 | information about the image memory object to be created and is described in 104 | Table 5.3. 105 | 106 | * The read/write bits in the must be set according to the read/write 107 | capability supported for the . 108 | 109 | * CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, and CL_MEM_COPY_HOST_PTR 110 | cannot be set for the when creating a child image. 111 | 112 | must be set to NULL. 113 | 114 | will return an appropriate error code. If is 115 | NULL, no error code is returned. 116 | 117 | * CL_INVALID_IMAGE_SIZE if image dimensions specified in image_desc do not 118 | match the dimensions of the parent image. 119 | 120 | * CL_INVALID_IMAGE_DESCRIPTOR if values specified in image_desc do not 121 | satisfy the rules for ->mem_object and the child image's 122 | pitches specified above. 123 | 124 | * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if image_format does not sastisfy the 125 | rules for the child image formats specified above. 126 | 127 | If the call succeeds, the returned child image object references the image 128 | data of the parent image's plane specified by the child . The 129 | child image data & its layout therefore is identical to the associated plane 130 | on the parent image with is (0,0,0) and is (, 131 | , ). It is important to notice that coordinate offset is not 132 | allowed on the child image. 133 | 134 | Concurrent reading from and writing to both a child image object and its 135 | parent image object is undefined. Concurrent reading from and writing to 136 | child images created with the same parent image is undefined. Only 137 | concurrent reading from both a child image and its parent image, and 138 | concurrent reading from multiple child images created from the same parent 139 | image is defined. 140 | 141 | Sample Code 142 | 143 | // Create a 2D CL_QCOM_COMPRESSED_NV12_Y child image from an existing 2D 144 | // CL_QCOM_COMPRESSED_NV12 parent image. 145 | 146 | cl_int errcode = CL_SUCCESS; 147 | cl_mem_flags child_image_flag = {0}; 148 | cl_image_desc child_image_desc = {0}; 149 | cl_image_format child_planar_y_format = {0}; 150 | 151 | // Query the parent_image's dimensions 152 | error_code = clGetImageInfo (&parent_planar_image, CL_IMAGE_WIDTH, 153 | sizeof(size_t), (void*)&parent_image_width, NULL); 154 | if(error_code != CL_SUCCESS) exit(-1); 155 | 156 | error_code = clGetImageInfo (&parent_planar_image, CL_IMAGE_WIDTH, 157 | sizeof(size_t), (void*)&parent_image_height, NULL); 158 | if(error_code != CL_SUCCESS) exit(-1); 159 | 160 | // Setting the image_desc & image_format for creating the child_image 161 | child_planar_y_format.image_channel_order = CL_UNORM_INT8; 162 | child_planar_y_format.image_channel_data_type = CL_QCOM_COMPRESSED_NV12_Y; 163 | 164 | child_image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 165 | child_image_desc.image_width = parent_image_width; 166 | child_image_desc.image_height = parent_image_height; 167 | child_image_desc.image_row_pitch = 0; 168 | child_image_desc.image_slice_pitch = 0; 169 | child_image_desc.mem_object = parent_planar_image; 170 | 171 | child_image_flag = CL_MEM_READ_WRITE; 172 | 173 | child_y_image = clCreateImage(context, 174 | child_image_flag, 175 | child_planar_y_format, 176 | &child_image_desc, 177 | NULL, // host_ptr must be set to NULL 178 | &errcode); 179 | if(errcode != CL_SUCCESS) exit(-1); 180 | 181 | Revision History 182 | 183 | Revision 1, 2016/12/06: First draft. 184 | Revision 2, 2017/03/19: Second draft. 185 | Revision 3, 2017/06/16: Clean up. No functional changes. 186 | Revision 4, 2017/11/13: Clean up. No functional changes. 187 | Revision 5, 2018/01/19: Formatting and misc changes. No functional changes. 188 | 189 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_ion_host_ptr.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_ion_host_ptr 4 | 5 | Contributors 6 | 7 | Balaji Calidas, Qualcomm Technologies, Inc. 8 | David Garcia, Qualcomm Technologies, Inc. 9 | Sushmita Susheelendra, Qualcomm Innovation Center, Inc. 10 | 11 | Contact 12 | 13 | bcalidas at qti dot qualcomm dot com 14 | 15 | Version 16 | 17 | Version 7, 2018/01/19 18 | 19 | Number 20 | 21 | OpenCL Extension #22 22 | 23 | Status 24 | 25 | Shipping 26 | 27 | Extension Type 28 | 29 | OpenCL device extension 30 | 31 | Dependencies 32 | 33 | OpenCL 1.1 is required. cl_qcom_ext_host_ptr is required. 34 | 35 | This extension is written against the OpenCL 1.1 specification 36 | 37 | If present, cl_qcom_ext_host_ptr_iocoherent extends the functionality of 38 | this extension. 39 | 40 | Overview 41 | 42 | This extension extends the functionality provided by clCreateBuffer, 43 | clCreateImage2D, clCreateImage3D. It allows applications to pass an ION 44 | memory allocation to these functions so that it can be mapped to the 45 | device's address space and thus avoid having to copy data back and forth 46 | between the host and the device. 47 | 48 | Header File 49 | 50 | cl_ext.h 51 | 52 | New Tokens 53 | 54 | Accepted by the argument of clCreateBuffer, clCreateImage2D and 55 | clCreateImage3D: 56 | 57 | typedef struct _cl_mem_ion_host_ptr 58 | { 59 | // Type of external memory allocation. 60 | // Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. 61 | cl_mem_ext_host_ptr ext_host_ptr; 62 | 63 | // ION file descriptor 64 | int ion_filedesc; 65 | 66 | // Host pointer to the ION allocated memory 67 | void* ion_hostptr; 68 | 69 | } cl_mem_ion_host_ptr; 70 | 71 | Used together with CL_MEM_EXT_HOST_PTR_QCOM: 72 | 73 | CL_MEM_ION_HOST_PTR_QCOM 0x40A8 74 | 75 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification 76 | 77 | (Creating Buffer Objects) 78 | 79 | When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the argument, then 80 | is interpreted as a pointer to cl_mem_ext_host_ptr. When 81 | ->allocation_type is equal to CL_MEM_ION_HOST_PTR_QCOM then 82 | can also be interpreted as a pointer to cl_mem_ion_host_ptr. 83 | 84 | In addition to that, the application must also initialize the following 85 | struct fields: 86 | 87 | * ->host_cache_policy should be set as follows - If the ion 88 | allocation was made with the flag ION_FLAG_CACHED enabled and 89 | cl_mem_ext_host_ptr_iocoherent is present, ->host_cache_policy 90 | can be set to either CL_MEM_HOST_WRITEBACK_QCOM or 91 | CL_MEM_HOST_IOCOHERENT_QCOM. If the ION allocation was made with the 92 | flag ION_FLAG_CACHED enabled and cl_mem_ext_host_ptr_iocoherent is not 93 | present, ->host_cache_policy should be set to 94 | CL_MEM_HOST_WRITEBACK_QCOM. It must be equal to 95 | CL_MEM_HOST_UNCACHED_QCOM otherwise. 96 | 97 | * ->ion_filedesc must be the file descriptor of the ION memory 98 | allocation that the application wants to use as storage bits for the 99 | memory object. 100 | 101 | * ->ion_hostptr must be the host virtual pointer associated with 102 | the same ION memory allocation. If the application does not need to map 103 | the newly created cl memory object for host access, it can set 104 | ->ion_hostptr to NULL. If this happens, then calls to host 105 | access functions such as clEnqueueMapBuffer will fail out and return 106 | an errorcode of CL_INVALID_OPERATION. Setting ->ion_hostptr to 107 | NULL avoids the need for the application to make an extra map call for 108 | acquiring the host virtual pointer. 109 | 110 | Memory specified this way must be aligned to the device's page size. The 111 | application can query the device's page size by using 112 | clGetDeviceInfo(..., CL_DEVICE_PAGE_SIZE_QCOM, ...). 113 | 114 | Once the memory object is created, the application must call 115 | clEnqueueMapBuffer/clEnqueueMapImage with appropriate flags before 116 | reading or writing to it on the host. The host unmaps the region when 117 | accesses (reads and/or writes) to this mapped region by the host are 118 | complete. As per the OpenCL 1.2 specification, clEnqueueMapBuffer and 119 | clEnqueueMapImage act as synchronization points for the region of the 120 | buffer object being mapped. 121 | 122 | Sample Code 123 | 124 | 1) Using the extension for CL buffer objects 125 | 126 | cl_mem buffer_object = NULL; 127 | size_t buffer_size_in_bytes = 0; 128 | size_t buffer_size_with_padding = 0; 129 | cl_mem_ion_host_ptr myionmem = {0}; 130 | size_t ext_mem_padding_in_bytes = 0; 131 | size_t device_page_size = 0; 132 | 133 | // Query the device's page size and the amount of padding necessary at 134 | // the end of the buffer. 135 | clGetDeviceInfo(device, CL_DEVICE_PAGE_SIZE_QCOM, 136 | sizeof(device_page_size), &device_page_size, NULL); 137 | clGetDeviceInfo(device, CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, 138 | sizeof(ext_mem_padding_in_bytes), &ext_mem_padding_in_bytes, NULL); 139 | 140 | // Compute the desired size for the data in the buffer. 141 | buffer_size_in_bytes = foobar(); 142 | 143 | // Compute amount of memory that needs to be allocated for the buffer 144 | // including padding. 145 | buffer_size_with_padding = buffer_size_in_bytes + 146 | ext_mem_padding_in_bytes; 147 | 148 | // Make an ION memory allocation of size buffer_size_with_padding here. 149 | // Note that allocating buffer_size_in_bytes instead would be a mistake. 150 | // It's important to allocate the extra padding. Let's say the 151 | // parameters of the allocation are stored in a struct named ion_info 152 | // that we will use below. 153 | 154 | // Create an OpenCL buffer object that uses ion_info as its data store. 155 | // Notice how the buffer is created with size buffer_size_in_bytes, not 156 | // buffer_size_with_padding. 157 | myionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM; 158 | myionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM; 159 | // file descriptor for ION 160 | myionmem.ion_filedesc = ion_info_fd.file_descriptor; 161 | // hostptr returned by ION which is device page size aligned 162 | myionmem.ion_hostptr = ion_info.host_virtual_address; 163 | 164 | if(myionmem.ion_hostptr % device_page_size) 165 | { 166 | error("Host pointer must be aligned to device_page_size!"); 167 | } 168 | 169 | buffer_object = clCreateBuffer(context, 170 | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 171 | buffer_size_in_bytes, &myionmem, &errcode); 172 | 173 | 2) Using the extension for CL image objects 174 | 175 | cl_mem image_object = NULL; 176 | cl_mem_ion_host_ptr myionmem = {0}; 177 | size_t ext_mem_padding_in_bytes = 0; 178 | size_t device_page_size = 0; 179 | size_t row_pitch = 0; 180 | 181 | // Query the device's page size and the amount of padding necessary at 182 | // the end of the buffer. 183 | clGetDeviceInfo(device, CL_DEVICE_PAGE_SIZE_QCOM, 184 | sizeof(device_page_size), &device_page_size, NULL); 185 | clGetDeviceInfo(device, CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, 186 | sizeof(ext_mem_padding_in_bytes), &ext_mem_padding_in_bytes, NULL); 187 | 188 | // Query the device supported row and slice pitch using 189 | // clGetDeviceImageInfoQCOM 190 | // imgw - image width 191 | // imgh - image height 192 | // img_fmt - image format 193 | clGetDeviceImageInfoQCOM(device, imgw, imgh, &img_fmt, 194 | CL_IMAGE_ROW_PITCH, sizeof(image_row_pitch), &image_row_pitch, 195 | NULL); 196 | 197 | // Use the image height, row pitch obtained above and element size to 198 | // compute the size of the buffer 199 | buffer_size_in_bytes = imgh * image_row_pitch; 200 | 201 | // Compute amount of memory that needs to be allocated for the buffer 202 | // including padding. 203 | buffer_size_with_padding = buffer_size_in_bytes + 204 | ext_mem_padding_in_bytes; 205 | 206 | // Make an ION memory allocation of size buffer_size_with_padding here. 207 | // Note that allocating buffer_size_in_bytes instead would be a mistake. 208 | // It's important to allocate the extra padding. Let's say the 209 | // parameters of the allocation are stored in a struct named ion_info 210 | // that we will use below. 211 | 212 | // Create an OpenCL image object that uses ion_info as its data store. 213 | myionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM; 214 | myionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM; 215 | // file descriptor for ION 216 | myionmem.ion_filedesc = ion_info_fd.file_descriptor; 217 | // hostptr returned by ION which is device page size aligned 218 | myionmem.ion_hostptr = ion_info.host_virtual_address; 219 | 220 | if(myionmem.ion_hostptr % device_page_size) 221 | { 222 | error("Host pointer must be aligned to device_page_size!"); 223 | } 224 | 225 | // Note that the image_row_pitch obtained by calling 226 | // clGetDeviceImageInfoQCOM should be passed to clCreateImage2D 227 | image_object = clCreateImage2D(context, 228 | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, &image_fmt, imgw, 229 | imgh, image_row_pitch, &myionmem, &errcode); 230 | 231 | // Call clEnqueueMapImage before filling input image data 232 | pinput = clEnqueueMapImage(command_queue, image_object, CL_TRUE, 233 | CL_MAP_WRITE, origin, region, &row_pitch, NULL, 0, NULL, NULL, 234 | &errcode); 235 | 236 | // Fill the input image data using the hostptr and row_pitch returned by 237 | // clEnqueueMapImage 238 | cl_uchar* inp = pinput; 239 | memset(inp, 0x0, (row_pitch * imgh)); 240 | for(i = 0; i < (row_pitch * imgh); i+=row_pitch) 241 | { 242 | memset(inp+i, 0xff, imgw * element_size); 243 | } 244 | 245 | errcode = clEnqueueUnmapMemObject(command_queue, image_object, pinput, 246 | 0, NULL, NULL); 247 | 248 | Revision History 249 | 250 | Revision 1, 2012/10/18: Initial version. 251 | Revision 2, 2012/11/01: Improved sample code. 252 | Revision 3, 2013/05/17: Generalized. Cleaned-up for Khronos. Added final 253 | token values. 254 | Revision 4, 2017/06/16: Clean up. No functional changes. 255 | Revision 5, 2017/11/13: Clean up. No functional changes. 256 | Revision 6, 2018/01/03: Added reference to cl_qcom_ext_host_ptr_iocoherent. 257 | Revision 7, 2018/01/19: Formatting and misc changes. No functional changes. 258 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_other_image.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_other_image 4 | 5 | Contributors 6 | 7 | Balaji Calidas, Qualcomm Technologies, Inc. 8 | Roto Le, Qualcomm Technologies, Inc. 9 | Manali Torpe, Qualcomm Innovation Center, Inc. 10 | Chris Grimm, Qualcomm Technologies, Inc. 11 | 12 | Contact 13 | 14 | bcalidas at qti dot qualcomm dot com 15 | 16 | Version 17 | 18 | Version 6, 2018/04/25 19 | 20 | Status 21 | 22 | Shipping 23 | 24 | Extension Type 25 | 26 | OpenCL device extension 27 | 28 | Dependencies 29 | 30 | OpenCL 2.0 or later is required. 31 | 32 | cl_qcom_android_native_buffer_host_ptr or cl_qcom_ion_host_ptr is required. 33 | 34 | This extension is written against the OpenCL 2.0 Specification. 35 | 36 | Overview 37 | 38 | This extension enables an application to read from and/or write to 39 | non-conventional OpenCL image objects. Examples of non-conventional images 40 | are planar images such as NV12 or TP10, MIPI packed images, Bayer pattern 41 | images and tiled images. These images do not conform to the standards for 42 | images as described in the OpenCL specification. Therefore they are not 43 | exposed directly through OpenCL. Instead they are exposed through this 44 | extension. These images are supported on select Qualcomm GPUs. 45 | 46 | An application can use this extension to query supported non-conventional 47 | image formats. It can then create an image of a supported non-conventional 48 | format from an ION or ANB allocation. Only reads and writes of this image 49 | from inside a CL kernel are defined, as well as a limited use of the 50 | clEnqueueMapImage host API described below. 51 | 52 | Header File 53 | 54 | cl_ext_qcom.h 55 | 56 | New Procedures and Functions 57 | 58 | None 59 | 60 | New Tokens 61 | 62 | Added to the list of supported cl_mem_flags by clCreateImage in 63 | Table 5.3 of the OpenCL 2.0 Specification. 64 | 65 | CL_MEM_OTHER_IMAGE_QCOM 66 | 67 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification 68 | (Creating Image Objects) 69 | (Append to the section introduced by cl_qcom_ion_host_ptr extension) 70 | 71 | Non-conventional images can be created from ION buffers in much the same way 72 | as conventional images. ->allocation_type should be set to 73 | CL_MEM_ION_HOST_PTR_QCOM . The application is responsible for the layout of 74 | the non-conventional image data held in an ION allocation, which must follow 75 | the structure defined by the image format. Any deviation from the standard 76 | will lead to undefined results. 77 | 78 | While creating non-conventional images, when CL_MEM_EXT_HOST_PTR_QCOM is 79 | enabled in the argument, the image_row_pitch and image_slice_pitch 80 | fields of cl_image_desc must be set to 0. 81 | 82 | Additions to Chapter 5.3.2 of the OpenCL 2.0 Specification 83 | (Querying List of Supported Image Formats) 84 | 85 | When CL_MEM_OTHER_IMAGE_QCOM is enabled in the argument, the 86 | implementation will return a list of supported non-conventional image 87 | formats. CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE and 88 | CL_MEM_KERNEL_READ_AND_WRITE flags retain the same meaning as for 89 | conventional image formats. 90 | 91 | Additions to Chapter 5.3.6 of the OpenCL 2.0 Specification 92 | (Mapping Image Objects) 93 | (Append to description of clEnqueueMapImage function) 94 | 95 | If the image is of a non-conventional format enabled by 96 | CL_MEM_OTHER_IMAGE_QCOM, then mapping a region has a special meaning that is 97 | only defined under certain conditions. All other uses will invoke undefined 98 | behavior. If origin[0], origin[1] and origin[2] are all 0, the value of 99 | region specifies the entire image, and the image was created with a valid 100 | ION or ANB allocation, then this function will return a valid host pointer 101 | to the start of the underlying ION or ANB allocation. Additionally, any 102 | necessary cache operations will be performed to ensure appropriate data 103 | visibility. The object must be unmapped using clEnqueueUnmapMemObject when 104 | host access is complete to ensure that any changes become visible. 105 | 106 | Additions to Chapter 6.13.14.2 of the OpenCL-C 2.0 Specification 107 | (Built-in Image Read Functions) 108 | 109 | The sampler used for reading non-conventional images can take the following 110 | values: CLK_FILTER_NEAREST and CLK_FILTER_LINEAR for the sampler's filter 111 | mode. CLK_ADDRESS_NONE, CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE for 112 | the sampler's addressing mode. For some non-conventional image formats there 113 | may be restrictions on which filter modes and which addressing modes can be 114 | used. 115 | 116 | Android Native Buffer Option 117 | 118 | On Android platforms it is possible to create a non-conventional image from 119 | ANativeWindowBuffer (ANB) aka graphics buffer. The application is 120 | responsible for creating an ANB buffer of appropriate format and size which 121 | can be used to store the non-conventional image data. 122 | 123 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification 124 | (Creating Image Objects) 125 | 126 | (Append to the section introduced by cl_qcom_android_native_buffer_host_ptr 127 | extension) 128 | 129 | Non-conventional images can be created from ANB buffers in much the same way 130 | as conventional images. ->allocation_type should be set to 131 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM. The application is responsible 132 | for the layout of non-conventional image data held in an ANB allocation, 133 | which must follow the structure defined by Qualcomm compression standard. 134 | Any deviation from the standard will lead to undefined results. While 135 | creating non-conventional images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled 136 | in the argument, the image_row_pitch and image_slice_pitch fields of 137 | cl_image_desc must be set to 0. 138 | 139 | Compressed Planar Images 140 | 141 | It is possible to combine the cl_qcom_other_image extension with the 142 | cl_qcom_compressed_image_extension to create images that are both compressed 143 | and non-conventional. An example would be CL_QCOM_COMPRESSED_NV12. Such 144 | images may be queried by enabling CL_MEM_OTHER_IMAGE_QCOM | 145 | CL_MEM_COMPRESSED_IMAGE_QCOM in the flags argument of 146 | clgetSupportedImageFormats. These images can be created in the same manner 147 | as conventional images using ION or ANB buffers. 148 | 149 | Mem Flags Usage 150 | 151 | When querying non-conventional and/or compressed image formats, it is 152 | important to use the correct mem flags. CL_MEM_OTHER_IMAGE_QCOM will query 153 | the non-conventional and uncompressed image formats. 154 | CL_MEM_OTHER_IMAGE_QCOM | CL_MEM_COMPRESSED_IMAGE_QCOM will query the 155 | non-conventional and compressed image formats. CL_MEM_COMPRESSED_IMAGE_QCOM 156 | when used alone will query the conventional compressed image formats such as 157 | CL_QCOM_COMPRESSED_RGBA. 158 | 159 | Sample Code 160 | 161 | 1) Querying supported formats for read_only non-conventional images 162 | 163 | #define MAX_NUM_FORMATS 128 164 | 165 | cl_image_format format_list[ MAX_NUM_FORMATS] = {0}; 166 | cl_int num_format_list_entries = MAX_NUM_FORMATS; 167 | cl_int num_reported_image_formats = 0; 168 | cl_int errcode = 0; 169 | 170 | // Query the supported formats for compressed non-conventional images 171 | errcode = clGetSupportedImageFormats(context, 172 | CL_MEM_READ_ONLY | CL_MEM_OTHER_IMAGE_QCOM | 173 | CL_MEM_COMPRESSED_IMAGE_QCOM, 174 | CL_MEM_OBJECT_IMAGE2D, 175 | num_format_list_entries, 176 | format_list, 177 | &num_reported_image_formats); 178 | 179 | 2) Creating an ION buffer for holding non-conventional image data. 180 | 181 | cl_mem_ion_host_ptr nv12_ionmem = {0}; 182 | 183 | // Initialize ION buffer attributes 184 | nv12_ionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM; 185 | nv12_ionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM; 186 | // file descriptor for ION 187 | nv12_ionmem.ion_filedesc = ion_info_fd.file_descriptor; 188 | // hostptr returned by ION 189 | nv12_ionmem.ion_hostptr = ion_info.host_virtual_address; 190 | 191 | 3) Using cl_qcom_ion_host_ptr holding planar image data to create a 192 | non-conventional compressed NV12 image object. 193 | 194 | cl_image_format image_format = {0}; 195 | cl_image_desc image_desc = {0}; 196 | cl_int errcode = 0; 197 | 198 | // Set image format 199 | image_format->image_channel_order = CL_QCOM_COMPRESSED_NV12; 200 | image_format->image_channel_data_type = CL_UNORM_INT8; 201 | 202 | // Set image parameters. image_height and image_row_pitch are always 0 203 | // for non-conventional images. 204 | image_desc->image_width = 256; 205 | image_desc->image_height = 256; 206 | image_desc->image_row_pitch = 0; 207 | image_desc->image_slice_pitch = 0; 208 | 209 | // Create a non_conventional image 210 | other_image = clCreateImage(context, 211 | CL_MEM_EXT_HOST_PTR_QCOM|CL_MEM_READ_ONLY, 212 | image_format, 213 | image_desc, 214 | (void*)nv12_ionmem, 215 | &errcode); 216 | 217 | 4) Creating an ANB buffer for holding planar image data 218 | 219 | cl_mem_android_native_buffer_host_ptr other_anb = {0}; 220 | GraphicBuffer *gb; // previously created 221 | 222 | other.ext_host_ptr.allocation_type = 223 | CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM; 224 | other.ext_host_ptr.host_cache_policy = CL_MEM_HOST_WRITEBACK_QCOM; 225 | // the hostptr to a native buffer and gb is an Android GraphicBuffer 226 | other.anb_ptr = gb->getNativeBuffer(); 227 | 228 | 5) Using cl_qcom_android_native_buffer_host_ptr holding planar image data to 229 | create a non-conventional image object. 230 | 231 | cl_image_format image_format = {0}; 232 | cl_image_desc image_desc = {0}; 233 | cl_int errcode = 0; 234 | 235 | // Set image format 236 | image_format->image_channel_order = CL_QCOM_COMPRESSED_NV12; 237 | image_format->image_channel_data_type = CL_QCOM_UNORM_INT8; 238 | 239 | // Set image parameters. image_height and image_row_pitch are always 0 240 | // for non-conventional images. 241 | image_desc->image_width = 256; 242 | image_desc->image_height = 256; 243 | image_desc->image_row_pitch = 0; 244 | image_desc->image_slice_pitch = 0; 245 | 246 | // Create a non-conventional image 247 | nv12_img = clCreateImage(context, 248 | CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY, 249 | image_format, 250 | image_desc, 251 | (void*)other_anb, 252 | &errcode); 253 | 254 | Revision History 255 | 256 | Revision 1, 2016/09/02: Initial version. 257 | Revision 2, 2017/06/16: Clean up. No functional changes. 258 | Revision 3, 2017/09/13: Clarified Mem Flags usage. 259 | Revision 4, 2017/11/13: Clean up. No functional changes. 260 | Revision 5, 2018/01/19: Formatting and misc changes. No functional changes. 261 | Revision 6, 2018/04/25: Document map behavior. 262 | 263 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_perf_hint.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_perf_hint 4 | 5 | Contributors 6 | 7 | Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc. 8 | 9 | Contact 10 | 11 | sharidas at quicinc dot com 12 | 13 | Version 14 | 15 | Version 4, 2018/01/19 16 | 17 | Status 18 | 19 | Shipping 20 | 21 | Extension Type 22 | 23 | OpenCL device extension 24 | 25 | Dependencies 26 | 27 | OpenCL 1.1 or later is required. 28 | 29 | This extension is written against the OpenCL 1.1 specification. 30 | 31 | Overview 32 | 33 | This extension allows applications to request the 34 | performance level desired for device(s) on an OpenCL context. 35 | 36 | Higher performance implies higher frequencies on the device. 37 | 38 | The hint may be provided at context creation as a context property and/or 39 | updated using a separate API at any point during the context’s lifetime. 40 | 41 | Header File 42 | 43 | cl_ext_qcom.h 44 | 45 | New Procedures and Functions 46 | 47 | clSetPerfHintQCOM 48 | 49 | New Tokens 50 | 51 | Added to the list of supported properties by clCreateContext in 52 | Table 4.4 of the OpenCL 1.1 specification. 53 | 54 | CL_CONTEXT_PERF_HINT_QCOM 0x40C2 55 | 56 | New list of supported values for CL_CONTEXT_PERF_HINT_QCOM property 57 | 58 | CL_PERF_HINT_HIGH_QCOM 0x40C3 59 | CL_PERF_HINT_NORMAL_QCOM 0x40C4 60 | CL_PERF_HINT_LOW_QCOM 0x40C5 61 | 62 | Additions to Chapter 4.3 of the OpenCL 1.1 Specification 63 | (Contexts) 64 | 65 | Add the following tokens to Table 4.4 (List of supported properties by 66 | clCreateContext) 67 | 68 | CL_CONTEXT_PERF_HINT_QCOM cl_perf_hint Specifies the performance 69 | hint for this context 70 | 71 | Add the following to Table 4.4a (List of supported performance hint values 72 | by CL_CONTEXT_PERF_HINT_QCOM) 73 | 74 | List of supported performance hint values and their effect on performance 75 | is described in Table 4.4a 76 | 77 | cl_perf_hint Description 78 | 79 | CL_PERF_HINT_HIGH_QCOM Requests the highest performance level from 80 | device. This is the default setting for 81 | devices in an OpenCL context. 82 | 83 | CL_PERF_HINT_NORMAL_QCOM Requests a balanced performance setting that 84 | is set dynamically by the GPU frequency and 85 | power management 86 | 87 | CL_PERF_HINT_LOW_QCOM Requests a performance setting that 88 | prioritizes lower power consumption 89 | 90 | Add the following error descriptions for clCreateContext: 91 | 92 | * CL_INVALID_PROPERTY if the the context property 93 | CL_CONTEXT_PERF_HINT_QCOM is specified and at least one of the devices 94 | in does not support the performance hint property 95 | 96 | Add the following to Table 4.6 (List of supported param_names by 97 | clGetContextInfo), append to the entry for CL_CONTEXT_PROPERTIES 98 | 99 | cl_context_info Return Type Information returned in 100 | param_value 101 | 102 | CL_CONTEXT_ cl_context_ If a CL_CONTEXT_PERF_HINT_QCOM 103 | PROPERTIES properties[] property was set using 104 | clSetPerfHintQCOM, the properties 105 | argument returned will be populated with 106 | this property, even if the property was 107 | not specified in clCreateContext or 108 | clCreateContextFromType. 109 | 110 | Add the following new function 111 | 112 | The function 113 | 114 | cl_int clSetPerfHintQCOM(cl_context context, 115 | cl_perf_hint perf_hint) 116 | 117 | can be used to set the value of CL_CONTEXT_PERF_HINT_QCOM property on a 118 | context This function can be used to set or update the 119 | CL_CONTEXT_PERF_HINT_QCOM property irrespective of whether it was 120 | specified at context time as one of the context properties. 121 | 122 | must be a valid OpenCL context 123 | identifies the hint being set. It has to be one of the 124 | values in Table 4.4a 125 | 126 | clSetPerfHintQCOM returns CL_SUCCESS if the property 127 | CL_CONTEXT_PERF_HINT_QCOM was set to the provided value. Otherwise, it 128 | returns one of the following errors: 129 | 130 | * CL_INVALID_CONTEXT if context is not a valid context 131 | 132 | * CL_INVALID_PROPERTY if least one of the devices in the context does 133 | not support the performance hint property 134 | 135 | * CL_INVALID_VALUE if the value of perf_hint is not one of the supported 136 | values as specified in Table 4.4a. 137 | 138 | * CL_OUT_OF_RESOURCES if there is a failure to set the perf-hint on any 139 | device in the context 140 | 141 | * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 142 | required by the OpenCL implementation on the host 143 | 144 | The effect of clSetPerfHintQCOM on commands that are in flight is 145 | undefined. These commands may be executed at either of the existing or 146 | new performance levels. If the user desires to complete all pending work 147 | at the existing level and execute all subsequent commands at the new 148 | level, a clFinish must be added before calling clSetPerfHintQCOM to set 149 | the new level. 150 | 151 | Sample Code 152 | 153 | 1) Set performance hint at time of CL context creation: 154 | 155 | cl_context_properties properties[] = {CL_CONTEXT_PERF_HINT_QCOM, 156 | CL_PERF_HINT_LOW_QCOM, 0}; 157 | clCreateContext(properties, 1, &device_id, NULL, NULL, NULL); 158 | 159 | 2) Set perfomrnace hint for an existing CL context: 160 | 161 | clSetPerfHintQCOM(context, CL_PERF_HINT_NORMAL_QCOM); 162 | 163 | Revision History 164 | 165 | Revision 1, 2014/05/20: Initial version. 166 | Revision 2, 2017/06/16: Clean up. No functional changes. 167 | Revision 3, 2017/11/13: Clean up. No functional changes. 168 | Revision 4, 2018/01/19: Formatting and misc changes. No functional changes. 169 | 170 | -------------------------------------------------------------------------------- /docs/extensions/cl_qcom_priority_hint.txt: -------------------------------------------------------------------------------- 1 | Name Strings 2 | 3 | cl_qcom_priority_hint 4 | 5 | Contributors 6 | 7 | Richard Ruigrok, Qualcomm Innovation Center, Inc. 8 | 9 | Contact 10 | 11 | bcalidas at qti dot qualcomm dot com 12 | 13 | Version 14 | 15 | Version 5, 2018/01/19 16 | 17 | Status 18 | 19 | Shipping 20 | 21 | Extension Type 22 | 23 | OpenCL device extension 24 | 25 | Dependencies 26 | 27 | OpenCL 1.1 or later is required. 28 | 29 | This extension is written against the OpenCL 1.1 specification. 30 | 31 | Overview 32 | 33 | This extension allows applications to specify the desired priority for 34 | enqueued kernels to be submitted to the device(s) on an OpenCL context. 35 | 36 | Higher priority implies that enqueued kernels may be submitted to the 37 | device for processing before other enqueues on other contexts that 38 | have lower priority. 39 | 40 | The default behavior selects the priority that would otherwise be used 41 | for the context if this extension is not used. 42 | 43 | The hint may be provided at context creation as a context property. 44 | The context property may not be updated after context creation. 45 | 46 | Header File 47 | 48 | cl_ext_qcom.h 49 | 50 | New Procedures and Functions 51 | 52 | None 53 | 54 | New Tokens 55 | 56 | Added to the list of supported properties by clCreateContext in 57 | Table 4.4 of the OpenCL 1.1 specification. 58 | 59 | CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9 60 | 61 | New list of supported values for CL_CONTEXT_PRIORITY_HINT_QCOM property 62 | 63 | CL_PRIORITY_HINT_HIGH_QCOM 0x40CA 64 | CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB 65 | CL_PRIORITY_HINT_LOW_QCOM 0x40CC 66 | 67 | 68 | Additions to Chapter 4.3 of the OpenCL 1.1 Specification 69 | (Contexts) 70 | 71 | Add the following tokens to Table 4.4 72 | (List of supported properties by clCreateContext) 73 | 74 | CL_CONTEXT_PRIORITY_ cl_priority_hint Specifies the desired priority 75 | HINT_QCOM level for this context 76 | 77 | Add the following to Table 4.4a 78 | (List of supported priority hint values by CL_CONTEXT_PRIORITY_HINT_QCOM) 79 | 80 | List of supported priority hint values and their effect on priority is 81 | described in Table 4.4a 82 | 83 | cl_priority_hint Description 84 | 85 | CL_PRIORITY_HINT_HIGH_QCOM Requests the highest priority level for 86 | all submissions for any command, for all 87 | devices on this context. 88 | 89 | CL_PRIORITY_HINT_NORMAL_QCOM Requests a balanced priority level for all 90 | submissions for any command, for all 91 | devices on this context. This is the 92 | default. 93 | 94 | CL_PRIORITY_HINT_LOW_QCOM Requests a lower priority level for all 95 | submissions for any command, for all 96 | devices on this context. 97 | 98 | Add the following error descriptions for clCreateContext: 99 | 100 | * CL_INVALID_PROPERTY if the context property 101 | CL_CONTEXT_PRIORITY_HINT_QCOM is specified and at least one of the 102 | devices in does not support the priority hint property 103 | 104 | Add the following to Table 4.6 105 | (List of supported param_names by clGetContextInfo) 106 | append to the entry for CL_CONTEXT_PROPERTIES 107 | 108 | cl_context_info Return Type Information returned in 109 | param_value 110 | 111 | CL_CONTEXT_ cl_context_ If a CL_CONTEXT_PRIORITY_HINT_QCOM 112 | PROPERTIES properties[] property was given at context 113 | creation, this property will be 114 | returned. 115 | 116 | Sample Code 117 | 118 | cl_context_properties properties[] = {CL_CONTEXT_PERF_HINT_QCOM, 119 | CL_PERF_HINT_LOW_QCOM, 0}; 120 | clCreateContext(properties, 1, &device_id, NULL, NULL, NULL); 121 | 122 | Revision History 123 | 124 | Revision 1, 2014/10/21: Initial version. 125 | Revision 2, 2017/06/16: Clean up. No functional changes. 126 | Revision 3, 2017/11/08: Now a public extension. 127 | Revision 4, 2017/11/13: Clean up. No functional changes. 128 | Revision 5, 2018/01/19: Formatting and misc changes. No functional changes. 129 | 130 | -------------------------------------------------------------------------------- /example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat -------------------------------------------------------------------------------- /example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat -------------------------------------------------------------------------------- /example_images/create_example_images.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import random as rand 3 | 4 | ######## 5 | # NV12 # 6 | ######## 7 | 8 | prefix = ( 9 | "\x18\x00\x00\x00" + 10 | "\x18\x00\x00\x00" + 11 | "\xD2\x10\x00\x00" + 12 | "\x33\x41\x00\x00" 13 | ) 14 | 15 | const_data = prefix 16 | incr_data = prefix 17 | for i in range(0, 0x18 * 0x18 * 3 / 2): 18 | const_data += struct.pack("B", 128) 19 | if i < 0x18 * 0x18: 20 | incr_data += struct.pack("B", i % 256) 21 | 22 | for i in range(0, 0x18 * 0x18 / 4): 23 | incr_data += struct.pack("BB", i % 256, i % 256) 24 | 25 | 26 | rand_prefix = ( 27 | "\xA0\x00\x00\x00" + 28 | "\x78\x00\x00\x00" + 29 | "\xD2\x10\x00\x00" + 30 | "\x33\x41\x00\x00" 31 | ) 32 | 33 | rand.seed(42) 34 | 35 | rand_data = rand_prefix 36 | for i in range(0xA0 * 0x78 * 3 / 2): 37 | rand_data += struct.pack("B", rand.randint(0, 255)) 38 | 39 | with open("CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat", "wb") as f: 40 | f.write(rand_data) 41 | 42 | with open("CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat", "wb") as f: 43 | f.write(const_data) 44 | 45 | with open("CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat", "wb") as f: 46 | f.write(incr_data) 47 | 48 | # Make some circles of various sizes 49 | for i in range(6, 9): 50 | height = width = 2 ** i 51 | data = "" 52 | data += struct.pack(" 36 | #endif 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | 43 | /* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ 44 | #define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F 45 | #define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D 46 | #define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E 47 | 48 | /* Error type for clCreateFromEGLImageKHR */ 49 | #define CL_INVALID_EGL_OBJECT_KHR -1093 50 | #define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 51 | 52 | /* CLeglImageKHR is an opaque handle to an EGLImage */ 53 | typedef void* CLeglImageKHR; 54 | 55 | /* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ 56 | typedef void* CLeglDisplayKHR; 57 | 58 | /* CLeglSyncKHR is an opaque handle to an EGLSync object */ 59 | typedef void* CLeglSyncKHR; 60 | 61 | /* properties passed to clCreateFromEGLImageKHR */ 62 | typedef intptr_t cl_egl_image_properties_khr; 63 | 64 | 65 | #define cl_khr_egl_image 1 66 | 67 | extern CL_API_ENTRY cl_mem CL_API_CALL 68 | clCreateFromEGLImageKHR(cl_context /* context */, 69 | CLeglDisplayKHR /* egldisplay */, 70 | CLeglImageKHR /* eglimage */, 71 | cl_mem_flags /* flags */, 72 | const cl_egl_image_properties_khr * /* properties */, 73 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; 74 | 75 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( 76 | cl_context context, 77 | CLeglDisplayKHR egldisplay, 78 | CLeglImageKHR eglimage, 79 | cl_mem_flags flags, 80 | const cl_egl_image_properties_khr * properties, 81 | cl_int * errcode_ret); 82 | 83 | 84 | extern CL_API_ENTRY cl_int CL_API_CALL 85 | clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */, 86 | cl_uint /* num_objects */, 87 | const cl_mem * /* mem_objects */, 88 | cl_uint /* num_events_in_wait_list */, 89 | const cl_event * /* event_wait_list */, 90 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; 91 | 92 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( 93 | cl_command_queue command_queue, 94 | cl_uint num_objects, 95 | const cl_mem * mem_objects, 96 | cl_uint num_events_in_wait_list, 97 | const cl_event * event_wait_list, 98 | cl_event * event); 99 | 100 | 101 | extern CL_API_ENTRY cl_int CL_API_CALL 102 | clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */, 103 | cl_uint /* num_objects */, 104 | const cl_mem * /* mem_objects */, 105 | cl_uint /* num_events_in_wait_list */, 106 | const cl_event * /* event_wait_list */, 107 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; 108 | 109 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( 110 | cl_command_queue command_queue, 111 | cl_uint num_objects, 112 | const cl_mem * mem_objects, 113 | cl_uint num_events_in_wait_list, 114 | const cl_event * event_wait_list, 115 | cl_event * event); 116 | 117 | 118 | #define cl_khr_egl_event 1 119 | 120 | extern CL_API_ENTRY cl_event CL_API_CALL 121 | clCreateEventFromEGLSyncKHR(cl_context /* context */, 122 | CLeglSyncKHR /* sync */, 123 | CLeglDisplayKHR /* display */, 124 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; 125 | 126 | typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( 127 | cl_context context, 128 | CLeglSyncKHR sync, 129 | CLeglDisplayKHR display, 130 | cl_int * errcode_ret); 131 | 132 | #ifdef __cplusplus 133 | } 134 | #endif 135 | 136 | #endif /* __OPENCL_CL_EGL_H */ 137 | -------------------------------------------------------------------------------- /inc/CL/cl_ext_qcom.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009-2017 Qualcomm Technologies, Inc. All Rights Reserved. 2 | * Qualcomm Technologies Proprietary and Confidential. 3 | */ 4 | 5 | #ifndef __OPENCL_CL_EXT_QCOM_H 6 | #define __OPENCL_CL_EXT_QCOM_H 7 | 8 | // Needed by cl_khr_egl_event extension 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | 18 | /************************************ 19 | * cl_qcom_create_buffer_from_image * 20 | ************************************/ 21 | 22 | #define CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM 0x40C0 23 | #define CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM 0x40C1 24 | 25 | extern CL_API_ENTRY cl_mem CL_API_CALL 26 | clCreateBufferFromImageQCOM(cl_mem image, 27 | cl_mem_flags flags, 28 | cl_int *errcode_ret); 29 | 30 | 31 | /************************************ 32 | * cl_qcom_limited_printf extension * 33 | ************************************/ 34 | 35 | /* Builtin printf function buffer size in bytes. */ 36 | #define CL_DEVICE_PRINTF_BUFFER_SIZE_QCOM 0x1049 37 | 38 | 39 | /************************************* 40 | * cl_qcom_extended_images extension * 41 | *************************************/ 42 | 43 | #define CL_CONTEXT_ENABLE_EXTENDED_IMAGES_QCOM 0x40AA 44 | #define CL_DEVICE_EXTENDED_IMAGE2D_MAX_WIDTH_QCOM 0x40AB 45 | #define CL_DEVICE_EXTENDED_IMAGE2D_MAX_HEIGHT_QCOM 0x40AC 46 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_WIDTH_QCOM 0x40AD 47 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_HEIGHT_QCOM 0x40AE 48 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_DEPTH_QCOM 0x40AF 49 | 50 | /************************************* 51 | * cl_qcom_perf_hint extension * 52 | *************************************/ 53 | 54 | typedef cl_uint cl_perf_hint; 55 | 56 | #define CL_CONTEXT_PERF_HINT_QCOM 0x40C2 57 | 58 | /*cl_perf_hint*/ 59 | #define CL_PERF_HINT_HIGH_QCOM 0x40C3 60 | #define CL_PERF_HINT_NORMAL_QCOM 0x40C4 61 | #define CL_PERF_HINT_LOW_QCOM 0x40C5 62 | 63 | extern CL_API_ENTRY cl_int CL_API_CALL 64 | clSetPerfHintQCOM(cl_context context, 65 | cl_perf_hint perf_hint); 66 | 67 | // This extension is published at Khronos, so its definitions are made in cl_ext.h. 68 | // This duplication is for backward compatibility. 69 | 70 | #ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 71 | 72 | /********************************* 73 | * cl_qcom_android_native_buffer_host_ptr extension 74 | *********************************/ 75 | 76 | #define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 77 | 78 | 79 | typedef struct _cl_mem_android_native_buffer_host_ptr 80 | { 81 | // Type of external memory allocation. 82 | // Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. 83 | cl_mem_ext_host_ptr ext_host_ptr; 84 | 85 | // Virtual pointer to the android native buffer 86 | void* anb_ptr; 87 | 88 | } cl_mem_android_native_buffer_host_ptr; 89 | 90 | #endif //#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 91 | 92 | /*********************************** 93 | * cl_img_egl_image extension * 94 | ************************************/ 95 | typedef void* CLeglImageIMG; 96 | typedef void* CLeglDisplayIMG; 97 | 98 | extern CL_API_ENTRY cl_mem CL_API_CALL 99 | clCreateFromEGLImageIMG(cl_context context, 100 | cl_mem_flags flags, 101 | CLeglImageIMG image, 102 | CLeglDisplayIMG display, 103 | cl_int *errcode_ret); 104 | 105 | 106 | /********************************* 107 | * cl_qcom_other_image extension 108 | *********************************/ 109 | 110 | // Extended flag for creating/querying QCOM non-standard images 111 | #define CL_MEM_OTHER_IMAGE_QCOM (1<<25) 112 | 113 | // cl_channel_type 114 | #define CL_QCOM_UNORM_MIPI10 0x4159 115 | #define CL_QCOM_UNORM_MIPI12 0x415A 116 | #define CL_QCOM_UNSIGNED_MIPI10 0x415B 117 | #define CL_QCOM_UNSIGNED_MIPI12 0x415C 118 | #define CL_QCOM_UNORM_INT10 0x415D 119 | #define CL_QCOM_UNORM_INT12 0x415E 120 | #define CL_QCOM_UNSIGNED_INT16 0x415F 121 | 122 | // cl_channel_order 123 | // Dedicate 0x4130-0x415F range for QCOM extended image formats 124 | // 0x4130 - 0x4132 range is assigned to pixel-oriented compressed format 125 | #define CL_QCOM_BAYER 0x414E 126 | 127 | #define CL_QCOM_NV12 0x4133 128 | #define CL_QCOM_NV12_Y 0x4134 129 | #define CL_QCOM_NV12_UV 0x4135 130 | 131 | #define CL_QCOM_TILED_NV12 0x4136 132 | #define CL_QCOM_TILED_NV12_Y 0x4137 133 | #define CL_QCOM_TILED_NV12_UV 0x4138 134 | 135 | #define CL_QCOM_P010 0x413C 136 | #define CL_QCOM_P010_Y 0x413D 137 | #define CL_QCOM_P010_UV 0x413E 138 | 139 | #define CL_QCOM_TILED_P010 0x413F 140 | #define CL_QCOM_TILED_P010_Y 0x4140 141 | #define CL_QCOM_TILED_P010_UV 0x4141 142 | 143 | 144 | #define CL_QCOM_TP10 0x4145 145 | #define CL_QCOM_TP10_Y 0x4146 146 | #define CL_QCOM_TP10_UV 0x4147 147 | 148 | #define CL_QCOM_TILED_TP10 0x4148 149 | #define CL_QCOM_TILED_TP10_Y 0x4149 150 | #define CL_QCOM_TILED_TP10_UV 0x414A 151 | 152 | /********************************* 153 | * cl_qcom_compressed_image extension 154 | *********************************/ 155 | 156 | // Extended flag for creating/querying QCOM non-planar compressed images 157 | #define CL_MEM_COMPRESSED_IMAGE_QCOM (1<<27) 158 | 159 | // Extended image format 160 | // cl_channel_order 161 | #define CL_QCOM_COMPRESSED_RGBA 0x4130 162 | #define CL_QCOM_COMPRESSED_RGBx 0x4131 163 | 164 | #define CL_QCOM_COMPRESSED_NV12_Y 0x413A 165 | #define CL_QCOM_COMPRESSED_NV12_UV 0x413B 166 | 167 | #define CL_QCOM_COMPRESSED_P010 0x4142 168 | #define CL_QCOM_COMPRESSED_P010_Y 0x4143 169 | #define CL_QCOM_COMPRESSED_P010_UV 0x4144 170 | 171 | #define CL_QCOM_COMPRESSED_TP10 0x414B 172 | #define CL_QCOM_COMPRESSED_TP10_Y 0x414C 173 | #define CL_QCOM_COMPRESSED_TP10_UV 0x414D 174 | 175 | #define CL_QCOM_COMPRESSED_NV12_4R 0x414F 176 | #define CL_QCOM_COMPRESSED_NV12_4R_Y 0x4150 177 | #define CL_QCOM_COMPRESSED_NV12_4R_UV 0x4151 178 | /********************************* 179 | * cl_qcom_compressed_yuv_image_read extension 180 | *********************************/ 181 | 182 | // Extended flag for creating/querying QCOM compressed images 183 | #define CL_MEM_COMPRESSED_YUV_IMAGE_QCOM (1<<28) 184 | 185 | // Extended image format 186 | #define CL_QCOM_COMPRESSED_NV12 0x10C4 187 | 188 | // Extended flag for setting ION buffer allocation type 189 | #define CL_MEM_ION_HOST_PTR_COMPRESSED_YUV_QCOM 0x40CD 190 | #define CL_MEM_ION_HOST_PTR_PROTECTED_COMPRESSED_YUV_QCOM 0x40CE 191 | 192 | /********************************* 193 | * cl_qcom_accelerated_image_ops 194 | *********************************/ 195 | #define CL_MEM_OBJECT_WEIGHT_IMAGE_QCOM 0x4110 196 | #define CL_DEVICE_HOF_MAX_NUM_PHASES_QCOM 0x4111 197 | #define CL_DEVICE_HOF_MAX_FILTER_SIZE_X_QCOM 0x4112 198 | #define CL_DEVICE_HOF_MAX_FILTER_SIZE_Y_QCOM 0x4113 199 | #define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_X_QCOM 0x4114 200 | #define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_Y_QCOM 0x4115 201 | 202 | //Extended flag for specifying weight image type 203 | #define CL_WEIGHT_IMAGE_SEPARABLE_QCOM (1<<0) 204 | 205 | // Box Filter 206 | typedef struct _cl_box_filter_size_qcom 207 | { 208 | // Width of box filter on X direction. 209 | float box_filter_width; 210 | 211 | // Height of box filter on Y direction. 212 | float box_filter_height; 213 | } cl_box_filter_size_qcom; 214 | 215 | // HOF Weight Image Desc 216 | typedef struct _cl_weight_desc_qcom 217 | { 218 | /** Coordinate of the "center" point of the weight image, 219 | based on the weight image's top-left corner as the origin. */ 220 | size_t center_coord_x; 221 | size_t center_coord_y; 222 | cl_bitfield flags; 223 | } cl_weight_desc_qcom; 224 | 225 | typedef struct _cl_weight_image_desc_qcom 226 | { 227 | cl_image_desc image_desc; 228 | cl_weight_desc_qcom weight_desc; 229 | } cl_weight_image_desc_qcom; 230 | 231 | /************************************* 232 | * cl_qcom_protected_context extension * 233 | *************************************/ 234 | 235 | #define CL_CONTEXT_PROTECTED_QCOM 0x40C7 236 | #define CL_MEM_ION_HOST_PTR_PROTECTED_QCOM 0x40C8 237 | 238 | /************************************* 239 | * cl_qcom_priority_hint extension * 240 | *************************************/ 241 | #define CL_PRIORITY_HINT_NONE_QCOM 0 242 | typedef cl_uint cl_priority_hint; 243 | 244 | #define CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9 245 | 246 | /*cl_priority_hint*/ 247 | #define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA 248 | #define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB 249 | #define CL_PRIORITY_HINT_LOW_QCOM 0x40CC 250 | 251 | #ifdef __cplusplus 252 | } 253 | #endif 254 | 255 | #endif /* __OPENCL_CL_EXT_QCOM_H */ 256 | -------------------------------------------------------------------------------- /inc/CL/cl_gl.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************** 2 | * Copyright (c) 2008-2015 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 16 | * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 17 | * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 18 | * https://www.khronos.org/registry/ 19 | * 20 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 27 | **********************************************************************************/ 28 | 29 | #ifndef __OPENCL_CL_GL_H 30 | #define __OPENCL_CL_GL_H 31 | 32 | #ifdef __APPLE__ 33 | #include 34 | #else 35 | #include 36 | #endif 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | typedef cl_uint cl_gl_object_type; 43 | typedef cl_uint cl_gl_texture_info; 44 | typedef cl_uint cl_gl_platform_info; 45 | typedef struct __GLsync *cl_GLsync; 46 | 47 | /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ 48 | #define CL_GL_OBJECT_BUFFER 0x2000 49 | #define CL_GL_OBJECT_TEXTURE2D 0x2001 50 | #define CL_GL_OBJECT_TEXTURE3D 0x2002 51 | #define CL_GL_OBJECT_RENDERBUFFER 0x2003 52 | #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E 53 | #define CL_GL_OBJECT_TEXTURE1D 0x200F 54 | #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 55 | #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 56 | 57 | /* cl_gl_texture_info */ 58 | #define CL_GL_TEXTURE_TARGET 0x2004 59 | #define CL_GL_MIPMAP_LEVEL 0x2005 60 | #define CL_GL_NUM_SAMPLES 0x2012 61 | 62 | 63 | extern CL_API_ENTRY cl_mem CL_API_CALL 64 | clCreateFromGLBuffer(cl_context /* context */, 65 | cl_mem_flags /* flags */, 66 | cl_GLuint /* bufobj */, 67 | int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; 68 | 69 | extern CL_API_ENTRY cl_mem CL_API_CALL 70 | clCreateFromGLTexture(cl_context /* context */, 71 | cl_mem_flags /* flags */, 72 | cl_GLenum /* target */, 73 | cl_GLint /* miplevel */, 74 | cl_GLuint /* texture */, 75 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; 76 | 77 | extern CL_API_ENTRY cl_mem CL_API_CALL 78 | clCreateFromGLRenderbuffer(cl_context /* context */, 79 | cl_mem_flags /* flags */, 80 | cl_GLuint /* renderbuffer */, 81 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; 82 | 83 | extern CL_API_ENTRY cl_int CL_API_CALL 84 | clGetGLObjectInfo(cl_mem /* memobj */, 85 | cl_gl_object_type * /* gl_object_type */, 86 | cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; 87 | 88 | extern CL_API_ENTRY cl_int CL_API_CALL 89 | clGetGLTextureInfo(cl_mem /* memobj */, 90 | cl_gl_texture_info /* param_name */, 91 | size_t /* param_value_size */, 92 | void * /* param_value */, 93 | size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; 94 | 95 | extern CL_API_ENTRY cl_int CL_API_CALL 96 | clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, 97 | cl_uint /* num_objects */, 98 | const cl_mem * /* mem_objects */, 99 | cl_uint /* num_events_in_wait_list */, 100 | const cl_event * /* event_wait_list */, 101 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; 102 | 103 | extern CL_API_ENTRY cl_int CL_API_CALL 104 | clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, 105 | cl_uint /* num_objects */, 106 | const cl_mem * /* mem_objects */, 107 | cl_uint /* num_events_in_wait_list */, 108 | const cl_event * /* event_wait_list */, 109 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; 110 | 111 | 112 | /* Deprecated OpenCL 1.1 APIs */ 113 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL 114 | clCreateFromGLTexture2D(cl_context /* context */, 115 | cl_mem_flags /* flags */, 116 | cl_GLenum /* target */, 117 | cl_GLint /* miplevel */, 118 | cl_GLuint /* texture */, 119 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; 120 | 121 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL 122 | clCreateFromGLTexture3D(cl_context /* context */, 123 | cl_mem_flags /* flags */, 124 | cl_GLenum /* target */, 125 | cl_GLint /* miplevel */, 126 | cl_GLuint /* texture */, 127 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; 128 | 129 | /* cl_khr_gl_sharing extension */ 130 | 131 | #define cl_khr_gl_sharing 1 132 | 133 | typedef cl_uint cl_gl_context_info; 134 | 135 | /* Additional Error Codes */ 136 | #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 137 | 138 | /* cl_gl_context_info */ 139 | #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 140 | #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 141 | 142 | /* Additional cl_context_properties */ 143 | #define CL_GL_CONTEXT_KHR 0x2008 144 | #define CL_EGL_DISPLAY_KHR 0x2009 145 | #define CL_GLX_DISPLAY_KHR 0x200A 146 | #define CL_WGL_HDC_KHR 0x200B 147 | #define CL_CGL_SHAREGROUP_KHR 0x200C 148 | 149 | extern CL_API_ENTRY cl_int CL_API_CALL 150 | clGetGLContextInfoKHR(const cl_context_properties * /* properties */, 151 | cl_gl_context_info /* param_name */, 152 | size_t /* param_value_size */, 153 | void * /* param_value */, 154 | size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; 155 | 156 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( 157 | const cl_context_properties * properties, 158 | cl_gl_context_info param_name, 159 | size_t param_value_size, 160 | void * param_value, 161 | size_t * param_value_size_ret); 162 | 163 | #ifdef __cplusplus 164 | } 165 | #endif 166 | 167 | #endif /* __OPENCL_CL_GL_H */ 168 | -------------------------------------------------------------------------------- /inc/CL/cl_gl_ext.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************** 2 | * Copyright (c) 2008-2015 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 16 | * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 17 | * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 18 | * https://www.khronos.org/registry/ 19 | * 20 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 27 | **********************************************************************************/ 28 | 29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ 30 | 31 | /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ 32 | /* OpenGL dependencies. */ 33 | 34 | #ifndef __OPENCL_CL_GL_EXT_H 35 | #define __OPENCL_CL_GL_EXT_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | #ifdef __APPLE__ 42 | #include 43 | #else 44 | #include 45 | #endif 46 | 47 | /* 48 | * For each extension, follow this template 49 | * cl_VEN_extname extension */ 50 | /* #define cl_VEN_extname 1 51 | * ... define new types, if any 52 | * ... define new tokens, if any 53 | * ... define new APIs, if any 54 | * 55 | * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header 56 | * This allows us to avoid having to decide whether to include GL headers or GLES here. 57 | */ 58 | 59 | /* 60 | * cl_khr_gl_event extension 61 | * See section 9.9 in the OpenCL 1.1 spec for more information 62 | */ 63 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D 64 | 65 | extern CL_API_ENTRY cl_event CL_API_CALL 66 | clCreateEventFromGLsyncKHR(cl_context /* context */, 67 | cl_GLsync /* cl_GLsync */, 68 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif /* __OPENCL_CL_GL_EXT_H */ 75 | -------------------------------------------------------------------------------- /inc/CL/opencl.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2008-2015 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 16 | * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 17 | * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 18 | * https://www.khronos.org/registry/ 19 | * 20 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 27 | ******************************************************************************/ 28 | 29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ 30 | 31 | #ifndef __OPENCL_H 32 | #define __OPENCL_H 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | #ifdef __APPLE__ 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #else 46 | 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #endif 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | 58 | #endif /* __OPENCL_H */ 59 | 60 | -------------------------------------------------------------------------------- /src/examples/basic/hello_world.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: hello_world.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2017 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | 20 | // Library includes 21 | #include 22 | 23 | static const char *HELP_MESSAGE = "\n" 24 | "Usage: hello_world \n" 25 | "\n" 26 | "This example copies the input file to the output file.\n" 27 | "Use it to test your build tools.\n"; 28 | 29 | static const char *PROGRAM_SOURCE[] = { 30 | "__kernel void copy(__global char *src,\n", //如果使用cl_mem clCreateBuffer对象, 那么参数应该为指针的形式 31 | " __global char *dst\n", 32 | " )\n", 33 | "{\n", 34 | " uint wid_x = get_global_id(0);\n", 35 | " dst[wid_x] = src[wid_x];\n", 36 | "}\n" 37 | }; 38 | 39 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 40 | 41 | int main(int argc, char** argv) 42 | { 43 | if (argc < 3) 44 | { 45 | std::cerr << "Please specify source and destination files.\n"; 46 | std::cerr << HELP_MESSAGE; 47 | std::exit(EXIT_SUCCESS); 48 | } 49 | const std::string src_filename(argv[1]); 50 | const std::string out_filename(argv[2]); 51 | 52 | cl_wrapper wrapper; 53 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 54 | cl_kernel kernel = wrapper.make_kernel("copy", program); 55 | cl_context context = wrapper.get_context(); 56 | cl_command_queue command_queue = wrapper.get_command_queue(); 57 | cl_int err = CL_SUCCESS; 58 | 59 | /* 60 | * Step 0: Create CL buffers. 61 | */ 62 | 63 | std::ifstream fin(src_filename, std::ios::binary); 64 | if (!fin) 65 | { 66 | std::cerr << "Couldn't open file " << src_filename << "\n"; 67 | std::exit(EXIT_FAILURE); 68 | } 69 | 70 | const auto fin_begin = fin.tellg(); 71 | 72 | fin.seekg(0, std::ios::end); 73 | const auto fin_end = fin.tellg(); 74 | const size_t buf_size = static_cast(fin_end - fin_begin); 75 | std::vector buf(buf_size); //使用vector自动管理内存,创建对象的时候就分配需要的内存大小 76 | 77 | fin.seekg(0, std::ios::beg); 78 | fin.read(buf.data(), buf_size); 79 | 80 | cl_mem src_buffer = clCreateBuffer( 81 | context, 82 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, 83 | buf_size, 84 | buf.data(), 85 | &err 86 | ); 87 | if (err != CL_SUCCESS) 88 | { 89 | std::cerr << "Error " << err << " with clCreateBuffer for source file." << "\n"; 90 | std::exit(err); 91 | } 92 | 93 | cl_mem out_buffer = clCreateBuffer( 94 | context, 95 | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 96 | buf_size, 97 | NULL, 98 | &err 99 | ); 100 | if (err != CL_SUCCESS) 101 | { 102 | std::cerr << "Error " << err << " with clCreateBuffer for output file." << "\n"; 103 | std::exit(err); 104 | } 105 | 106 | /* 107 | * Step 1: Set up kernel arguments and run the kernel. 108 | */ 109 | 110 | err = clSetKernelArg(kernel, 0, sizeof(src_buffer), &src_buffer); 111 | if (err != CL_SUCCESS) 112 | { 113 | std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n"; 114 | std::exit(err); 115 | } 116 | 117 | err = clSetKernelArg(kernel, 1, sizeof(out_buffer), &out_buffer); 118 | if (err != CL_SUCCESS) 119 | { 120 | std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n"; 121 | std::exit(err); 122 | } 123 | 124 | err = clEnqueueNDRangeKernel( 125 | command_queue, 126 | kernel, 127 | 1, 128 | NULL, 129 | &buf_size, 130 | NULL, 131 | 0, 132 | NULL, 133 | NULL 134 | ); 135 | if (err != CL_SUCCESS) 136 | { 137 | std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n"; 138 | std::exit(err); 139 | } 140 | 141 | /* 142 | * Step 2: Copy the data out of the ion buffer for each plane. 143 | */ 144 | 145 | char *mapped_ptr = static_cast(clEnqueueMapBuffer( 146 | command_queue, 147 | out_buffer, 148 | CL_TRUE, 149 | CL_MAP_READ, 150 | 0, 151 | buf_size, 152 | 0, 153 | NULL, 154 | NULL, 155 | &err 156 | )); 157 | if (err != CL_SUCCESS) 158 | { 159 | std::cerr << "Error " << err << " mapping output buffer." << "\n"; 160 | std::exit(err); 161 | } 162 | 163 | std::ofstream fout(out_filename, std::ios::binary); 164 | if (!fout) 165 | { 166 | std::cerr << "Couldn't open file " << out_filename << "\n"; 167 | std::exit(EXIT_FAILURE); 168 | } 169 | fout.write(mapped_ptr, buf_size); 170 | fout.close(); 171 | 172 | err = clEnqueueUnmapMemObject(command_queue, out_buffer, mapped_ptr, 0, NULL, NULL); 173 | if (err != CL_SUCCESS) 174 | { 175 | std::cerr << "Error " << err << " unmapping output buffer." << "\n"; 176 | std::exit(err); 177 | } 178 | 179 | clFinish(command_queue); 180 | 181 | // Clean up cl resources that aren't automatically handled by cl_wrapper 182 | clReleaseMemObject(src_buffer); 183 | clReleaseMemObject(out_buffer); 184 | 185 | return 0; 186 | } 187 | -------------------------------------------------------------------------------- /src/examples/bayer_mipi/mipi10_to_unpacked.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: mipi10_to_unpacked.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | #include "util/util.h" 20 | 21 | // Library includes 22 | #include 23 | #include 24 | 25 | static const char *HELP_MESSAGE = "\n" 26 | "Usage: mipi10_to_unpacked \n" 27 | "\n" 28 | "Converts a single-channel MIPI10 image into an unpacked 16-bit format.\n"; 29 | 30 | static const char *PROGRAM_SOURCE[] = { 31 | "__kernel void unpack(__read_only image2d_t packed_image,\n", 32 | " __write_only image2d_t unpacked_image,\n", 33 | " sampler_t sampler)\n", 34 | "{\n", 35 | " const int wid_x = get_global_id(0);\n", 36 | " const int wid_y = get_global_id(1);\n", 37 | " const int2 coord = (int2)(4 * wid_x, wid_y);\n", 38 | " const float4 pixels[] = {\n", 39 | " read_imagef(packed_image, sampler, coord + (int2)(0, 0)),\n", 40 | " read_imagef(packed_image, sampler, coord + (int2)(1, 0)),\n", 41 | " read_imagef(packed_image, sampler, coord + (int2)(2, 0)),\n", 42 | " read_imagef(packed_image, sampler, coord + (int2)(3, 0)),\n", 43 | " };\n", 44 | " write_imagef(unpacked_image, coord + (int2)(0, 0), pixels[0]);\n", 45 | " write_imagef(unpacked_image, coord + (int2)(1, 0), pixels[1]);\n", 46 | " write_imagef(unpacked_image, coord + (int2)(2, 0), pixels[2]);\n", 47 | " write_imagef(unpacked_image, coord + (int2)(3, 0), pixels[3]);\n", 48 | "}\n" 49 | }; 50 | 51 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 52 | 53 | int main(int argc, char** argv) 54 | { 55 | if (argc < 3) 56 | { 57 | std::cerr << "Please specify source and output images.\n"; 58 | std::cerr << HELP_MESSAGE; 59 | std::exit(EXIT_SUCCESS); 60 | } 61 | const std::string src_image_filename(argv[1]); 62 | const std::string out_image_filename(argv[2]); 63 | 64 | cl_wrapper wrapper; 65 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 66 | cl_kernel kernel = wrapper.make_kernel("unpack", program); 67 | cl_context context = wrapper.get_context(); 68 | cl_command_queue command_queue = wrapper.get_command_queue(); 69 | bayer_mipi10_image_t src_bayer_image_info = load_bayer_mipi_10_image_data(src_image_filename); 70 | 71 | /* 72 | * Step 0: Confirm the required OpenCL extensions are supported. 73 | */ 74 | 75 | if (!wrapper.check_extension_support("cl_qcom_other_image")) 76 | { 77 | std::cerr << "Extension cl_qcom_other_image needed for MIPI10 data type is not supported.\n"; 78 | std::exit(EXIT_FAILURE); 79 | } 80 | 81 | if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr")) 82 | { 83 | std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n"; 84 | std::exit(EXIT_FAILURE); 85 | } 86 | 87 | if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr")) 88 | { 89 | std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n"; 90 | std::exit(EXIT_FAILURE); 91 | } 92 | 93 | /* 94 | * Step 1: Create suitable ion buffer-backed CL images. 95 | * Note the source image has the same layout as with bayer_mipi10_to_rgba.cpp example. 96 | * The difference is how such images are addressed on the GPU. 97 | */ 98 | 99 | cl_image_format src_format; 100 | src_format.image_channel_order = CL_R; 101 | src_format.image_channel_data_type = CL_QCOM_UNORM_MIPI10; 102 | 103 | cl_image_desc src_desc; 104 | std::memset(&src_desc, 0, sizeof(src_desc)); 105 | src_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 106 | src_desc.image_width = src_bayer_image_info.width; 107 | src_desc.image_height = src_bayer_image_info.height; 108 | src_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(src_format, src_desc); 109 | 110 | cl_mem_ion_host_ptr src_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(src_format, src_desc); 111 | cl_int err = 0; 112 | cl_mem src_image = clCreateImage( 113 | context, 114 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 115 | &src_format, 116 | &src_desc, 117 | &src_ion_mem, 118 | &err 119 | ); 120 | if (err != CL_SUCCESS) 121 | { 122 | std::cerr << "Error " << err << " with clCreateImage for source image." << "\n"; 123 | std::exit(err); 124 | } 125 | 126 | const size_t origin[] = {0, 0, 0}; 127 | size_t row_pitch = 0 ; 128 | const size_t src_region[] = {src_desc.image_width, src_desc.image_height, 1}; 129 | unsigned char *image_ptr = static_cast(clEnqueueMapImage( 130 | command_queue, 131 | src_image, 132 | CL_BLOCKING, 133 | CL_MAP_WRITE, 134 | origin, 135 | src_region, 136 | &row_pitch, 137 | NULL, 138 | 0, 139 | NULL, 140 | NULL, 141 | &err 142 | )); 143 | if (err != CL_SUCCESS) 144 | { 145 | std::cerr << "Error " << err << " with clEnqueueMapImage for source image." << "\n"; 146 | std::exit(err); 147 | } 148 | 149 | // Copies image data from the host to the ION buffer 150 | for (uint32_t i = 0; i < src_desc.image_height; ++i) 151 | { 152 | std::memcpy( 153 | image_ptr + i * src_desc.image_row_pitch, 154 | src_bayer_image_info.pixels.data() + i * src_desc.image_width / 4 * 5, 155 | src_desc.image_width / 4 * 5 156 | ); 157 | } 158 | 159 | err = clEnqueueUnmapMemObject(command_queue, src_image, image_ptr, 0, NULL, NULL); 160 | if (err != CL_SUCCESS) 161 | { 162 | std::cerr << "Error " << err << " unmapping source image." << "\n"; 163 | std::exit(err); 164 | } 165 | 166 | cl_image_format out_format; 167 | out_format.image_channel_order = CL_R; 168 | out_format.image_channel_data_type = CL_UNORM_INT16; 169 | 170 | cl_image_desc out_desc; 171 | std::memset(&out_desc, 0, sizeof(out_desc)); 172 | out_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 173 | out_desc.image_width = src_bayer_image_info.width; 174 | out_desc.image_height = src_bayer_image_info.height; 175 | out_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(out_format, out_desc); 176 | 177 | cl_mem_ion_host_ptr out_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(out_format, out_desc); 178 | cl_mem out_image = clCreateImage( 179 | context, 180 | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 181 | &out_format, 182 | &out_desc, 183 | &out_ion_mem, 184 | &err 185 | ); 186 | if (err != CL_SUCCESS) 187 | { 188 | std::cerr << "Error " << err << " with clCreateImage for output image." << "\n"; 189 | std::exit(err); 190 | } 191 | 192 | /* 193 | * Step 2: Set up kernel arguments and run the kernel. 194 | */ 195 | 196 | cl_sampler sampler = clCreateSampler( 197 | context, 198 | CL_FALSE, 199 | CL_ADDRESS_NONE, 200 | CL_FILTER_NEAREST, 201 | &err 202 | ); 203 | if (err != CL_SUCCESS) 204 | { 205 | std::cerr << "Error " << err << " with clCreateSampler." << "\n"; 206 | std::exit(err); 207 | } 208 | 209 | err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image); 210 | if (err != CL_SUCCESS) 211 | { 212 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 0." << "\n"; 213 | std::exit(err); 214 | } 215 | 216 | err = clSetKernelArg(kernel, 1, sizeof(out_image), &out_image); 217 | if (err != CL_SUCCESS) 218 | { 219 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 1." << "\n"; 220 | std::exit(err); 221 | } 222 | 223 | err = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler); 224 | if (err != CL_SUCCESS) 225 | { 226 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 2." << "\n"; 227 | std::exit(err); 228 | } 229 | 230 | const size_t global_work_size[] = {out_desc.image_width / 4, out_desc.image_height}; 231 | err = clEnqueueNDRangeKernel( 232 | command_queue, 233 | kernel, 234 | 2, 235 | NULL, 236 | global_work_size, 237 | NULL, 238 | 0, 239 | NULL, 240 | NULL 241 | ); 242 | if (err != CL_SUCCESS) 243 | { 244 | std::cerr << "\tError " << err << " with clEnqueueNDRangeKernel." << "\n"; 245 | std::exit(err); 246 | } 247 | 248 | /* 249 | * Step 3: Copy the data out of the ion buffer. 250 | */ 251 | 252 | single_channel_int16_image_t out_image_info; 253 | out_image_info.width = out_desc.image_width; 254 | out_image_info.height = out_desc.image_height; 255 | out_image_info.pixels.resize((out_image_info.width * 2) * out_image_info.height); 256 | 257 | const size_t out_region[] = {out_desc.image_width, out_desc.image_height, 1}; 258 | row_pitch = 0; 259 | image_ptr = static_cast(clEnqueueMapImage( 260 | command_queue, 261 | out_image, 262 | CL_TRUE, 263 | CL_MAP_READ, 264 | origin, 265 | out_region, 266 | &row_pitch, 267 | NULL, 268 | 0, 269 | NULL, 270 | NULL, 271 | &err 272 | )); 273 | if (err != CL_SUCCESS) 274 | { 275 | std::cerr << "Error " << err << " mapping dest image buffer for reading." << "\n"; 276 | std::exit(err); 277 | } 278 | 279 | // Copies image data from the ION buffer to the host 280 | for (uint32_t i = 0; i < out_desc.image_height; ++i) 281 | { 282 | std::memcpy( 283 | out_image_info.pixels.data() + i * out_desc.image_width * 2, 284 | image_ptr + i * row_pitch, 285 | out_desc.image_width * 2 286 | ); 287 | } 288 | 289 | err = clEnqueueUnmapMemObject(command_queue, out_image, image_ptr, 0, NULL, NULL); 290 | if (err != CL_SUCCESS) 291 | { 292 | std::cerr << "Error " << err << " unmapping dest image." << "\n"; 293 | std::exit(err); 294 | } 295 | 296 | clFinish(command_queue); 297 | 298 | save_single_channel_image_data(out_image_filename, out_image_info); 299 | 300 | // Clean up cl resources that aren't automatically handled by cl_wrapper 301 | clReleaseSampler(sampler); 302 | clReleaseMemObject(src_image); 303 | clReleaseMemObject(out_image); 304 | 305 | return 0; 306 | } 307 | 308 | -------------------------------------------------------------------------------- /src/examples/bayer_mipi/unpacked_to_mipi10.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: unpacked_to_mipi10.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | #include "util/util.h" 20 | 21 | // Library includes 22 | #include 23 | #include 24 | 25 | static const char *HELP_MESSAGE = "\n" 26 | "Usage: unpacked_to_mipi10 \n" 27 | "\n" 28 | "Converts a single-channel unpacked 16-bit image to MIPI10 format.\n"; 29 | 30 | static const char *PROGRAM_SOURCE[] = { 31 | "__kernel void pack(__read_only image2d_t unpacked_image,\n", 32 | " __write_only image2d_t packed_image,\n", 33 | " sampler_t sampler)\n", 34 | "{\n", 35 | " const int wid_x = get_global_id(0);\n", 36 | " const int wid_y = get_global_id(1);\n", 37 | " const int2 coord = (int2)(4 * wid_x, wid_y);\n", 38 | " const float4 pixels[] = {\n", 39 | " read_imagef(unpacked_image, sampler, coord + (int2)(0, 0)),\n", 40 | " read_imagef(unpacked_image, sampler, coord + (int2)(1, 0)),\n", 41 | " read_imagef(unpacked_image, sampler, coord + (int2)(2, 0)),\n", 42 | " read_imagef(unpacked_image, sampler, coord + (int2)(3, 0)),\n", 43 | " };\n", 44 | " float out_pixel[] = {pixels[0].x, pixels[1].x, pixels[2].x, pixels[3].x};\n", 45 | " const int2 write_coord = (int2)(wid_x, wid_y);\n", 46 | " qcom_write_imagefv_4x1_n10m00(packed_image, write_coord, out_pixel);\n", 47 | "}\n" 48 | }; 49 | 50 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 51 | 52 | int main(int argc, char** argv) 53 | { 54 | if (argc < 3) 55 | { 56 | std::cerr << "Please specify source and output images.\n"; 57 | std::cerr << HELP_MESSAGE; 58 | std::exit(EXIT_SUCCESS); 59 | } 60 | const std::string src_image_filename(argv[1]); 61 | const std::string out_image_filename(argv[2]); 62 | 63 | cl_wrapper wrapper; 64 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 65 | cl_kernel kernel = wrapper.make_kernel("pack", program); 66 | cl_context context = wrapper.get_context(); 67 | cl_command_queue command_queue = wrapper.get_command_queue(); 68 | single_channel_int16_image_t src_int16_image_info = load_single_channel_image_data(src_image_filename); 69 | 70 | /* 71 | * Step 0: Confirm the required OpenCL extensions are supported. 72 | */ 73 | 74 | if (!wrapper.check_extension_support("cl_qcom_other_image")) 75 | { 76 | std::cerr << "Extension cl_qcom_other_image needed for MIPI10 data type is not supported.\n"; 77 | std::exit(EXIT_FAILURE); 78 | } 79 | 80 | if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr")) 81 | { 82 | std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n"; 83 | std::exit(EXIT_FAILURE); 84 | } 85 | 86 | if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr")) 87 | { 88 | std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n"; 89 | std::exit(EXIT_FAILURE); 90 | } 91 | 92 | /* 93 | * Step 1: Create suitable ion buffer-backed CL images. 94 | * Note the source image has the same layout as with bayer_mipi10_to_rgba.cpp example. 95 | * The difference is how such images are addressed on the GPU. 96 | */ 97 | 98 | cl_image_format src_format; 99 | src_format.image_channel_order = CL_R; 100 | src_format.image_channel_data_type = CL_UNORM_INT16; 101 | 102 | cl_image_desc src_desc; 103 | std::memset(&src_desc, 0, sizeof(src_desc)); 104 | src_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 105 | src_desc.image_width = src_int16_image_info.width; 106 | src_desc.image_height = src_int16_image_info.height; 107 | src_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(src_format, src_desc); 108 | 109 | cl_mem_ion_host_ptr src_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(src_format, src_desc); 110 | cl_int err = 0; 111 | cl_mem src_image = clCreateImage( 112 | context, 113 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 114 | &src_format, 115 | &src_desc, 116 | &src_ion_mem, 117 | &err 118 | ); 119 | if (err != CL_SUCCESS) 120 | { 121 | std::cerr << "Error " << err << " with clCreateImage for source image." << "\n"; 122 | std::exit(err); 123 | } 124 | 125 | const size_t origin[] = {0, 0, 0}; 126 | size_t row_pitch = 0 ; 127 | const size_t src_region[] = {src_desc.image_width, src_desc.image_height, 1}; 128 | unsigned char *image_ptr = static_cast(clEnqueueMapImage( 129 | command_queue, 130 | src_image, 131 | CL_BLOCKING, 132 | CL_MAP_WRITE, 133 | origin, 134 | src_region, 135 | &row_pitch, 136 | NULL, 137 | 0, 138 | NULL, 139 | NULL, 140 | &err 141 | )); 142 | if (err != CL_SUCCESS) 143 | { 144 | std::cerr << "Error " << err << " with clEnqueueMapImage for source image." << "\n"; 145 | std::exit(err); 146 | } 147 | 148 | // Copies image data from the host to the ION buffer 149 | for (uint32_t i = 0; i < src_desc.image_height; ++i) 150 | { 151 | std::memcpy( 152 | image_ptr + i * src_desc.image_row_pitch, 153 | src_int16_image_info.pixels.data() + i * src_desc.image_width * 2, 154 | src_desc.image_width * 2 155 | ); 156 | } 157 | 158 | err = clEnqueueUnmapMemObject(command_queue, src_image, image_ptr, 0, NULL, NULL); 159 | if (err != CL_SUCCESS) 160 | { 161 | std::cerr << "Error " << err << " unmapping source image." << "\n"; 162 | std::exit(err); 163 | } 164 | 165 | cl_image_format out_format; 166 | out_format.image_channel_order = CL_R; 167 | out_format.image_channel_data_type = CL_QCOM_UNORM_MIPI10; 168 | 169 | cl_image_desc out_desc; 170 | std::memset(&out_desc, 0, sizeof(out_desc)); 171 | out_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 172 | out_desc.image_width = src_int16_image_info.width; 173 | out_desc.image_height = src_int16_image_info.height; 174 | out_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(out_format, out_desc); 175 | 176 | cl_mem_ion_host_ptr out_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(out_format, out_desc); 177 | cl_mem out_image = clCreateImage( 178 | context, 179 | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 180 | &out_format, 181 | &out_desc, 182 | &out_ion_mem, 183 | &err 184 | ); 185 | if (err != CL_SUCCESS) 186 | { 187 | std::cerr << "Error " << err << " with clCreateImage for output image." << "\n"; 188 | std::exit(err); 189 | } 190 | 191 | /* 192 | * Step 2: Set up kernel arguments and run the kernel. 193 | */ 194 | 195 | cl_sampler sampler = clCreateSampler( 196 | context, 197 | CL_FALSE, 198 | CL_ADDRESS_NONE, 199 | CL_FILTER_NEAREST, 200 | &err 201 | ); 202 | if (err != CL_SUCCESS) 203 | { 204 | std::cerr << "Error " << err << " with clCreateSampler." << "\n"; 205 | std::exit(err); 206 | } 207 | 208 | err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image); 209 | if (err != CL_SUCCESS) 210 | { 211 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 0." << "\n"; 212 | std::exit(err); 213 | } 214 | 215 | err = clSetKernelArg(kernel, 1, sizeof(out_image), &out_image); 216 | if (err != CL_SUCCESS) 217 | { 218 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 1." << "\n"; 219 | std::exit(err); 220 | } 221 | 222 | err = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler); 223 | if (err != CL_SUCCESS) 224 | { 225 | std::cerr << "\tError " << err << " with clSetKernelArg for argument 2." << "\n"; 226 | std::exit(err); 227 | } 228 | 229 | const size_t global_work_size[] = {out_desc.image_width / 4, out_desc.image_height}; 230 | err = clEnqueueNDRangeKernel( 231 | command_queue, 232 | kernel, 233 | 2, 234 | NULL, 235 | global_work_size, 236 | NULL, 237 | 0, 238 | NULL, 239 | NULL 240 | ); 241 | if (err != CL_SUCCESS) 242 | { 243 | std::cerr << "\tError " << err << " with clEnqueueNDRangeKernel." << "\n"; 244 | std::exit(err); 245 | } 246 | 247 | /* 248 | * Step 3: Copy the data out of the ion buffer. 249 | */ 250 | 251 | bayer_mipi10_image_t out_image_info; 252 | out_image_info.width = out_desc.image_width; 253 | out_image_info.height = out_desc.image_height; 254 | out_image_info.pixels.resize((out_image_info.width / 4 * 5) * out_image_info.height); 255 | 256 | const size_t out_region[] = {out_desc.image_width, out_desc.image_height, 1}; 257 | row_pitch = 0; 258 | image_ptr = static_cast(clEnqueueMapImage( 259 | command_queue, 260 | out_image, 261 | CL_TRUE, 262 | CL_MAP_READ, 263 | origin, 264 | out_region, 265 | &row_pitch, 266 | NULL, 267 | 0, 268 | NULL, 269 | NULL, 270 | &err 271 | )); 272 | if (err != CL_SUCCESS) 273 | { 274 | std::cerr << "Error " << err << " mapping dest image buffer for reading." << "\n"; 275 | std::exit(err); 276 | } 277 | 278 | // Copies image data from the ION buffer to the host 279 | for (uint32_t i = 0; i < out_desc.image_height; ++i) 280 | { 281 | std::memcpy( 282 | out_image_info.pixels.data() + i * out_desc.image_width / 4 * 5, 283 | image_ptr + i * row_pitch, 284 | out_desc.image_width / 4 * 5 285 | ); 286 | } 287 | 288 | err = clEnqueueUnmapMemObject(command_queue, out_image, image_ptr, 0, NULL, NULL); 289 | if (err != CL_SUCCESS) 290 | { 291 | std::cerr << "Error " << err << " unmapping dest image." << "\n"; 292 | std::exit(err); 293 | } 294 | 295 | clFinish(command_queue); 296 | 297 | save_bayer_mipi_10_image_data(out_image_filename, out_image_info); 298 | 299 | // Clean up cl resources that aren't automatically handled by cl_wrapper 300 | clReleaseSampler(sampler); 301 | clReleaseMemObject(src_image); 302 | clReleaseMemObject(out_image); 303 | 304 | return 0; 305 | } 306 | 307 | -------------------------------------------------------------------------------- /src/examples/io_coherent_ion/io_coherent_ion_buffers.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: io_coherent_ion_buffers.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | 20 | // Library includes 21 | #include 22 | 23 | static const char *HELP_MESSAGE = "\n" 24 | "Usage: io_coherent_ion_buffers \n" 25 | "\n" 26 | "This example copies the input file to the output file.\n" 27 | "It uses io-coherent ION buffers.\n"; 28 | 29 | static const char *PROGRAM_SOURCE[] = { 30 | "__kernel void copy(__global char *src,\n", 31 | " __global char *dst\n", 32 | " )\n", 33 | "{\n", 34 | " uint wid_x = get_global_id(0);\n", 35 | " dst[wid_x] = src[wid_x];\n", 36 | "}\n" 37 | }; 38 | 39 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 40 | 41 | int main(int argc, char** argv) 42 | { 43 | if (argc < 3) 44 | { 45 | std::cerr << "Please specify source and destination files.\n"; 46 | std::cerr << HELP_MESSAGE; 47 | std::exit(EXIT_SUCCESS); 48 | } 49 | const std::string src_filename(argv[1]); 50 | const std::string out_filename(argv[2]); 51 | 52 | cl_wrapper wrapper; 53 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 54 | cl_kernel kernel = wrapper.make_kernel("copy", program); 55 | cl_context context = wrapper.get_context(); 56 | cl_command_queue command_queue = wrapper.get_command_queue(); 57 | cl_int err = CL_SUCCESS; 58 | 59 | /* 60 | * Step 0: Create CL buffers. 61 | */ 62 | 63 | std::ifstream fin(src_filename, std::ios::binary); 64 | if (!fin) 65 | { 66 | std::cerr << "Couldn't open file " << src_filename << "\n"; 67 | std::exit(EXIT_FAILURE); 68 | } 69 | 70 | const auto fin_begin = fin.tellg(); 71 | 72 | fin.seekg(0, std::ios::end); 73 | const auto fin_end = fin.tellg(); 74 | const size_t buf_size = static_cast(fin_end - fin_begin); 75 | cl_mem_ion_host_ptr src_buf_ion = wrapper.make_iocoherent_ion_buffer(buf_size); 76 | 77 | cl_mem src_buffer = clCreateBuffer( 78 | context, 79 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 80 | buf_size, 81 | &src_buf_ion, 82 | &err 83 | ); 84 | if (err != CL_SUCCESS) 85 | { 86 | std::cerr << "Error " << err << " with clCreateBuffer for source file." << "\n"; 87 | std::exit(err); 88 | } 89 | 90 | char *buf_ptr = static_cast(clEnqueueMapBuffer( 91 | command_queue, 92 | src_buffer, 93 | CL_BLOCKING, 94 | CL_MAP_WRITE, 95 | 0, 96 | buf_size, 97 | 0, 98 | NULL, 99 | NULL, 100 | &err 101 | )); 102 | if (err != CL_SUCCESS) 103 | { 104 | std::cerr << "Error " << err << " mapping source buffer for writing." << "\n"; 105 | std::exit(err); 106 | } 107 | 108 | fin.seekg(0, std::ios::beg); 109 | fin.read(buf_ptr, buf_size); 110 | fin.close(); 111 | 112 | err = clEnqueueUnmapMemObject(command_queue, src_buffer, buf_ptr, 0, NULL, NULL); 113 | if (err != CL_SUCCESS) 114 | { 115 | std::cerr << "Error " << err << " unmapping source buffer." << "\n"; 116 | std::exit(err); 117 | } 118 | 119 | cl_mem_ion_host_ptr out_buf_ion = wrapper.make_iocoherent_ion_buffer(buf_size); 120 | cl_mem out_buffer = clCreateBuffer( 121 | context, 122 | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 123 | buf_size, 124 | &out_buf_ion, 125 | &err 126 | ); 127 | if (err != CL_SUCCESS) 128 | { 129 | std::cerr << "Error " << err << " with clCreateBuffer for output file." << "\n"; 130 | std::exit(err); 131 | } 132 | 133 | /* 134 | * Step 1: Set up kernel arguments and run the kernel. 135 | */ 136 | 137 | err = clSetKernelArg(kernel, 0, sizeof(src_buffer), &src_buffer); 138 | if (err != CL_SUCCESS) 139 | { 140 | std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n"; 141 | std::exit(err); 142 | } 143 | 144 | err = clSetKernelArg(kernel, 1, sizeof(out_buffer), &out_buffer); 145 | if (err != CL_SUCCESS) 146 | { 147 | std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n"; 148 | std::exit(err); 149 | } 150 | 151 | err = clEnqueueNDRangeKernel( 152 | command_queue, 153 | kernel, 154 | 1, 155 | NULL, 156 | &buf_size, 157 | NULL, 158 | 0, 159 | NULL, 160 | NULL 161 | ); 162 | if (err != CL_SUCCESS) 163 | { 164 | std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n"; 165 | std::exit(err); 166 | } 167 | /* 168 | * Step 2: Copy the data out of the ion buffer. 169 | */ 170 | 171 | std::ofstream fout(out_filename, std::ios::binary); 172 | if (!fout) 173 | { 174 | std::cerr << "Couldn't open file " << out_filename << "\n"; 175 | std::exit(EXIT_FAILURE); 176 | } 177 | 178 | buf_ptr = static_cast(clEnqueueMapBuffer( 179 | command_queue, 180 | out_buffer, 181 | CL_BLOCKING, 182 | CL_MAP_READ, 183 | 0, 184 | buf_size, 185 | 0, 186 | NULL, 187 | NULL, 188 | &err 189 | )); 190 | if (err != CL_SUCCESS) 191 | { 192 | std::cerr << "Error " << err << " mapping output buffer for writing." << "\n"; 193 | std::exit(err); 194 | } 195 | 196 | fout.write(buf_ptr, buf_size); 197 | fout.close(); 198 | 199 | err = clEnqueueUnmapMemObject(command_queue, out_buffer, buf_ptr, 0, NULL, NULL); 200 | if (err != CL_SUCCESS) 201 | { 202 | std::cerr << "Error " << err << " unmapping output buffer." << "\n"; 203 | std::exit(err); 204 | } 205 | 206 | // Clean up cl resources that aren't automatically handled by cl_wrapper 207 | clReleaseMemObject(src_buffer); 208 | clReleaseMemObject(out_buffer); 209 | 210 | return 0; 211 | } 212 | -------------------------------------------------------------------------------- /src/examples/linear_algebra/image_matrix_transpose.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: image_matrix_transpose.cpp 3 | // Desc: Demonstrates transposing matrices with images 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | #include "util/util.h" 20 | 21 | // Library includes 22 | #include 23 | #include 24 | 25 | static const char *HELP_MESSAGE = "\n" 26 | "Usage: image_matrix_transpose []\n" 27 | "Given a matrix, computes its transpose.\n" 28 | "There is no size restriction for the matrix, but it may be padded with extra elements.\n" 29 | "If no file is specified for the output, then it is written to stdout.\n"; 30 | 31 | static const char *PROGRAM_SOURCE[] = { 32 | "__kernel void transpose(__read_only image2d_t matrix,\n", 33 | " __write_only image2d_t matrix_t)\n", 34 | "{\n", 35 | " const int wid_x = get_global_id(0);\n", 36 | " const int wid_y = get_global_id(1);\n", 37 | " const float4 rows[] = {\n", 38 | " read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 0)),\n", 39 | " read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 1)),\n", 40 | " read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 2)),\n", 41 | " read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 3)),\n", 42 | " };\n", 43 | " write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 0), (float4)(rows[0].x, rows[1].x, rows[2].x, rows[3].x));\n", 44 | " write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 1), (float4)(rows[0].y, rows[1].y, rows[2].y, rows[3].y));\n", 45 | " write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 2), (float4)(rows[0].z, rows[1].z, rows[2].z, rows[3].z));\n", 46 | " write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 3), (float4)(rows[0].w, rows[1].w, rows[2].w, rows[3].w));\n", 47 | "}\n" 48 | }; 49 | 50 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 51 | 52 | int main(int argc, char** argv) 53 | { 54 | if (argc < 2) 55 | { 56 | std::cerr << "Please specify input file.\n"; 57 | std::cerr << HELP_MESSAGE; 58 | std::exit(EXIT_SUCCESS); 59 | } 60 | 61 | const std::string matrix_a_filename(argv[1]); 62 | const bool output_to_file = argc >= 3; 63 | const matrix_t matrix_a = load_matrix(matrix_a_filename); 64 | const std::string output_filename(output_to_file ? argv[2] : ""); 65 | 66 | matrix_t matrix_b; 67 | matrix_b.width = matrix_a.height; 68 | matrix_b.height = matrix_a.width; 69 | const size_t matrix_b_size = matrix_b.width * matrix_b.height; 70 | matrix_b.elements.resize(matrix_b_size); 71 | 72 | cl_wrapper wrapper; 73 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 74 | cl_kernel kernel = wrapper.make_kernel("transpose", program); 75 | cl_context context = wrapper.get_context(); 76 | cl_command_queue command_queue = wrapper.get_command_queue(); 77 | 78 | /* 79 | * Step 0: Confirm the required OpenCL extensions are supported. 80 | */ 81 | 82 | if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr")) 83 | { 84 | std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n"; 85 | std::exit(EXIT_FAILURE); 86 | } 87 | 88 | if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr")) 89 | { 90 | std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n"; 91 | std::exit(EXIT_FAILURE); 92 | } 93 | 94 | cl_int err = CL_SUCCESS; 95 | 96 | /* 97 | * Step 1: Create suitable ION-backed images. 98 | */ 99 | 100 | /* 101 | * Matrix A 102 | */ 103 | 104 | cl_image_format matrix_a_format; 105 | matrix_a_format.image_channel_order = CL_RGBA; 106 | matrix_a_format.image_channel_data_type = CL_FLOAT; 107 | 108 | cl_image_desc matrix_a_desc; 109 | std::memset(&matrix_a_desc, 0, sizeof(matrix_a_desc)); 110 | matrix_a_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 111 | matrix_a_desc.image_width = ((matrix_a.width + 3) / 4); 112 | matrix_a_desc.image_height = ((matrix_a.height + 3) / 4) * 4; 113 | matrix_a_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(matrix_a_format, matrix_a_desc); 114 | 115 | cl_mem_ion_host_ptr matrix_a_ion_buf = wrapper.make_ion_buffer_for_nonplanar_image(matrix_a_format, 116 | matrix_a_desc); 117 | cl_mem matrix_a_mem = clCreateImage( 118 | context, 119 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 120 | &matrix_a_format, 121 | &matrix_a_desc, 122 | &matrix_a_ion_buf, 123 | &err 124 | ); 125 | if (err != CL_SUCCESS) 126 | { 127 | std::cerr << "Error " << err << " with clCreateImage for matrix A." << "\n"; 128 | std::exit(err); 129 | } 130 | 131 | char *image_ptr; 132 | const size_t origin[] = {0, 0, 0}; 133 | const size_t matrix_a_region[] = {matrix_a_desc.image_width, matrix_a_desc.image_height, 1}; 134 | size_t row_pitch = 0; 135 | image_ptr = static_cast(clEnqueueMapImage( 136 | command_queue, 137 | matrix_a_mem, 138 | CL_BLOCKING, 139 | CL_MAP_WRITE, 140 | origin, 141 | matrix_a_region, 142 | &row_pitch, 143 | NULL, 144 | 0, 145 | NULL, 146 | NULL, 147 | &err 148 | )); 149 | if (err != CL_SUCCESS) 150 | { 151 | std::cerr << "Error " << err << " mapping matrix A image." << "\n"; 152 | std::exit(err); 153 | } 154 | 155 | for (size_t i = 0; i < matrix_a_desc.image_height; ++i) 156 | { 157 | if (i < static_cast(matrix_a.height)) 158 | { 159 | const size_t unpadded_row_size = sizeof(cl_float) * matrix_a.width; 160 | std::memcpy( 161 | image_ptr + i * row_pitch, 162 | matrix_a.elements.data() + i * matrix_a.width, 163 | unpadded_row_size 164 | ); 165 | const size_t remaining_bytes = row_pitch - unpadded_row_size; 166 | std::memset(image_ptr + (i * row_pitch) + unpadded_row_size, 0, remaining_bytes); 167 | } 168 | else 169 | { 170 | std::memset(image_ptr + i * row_pitch, 0, row_pitch); 171 | } 172 | } 173 | 174 | err = clEnqueueUnmapMemObject(command_queue, matrix_a_mem, image_ptr, 0, NULL, NULL); 175 | if (err != CL_SUCCESS) 176 | { 177 | std::cerr << "Error " << err << " unmapping matrix A image." << "\n"; 178 | std::exit(err); 179 | } 180 | 181 | /* 182 | * Matrix B 183 | */ 184 | 185 | cl_image_format matrix_b_format; 186 | matrix_b_format.image_channel_order = CL_RGBA; 187 | matrix_b_format.image_channel_data_type = CL_FLOAT; 188 | 189 | cl_image_desc matrix_b_desc; 190 | std::memset(&matrix_b_desc, 0, sizeof(matrix_b_desc)); 191 | matrix_b_desc.image_type = CL_MEM_OBJECT_IMAGE2D; 192 | matrix_b_desc.image_width = ((matrix_b.width + 3) / 4); 193 | matrix_b_desc.image_height = ((matrix_b.height + 3) / 4) * 4; 194 | matrix_b_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(matrix_b_format, matrix_b_desc); 195 | 196 | cl_mem_ion_host_ptr matrix_b_ion_buf = wrapper.make_ion_buffer_for_nonplanar_image(matrix_b_format, 197 | matrix_b_desc); 198 | cl_mem matrix_b_mem = clCreateImage( 199 | context, 200 | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 201 | &matrix_b_format, 202 | &matrix_b_desc, 203 | &matrix_b_ion_buf, 204 | &err 205 | ); 206 | if (err != CL_SUCCESS) 207 | { 208 | std::cerr << "Error " << err << " with clCreateImage for matrix B." << "\n"; 209 | std::exit(err); 210 | } 211 | 212 | /* 213 | * Step 2: Set up the kernel arguments 214 | */ 215 | 216 | err = clSetKernelArg(kernel, 0, sizeof(matrix_a_mem), &matrix_a_mem); 217 | if (err != CL_SUCCESS) 218 | { 219 | std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n"; 220 | std::exit(err); 221 | } 222 | 223 | err = clSetKernelArg(kernel, 1, sizeof(matrix_b_mem), &matrix_b_mem); 224 | if (err != CL_SUCCESS) 225 | { 226 | std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n"; 227 | std::exit(err); 228 | } 229 | 230 | /* 231 | * Step 3: Run the kernel. 232 | */ 233 | 234 | const size_t global_work_size[] = {matrix_a_desc.image_width, matrix_a_desc.image_height / 4}; 235 | err = clEnqueueNDRangeKernel( 236 | command_queue, 237 | kernel, 238 | 2, 239 | NULL, 240 | global_work_size, 241 | NULL, 242 | 0, 243 | NULL, 244 | NULL 245 | ); 246 | if (err != CL_SUCCESS) 247 | { 248 | std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n"; 249 | std::exit(err); 250 | } 251 | 252 | /* 253 | * Step 4: Copy the data out of the ION buffer. 254 | */ 255 | 256 | const size_t matrix_b_region[] = {matrix_b_desc.image_width, matrix_b_desc.image_height, 1}; 257 | image_ptr = static_cast(clEnqueueMapImage( 258 | command_queue, 259 | matrix_b_mem, 260 | CL_BLOCKING, 261 | CL_MAP_READ, 262 | origin, 263 | matrix_b_region, 264 | &row_pitch, 265 | NULL, 266 | 0, 267 | NULL, 268 | NULL, 269 | &err 270 | )); 271 | if (err != CL_SUCCESS) 272 | { 273 | std::cerr << "Error " << err << " with clEnqueueMapImage for matrix B." << "\n"; 274 | std::exit(err); 275 | } 276 | 277 | for (size_t i = 0; i < static_cast(matrix_b.height); ++i) 278 | { 279 | const size_t unpadded_row_size = sizeof(cl_float) * matrix_b.width; 280 | std::memcpy( 281 | matrix_b.elements.data() + i * matrix_b.width, 282 | image_ptr + i * row_pitch, 283 | unpadded_row_size 284 | ); 285 | } 286 | err = clEnqueueUnmapMemObject(command_queue, matrix_b_mem, image_ptr, 0, NULL, NULL); 287 | if (err != CL_SUCCESS) 288 | { 289 | std::cerr << "Error " << err << " with clEnqueueUnmapMemObject." << "\n"; 290 | std::exit(err); 291 | } 292 | 293 | clFinish(command_queue); 294 | 295 | if (output_to_file) 296 | { 297 | save_matrix(output_filename, matrix_b); 298 | } 299 | else 300 | { 301 | save_matrix(std::cout, matrix_b); 302 | } 303 | 304 | // Clean up cl resources that aren't automatically handled by cl_wrapper 305 | clReleaseMemObject(matrix_a_mem); 306 | clReleaseMemObject(matrix_b_mem); 307 | 308 | return 0; 309 | } 310 | -------------------------------------------------------------------------------- /src/examples/linear_algebra/matrix_addition.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: matrix_addition.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | // Std includes 13 | #include 14 | #include 15 | #include 16 | 17 | // Project includes 18 | #include "util/cl_wrapper.h" 19 | #include "util/util.h" 20 | 21 | // Library includes 22 | #include 23 | #include 24 | 25 | static const char *HELP_MESSAGE = "\n" 26 | "Usage: matrix_addition []\n" 27 | "Computes the matrix sum C = A + B. See README.md for matrix input format.\n" 28 | "If no file is specified for the output, then it is written to stdout.\n"; 29 | 30 | static const char *PROGRAM_SOURCE[] = { 31 | "__kernel void buffer_addition(__global const float *matrix_a,\n", 32 | " __global const float *matrix_b,\n", 33 | " __global float *matrix_c)\n", 34 | "{\n", 35 | " const int wid_x = get_global_id(0);\n", 36 | " matrix_c[wid_x] = matrix_a[wid_x] + matrix_b[wid_x];\n", 37 | "}\n", 38 | }; 39 | 40 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *); 41 | 42 | int main(int argc, char** argv) 43 | { 44 | if (argc < 3) 45 | { 46 | std::cerr << "Please specify input files.\n"; 47 | std::cerr << HELP_MESSAGE; 48 | std::exit(EXIT_SUCCESS); 49 | } 50 | const std::string matrix_a_filename(argv[1]); 51 | const std::string matrix_b_filename(argv[2]); 52 | const bool output_to_file = argc >= 4; 53 | const matrix_t matrix_a = load_matrix(matrix_a_filename); 54 | const matrix_t matrix_b = load_matrix(matrix_b_filename); 55 | const size_t matrix_size = matrix_a.width * matrix_a.height; 56 | const size_t matrix_bytes = matrix_size * sizeof(cl_float); 57 | const std::string output_filename(output_to_file ? argv[3] : ""); 58 | 59 | if (matrix_a.width != matrix_b.width && matrix_a.height != matrix_b.height) 60 | { 61 | std::cerr << "Matrix A and B must have the same dimensions.\n"; 62 | std::exit(EXIT_FAILURE); 63 | } 64 | 65 | cl_wrapper wrapper; 66 | cl_program program = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN); 67 | cl_kernel kernel = wrapper.make_kernel("buffer_addition", program); 68 | cl_context context = wrapper.get_context(); 69 | cl_command_queue command_queue = wrapper.get_command_queue(); 70 | 71 | /* 72 | * Step 0: Confirm the required OpenCL extensions are supported. 73 | */ 74 | 75 | if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr")) 76 | { 77 | std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n"; 78 | std::exit(EXIT_FAILURE); 79 | } 80 | 81 | if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr")) 82 | { 83 | std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n"; 84 | std::exit(EXIT_FAILURE); 85 | } 86 | 87 | /* 88 | * Step 1: Create suitable ION-backed buffers. 89 | */ 90 | 91 | cl_int err = CL_SUCCESS; 92 | 93 | cl_mem_ion_host_ptr matrix_a_ion_buf = wrapper.make_ion_buffer(matrix_bytes); 94 | std::memcpy(matrix_a_ion_buf.ion_hostptr, matrix_a.elements.data(), matrix_bytes); 95 | cl_mem matrix_a_mem = clCreateBuffer( 96 | context, 97 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 98 | matrix_bytes, 99 | &matrix_a_ion_buf, 100 | &err 101 | ); 102 | if (err != CL_SUCCESS) 103 | { 104 | std::cerr << "Error " << err << " with clCreateBuffer for matrix A." << "\n"; 105 | std::exit(err); 106 | } 107 | 108 | cl_mem_ion_host_ptr matrix_b_ion_buf = wrapper.make_ion_buffer(matrix_bytes); 109 | std::memcpy(matrix_b_ion_buf.ion_hostptr, matrix_b.elements.data(), matrix_bytes); 110 | cl_mem matrix_b_mem = clCreateBuffer( 111 | context, 112 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 113 | matrix_bytes, 114 | &matrix_b_ion_buf, 115 | &err 116 | ); 117 | if (err != CL_SUCCESS) 118 | { 119 | std::cerr << "Error " << err << " with clCreateBuffer for matrix B." << "\n"; 120 | std::exit(err); 121 | } 122 | 123 | cl_mem_ion_host_ptr matrix_c_ion_buf = wrapper.make_ion_buffer(matrix_bytes); 124 | cl_mem matrix_c_mem = clCreateBuffer( 125 | context, 126 | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, 127 | matrix_bytes, 128 | &matrix_c_ion_buf, 129 | &err 130 | ); 131 | if (err != CL_SUCCESS) 132 | { 133 | std::cerr << "Error " << err << " with clCreateBuffer for matrix C." << "\n"; 134 | std::exit(err); 135 | } 136 | 137 | /* 138 | * Step 2: Set up the kernel arguments 139 | */ 140 | 141 | err = clSetKernelArg(kernel, 0, sizeof(matrix_a_mem), &matrix_a_mem); 142 | if (err != CL_SUCCESS) 143 | { 144 | std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n"; 145 | std::exit(err); 146 | } 147 | 148 | err = clSetKernelArg(kernel, 1, sizeof(matrix_b_mem), &matrix_b_mem); 149 | if (err != CL_SUCCESS) 150 | { 151 | std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n"; 152 | std::exit(err); 153 | } 154 | 155 | err = clSetKernelArg(kernel, 2, sizeof(matrix_c_mem), &matrix_c_mem); 156 | if (err != CL_SUCCESS) 157 | { 158 | std::cerr << "Error " << err << " with clSetKernelArg for argument 2." << "\n"; 159 | std::exit(err); 160 | } 161 | 162 | /* 163 | * Step 3: Run the kernel. 164 | */ 165 | 166 | const size_t global_work_size = matrix_size; 167 | err = clEnqueueNDRangeKernel( 168 | command_queue, 169 | kernel, 170 | 1, 171 | NULL, 172 | &global_work_size, 173 | NULL, 174 | 0, 175 | NULL, 176 | NULL 177 | ); 178 | if (err != CL_SUCCESS) 179 | { 180 | std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n"; 181 | std::exit(err); 182 | } 183 | 184 | /* 185 | * Step 4: Copy the data out of the ION buffer. 186 | */ 187 | 188 | cl_float *ptr = static_cast(clEnqueueMapBuffer( 189 | command_queue, 190 | matrix_c_mem, 191 | CL_BLOCKING, 192 | CL_MAP_READ, 193 | 0, 194 | matrix_bytes, 195 | 0, 196 | NULL, 197 | NULL, 198 | &err 199 | )); 200 | if (err != CL_SUCCESS) 201 | { 202 | std::cerr << "Error " << err << " with clEnqueueMapBuffer." << "\n"; 203 | std::exit(err); 204 | } 205 | 206 | matrix_t matrix_c; 207 | matrix_c.width = matrix_a.width; 208 | matrix_c.height = matrix_a.height; 209 | matrix_c.elements.resize(matrix_size); 210 | std::memcpy(matrix_c.elements.data(), ptr, matrix_bytes); 211 | 212 | err = clEnqueueUnmapMemObject(command_queue, matrix_c_mem, ptr, 0, NULL, NULL); 213 | if (err != CL_SUCCESS) 214 | { 215 | std::cerr << "Error " << err << " with clEnqueueUnmapMemObject." << "\n"; 216 | std::exit(err); 217 | } 218 | 219 | clFinish(command_queue); 220 | 221 | if (output_to_file) 222 | { 223 | save_matrix(output_filename, matrix_c); 224 | } 225 | else 226 | { 227 | save_matrix(std::cout, matrix_c); 228 | } 229 | 230 | // Clean up cl resources that aren't automatically handled by cl_wrapper 231 | clReleaseMemObject(matrix_a_mem); 232 | clReleaseMemObject(matrix_b_mem); 233 | clReleaseMemObject(matrix_c_mem); 234 | 235 | return 0; 236 | } 237 | -------------------------------------------------------------------------------- /src/util/cl_wrapper.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: cl_wrapper.h 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2017 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | #ifndef SDK_EXAMPLES_CL_WRAPPER_H 13 | #define SDK_EXAMPLES_CL_WRAPPER_H 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #ifdef USES_ANDROID_CMAKE 22 | #include 23 | #include 24 | #else /* USES_ANDROID_CMAKE */ 25 | #ifdef USES_LIBION 26 | #include 27 | #include 28 | #else /* USES_LIBION */ 29 | #include 30 | #include 31 | #endif /* USES_LIBION */ 32 | #endif /* USES_ANDROID_CMAKE */ 33 | 34 | #include "util.h" 35 | 36 | /** 37 | * \brief A wrapper around OpenCL setup/teardown code. 38 | * 39 | * All objects exposed are owned by the wrapper, and are cleaned up when it is destroyed. 40 | */ 41 | class cl_wrapper { 42 | public: 43 | /** 44 | * \brief Sets up OpenCL. 45 | */ 46 | cl_wrapper(); 47 | 48 | /** 49 | * \brief Frees associated OpenCL objects, including the results of make_kernel, make_program, and make_ion_buffer. 50 | */ 51 | ~cl_wrapper(); 52 | 53 | /** 54 | * \brief Gets the cl_context associated with the wrapper for using in OpenCL functions. 55 | * @return 56 | */ 57 | cl_context get_context() const; 58 | 59 | /** 60 | * \brief Gets the cl_command_queue associated with the wrapper for using in OpenCL functions. 61 | * @return 62 | */ 63 | cl_command_queue get_command_queue() const; 64 | 65 | /** 66 | * \brief Makes a cl_kernel from the given program. 67 | * 68 | * @param kernel_name 69 | * @param program 70 | * @return 71 | */ 72 | cl_kernel make_kernel(const std::string &kernel_name, cl_program program); 73 | 74 | /** 75 | * Makes a cl_program (whose lifetime is managed by cl_wrapper) from the given source code strings. 76 | * 77 | * @param program_source - The source code strings. 78 | * @param program_source_len - The length of program_source 79 | * @return 80 | */ 81 | cl_program make_program(const char **program_source, cl_uint program_source_len); 82 | 83 | /** 84 | * \brief Makes an uncached ion buffer that can be used for a YUV 4:2:0 image. 85 | * 86 | * @param img_format [in] - The image format 87 | * @param img_desc [in] - The image description 88 | * @return 89 | */ 90 | cl_mem_ion_host_ptr make_ion_buffer_for_yuv_image(const cl_image_format &img_format, const cl_image_desc &img_desc); 91 | 92 | /** 93 | * \brief Makes an uncached ion buffer that can be used for a nonplanar image, e.g. CL_R or CL_RGB 94 | * 95 | * @param img_format [in] 96 | * @param img_desc [in] 97 | * @return 98 | */ 99 | cl_mem_ion_host_ptr make_ion_buffer_for_nonplanar_image(const cl_image_format &img_format, const cl_image_desc &img_desc); 100 | 101 | /** 102 | * \brief Makes an uncached ion buffer that can be used for a compressed image. 103 | * 104 | * @param img_format [in] - The image format 105 | * @param img_desc [in] - The image description 106 | * @return 107 | */ 108 | cl_mem_ion_host_ptr make_ion_buffer_for_compressed_image(cl_image_format img_format, const cl_image_desc &img_desc); 109 | 110 | /** 111 | * \brief Makes an uncached ion buffer of the specified size. 112 | * 113 | * @param size [in] - Desired buffer size 114 | * @return 115 | */ 116 | cl_mem_ion_host_ptr make_ion_buffer(size_t size); 117 | 118 | /** 119 | * \brief Makes an ion buffer of the specified size, using the IO-coherent 120 | * cache policy. 121 | * 122 | * @param size [in] - Desired buffer size 123 | * @return 124 | */ 125 | cl_mem_ion_host_ptr make_iocoherent_ion_buffer(size_t size); 126 | 127 | /** 128 | * \brief Makes an ion buffer that can be used for a YUV 4:2:0 image, using 129 | * the IO-coherent cache policy. 130 | * 131 | * @param img_format [in] - The image format 132 | * @param img_desc [in] - The image description 133 | * @return 134 | */ 135 | cl_mem_ion_host_ptr make_iocoherent_ion_buffer_for_yuv_image(const cl_image_format &img_format, const cl_image_desc &img_desc); 136 | 137 | /** 138 | * \brief Checks if the wrapped device supports the desired extension via clGetDeviceInfo 139 | * 140 | * @param desired_extension 141 | * @return true if the desired_extension is supported, otherwise false 142 | */ 143 | bool check_extension_support(const std::string &desired_extension) const; 144 | 145 | /** 146 | * \brief Gets the required row pitch for the given image. Must be considered when accessing the underlying ion buffer. 147 | * 148 | * @param img_format [in] - The image format 149 | * @param img_desc [in] - The image description 150 | * @return the image row pitch 151 | */ 152 | size_t get_ion_image_row_pitch(const cl_image_format &img_format, const cl_image_desc &img_desc) const; 153 | 154 | /** 155 | * \brief Gets the max workgroup size for the specified kernel. 156 | * 157 | * @param kernel 158 | * @return 159 | */ 160 | size_t get_max_workgroup_size(cl_kernel kernel) const; 161 | 162 | private: 163 | 164 | cl_mem_ion_host_ptr 165 | make_ion_buffer_internal(size_t size, unsigned int ion_allocation_flags, cl_uint host_cache_policy); 166 | 167 | // Data members 168 | cl_device_id m_device; 169 | cl_context m_context; 170 | cl_command_queue m_cmd_queue; 171 | std::vector m_programs; 172 | std::vector m_kernels; 173 | 174 | // ION stuff 175 | #if USES_LIBION 176 | // Pass 177 | #else 178 | std::vector m_handle_data; 179 | #endif 180 | std::vector m_file_descs; 181 | std::vector> m_ion_host_ptrs; 182 | int m_ion_device_fd; 183 | }; 184 | 185 | 186 | #endif //SDK_EXAMPLES_CL_CONTEXT_WRAPPER_H 187 | -------------------------------------------------------------------------------- /src/util/half_float.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: half_float.cpp 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | #include "half_float.h" 13 | #include 14 | #include 15 | 16 | cl_half to_half(float f) 17 | { 18 | static const struct 19 | { 20 | unsigned int bit_size = 16; // total number of bits in the representation 21 | unsigned int num_frac_bits = 10; // number of fractional (mantissa) bits 22 | unsigned int num_exp_bits = 5; // number of (biased) exponent bits 23 | unsigned int sign_bit = 15; // position of the sign bit 24 | unsigned int sign_mask = 1 << 15; // mask to extract sign bit 25 | unsigned int frac_mask = (1 << 10) - 1; // mask to extract the fractional (mantissa) bits 26 | unsigned int exp_mask = ((1 << 5) - 1) << 10; // mask to extract the exponent bits 27 | unsigned int e_max = (1 << (5 - 1)) - 1; // max value for the exponent 28 | int e_min = -((1 << (5 - 1)) - 1) + 1; // min value for the exponent 29 | unsigned int max_normal = ((((1 << (5 - 1)) - 1) + 127) << 23) | 0x7FE000; // max value that can be represented by the 16 bit float 30 | unsigned int min_normal = ((-((1 << (5 - 1)) - 1) + 1) + 127) << 23; // min value that can be represented by the 16 bit float 31 | unsigned int bias_diff = ((unsigned int)(((1 << (5 - 1)) - 1) - 127) << 23); // difference in bias between the float16 and float32 exponent 32 | unsigned int frac_bits_diff = 23 - 10; // difference in number of fractional bits between float16/float32 33 | } float16_params; 34 | 35 | static const struct 36 | { 37 | unsigned int abs_value_mask = 0x7FFFFFFF; // ANDing with this value gives the abs value 38 | unsigned int sign_bit_mask = 0x80000000; // ANDing with this value gives the sign 39 | unsigned int e_max = 127; // max value for the exponent 40 | unsigned int num_mantissa_bits = 23; // 23 bit mantissa on single precision floats 41 | unsigned int mantissa_mask = 0x007FFFFF; // 23 bit mantissa on single precision floats 42 | } float32_params; 43 | 44 | const union 45 | { 46 | float f; 47 | unsigned int bits; 48 | } value = {f}; 49 | 50 | const unsigned int f_abs_bits = value.bits & float32_params.abs_value_mask; 51 | const bool is_neg = value.bits & float32_params.sign_bit_mask; 52 | const unsigned int sign = (value.bits & float32_params.sign_bit_mask) >> (float16_params.num_frac_bits + float16_params.num_exp_bits + 1); 53 | cl_half half = 0; 54 | 55 | if (std::isnan(value.f)) 56 | { 57 | half = float16_params.exp_mask | float16_params.frac_mask; 58 | } 59 | else if (std::isinf(value.f)) 60 | { 61 | half = is_neg ? float16_params.sign_mask | float16_params.exp_mask : float16_params.exp_mask; 62 | } 63 | else if (f_abs_bits > float16_params.max_normal) 64 | { 65 | // Clamp to max float 16 value 66 | half = sign | (((1 << float16_params.num_exp_bits) - 1) << float16_params.num_frac_bits) | float16_params.frac_mask; 67 | } 68 | else if (f_abs_bits < float16_params.min_normal) 69 | { 70 | const unsigned int frac_bits = (f_abs_bits & float32_params.mantissa_mask) | (1 << float32_params.num_mantissa_bits); 71 | const int nshift = float16_params.e_min + float32_params.e_max - (f_abs_bits >> float32_params.num_mantissa_bits); 72 | const unsigned int shifted_bits = nshift < 24 ? frac_bits >> nshift : 0; 73 | half = sign | (shifted_bits >> float16_params.frac_bits_diff); 74 | } 75 | else 76 | { 77 | half = sign | ((f_abs_bits + float16_params.bias_diff) >> float16_params.frac_bits_diff); 78 | } 79 | return half; 80 | } 81 | 82 | cl_float to_float(cl_half f) 83 | { 84 | static const struct { 85 | uint16_t sign_mask = 0x8000; 86 | uint16_t exp_mask = 0x7C00; 87 | int exp_bias = 15; 88 | int exp_offset = 10; 89 | uint16_t biased_exp_max = (1 << 5) - 1; 90 | uint16_t frac_mask = 0x03FF; 91 | float smallest_subnormal_as_float = 5.96046448e-8f; 92 | } float16_params; 93 | 94 | static const struct { 95 | int sign_offset = 31; 96 | int exp_bias = 127; 97 | int exp_offset = 23; 98 | } float32_params; 99 | 100 | const bool is_pos = (f & float16_params.sign_mask) == 0; 101 | const uint32_t biased_exponent = (f & float16_params.exp_mask) >> float16_params.exp_offset; 102 | const uint32_t frac = (f & float16_params.frac_mask); 103 | const bool is_inf = biased_exponent == float16_params.biased_exp_max 104 | && (frac == 0); 105 | 106 | if (is_inf) 107 | { 108 | return is_pos ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); 109 | } 110 | 111 | const bool is_nan = biased_exponent == float16_params.biased_exp_max 112 | && (frac != 0); 113 | if (is_nan) 114 | { 115 | return std::numeric_limits::quiet_NaN(); 116 | } 117 | 118 | const bool is_subnormal = biased_exponent == 0; 119 | if (is_subnormal) 120 | { 121 | return static_cast(frac) * float16_params.smallest_subnormal_as_float * (is_pos ? 1.f : -1.f); 122 | } 123 | 124 | const int unbiased_exp = static_cast(biased_exponent) - float16_params.exp_bias; 125 | const uint32_t biased_f32_exponent = static_cast(unbiased_exp + float32_params.exp_bias); 126 | 127 | union 128 | { 129 | cl_float f; 130 | uint32_t ui; 131 | } res = {0}; 132 | 133 | res.ui = (is_pos ? 0 : 1 << float32_params.sign_offset) 134 | | (biased_f32_exponent << float32_params.exp_offset) 135 | | (frac << (float32_params.exp_offset - float16_params.exp_offset)); 136 | 137 | return res.f; 138 | } 139 | -------------------------------------------------------------------------------- /src/util/half_float.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: half_float.h 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2018 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | #ifndef SDK_EXAMPLES_HALF_FLOAT_H 13 | #define SDK_EXAMPLES_HALF_FLOAT_H 14 | 15 | #include 16 | 17 | /** 18 | * \brief Given a 32-bit float, converts it (potentially with some error due to loss of precision) 19 | * to a 16-bit half float for use with OpenCL. 20 | * 21 | * @param f [in] - The 32-bit float to convert 22 | * @return the equivalent 16-bit half float 23 | */ 24 | cl_half to_half(float f); 25 | 26 | /** 27 | * \brief Given a 16-bit half float, converts it to a 32-bit float. 28 | * 29 | * @param f [in] - The 16-bit half float to convert 30 | * @return the equivalent 32-bit float 31 | */ 32 | cl_float to_float(cl_half f); 33 | 34 | #endif //SDK_EXAMPLES_HALF_FLOAT_H 35 | -------------------------------------------------------------------------------- /src/util/util.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // File: util.h 3 | // Desc: 4 | // 5 | // Author: QUALCOMM 6 | // 7 | // Copyright (c) 2017 QUALCOMM Technologies, Inc. 8 | // All Rights Reserved. 9 | // QUALCOMM Proprietary/GTDR 10 | //-------------------------------------------------------------------------------------- 11 | 12 | #ifndef SDK_EXAMPLES_UTIL_H 13 | #define SDK_EXAMPLES_UTIL_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | /** 22 | * \brief yuv_image_t represents the "raw bytes" + width and height of YUV image with two planes. 23 | * this encompasses e.g. NV12, TP10, P010. 24 | */ 25 | struct yuv_image_t 26 | { 27 | uint32_t y_width; 28 | uint32_t y_height; 29 | std::vector y_plane; 30 | std::vector uv_plane; 31 | }; 32 | 33 | struct nv12_image_t : public yuv_image_t {}; 34 | 35 | struct tp10_image_t : public yuv_image_t {}; 36 | 37 | struct p010_image_t : public yuv_image_t {}; 38 | 39 | struct matrix_t 40 | { 41 | int width, height; 42 | std::vector elements; 43 | }; 44 | 45 | struct half_matrix_t 46 | { 47 | int width, height; 48 | std::vector elements; 49 | }; 50 | 51 | /** 52 | * \brief nonplanar_image_t represents an image type that in contrast to 53 | * yuv_image_t does not separate its pixel data into different planes. 54 | */ 55 | struct nonplanar_image_t 56 | { 57 | uint32_t width; 58 | uint32_t height; 59 | std::vector pixels; 60 | }; 61 | 62 | /** 63 | * \brief bayer_mipi10_image_t represents the "raw bytes" + width and height of 64 | * a Bayer-ordered MIPI RAW10 data type image. In Bayer order, blue and 65 | * red values are interleaved with green values in alternating rows: 66 | * 67 | * BGBGBGBG... 68 | * GRGRGRGR... 69 | * 70 | * One "quad" of values here means two green values and one each of red 71 | * and blue values that are in the same two columns and span two 72 | * consecutive rows. The top left corner of a quad is always a blue 73 | * value. 74 | * 75 | * We consider the width of such an image as the total # of blue/green 76 | * or green/red values per row, and the height is the number of rows. 77 | * However these images are addressed in OpenCL kernels as though each 78 | * quad were one pixel, effectively dividing the image dimensions by 2. 79 | * 80 | * MIPI RAW10 is a packed 10-bit ber channel data type -- the 8 most 81 | * significant bits of 4 consecutive values per row are followed by 1 82 | * byte with the 2 least significant bits for the preceding values, in 83 | * order. The MSBs of the fifth byte hold the LSBs for value 1. For 84 | * example the top row of a Bayer-ordered image would start with this 85 | * sequence of 5 bytes: 86 | * 87 | * | byte 1 | byte 2 | byte 3 | byte 4 | byte 5 | 88 | * | b1 MSBs | g1 MSBs | b2 MSBs | g2 MSBs | LSBs | 89 | */ 90 | struct bayer_mipi10_image_t : public nonplanar_image_t {}; 91 | 92 | /** 93 | * \brief Unpacked Bayer image format. Pixels are Bayer-ordered as above, but 94 | * each 10-bit channel is held in a 16-bit int with 6 unused bits. 95 | */ 96 | struct bayer_int10_image_t : public nonplanar_image_t {}; 97 | 98 | /** 99 | * \brief Represents an RGBA 8888 image. 100 | */ 101 | struct rgba_image_t : public nonplanar_image_t {}; 102 | 103 | /** 104 | * \brief Represents a single-channel CL_R image type with an unsigned 16-bit 105 | * data type. 106 | */ 107 | struct single_channel_int16_image_t : public nonplanar_image_t {}; 108 | 109 | /** 110 | * \brief Represents a single-channel CL_R image type with 32-bit float data 111 | * type. 112 | */ 113 | struct single_channel_float_image_t : public nonplanar_image_t {}; 114 | 115 | /** 116 | * \brief Loads an 8-bit NV12 image from image data at filename 117 | * 118 | * @param filename 119 | * @return 120 | */ 121 | nv12_image_t load_nv12_image_data(const std::string &filename); 122 | 123 | /** 124 | * \brief Saves 8-bit NV12 image to the given filename 125 | * 126 | * @param filename 127 | * @param image 128 | */ 129 | void save_nv12_image_data(const std::string &filename, const nv12_image_t &image); 130 | 131 | /** 132 | * \brief Loads a TP10 image from image data at filename 133 | * 134 | * @param filename 135 | * @return 136 | */ 137 | tp10_image_t load_tp10_image_data(const std::string &filename); 138 | 139 | /** 140 | * \brief Saves TP10 image to the given filename 141 | * 142 | * @param filename 143 | * @param image 144 | */ 145 | void save_tp10_image_data(const std::string &filename, const tp10_image_t &image); 146 | 147 | /** 148 | * \brief Loads a p010 image from image data at filename 149 | * 150 | * @param filename 151 | * @return 152 | */ 153 | p010_image_t load_p010_image_data(const std::string &filename); 154 | 155 | /** 156 | * \brief Saves p010 image to the given filename 157 | * 158 | * @param filename 159 | * @param image 160 | */ 161 | void save_p010_image_data(const std::string &filename, const p010_image_t &image); 162 | 163 | /** 164 | * \brief Loads a matrix from the given file according to the format 165 | * described in README.md 166 | * @param filename 167 | */ 168 | matrix_t load_matrix(const std::string &filename); 169 | 170 | /** 171 | * \brief Loads a matrix of half-floats from the given file according to the 172 | * format described in README.md 173 | * @param filename 174 | */ 175 | half_matrix_t load_half_matrix(const std::string &filename); 176 | 177 | /** 178 | * \brief Saves a matrix to the given filename. 179 | * @param filename 180 | * @param matrix 181 | */ 182 | void save_matrix(const std::string &filename, const matrix_t &matrix); 183 | 184 | /** 185 | * \brief Serializes the matrix to the given output stream 186 | * @param filename 187 | * @param matrix 188 | */ 189 | void save_matrix(std::ostream &out, const matrix_t &matrix); 190 | 191 | /** 192 | * \brief Loads a Bayer MIPI10 from image data at filename 193 | * @param filename 194 | * @return 195 | */ 196 | bayer_mipi10_image_t load_bayer_mipi_10_image_data(const std::string &filename); 197 | 198 | /** 199 | * \brief Saves a Bayer MIPI10 image to the given filename 200 | * @param filename 201 | * @param image 202 | */ 203 | void save_bayer_mipi_10_image_data(const std::string &filename, const bayer_mipi10_image_t &image); 204 | 205 | /** 206 | * \brief Loads a Bayer unpacked 10-bit image from image data at filename 207 | * @param filename 208 | * @return 209 | */ 210 | bayer_int10_image_t load_bayer_int_10_image_data(const std::string &filename); 211 | 212 | 213 | /** 214 | * \brief Loads an 8-bit depth RGBA image from the given filename. 215 | * @param filename 216 | * @param image 217 | */ 218 | rgba_image_t load_rgba_image_data(const std::string &filename); 219 | 220 | /** 221 | * \brief Saves an 8-bit depth RGBA image to the given filename. 222 | * @param filename 223 | * @param image 224 | */ 225 | void save_rgba_image_data(const std::string &filename, const rgba_image_t &image); 226 | 227 | /** 228 | * \brief Saves a 16-bit depth single-channel image to the given filename. 229 | * @param filename 230 | * @param image 231 | */ 232 | void save_single_channel_image_data(const std::string &filename, const single_channel_int16_image_t &image); 233 | 234 | /** 235 | * \brief Saves a 32-bit float single-channel image to the given filename. 236 | * @param filename 237 | * @param image 238 | */ 239 | void save_single_channel_image_data(const std::string &filename, const single_channel_float_image_t &image); 240 | 241 | /** 242 | * \brief Loads a 16-bit depth single-channel image from image data at filename 243 | * @param filename 244 | * @return 245 | */ 246 | single_channel_int16_image_t load_single_channel_image_data(const std::string &filename); 247 | 248 | /** 249 | * \brief Returns smallest y such that y % r == 0 and y >= x 250 | * @param x 251 | * @param r 252 | * @return 253 | */ 254 | size_t work_units(size_t x, size_t r); 255 | 256 | /** 257 | * \brief get supported formats with specific mem flag 258 | * @param context 259 | * @param mem_flags 260 | */ 261 | std::vector get_image_formats(cl_context context, cl_mem_flags mem_flags); 262 | 263 | /** 264 | * \brief print supported image formats 265 | * @param formats 266 | */ 267 | void print_formats(const std::vector &formats); 268 | 269 | /** 270 | * \brief check if specific format in the supported formats list 271 | * @param formats 272 | * @param format 273 | */ 274 | bool is_format_supported(const std::vector &formats, const cl_image_format &format); 275 | 276 | #endif //SDK_EXAMPLES_UTIL_H 277 | -------------------------------------------------------------------------------- /toolchain/linux_embedded/linux_embedded-toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(TARGET apq8053) 2 | set(ARCH 64) 3 | 4 | INCLUDE(CMakeForceCompiler) 5 | set(CMAKE_SYSTEM_NAME Linux) 6 | 7 | set(OE_HOST_SYSROOT "poky/build/tmp-glibc/sysroots/x86_64-linux") 8 | 9 | if(${ARCH} STREQUAL "32") 10 | set(OE_TARGET_SYSROOT "$ENV{OE_ROOT}/poky/build/tmp-glibc/sysroots/lib32-${TARGET}") 11 | set(CMAKE_C_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/arm-oemllib32-linux-gnueabi/arm-oemllib32-linux-gnueabi-gcc) 12 | set(CMAKE_CXX_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/arm-oemllib32-linux-gnueabi/arm-oemllib32-linux-gnueabi-g++) 13 | set(BIT_FLAGS "-mcpu=cortex-a15 -mfloat-abi=softfp -mfpu=neon") 14 | else() 15 | set(OE_TARGET_SYSROOT "$ENV{OE_ROOT}/poky/build/tmp-glibc/sysroots/${TARGET}") 16 | set(CMAKE_C_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/aarch64-oe-linux/aarch64-oe-linux-gcc) 17 | set(CMAKE_CXX_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++) 18 | set(BIT_FLAGS "") 19 | endif() 20 | 21 | set(CMAKE_SYSROOT ${OE_TARGET_SYSROOT}) 22 | set(SYSROOT "--sysroot=${OE_TARGET_SYSROOT}") 23 | set(INC_DIR "-I${OE_TARGET_SYSROOT}/usr/include") 24 | 25 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SYSROOT} ${INC_DIR} ${BIT_FLAGS}" CACHE STRING "" FORCE) 26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SYSROOT} ${INC_DIR} ${BIT_FLAGS}" CACHE STRING "" FORCE) 27 | 28 | set(CMAKE_FIND_ROOT_PATH ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin) 29 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 30 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 31 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 32 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) 33 | --------------------------------------------------------------------------------