├── .gitignore
├── Android.mk
├── CMakeLists.txt
├── README.md
├── build_android.sh
├── build_linux_oe.sh
├── docs
    └── extensions
    │   ├── cl_qcom_accelerated_image_ops.txt
    │   ├── cl_qcom_android_native_buffer_host_ptr.txt
    │   ├── cl_qcom_compressed_image.txt
    │   ├── cl_qcom_create_buffer_from_image.txt
    │   ├── cl_qcom_ext_host_ptr.txt
    │   ├── cl_qcom_ext_host_ptr_iocoherent.txt
    │   ├── cl_qcom_extract_image_plane.txt
    │   ├── cl_qcom_ion_host_ptr.txt
    │   ├── cl_qcom_other_image.txt
    │   ├── cl_qcom_perf_hint.txt
    │   ├── cl_qcom_priority_hint.txt
    │   ├── cl_qcom_protected_context.txt
    │   ├── cl_qcom_subgroup_shuffle.txt
    │   └── cl_qcom_vector_image_ops.txt
├── example_images
    ├── CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat
    ├── CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat
    ├── CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat
    ├── CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat
    ├── CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat
    ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat
    ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat
    ├── CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat
    ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat
    ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat
    ├── CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat
    └── create_example_images.py
├── inc
    └── CL
    │   ├── cl.h
    │   ├── cl.hpp
    │   ├── cl2.hpp
    │   ├── cl_egl.h
    │   ├── cl_ext.h
    │   ├── cl_ext_qcom.h
    │   ├── cl_gl.h
    │   ├── cl_gl_ext.h
    │   ├── cl_platform.h
    │   └── opencl.h
├── src
    ├── examples
    │   ├── basic
    │   │   ├── compressed_image_nv12.cpp
    │   │   ├── compressed_image_rgba.cpp
    │   │   ├── hello_world.cpp
    │   │   ├── qcom_block_match_sad.cpp
    │   │   ├── qcom_block_match_ssd.cpp
    │   │   ├── qcom_box_filter_image.cpp
    │   │   └── qcom_convolve_image.cpp
    │   ├── bayer_mipi
    │   │   ├── bayer_mipi10_to_rgba.cpp
    │   │   ├── mipi10_to_unpacked.cpp
    │   │   ├── unpacked_bayer_to_rgba.cpp
    │   │   └── unpacked_to_mipi10.cpp
    │   ├── conversions
    │   │   ├── nv12_to_rgba.cpp
    │   │   └── p010_to_compressed_tp10.cpp
    │   ├── convolutions
    │   │   ├── accelerated_convolution.cpp
    │   │   └── convolution.cpp
    │   ├── fft
    │   │   ├── fft_image.cpp
    │   │   └── fft_matrix.cpp
    │   ├── io_coherent_ion
    │   │   ├── io_coherent_ion_buffers.cpp
    │   │   └── io_coherent_ion_images.cpp
    │   ├── linear_algebra
    │   │   ├── buffer_matrix_multiplication.cpp
    │   │   ├── buffer_matrix_multiplication_half.cpp
    │   │   ├── buffer_matrix_transpose.cpp
    │   │   ├── image_matrix_multiplication.cpp
    │   │   ├── image_matrix_multiplication_half.cpp
    │   │   ├── image_matrix_transpose.cpp
    │   │   └── matrix_addition.cpp
    │   └── vector_image_ops
    │   │   ├── compressed_nv12_vector_image_ops.cpp
    │   │   ├── compressed_p010_vector_image_ops.cpp
    │   │   ├── compressed_tp10_vector_image_ops.cpp
    │   │   ├── nv12_vector_image_ops.cpp
    │   │   ├── p010_vector_image_ops.cpp
    │   │   └── tp10_vector_image_ops.cpp
    └── util
    │   ├── cl_wrapper.cpp
    │   ├── cl_wrapper.h
    │   ├── half_float.cpp
    │   ├── half_float.h
    │   ├── util.cpp
    │   └── util.h
└── toolchain
    └── linux_embedded
        └── linux_embedded-toolchain.cmake


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------------------------------------
  2 | # File: CMakeLists.txt
  3 | # Desc:
  4 | #
  5 | # Author:      QUALCOMM
  6 | #
  7 | #               Copyright (c) 2017 QUALCOMM Technologies, Inc.
  8 | #                         All Rights Reserved.
  9 | #                      QUALCOMM Proprietary/GTDR
 10 | #--------------------------------------------------------------------------------------
 11 | cmake_minimum_required(VERSION 2.8)
 12 | project(sdk_examples)
 13 | 
 14 | set(CMAKE_CXX_STANDARD 11) # CMAKE_CXX_STANDARD exists for cmake 3.1 and later
 15 | if (CMAKE_VERSION VERSION_LESS "3.1")
 16 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") # This is more or less equivalent to the above for older cmake
 17 | endif ()
 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror")
 19 | 
 20 | set(COMMON_SOURCE_FILES
 21 |         src/util/util.h
 22 |         src/util/util.cpp
 23 |         src/util/half_float.h
 24 |         src/util/half_float.cpp
 25 |         src/util/cl_wrapper.h
 26 |         src/util/cl_wrapper.cpp
 27 |         )
 28 | 
 29 | if(ANDROID)
 30 | if("${ION_INCLUDE_PATH}" STREQUAL "")
 31 |     set(ION_INCLUDE_PATH "$ENV{ION_INCLUDE_PATH}")
 32 | endif()
 33 | 
 34 | if("${ION_INCLUDE_PATH}" STREQUAL "")
 35 |     message(FATAL_ERROR "Please set CMake variable ION_INCLUDE_PATH")
 36 | endif()
 37 | 
 38 | message("Using ${ION_INCLUDE_PATH} as include path for ION headers")
 39 | 
 40 | add_definitions(-DANDROID -DUSES_ANDROID_CMAKE)
 41 | set(ION_INCLUDE_PATH ${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include ${ION_INCLUDE_PATH})
 42 | endif() #ANDROID
 43 | 
 44 | include_directories(
 45 |         src
 46 |         inc
 47 |         ${ION_INCLUDE_PATH}
 48 | )
 49 | 
 50 | if("${OPEN_CL_LIB}" STREQUAL "")
 51 |     message(FATAL_ERROR "Can't find libOpenCL.so, please set the CMake variable OPEN_CL_LIB to /path/to/libOpenCL.so.")
 52 | endif()
 53 | 
 54 | add_executable(qcom_box_filter_image ${COMMON_SOURCE_FILES} src/examples/basic/qcom_box_filter_image.cpp)
 55 | add_executable(qcom_convolve_image   ${COMMON_SOURCE_FILES} src/examples/basic/qcom_convolve_image.cpp)
 56 | add_executable(qcom_block_match_sad ${COMMON_SOURCE_FILES} src/examples/basic/qcom_block_match_sad.cpp)
 57 | add_executable(qcom_block_match_ssd ${COMMON_SOURCE_FILES} src/examples/basic/qcom_block_match_ssd.cpp)
 58 | add_executable(accelerated_convolution ${COMMON_SOURCE_FILES} src/examples/convolutions/accelerated_convolution.cpp)
 59 | add_executable(convolution ${COMMON_SOURCE_FILES} src/examples/convolutions/convolution.cpp)
 60 | add_executable(compressed_image_nv12 ${COMMON_SOURCE_FILES} src/examples/basic/compressed_image_nv12.cpp)
 61 | add_executable(nv12_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/nv12_vector_image_ops.cpp)
 62 | add_executable(tp10_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/tp10_vector_image_ops.cpp)
 63 | add_executable(p010_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/p010_vector_image_ops.cpp)
 64 | add_executable(compressed_nv12_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_nv12_vector_image_ops.cpp)
 65 | add_executable(compressed_p010_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_p010_vector_image_ops.cpp)
 66 | add_executable(compressed_tp10_vector_image_ops ${COMMON_SOURCE_FILES} src/examples/vector_image_ops/compressed_tp10_vector_image_ops.cpp)
 67 | add_executable(hello_world ${COMMON_SOURCE_FILES} src/examples/basic/hello_world.cpp)
 68 | add_executable(p010_to_compressed_tp10 ${COMMON_SOURCE_FILES} src/examples/conversions/p010_to_compressed_tp10.cpp)
 69 | add_executable(nv12_to_rgba ${COMMON_SOURCE_FILES} src/examples/conversions/nv12_to_rgba.cpp)
 70 | add_executable(matrix_addition ${COMMON_SOURCE_FILES} src/examples/linear_algebra/matrix_addition.cpp)
 71 | add_executable(image_matrix_multiplication ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_multiplication.cpp)
 72 | add_executable(buffer_matrix_multiplication ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_multiplication.cpp)
 73 | add_executable(buffer_matrix_transpose ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_transpose.cpp)
 74 | add_executable(image_matrix_transpose ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_transpose.cpp)
 75 | add_executable(bayer_mipi10_to_rgba ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/bayer_mipi10_to_rgba.cpp)
 76 | add_executable(mipi10_to_unpacked ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/mipi10_to_unpacked.cpp)
 77 | add_executable(unpacked_bayer_to_rgba ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/unpacked_bayer_to_rgba.cpp)
 78 | add_executable(unpacked_to_mipi10 ${COMMON_SOURCE_FILES} src/examples/bayer_mipi/unpacked_to_mipi10.cpp)
 79 | add_executable(fft_image ${COMMON_SOURCE_FILES} src/examples/fft/fft_image.cpp)
 80 | add_executable(fft_matrix ${COMMON_SOURCE_FILES} src/examples/fft/fft_matrix.cpp)
 81 | add_executable(image_matrix_multiplication_half ${COMMON_SOURCE_FILES} src/examples/linear_algebra/image_matrix_multiplication_half.cpp)
 82 | add_executable(buffer_matrix_multiplication_half ${COMMON_SOURCE_FILES} src/examples/linear_algebra/buffer_matrix_multiplication_half.cpp)
 83 | add_executable(io_coherent_ion_buffers ${COMMON_SOURCE_FILES} src/examples/io_coherent_ion/io_coherent_ion_buffers.cpp)
 84 | add_executable(io_coherent_ion_images ${COMMON_SOURCE_FILES} src/examples/io_coherent_ion/io_coherent_ion_images.cpp)
 85 | add_executable(compressed_image_rgba ${COMMON_SOURCE_FILES} src/examples/basic/compressed_image_rgba.cpp)
 86 | 
 87 | target_link_libraries(qcom_box_filter_image ${OPEN_CL_LIB})
 88 | target_link_libraries(qcom_convolve_image ${OPEN_CL_LIB})
 89 | target_link_libraries(qcom_block_match_sad ${OPEN_CL_LIB})
 90 | target_link_libraries(qcom_block_match_ssd ${OPEN_CL_LIB})
 91 | target_link_libraries(accelerated_convolution ${OPEN_CL_LIB})
 92 | target_link_libraries(convolution ${OPEN_CL_LIB})
 93 | target_link_libraries(compressed_image_nv12 ${OPEN_CL_LIB})
 94 | target_link_libraries(nv12_vector_image_ops ${OPEN_CL_LIB})
 95 | target_link_libraries(tp10_vector_image_ops ${OPEN_CL_LIB})
 96 | target_link_libraries(p010_vector_image_ops ${OPEN_CL_LIB})
 97 | target_link_libraries(compressed_nv12_vector_image_ops ${OPEN_CL_LIB})
 98 | target_link_libraries(compressed_p010_vector_image_ops ${OPEN_CL_LIB})
 99 | target_link_libraries(compressed_tp10_vector_image_ops ${OPEN_CL_LIB})
100 | target_link_libraries(hello_world ${OPEN_CL_LIB})
101 | target_link_libraries(p010_to_compressed_tp10 ${OPEN_CL_LIB})
102 | target_link_libraries(nv12_to_rgba ${OPEN_CL_LIB})
103 | target_link_libraries(matrix_addition ${OPEN_CL_LIB})
104 | target_link_libraries(image_matrix_multiplication ${OPEN_CL_LIB})
105 | target_link_libraries(buffer_matrix_multiplication ${OPEN_CL_LIB})
106 | target_link_libraries(buffer_matrix_transpose ${OPEN_CL_LIB})
107 | target_link_libraries(image_matrix_transpose ${OPEN_CL_LIB})
108 | target_link_libraries(bayer_mipi10_to_rgba ${OPEN_CL_LIB})
109 | target_link_libraries(mipi10_to_unpacked ${OPEN_CL_LIB})
110 | target_link_libraries(unpacked_bayer_to_rgba ${OPEN_CL_LIB})
111 | target_link_libraries(unpacked_to_mipi10 ${OPEN_CL_LIB})
112 | target_link_libraries(fft_image ${OPEN_CL_LIB})
113 | target_link_libraries(fft_matrix ${OPEN_CL_LIB})
114 | target_link_libraries(image_matrix_multiplication_half ${OPEN_CL_LIB})
115 | target_link_libraries(buffer_matrix_multiplication_half ${OPEN_CL_LIB})
116 | target_link_libraries(io_coherent_ion_buffers ${OPEN_CL_LIB})
117 | target_link_libraries(io_coherent_ion_images ${OPEN_CL_LIB})
118 | target_link_libraries(compressed_image_rgba ${OPEN_CL_LIB})
119 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SDK Examples
  2 | 
  3 | ## What is this?
  4 | 
  5 | Usage examples for Qualcomm's extensions to OpenCL.
  6 | 
  7 | ## Building for Android
  8 | 
  9 | There's a few things you'll need:
 10 | 
 11 | * The Android Open Source Project (AOSP) tree set up to build for your target
 12 |   device.
 13 | * Appropriate kernel headers (`linux/ion.h` and `linux/msm_ion.h`)
 14 | * A `libOpenCL` module defined by an `Android.mk` file.
 15 | 
 16 | More on those below. Once everything is set up just run `mma` in this directory
 17 | to build all the examples.
 18 | 
 19 | ### Where do I get kernel headers?
 20 | 
 21 | If your target device's kernel has the appropriate headers, they still need to
 22 | be in a location where the Android build system can discover them. One way to
 23 | ensure this is to build a bootimage, which will export the appropriate files:
 24 | 
 25 | ```
 26 | > cd $ANDROID_BUILD_TOP
 27 | > make bootimage
 28 | ```
 29 | 
 30 | ### Where do I get the libOpenCL module?
 31 | 
 32 | At the time of this writing `libOpenCL` is not available as part of Google's
 33 | prebuilt graphics libraries releases for Qualcomm devices. If you are lucky
 34 | enough to have it anyway, then you shouldn't need to do anything. Running `mma`
 35 | in this directory will build all dependencies, including `libOpenCL`.
 36 | 
 37 | ### I don't have the libOpenCL module, can I still use these examples?
 38 | 
 39 | Maybe, if you have the `libOpenCL.so` binary for your device to link against,
 40 | but it's not for the faint of heart. Provided here is a `CMakeLists.txt` file
 41 | and a script `build_android.sh` that can be used as a starting point, but
 42 | there's no guarantee it will work for your target device. You'll still need the
 43 | AOSP tree for the kernel headers, so go get it if you don't have it.
 44 | 
 45 | Find taka-no-me's `android-cmake` project online and clone it into the
 46 | `android-cmake` directory here.
 47 | 
 48 | All of these examples use ION buffers, so you'll still need appropriate ION
 49 | headers. Find where your target device's `msm_ion.h` and `ion.h` headers are.
 50 | For example you might see them at
 51 | `$ANDROID_BUILD_TOP/hardware/qcom/<target-device>/kernel-headers/linux` where
 52 | `<target-device>` should be replaced by your target device. You'll include
 53 | this directory in the header search path.
 54 | 
 55 | You'll also need the Android NDK, Revision 11c. The specific version is
 56 | important.
 57 | 
 58 | Then run the build script, substituting the paths specific to your build
 59 | environment:
 60 | 
 61 | ```
 62 | ANDROID_NDK=/path/to/android-ndk-r11c \
 63 | OPEN_CL_LIB=/path/to/libOpenCL.so \
 64 | ION_INCLUDE_PATH=$ANDROID_BUILD_TOP/hardware/qcom/<target-device>/kernel-headers/linux \
 65 | ./build_android.sh <BITNESS>
 66 | ```
 67 | 
 68 | `<BITNESS>` should be `32` or `64` depending on your target architecture.
 69 | 
 70 | ## Usage
 71 | 
 72 | Building will produce a set of binaries. Run each one without arguments to see
 73 | a help message and description of what it does. Most binaries take an input
 74 | image in the format described above -- several sample images are given in the
 75 | example_images directory, which contains arbitrary data (e.g. it is not
 76 | visually interesting).
 77 | 
 78 | ## Descriptions
 79 | 
 80 | ### src/examples/basic directory
 81 | 
 82 | #### hello_world.cpp
 83 | 
 84 | A very basic example to test out building. It simply copies one file to another.
 85 | 
 86 | #### qcom_block_match_sad.cpp, qcom_block_match_ssd.cpp, qcom_box_filter_image.cpp, qcom_convolve_image.cpp
 87 | 
 88 | These examples all demonstrate basic usage for the named built-in extension functions.
 89 | Look here for minimal examples of how to use the extensions.
 90 | 
 91 | #### compressed_image_nv12.cpp, compressed_image_rgba.cpp
 92 | 
 93 | Demonstrates use of compressed images using Qualcomm extensions to OpenCL.
 94 | The input image is compressed and then decompressed, with the result written
 95 | to the specified output file for comparison. (The compression is not lossy so
 96 | they are identical.)
 97 | 
 98 | Compressed image formats may be saved to disk, however be advised that the format
 99 | is specific to each GPU.
100 | 
101 | The two examples show compression for NV12 and RGBA images.
102 | 
103 | ### src/examples/bayer_mipi
104 | 
105 | The examples in this directory show how to use Bayer-ordered images and packed
106 | MIPI data formats.
107 | 
108 | Bayer-ordered images have one red, green or blue value per pixel, and the pixels
109 | are interleaved in a mosaic pattern. In order to get an equivalent RGB image
110 | one must "demosaic" the image by interpolating the missing red, green, and blue
111 | values. Bayer-ordered images are addressed by 2x2 blocks of such pixels, where
112 | each block has one red and blue value, and two green values. A Bayer-ordered
113 | image may also be addressed as a single-channel (`CL_R`) image to get one color
114 | channel at a time.
115 | 
116 | `bayer_mipi10_to_rgba.cpp` and `unpacked_bayer_to_rgba.cpp` both demonstrate one
117 | scheme for demosaicing. The former uses the packed MIPI10 format, and the latter
118 | uses an unpacked 10-bit format (held in a 16-bit int with 6 bits unused). Both
119 | use Bayer-ordered images to exploit the GPU's interpolation capabilities without
120 | mixing different color channels. The destination format has 8-bits per channel,
121 | so some precision is lost.
122 | 
123 | `mipi10_to_unpacked.cpp` and `unpacked_to_mipi10.cpp` demonstrate using the
124 | MIPI10 data format with a single-channel `CL_R` order. The former converts a
125 | packed MIPI10 image into an unpacked 10-bit image. The latter shows the
126 | unpacked-to-packed conversion.
127 | 
128 | ### src/examples/conversions
129 | 
130 | The examples in this directory show conversions to and from various image formats.
131 | 
132 | ### src/examples/convolutions
133 | 
134 | #### convolution.cpp
135 | 
136 | Demonstrates efficient convolution without the use of built-in extension functions.
137 | 
138 | #### accelerated_convolution.cpp
139 | 
140 | Demonstrates efficient convolution with the qcom_convolve_imagef built-in extension
141 | function.
142 | 
143 | ### src/examples/fft
144 | 
145 | These examples compute the 2-dimensional fast Fourier transform (2D FFT) of an
146 | image or matrix using the in-place Cooley-Tukey algorithm. First in the
147 | "row pass" each work group calculates the 1D FFT of a row, by reading initial
148 | data from global memory into local memory, and calculating intermediate results
149 | in-place using local memory. The final result is written to global memory in
150 | transposed order. This procedure is then repeated in a "column pass" that acts
151 | on the rows of the result of the first pass. Calculating the 1D FFTs
152 | back-to-back in this way is equal to the 2D FFT.
153 | 
154 | For the image-based version, the input is an 8-bit per channel NV12 image, and
155 | the outputs are two single-channel images with a 32-bit float data type. The
156 | outputs contain the real and imaginary parts of the FFT. The example acts on
157 | the Y-plane only.
158 | 
159 | The buffer-based version takes a real-valued matrix as input (specified as
160 | below), and produces two matrices as the output holding the real and imaginary
161 | parts of the FFT.
162 | 
163 | ### src/examples/io_coherent_ion
164 | 
165 | These simple examples demonstrate using the IO-coherent host cache policy for
166 | ION buffers. Both examples simply copy a specified file or image. Except for
167 | the parameters used to create the ION buffers, there is no difference in the
168 | host or kernel code compared to using uncached ION buffers.
169 | 
170 | ### src/examples/linear_algebra
171 | 
172 | Demonstrates some basic linear algebra operations:
173 | 
174 | * Matrix addition
175 | * Matrix multiplication
176 | * Matrix transposition
177 | 
178 | The transposition and multiplication examples come in two flavors, one using
179 | OpenCL buffers and another that packs the matrices into 2D images. It is not a
180 | foregone conclusion that using an image or a buffer will enjoy better
181 | performance in any given use case, so generally one must try and see what works
182 | best.
183 | 
184 | The image versions of both examples pad irregularly sized matrices, both because
185 | images have per-row alignment requirements and because this permits an efficient
186 | tiled algorithm to be applied uniformly. This approach can use substantially
187 | more memory than the buffer-based version.
188 | 
189 | In contrast, the buffer versions do not pad the input matrices. They use an
190 | efficient tiled algorithm where possible, and a less efficient algorithm to
191 | calculate the remaining portion of the output not covered by the tiled
192 | algorithm.
193 | 
194 | The multiplication examples additionally have a "half" variant, that
195 | demonstrates using the 16-bit half-float data type. The input, output and
196 | arithmetic all use half-floats. This can be a significant performance advantage,
197 | although it introduces more error. One may mix use of floats and half-floats to
198 | achieve the desired performance/accuracy trade off.
199 | 
200 | ### src/examples/vector_image_ops
201 | 
202 | All examples in this directory demonstrate a variety of kernels using vector
203 | read and write operations for the given image formats.
204 | 
205 | ## Image data format
206 | 
207 | Input and output images have the following format, where multi-byte data types are written with the least significant
208 | byte first:
209 | 
210 | * 4 bytes: plane width in pixels (unsigned integer)
211 | * 4 bytes: plane height (unsigned integer)
212 | * 4 bytes: OpenCL channel data type.
213 | * 4 bytes: OpenCL channel order.
214 | * N bytes: pixel data, where N is dependent on the preceding four values.
215 | 
216 | ## Matrix data format
217 | 
218 | Matrices used by the examples in the `linear_algebra` directory have the
219 | following plain text format:
220 | 
221 | * Two integers separated by whitespace indicating the number of columns and rows
222 |   of the matrix.
223 | * A sequence of whitespace-separated floating point element values in row-major
224 |   order.
225 | 
226 | For example, the following represents a 3x2 matrix:
227 | 
228 | ```
229 | 2 3
230 | 1.0 2.0
231 | 3.1 4.1
232 | 6   0
233 | ```
234 | 


--------------------------------------------------------------------------------
/build_android.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #--------------------------------------------------------------------------------------
 3 | # File: build_android.sh
 4 | # Desc:
 5 | #
 6 | # Author:      QUALCOMM
 7 | #
 8 | #               Copyright (c) 2017 QUALCOMM Technologies, Inc.
 9 | #                         All Rights Reserved.
10 | #                      QUALCOMM Proprietary/GTDR
11 | #--------------------------------------------------------------------------------------
12 | set -e
13 | 
14 | if [[ -z "$ANDROID_NDK" ]]; then
15 |    echo "Please set \$ANDROID_NDK to the root of your standalone Android ndk build tree"
16 |    exit 1
17 | fi
18 | 
19 | if [[ -z "$OPEN_CL_LIB" ]]; then
20 |    echo "Please set \$OPEN_CL_LIB to the path to libOpenCL.so (e.g. /path/to/libOpenCL.so)"
21 |    exit 1
22 | fi
23 | 
24 | if [[ -z "$ION_INCLUDE_PATH" ]]; then
25 |    echo "Please set \$ION_INCLUDE_PATH to the directory containing ION headers."
26 |    exit 1
27 | fi
28 | 
29 | if [[ "$#" -lt 1 ]]; then
30 |    echo "Error: invalid number of arguments: $#"
31 |    echo "Usage: $0 <BITNESS> [args-to-pass-to-make]"
32 |    echo "       BITNESS: 32 or 64"
33 |    exit 1
34 | fi
35 | 
36 | BITNESS=$1
37 | 
38 | if [[ "64" == "$BITNESS" ]]; then
39 |     ANDROID_ABI="arm64-v8a"
40 |     ANDROID_TOOLCHAIN="aarch64-linux-android-clang"
41 |     NDK_ARCH=arm64
42 | elif [[ "32" == "$BITNESS" ]]; then
43 |     ANDROID_ABI="armeabi-v7a"
44 |     ANDROID_TOOLCHAIN="arm-linux-androideabi-clang"
45 |     NDK_ARCH=arm
46 | else
47 |     echo "Invalid bitness!: $BITNESS"
48 |     exit 1
49 | fi
50 | 
51 | if [ ! -d "android-cmake" ]; then
52 |     echo "Couldn't find `pwd`/android-cmake, please install it to this directory."
53 |     exit 1
54 | fi
55 | 
56 | # Creates an android standalone toolchain in this dir, for use with android-cmake
57 | ANDROID_STANDALONE_TOOLCHAIN=`pwd`/android_standalone_toolchain_$ANDROID_ABI
58 | if [ ! -d "$ANDROID_STANDALONE_TOOLCHAIN" ]; then
59 |     $ANDROID_NDK/build/tools/make-standalone-toolchain.sh \
60 |         --install-dir=$ANDROID_STANDALONE_TOOLCHAIN \
61 |         --arch=$NDK_ARCH \
62 |         --platform=android-21 \
63 |         --toolchain=$ANDROID_TOOLCHAIN
64 | fi
65 | 
66 | BUILD_DIR=bld_android_$BITNESS
67 | 
68 | mkdir --parents $BUILD_DIR
69 | cd $BUILD_DIR
70 | cmake \
71 |   --debug-trycompile \
72 |   -DANDROID=True \
73 |   -DCMAKE_TOOLCHAIN_FILE=../android-cmake/android.toolchain.cmake \
74 |   -DCMAKE_BUILD_TYPE=Debug \
75 |   -DANDROID_SO_UNDEFINED=ON \
76 |   -DANDROID_ABI=$ANDROID_ABI \
77 |   -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
78 |   -DOPEN_CL_LIB=$OPEN_CL_LIB \
79 |   -g "Unix Makefiles" ../
80 | 
81 | # Passes extra cmd line arguments to make
82 | make "${@:2}"
83 | 


--------------------------------------------------------------------------------
/build_linux_oe.sh:
--------------------------------------------------------------------------------
 1 | if ! [[ "$OSTYPE" =~ linux ]]; then
 2 |   echo "Not a Linux System "
 3 |   exit 1
 4 | fi
 5 | 
 6 | if [[ -z "$OE_ROOT" ]]; then
 7 |    echo "Please set \$OE_ROOT to your root directory"
 8 |    exit 1
 9 | fi
10 | 
11 | #if [[ -z "$OPEN_CL_LIB" ]]; then
12 | #   echo "Please set \$OPEN_CL_LIB"
13 | #   exit 1
14 | #fi
15 | 
16 | if [[ "$#" -lt 2 ]]; then
17 |    echo "Error: invalid number of arguments: $#"
18 |    echo "Usage: $0 <OE_TARGET> <BITNESS> [args-to-pass-to-make]"
19 |    echo "       OE_TARGET: target platform (8053, 8096, 8074)"
20 |    echo "       BITNESS: 32 or 64"
21 |    exit 1
22 | fi
23 | 
24 | OE_TARGET=$1
25 | ARCH=$2
26 | 
27 | if [[ "64" == "$ARCH" ]]; then
28 |    ARM_ARCH="arm64v8"
29 | elif [[ "32" == "$ARCH" ]]; then
30 |    ARM_ARCH="armv7"
31 | else
32 |    echo "Invalid bitness!: $ARCH"
33 |    exit 1
34 | fi
35 | 
36 | OE_TOOLCHAIN_FILE="toolchain/linux_embedded/linux_embedded-toolchain.cmake"
37 | if [[ ! -f $OE_TOOLCHAIN_FILE ]]; then
38 |     echo "Can't find toolchain file: $OE_TOOLCHAIN_FILE"
39 |     exit 1
40 | fi
41 | 
42 | BUILD_DIR=bld_linux_oe_$ARCH
43 | 
44 | mkdir -p $BUILD_DIR
45 | cd $BUILD_DIR
46 | cmake  \
47 |    --debug-trycompile \
48 |    -DLINUX_OE=YES \
49 |    -DCMAKE_TOOLCHAIN_FILE=$OE_TOOLCHAIN_FILE \
50 |    -DOE_ROOT=$OE_ROOT \
51 |    -DCMAKE_ARM_COMPILER=YES \
52 |    -DNEEDS_TO_LINK_PTHREAD=YES \
53 |    -DARCH=$ARCH \
54 |    -DOPEN_CL_LIB=$OPEN_CL_LIB \
55 |    -g "Unix Makefiles" ../
56 | make "${@:3}"
57 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_android_native_buffer_host_ptr.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_android_native_buffer_host_ptr
  4 | 
  5 | Contributors
  6 | 
  7 |     Hossein Mohtasham, Qualcomm Technologies, Inc.
  8 |     Sushmita Susheelendra, Qualcomm Innovation Center, Inc.
  9 |     Balaji Calidas, Qualcomm Technologies, Inc.
 10 | 
 11 | Contact
 12 | 
 13 |     bcalidas at qti dot qualcomm dot com
 14 | 
 15 | Version
 16 | 
 17 |     Version 6, 2018/01/19
 18 | 
 19 | Number
 20 | 
 21 |     OpenCL Extension #32
 22 | 
 23 | Status
 24 | 
 25 |     Shipping
 26 | 
 27 | Extension Type
 28 | 
 29 |     OpenCL device extension
 30 | 
 31 | Dependencies
 32 | 
 33 |     OpenCL 1.1 or later is required.
 34 |     cl_qcom_ext_host_ptr is required.
 35 |     Android OS is required.
 36 | 
 37 |     This extension is written against the OpenCL 1.1 specification. This
 38 |     extension provides functionality, beyond and above the cl_qcom_ion_host_ptr
 39 |     extension, to create buffers and images directly from Android native
 40 |     buffers.
 41 | 
 42 |     If present, cl_qcom_ext_host_ptr_iocoherent extends the functionality
 43 |     of this extension.
 44 | 
 45 | Overview
 46 | 
 47 |     This extension extends the functionality provided by clCreateBuffer,
 48 |     clCreateImage2D, and clCreateImage. It allows applications to pass an
 49 |     Android ANativeWindowBuffer (ANB), aka graphics buffer, that is based on
 50 |     ION memory allocator to these functions so that it can be mapped to the
 51 |     device's address space. Using this extension, we can avoid having to copy
 52 |     data back and forth between the graphic buffer and the device. This
 53 |     extension is for 2D images only; clCreateImage will fail out with
 54 |     CL_INVALID_VALUE if anything other than a 2D image is specified.
 55 | 
 56 | Header File
 57 | 
 58 |     cl_ext.h
 59 | 
 60 | New Tokens
 61 | 
 62 |     Accepted by the <host_ptr> argument of clCreateBuffer, clCreateImage2D:
 63 | 
 64 |         typedef struct _cl_mem_android_native_buffer_host_ptr
 65 |         {
 66 |             // Type of external memory allocation.
 67 |             // Must be CL_MEM_ANB_HOST_PTR_QCOM for Android Native Buffers.
 68 |             cl_mem_ext_host_ptr   ext_host_ptr;
 69 | 
 70 |             // Host pointer to the Android Native Buffer (ANativeBuffer*)
 71 |             void*                 anb_ptr;
 72 | 
 73 |         } cl_mem_android_native_buffer_host_ptr;
 74 | 
 75 |     Used together with CL_MEM_EXT_HOST_PTR_QCOM:
 76 | 
 77 |         CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM                   0x40C6
 78 | 
 79 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification
 80 | (Creating Buffer Objects)
 81 | 
 82 |         When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the <flags> argument, then
 83 |         <host_ptr> is interpreted as a pointer to cl_mem_ext_host_ptr. When
 84 |         <host_ptr>->allocation_type is equal to
 85 |         CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM then <host_ptr> can also be
 86 |         interpreted as a pointer to cl_mem_android_native_buffer_host_ptr.
 87 | 
 88 |         In addition to that, the application must also initialize the following
 89 |         struct fields:
 90 | 
 91 |         * <host_ptr>->host_cache_policy should be set as follows - If the
 92 |           Graphic Buffer was created as cached and
 93 |           cl_mem_ext_host_ptr_iocoherent is present,
 94 |           <host_ptr>->host_cache_policy can be set to either
 95 |           CL_MEM_HOST_WRITEBACK_QCOM or CL_MEM_HOST_IOCOHERENT_QCOM. If the
 96 |           Graphic Buffer was created as cached and
 97 |           cl_mem_ext_host_ptr_iocoherent is not present,
 98 |           <host_ptr>->host_cache_policy should be set to
 99 |           CL_MEM_HOST_WRITEBACK_QCOM. It must be equal to
100 |           CL_MEM_HOST_UNCACHED_QCOM otherwise.
101 | 
102 |         * <host_ptr>->anb_ptr must be the host virtual pointer associated with
103 |           the ANativeBuffer.
104 | 
105 |         The caching policy provided in ext_host_ptr.host_cache_policy must be
106 |         the same policy the GraphicBuffer is created with. Any mismatch will
107 |         result in undefined behavior.
108 | 
109 |         Only Buffers and 2D images are supported. Use of other image types will
110 |         result in undefined behavior.
111 | 
112 |         The application is responsible for maintaining the consistency of image
113 |         attributes, i.e. format, width, height, and pitch, between the OpenCL
114 |         image and the Android native buffer (aka graphics buffer). Also, if an
115 |         OpenCL buffer is created from a native buffer, the application is
116 |         responsible for making sure that the size of the buffer matches the
117 |         actual linear size of the native buffer; creating a buffer with a size
118 |         different than the passed-in native buffer will result in undefined
119 |         behavior.
120 | 
121 |         The application is responsible for ensuring that the underlying native
122 |         buffer is not released while the cl object is in use. Proper
123 |         synchronization between different APIs that share the underlying buffer
124 |         is to be handled by the application.
125 | 
126 | Sample Code
127 | 
128 |     1) Using the extension for CL buffer objects
129 | 
130 |         cl_mem                                buffer_object        = NULL;
131 |         size_t                                buffer_size_in_bytes = 0;
132 |         cl_mem_android_native_buffer_host_ptr myANBmem             = {0};
133 | 
134 |         // Create an OpenCL buffer object that uses myANBmem as its data store.
135 |         myANBmem.ext_host_ptr.allocation_type =
136 |             CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM;
137 |         myANBmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM;
138 |         myANBmem.anb_ptr = gb->getNativeBuffer(); // gb is Android GraphicBuffer
139 | 
140 |         // The stride returned is in pixels, so we have to factor in pixel_size
141 |         // (4 for RGBA) when calculating buffer_size_in_bytes
142 |         buffer_size_in_bytes = gb->getHeight() * gb->getStride() * 4;
143 | 
144 |         buffer_object = clCreateBuffer(context,
145 |             CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
146 |             buffer_size_in_bytes, &myANBmem, &errcode);
147 | 
148 |     2) Using the extension for CL image objects
149 | 
150 |         cl_mem                                image_object = NULL;
151 |         cl_mem_android_native_buffer_host_ptr myANBmem     = {0};
152 | 
153 |         // Create an OpenCL image object that uses myANBmem as its data store.
154 |         myANBmem.ext_host_ptr.allocation_type =
155 |             CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM;
156 |         myANBmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_WRITEBACK_QCOM;
157 |         myANBmem.anb_ptr = gb->getNativeBuffer(); // gb is Android GraphicBuffer
158 | 
159 |         imgw = gb->getWidth();
160 |         imgh = gb->getHeight();
161 |         // The stride returned is in pixels, so we have to factor in pixel_size
162 |         // (4 for RGBA) when calculating row_pitch
163 |         row_pitch = gb->getStride() * 4;
164 |         // pick any CL format as long as it is consistent with graphic buffer
165 |         // width and stride.
166 |         image_format = {CL_RGBA, CL_UNSIGNED_INT8};
167 | 
168 |         image_object = clCreateImage2D(context,
169 |             CL_MEM_USE_HOST_PTR|CL_MEM_EXT_HOST_PTR_QCOM, &image_fmt, imgw,
170 |             imgh, row_pitch, &myANBmem, &errcode);
171 | 
172 | Revision History
173 | 
174 |     Revision 1, 2014/06/05: Initial version.
175 |     Revision 2, 2017/06/16: Clean up. No functional changes.
176 |     Revision 3, 2017/10/24: Updated sample code.
177 |     Revision 4, 2017/11/13: Clean up. No functional changes.
178 |     Revision 5, 2018/01/03: Add reference to cl_qcom_ext_host_ptr_iocoherent.
179 |     Revision 6, 2018/01/19: Formatting and misc changes. No functional changes.
180 | 
181 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_compressed_image.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_compressed_image
  4 | 
  5 | Contributors
  6 | 
  7 |     Balaji Calidas, Qualcomm Technologies, Inc.
  8 |     Roto Le, Qualcomm Technologies, Inc.
  9 |     Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc.
 10 |     Chris Grimm, Qualcomm Technologies, Inc.
 11 | 
 12 | Contact
 13 | 
 14 |     bcalidas at qti dot qualcomm dot com
 15 | 
 16 | Version
 17 | 
 18 |     Version 3, 2018/01/19
 19 | 
 20 | Status
 21 | 
 22 |     Shipping
 23 | 
 24 | Extension Type
 25 | 
 26 |     OpenCL device extension
 27 | 
 28 | Dependencies
 29 | 
 30 |     OpenCL 2.0 or later is required.
 31 | 
 32 |     cl_qcom_android_native_buffer_host_ptr or cl_qcom_ion_host_ptr is required.
 33 | 
 34 |     This extension is written against the OpenCL 2.0 Specification.
 35 | 
 36 | Overview
 37 | 
 38 |     This extension enables an application to read from and write to
 39 |     OpenCL image objects holding Qualcomm compressed image data.
 40 | 
 41 |     Compressed images are enabled on specific Qualcomm GPUs.
 42 |     The main advantage of this feature is to reduce the overhead of reading and
 43 |     writing images.
 44 | 
 45 |     An application can use this extension to query supported compressed image
 46 |     formats. It can then create an image of a supported compressed format from
 47 |     an ION or an Android Native Buffer (ANB) allocation. Only reads and writes
 48 |     of this image from this inside a CL kernel are defined. The results of any
 49 |     host access are undefined.
 50 | 
 51 | Header File
 52 | 
 53 |     cl_ext_qcom.h
 54 | 
 55 | New Procedures and Functions
 56 | 
 57 |     None
 58 | 
 59 | New Tokens
 60 | 
 61 |     Added to the list of supported cl_mem_flags by clCreateImage in
 62 |     Table 5.3 of the OpenCL 2.0 Specification.
 63 | 
 64 |         CL_MEM_COMPRESSED_IMAGE_QCOM
 65 | 
 66 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification
 67 | (Creating Image Objects)
 68 | 
 69 | (Append to the section introduced by cl_qcom_ion_host_ptr extension)
 70 | 
 71 |     Compressed images can be created from ION buffers in much the same way as
 72 |     uncompressed images. <host_ptr>->allocation_type should be set to
 73 |     CL_MEM_ION_HOST_PTR_QCOM. The application is responsible for the layout of
 74 |     compressed image data held in an ION allocation, which must follow the
 75 |     structure defined by Qualcomm compression standard. Any deviation from the
 76 |     standard will lead to undefined results.
 77 | 
 78 |     While creating compressed images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled
 79 |     in the <flags> argument, the image_row_pitch and image_slice_pitch fields of
 80 |     cl_image_desc must be set to 0.
 81 | 
 82 | Additions to Chapter 5.3.2 of the OpenCL 2.0 Specification
 83 | (Querying List of Supported Image Formats)
 84 | 
 85 |     When CL_MEM_COMPRESSED_IMAGE_QCOM is enabled in the <flags> argument, the
 86 |     implementation will return a list of supported compressed image formats.
 87 |     CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE and
 88 |     CL_MEM_KERNEL_READ_AND_WRITE flags retain the same meaning as for
 89 |     uncompressed formats.
 90 | 
 91 | Additions to Chapter 6.13.14.2 of the OpenCL-C 2.0 Specification
 92 | (Built-in Image Read Functions)
 93 | 
 94 |     The sampler used for reading compressed images takes the following values:
 95 |     CLK_FILTER_NEAREST and CLK_FILTER_LINEAR for the sampler's filter mode.
 96 |     CLK_ADDRESS_NONE, CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE for the
 97 |     sampler's addressing mode
 98 | 
 99 | Android Native Buffer Option
100 | 
101 |     On Android platforms it is possible to create a compressed image from
102 |     ANativeWindowBuffer (ANB) aka graphics buffer. The application is
103 |     responsible for creating an ANB buffer of appropriate format and size which
104 |     can be used to store the compressed image data.
105 | 
106 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification
107 | (Creating Image Objects)
108 | 
109 | (Append to the section introduced by cl_qcom_android_native_buffer_host_ptr
110 | extension)
111 | 
112 |     Compressed images can be created from ANB buffers in much the same way as
113 |     uncompressed images. <host_ptr>->allocation_type should be set to
114 |     CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM. The application is responsible
115 |     for the layout of compressed image data held in an ANB allocation, which
116 |     must follow the structure defined by Qualcomm compression standard. Any
117 |     deviation from the standard will lead to undefined results.
118 | 
119 |     While creating compressed images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled
120 |     in the <flags> argument, the image_row_pitch and image_slice_pitch fields of
121 |     cl_image_desc must be set to 0.
122 | 
123 | Sample Code
124 | 
125 |     1) Querying supported formats for read_only compressed images
126 | 
127 |         #define MAX_NUM_FORMATS 128
128 | 
129 |         cl_image_format format_list[ MAX_NUM_FORMATS] = {0};
130 |         cl_int          num_format_list_entries       = MAX_NUM_FORMATS;
131 |         cl_int          num_reported_image_formats    = 0;
132 |         cl_int          errcode                       = 0;
133 | 
134 |         // Query the supported formats for COMPRESSED_IMAGE_QCOMM
135 |         errcode = clGetSupportedImageFormats(
136 |             context,
137 |             CL_MEM_READ_ONLY | CL_MEM_COMPRESSED_IMAGE_QCOM,
138 |             CL_MEM_OBJECT_IMAGE2D,
139 |             num_format_list_entries,
140 |             format_list,
141 |             &num_reported_image_formats);
142 | 
143 |     2) Creating an ION buffer for holding compressed image data.
144 |  
145 |         cl_mem_ion_host_ptr compressed_ionmem = {0};
146 | 
147 |         // Initialize ION buffer attributes
148 |         compressed_ionmem.ext_host_ptr.allocation_type =
149 |             CL_MEM_ION_HOST_PTR_QCOM;
150 |         compressed_ionmem.ext_host_ptr.host_cache_policy =
151 |             CL_MEM_HOST_UNCACHED_QCOM;
152 |         compressed_ionmem.ion_filedesc =
153 |             ion_info_fd.file_descriptor; // file descriptor for ION
154 |         compressed_ionmem.ion_hostptr =
155 |             ion_info.host_virtual_address; // hostptr returned by ION
156 | 
157 |     3) Using cl_qcom_ion_host_ptr holding compressed image data to create a
158 |        compressed RGBA image object.
159 | 
160 |         cl_image_format image_format = {0};
161 |         cl_image_desc   image_desc   = {0};
162 |         cl_int          errcode      = 0;
163 | 
164 |         // Set image format
165 |         image_format->image_channel_order     = CL_QCOM_COMPRESSED_RGBA;
166 |         image_format->image_channel_data_type = CL_UNORM_INT8;
167 | 
168 |         // Set image parameters
169 |         image_desc->image_width       = 128;
170 |         image_desc->image_height      = 256;
171 |         image_desc->image_row_pitch   = 0; // must be 0 for compressed images
172 |         image_desc->image_slice_pitch = 0; // must be 0 for compressed images
173 | 
174 |         // Create a compressed image
175 |         compressed_rbga_image = clCreateImage(
176 |             context,
177 |             CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY,
178 |             image_format,
179 |             image_desc,
180 |             (void*)compressed_ionmem,
181 |             &errcode);
182 | 
183 |     4) Creating an ANB buffer for holding compressed data
184 | 
185 |         cl_mem_android_native_buffer_host_ptr  compressed_ANBmem = {0};
186 |         GraphicBuffer *gb; // previously created
187 | 
188 |         compressed_ANBmem.ext_host_ptr.allocation_type =
189 |             CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM;
190 |         compressed_ANBmem.ext_host_ptr.host_cache_policy =
191 |             CL_MEM_HOST_WRITEBACK_QCOM;
192 |         // the hostptr to a native buffer and gb is an Android GraphicBuffer
193 |         compressed_ANBmem.anb_ptr = gb->getNativeBuffer();
194 | 
195 |     5) Using cl_qcom_android_native_buffer_host_ptr holding compressed image
196 |        data to create a compressed RGBA image object.
197 | 
198 |         cl_image_format image_format = {0};
199 |         cl_image_desc   image_desc   = {0};
200 |         cl_int          errcode      = 0;
201 | 
202 |         // Set image format
203 |         image_format->image_channel_order     = CL_QCOM_COMPRESSED_RGBA;
204 |         image_format->image_channel_data_type = CL_UNORM_INT8;
205 | 
206 |         // Set image parameters
207 |         image_desc->image_width       = 128;
208 |         image_desc->image_height      = 256;
209 |         image_desc->image_row_pitch   = 0; // always 0 for compressed images
210 |         image_desc->image_slice_pitch = 0; // always 0 for compressed images
211 | 
212 |         // Create a compressed  image
213 |         compressed_rbga_image = clCreateImage(
214 |             context,
215 |             CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY,
216 |             image_format,
217 |             image_desc,
218 |             (void*)compressed_ANBmem,
219 |             &errcode);
220 | 
221 | Revision History
222 | 
223 |     Revision 1, 2016/06/02: Initial version.
224 |     Revision 2, 2017/06/16: Clean up. No functional changes.
225 |     Revision 3, 2018/01/19: Formatting and misc changes. No functional changes.
226 | 
227 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_create_buffer_from_image.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_create_buffer_from_image
  4 | 
  5 | Contributors
  6 | 
  7 |     Alex Bourd, Qualcomm Technologies, Inc.
  8 |     Balaji Calidas, Qualcomm Technologies, Inc.
  9 |     David Garcia, Qualcomm Technologies, Inc.
 10 |     Samuel Pauls, Qualcomm Technologies, Inc.
 11 | 
 12 | Contact
 13 | 
 14 |     bcalidas at qti dot qualcomm dot com
 15 | 
 16 | Version
 17 | 
 18 |     Version 7, 2018/01/19
 19 | 
 20 | Status
 21 | 
 22 |     Shipping
 23 | 
 24 | Extension Type
 25 | 
 26 |     OpenCL device extension
 27 | 
 28 | Dependencies
 29 | 
 30 |     OpenCL 1.0 and the cl_qcom_ext_host_ptr extension are required. This
 31 |     extension is written against OpenCL 1.2 rev 15 and cl_qcom_ext_host_ptr
 32 |     rev 1.
 33 | 
 34 | Overview
 35 | 
 36 |     There are times when developers want to access image data as raw pointers
 37 |     in the OpenCL C language without the safety layer provided by the image
 38 |     read/write builtin functions. One particular case for this would be reading
 39 |     from or writing to EGL external images exposed indirectly to OpenCL through
 40 |     GL/CL interop extensions. Other example would be expert developers who want
 41 |     to read/write multiple pixels with a single memory load/store operation.
 42 | 
 43 | Header File
 44 | 
 45 |     cl_ext_qcom.h
 46 | 
 47 | New Procedures and Functions
 48 | 
 49 |     clCreateBufferFromImageQCOM
 50 | 
 51 | New Tokens
 52 | 
 53 |     Accepted as <param_name> arguments of clGetDeviceImageInfoQCOM:
 54 | 
 55 |         CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM   0x40C0
 56 |         CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM 0x40C1
 57 | 
 58 | Additions to Chapter 5.2 of the OpenCL 1.2 Specification
 59 | 
 60 |     The function
 61 | 
 62 |     cl_mem clCreateBufferFromImageQCOM(cl_mem        image,
 63 |                                        cl_mem_flags  flags,
 64 |                                        cl_int       *errcode_ret)
 65 | 
 66 |     can be used to create a new buffer object from an existing image.
 67 | 
 68 |     <image> is a valid image and cannot be of type
 69 |     CL_MEM_OBJECT_IMAGE1D_BUFFER nor can it use CL_MEM_USE_HOST_PTR.
 70 | 
 71 |     <flags> is a bit-field that is used to specify allocation and usage
 72 |     information about the buffer memory object being created and is described
 73 |     in Table 5.3.
 74 | 
 75 |     <errcode_ret> will return an appropriate error code. If <errcode_ret> is
 76 |     NULL, no error code is returned.
 77 | 
 78 |     If the call succeeds, the buffer that is returned references the data store
 79 |     allocated for <image> and points to the origin pixel in this data store. The
 80 |     data layout is equivalent to what is produced by clEnqueueMapImage when
 81 |     <origin> is (0,0,0) and <region> is (<width>, <height>, <depth>). The
 82 |     <image> from which the buffer is created is called the <parent image> of the
 83 |     buffer.
 84 | 
 85 |     In order to access the pixel data in the returned buffer correctly, the
 86 |     client must query the parent image row pitch and slice pitch using
 87 |     clGetDeviceImageInfoQCOM with the parameter names
 88 |     CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM and
 89 |     CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM. For example, it is incorrect to
 90 |     assume that the row pitch of the data contained in the buffer is simply the
 91 |     image element size multiplied by the image width.
 92 | 
 93 |     Concurrent reading from and writing to both a buffer object and its parent
 94 |     image is undefined. Concurrent reading from and writing to buffer objects
 95 |     created with the same parent image is undefined. Only concurrent reading
 96 |     from both a buffer object and its parent image object and concurrent reading
 97 |     from multiple buffer objects created from the same image is defined.
 98 | 
 99 |     clCreateBufferFromImageQCOM returns a valid non-zero buffer object and
100 |     <errcode_ret> is set to CL_SUCCESS if the buffer object is created
101 |     successfully. Otherwise, it returns a NULL value with one of the following
102 |     error values returned in <errcode_ret>:
103 | 
104 |     * CL_INVALID_MEM_OBJECT if <image> is not a valid image object or if it is
105 |       of type CL_MEM_OBJECT_IMAGE1D_BUFFER.
106 | 
107 |     * CL_INVALID_VALUE if <image> was created with CL_MEM_WRITE_ONLY and <flags>
108 |       specifies CL_MEM_READ_WRITE or CL_MEM_READ_ONLY, or if <image> was created
109 |       with CL_MEM_READ_ONLY and <flags> specifies CL_MEM_READ_WRITE or
110 |       CL_MEM_WRITE_ONLY, or if <image> was created with CL_MEM_USE_HOST_PTR, or
111 |       if <flags> specifies CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR or
112 |       CL_MEM_COPY_HOST_PTR.
113 | 
114 |     * CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
115 |       for the buffer object.
116 | 
117 |     * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
118 |       by the OpenCL implementation on the device.
119 | 
120 |     * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
121 |       by the OpenCL implementation on the host.
122 | 
123 | Modifications to Table 5.xxx in cl_qcom_ext_host_ptr
124 | 
125 |     Additional supported param_names by clGetDeviceImageInfoQCOM
126 | 
127 |         cl_image_pitch_info_qcom  Return Type  Info returned in param_value
128 | 
129 |         CL_BUFFER_FROM_IMAGE_     cl_uint      Returns the image row pitch in
130 |             ROW_PITCH_QCOM                     bytes supported by this device
131 |                                                in regard to
132 |                                                cl_qcom_create_buffer_from_image
133 | 
134 |         CL_BUFFER_FROM_IMAGE_      cl_int      Returns the image row pitch in
135 |             SLICE_PITCH_QCOM                   bytes supported by this device
136 |                                                in regard to
137 |                                                cl_qcom_create_buffer_from_image
138 | 
139 | Modifications to Table 5.9 in Chapter 5.4 of the OpenCL 1.2. Specification
140 | 
141 |     The row that defines the semantics of CL_MEM_ASSOCIATED_MEMOBJECT now reads:
142 | 
143 |         Return memory object from which memobj is created.
144 | 
145 |         This returns the parent buffer argument specified when
146 |         clCreateSubBuffer or clCreateImage were called. It also returns the
147 |         parent image specified when clCreateBufferFromImage was called.
148 | 
149 |         Otherwise a NULL value is returned.
150 | 
151 | Issues
152 | 
153 |     (1) Should we prevent buffers to be created out of
154 |         CL_MEM_OBJECT_IMAGE1D_BUFFER images?
155 | 
156 |         RESOLVED: Qualcomm has added the restriction that the image cannot be of
157 |         type CL_MEM_OBJECT_IMAGE1D_BUFFER.
158 | 
159 |     (2) How do we define the layout of the data pointed at by the buffer
160 |         created by clCreateBufferFromImage?
161 | 
162 |         RESOLVED: The data in the buffer must be linear.
163 | 
164 | Sample Code
165 | 
166 |     cl_mem buffer            = NULL;
167 |     cl_mem image             = NULL;
168 |     size_t row_pitch         = 0;
169 |     cl_image_desc image_desc = {0};
170 | 
171 |     image_desc.buffer            = NULL;
172 |     image_desc.image_array_size  = 1;
173 |     image_desc.image_width       = W;
174 |     image_desc.image_height      = H;
175 |     image_desc.image_depth       = 1;
176 |     image_desc.image_row_pitch   = 0;
177 |     image_desc.image_slice_pitch = 0;
178 |     image_desc.image_type        = CL_MEM_OBJECT_IMAGE2D;
179 |     image_desc.num_mip_levels    = 0;
180 |     image_desc.num_samples       = 0;
181 | 
182 |     // Create a 2D image
183 |     clCreateImage(context, CL_MEM_READ_WRITE, format, &image_desc, NULL, NULL);
184 | 
185 |     // convert the image to a buffer
186 |     buffer = clCreateBufferFromImageQCOM(image, CL_MEM_READ_WRITE, NULL);
187 | 
188 |     // retrieve the image row pitch in order to calculate total size
189 |     clGetDeviceImageInfoQCOM(device,
190 |                              width,
191 |                              height,
192 |                              &format,
193 |                              CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM,
194 |                              sizeof(row_pitch),
195 |                              &row_pitch,
196 |                              NULL);
197 | 
198 |     // map the buffer for reading using the total image size
199 |     clEnqueueMapBuffer(queue,
200 |                        buffer,
201 |                        CL_TRUE,
202 |                        CL_MAP_READ,
203 |                        0,
204 |                        image_desc.image_height * row_pitch,
205 |                        0,
206 |                        NULL,
207 |                        NULL,
208 |                        NULL);
209 | 
210 | Revision History
211 | 
212 |     Revision 1, 2012/10/17: First draft.
213 |     Revision 2, 2013/02/28: The implementation doesn't support an image created
214 |                             with CL_MEM_USE_HOST_PTR.
215 |     Revision 3, 2013/05/01: clGetDeviceImageInfoQCOM must now be used instead
216 |                             of clGetImageInfo to get the pitches.
217 |     Revision 4, 2013/08/27: Created new image pitch tokens for
218 |                             clGetDeviceImageInfoQCOM.
219 |     Revision 5, 2017/06/16: Clean up. No functional changes.
220 |     Revision 6, 2017/11/13: Clean up. No functional changes.
221 |     Revision 7, 2018/01/19: Formatting and misc changes. No functional changes.
222 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_ext_host_ptr.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_ext_host_ptr
  4 | 
  5 | Contributors
  6 | 
  7 |     Balaji Calidas, Qualcomm Technologies, Inc.
  8 |     David Garcia, Qualcomm Technologies, Inc.
  9 |     Rajeev Rao, Qualcomm Technologies, Inc.
 10 |     Sushmita Susheelendra, Qualcomm Innovation Center, Inc.
 11 | 
 12 | Contact
 13 | 
 14 |     bcalidas at qti dot qualcomm dot com
 15 | 
 16 | Version
 17 | 
 18 |     Version 5, 2018/01/19
 19 | 
 20 | Number
 21 | 
 22 |     OpenCL Extension #21
 23 | 
 24 | Status
 25 | 
 26 |     Shipping
 27 | 
 28 | Extension Type
 29 | 
 30 |     OpenCL device extension
 31 | 
 32 | Dependencies
 33 | 
 34 |     OpenCL 1.1 is required.
 35 | 
 36 |     This extension is written against the OpenCL 1.1 specification
 37 | 
 38 | Overview
 39 | 
 40 |     This extension extends the functionality provided by clCreateBuffer,
 41 |     clCreateImage2D, clCreateImage3D. It allows applications to specify a new
 42 |     flag CL_MEM_EXT_HOST_PTR_QCOM which enables the driver to map external
 43 |     memory allocations, to be defined in layered extensions, to the device's
 44 |     address space and thus avoiding having to copy data back and forth between
 45 |     the host and the device.
 46 | 
 47 | Header File
 48 | 
 49 |     cl_ext.h
 50 | 
 51 | New Procedures and Functions
 52 | 
 53 |     cl_int clGetDeviceImageInfoQCOM(
 54 |         cl_device_id             device,
 55 |         size_t                   image_width,
 56 |         size_t                   image_height,
 57 |         const cl_image_format    *image_format,
 58 |         cl_image_pitch_info_qcom param_name,
 59 |         size_t                   param_value_size,
 60 |         void                     *param_value,
 61 |         size_t                   *param_value_size_ret);
 62 | 
 63 | New Types
 64 | 
 65 |     typedef cl_uint cl_image_pitch_info_qcom;
 66 | 
 67 | New Tokens
 68 | 
 69 |     Accepted by the <param_name> argument of clGetDeviceInfo
 70 | 
 71 |         CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM    0x40A0
 72 |         CL_DEVICE_PAGE_SIZE_QCOM                   0x40A1
 73 | 
 74 |     Accepted by the <flags> argument of clCreateBuffer, clCreateImage2D and
 75 |     clCreateImage3D:
 76 | 
 77 |         CL_MEM_EXT_HOST_PTR_QCOM                   (1 << 29)
 78 | 
 79 |     Accepted by the <host_ptr> argument of clCreateBuffer, clCreateImage2D and
 80 |     clCreateImage3D:
 81 | 
 82 |         typedef struct _cl_mem_ext_host_ptr
 83 |         {
 84 |             // Type of external memory allocation.
 85 |             // Legal values will be defined in layered extensions.
 86 |             cl_uint  allocation_type;
 87 | 
 88 |             // Host cache policy for this external memory allocation.
 89 |             cl_uint  host_cache_policy;
 90 | 
 91 |         } cl_mem_ext_host_ptr;
 92 | 
 93 |     Accepted values for cl_mem_ext_host_ptr::host_cache_policy:
 94 | 
 95 |         CL_MEM_HOST_UNCACHED_QCOM                  0x40A4
 96 |         CL_MEM_HOST_WRITEBACK_QCOM                 0x40A5
 97 |         CL_MEM_HOST_WRITETHROUGH_QCOM              0x40A6
 98 |         CL_MEM_HOST_WRITE_COMBINING_QCOM           0x40A7
 99 | 
100 |     Accepted by the <param_name> argument of clGetDeviceImageInfoQCOM
101 | 
102 |         CL_IMAGE_ROW_ALIGNMENT_QCOM                0x40A2
103 |         CL_IMAGE_SLICE_ALIGNMENT_QCOM              0x40A3
104 | 
105 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification
106 | (Creating Buffer Objects)
107 | 
108 |     Add the following token to Table 5.3 (clCreateBuffer List of supported
109 |     cl_mem_flags values):
110 | 
111 |         CL_MEM_EXT_HOST_PTR_QCOM             This flag is valid only when used
112 |                                              together with CL_MEM_USE_HOST_PTR.
113 |                                              If specified, it indicates that the
114 |                                              <host_ptr> argument provided by the
115 |                                              application is actually a pointer
116 |                                              to cl_mem_ext_host_ptr.
117 | 
118 | 
119 |         When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the <flags> argument, then
120 |         <host_ptr> is interpreted as a pointer to cl_mem_ext_host_ptr. The
121 |         application must then initialize cl_mem_ext_host_ptr::allocation_type to
122 |         the allowed token values defined in future layered extensions.
123 | 
124 |         The application must also initialize
125 |         cl_mem_ext_host_ptr::host_cache_policy to one of
126 |         CL_MEM_HOST_UNCACHED_QCOM, CL_MEM_HOST_WRITEBACK_QCOM,
127 |         CL_MEM_HOST_WRITETHROUGH_QCOM, or CL_MEM_HOST_WRITE_COMBINING_QCOM
128 |         according to the cache policy used in the host for this memory
129 |         allocation.
130 | 
131 |     Add the following token to Table 4.3 (clGetDeviceInfo OpenCL Device
132 |     Queries):
133 | 
134 |         CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM    Returns the amount of memory
135 |                                                    padding that the application
136 |                                                    must add to the end of every
137 |                                                    external allocation that will
138 |                                                    be used in conjunction with
139 |                                                    CL_MEM_EXT_HOST_PTR_QCOM.
140 | 
141 |         CL_DEVICE_PAGE_SIZE_QCOM                   Returns the device's page
142 |                                                    size.
143 | 
144 |         The application may query the row and slice pitch values using
145 |         clGetDeviceImageInfoQCOM and provide the queried values or any other
146 |         supported value to clCreateImage2D and clCreateImage3D when using
147 |         CL_MEM_EXT_HOST_PTR_QCOM.
148 | 
149 |         A supported value for row pitch and slice pitch is defined respectively
150 |         as:
151 | 
152 |         * Any value greater than or equal to CL_IMAGE_ROW_PITCH that is also a
153 |           multiple of CL_IMAGE_ROW_ALIGNMENT_QCOM.
154 |         * Any value greater than or equal to CL_IMAGE_SLICE_PITCH that is also
155 |           a multiple of CL_IMAGE_SLICE_ALIGNMENT_QCOM.
156 | 
157 | Additions to Section 5.3 after clGetImageInfo
158 | 
159 |     An application that creates OpenCL image objects with the
160 |     CL_MEM_EXT_HOST_PTR_QCOM flag can invoke the following function to query the
161 |     required row pitch, slice pitch and alignment for a particular device:
162 | 
163 |     cl_int clGetDeviceImageInfoQCOM(
164 |         cl_device_id             device,
165 |         size_t                   image_width,
166 |         size_t                   image_height,
167 |         const cl_image_format   *image_format,
168 |         cl_image_pitch_info_qcom param_name,
169 |         size_t                   param_value_size,
170 |         void                    *param_value,
171 |         size_t                  *param_value_size_ret);
172 | 
173 |     device               - is a valid device
174 | 
175 |     image_width          - width of the image in image elements (pixels)
176 | 
177 |     image_height         - height of the image in image elements (pixels)
178 | 
179 |     image_format         - pointer to image format descriptor structure
180 | 
181 |     param_name           - specifies the information to query. The list of
182 |                            supported param_name types and the information
183 |                            returned in param_value by clGetImageInfo is
184 |                            described in Table 5.XXX
185 | 
186 |     param_value          - is a pointer to memory where the appropriate result
187 |                            being queried is returned. If param_value is NULL, it
188 |                            is ignored.
189 | 
190 |     param_value_size     - is used to specify the size in bytes of memory
191 |                            pointed to by param_value. This size must be greater
192 |                            than or equal to the size of return type as described
193 |                            in Table 5.8.
194 | 
195 |     param_value_size_ret - returns the actual size in bytes of data being
196 |                            queried by param_value. If param_value_size_ret is
197 |                            NULL, it is ignored.
198 | 
199 |     clGetDeviceImageInfoQCOM returns CL_SUCCESS if the function is executed
200 |     successfully. Otherwise, it returns one of the following errors:
201 | 
202 |     CL_INVALID_VALUE      - if param_name is not valid, or if size in bytes
203 |                               specified by param_value_size is less than the
204 |                               size of return type for that param_value and
205 |                               param_value is not NULL.
206 | 
207 |     CL_INVALID_MEM_OBJECT - if image is a not a valid image object.
208 | 
209 |     CL_OUT_OF_RESOURCES   - if there is a failure to allocate resources required
210 |                             by the OpenCL implementation on the device.
211 | 
212 |     CL_OUT_OF_HOST_MEMORY - if there is a failure to allocate resources required
213 |                             by the OpenCL implementation on the host.
214 | 
215 | Table 5.XXX
216 | 
217 |     List of supported param_names by clGetDeviceImageInfoQCOM
218 | 
219 |         cl_image_pitch_info_qcom       Return Type   Info returned in
220 |                                                      param_value
221 | 
222 |         CL_IMAGE_ROW_PITCH             cl_uint       Returns the image row pitch
223 |                                                      supported by this device
224 | 
225 |         CL_IMAGE_ROW_ALIGNMENT_QCOM    cl_uint       Returns the image row pitch
226 |                                                      alignment supported by this
227 |                                                      device
228 | 
229 |         CL_IMAGE_SLICE_PITCH           cl_uint       Returns the image slice
230 |                                                      pitch supported by this
231 |                                                      device
232 | 
233 |         CL_IMAGE_SLICE_ALIGNMENT_QCOM  cl_uint       Returns the image slice
234 |                                                      pitch alignment supported
235 |                                                      by this device
236 | 
237 | Additions to Section 5.3.1. (Creating Image Objects) at the end of the list of
238 | errors returned by clCreateImage2D and clCreateImage3D:
239 | 
240 |     CL_INVALID_VALUE if <flags> has CL_MEM_EXT_HOST_PTR_QCOM enabled and yet
241 |     CL_MEM_USE_HOST_PTR is not enabled.
242 | 
243 |     CL_INVALID_VALUE if <flags> has CL_MEM_EXT_HOST_PTR_QCOM enabled and if
244 |     <image_row_pitch> and/or <image_slice_pitch> fail to match the requirements
245 |     of section 5.3
246 | 
247 |     CL_INVALID_VALUE if any of the fields in the struct pointed at by <host_ptr>
248 |     are invalid.
249 | 
250 | Revision History
251 | 
252 |     Revision 1, 2013/05/27: Initial version.
253 |     Revision 2, 2017/06/16: Clean up. No functional changes.
254 |     Revision 3, 2017/11/13: Clean up. No functional changes.
255 |     Revision 4, 2017/11/30: Corrected parameter description for
256 |                             clGetDeviceImageInfoQCOM.
257 |     Revision 5, 2018/01/19: Formatting and misc changes. No functional changes.
258 | 
259 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_ext_host_ptr_iocoherent.txt:
--------------------------------------------------------------------------------
 1 | Name Strings
 2 | 
 3 |     cl_qcom_ext_host_ptr_iocoherent
 4 | 
 5 | Contributors
 6 | 
 7 |     Balaji Calidas, Qualcomm Technologies, Inc.
 8 |     Manali Torpe, Qualcomm Innovation Center, Inc.
 9 |     Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc.
10 | 
11 | Contact
12 | 
13 |     bcalidas at qti dot qualcomm dot com
14 | 
15 | Version
16 | 
17 |     Version 4, 2018/03/06
18 | 
19 | Number
20 | 
21 |     OpenCL Extension #53
22 | 
23 | Status
24 | 
25 |     Shipping
26 | 
27 | Extension Type
28 | 
29 |     OpenCL device extension
30 | 
31 | Dependencies
32 | 
33 |     OpenCL 1.1 is required.
34 |     cl_qcom_ext_host_ptr is required.
35 | 
36 |     This extension extends the functionality of cl_qcom_ext_host_ptr.
37 | 
38 | Overview
39 | 
40 |     This extension extends the functionality provided by cl_qcom_ext_host_ptr by
41 |     adding a new host cache policy. It allows applications to specify a new
42 |     value, CL_MEM_HOST_IOCOHERENT_QCOM, for
43 |     cl_mem_ext_host_ptr::host_cache_policy. When the application selects this
44 |     value for host cache policy, the imported allocation is mapped as
45 |     io-coherent for the GPU. This in turn avoids the need for the OpenCL driver
46 |     to explicitly issue CPU cache operation calls. Although GPU performance can
47 |     be slower in some cases for io-coherent allocations, the overall performance
48 |     can improve due to the elimination of explicit CPU cache operations.
49 | 
50 | Header File
51 | 
52 |     cl_ext.h
53 | 
54 | New Tokens
55 | 
56 |     Modification to handling of <host_ptr> argument of clCreateBuffer,
57 |     clCreateImage2D and clCreateImage3D when CL_MEM_EXT_HOST_PTR_QCOM is
58 |     specified in the <flags> argument.
59 | 
60 |     New accepted value for cl_mem_ext_host_ptr::host_cache_policy:
61 | 
62 |         CL_MEM_HOST_IOCOHERENT_QCOM                0x40A9
63 | 
64 |     As described in the cl_qcom_ext_host_ptr spec, when CL_MEM_EXT_HOST_PTR_QCOM
65 |     is enabled in the <flags> argument, then <host_ptr> is interpreted as a
66 |     pointer to cl_mem_ext_host_ptr. The application must initialize
67 |     cl_mem_ext_host_ptr::host_cache_policy to one of CL_MEM_HOST_UNCACHED_QCOM,
68 |     CL_MEM_HOST_WRITEBACK_QCOM, or CL_MEM_HOST_IOCOHERENT_QCOM according to the
69 |     cache policy used in the host for this memory allocation.
70 |     CL_MEM_HOST_IOCOHERENT_QCOM can only be specified when the memory was
71 |     originally allocated as cached. Use of this value with an uncached
72 |     allocation will lead to undefined results.
73 | 
74 | Revision History
75 | 
76 |     Revision 1, 2018/01/03: Initial version.
77 |     Revision 2, 2018/01/15: Minor edits.
78 |     Revision 3, 2018/01/19: Formatting and misc changes. No functional changes.
79 |     Revision 4, 2018/03/06: Corrected token value.
80 | 
81 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_extract_image_plane.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_extract_image_plane
  4 | 
  5 | Contributors
  6 | 
  7 |     Roto Le, Qualcomm Technologies, Inc.
  8 |     Balaji Calidas, Qualcomm Technologies, Inc.
  9 | 
 10 | Contact
 11 | 
 12 |     bcalidas at qti dot qualcomm dot com
 13 | 
 14 | Version
 15 | 
 16 |     Version 5, 2018/01/19
 17 | 
 18 | Status
 19 | 
 20 |     Shipping
 21 | 
 22 | Extension Type
 23 | 
 24 |     OpenCL device extension
 25 | 
 26 | Dependencies
 27 | 
 28 |     OpenCL 2.0 and the cl_qcom_other_image and cl_qcom_compressed_image
 29 |     extension are required.
 30 | 
 31 |     This extension is written against OpenCL 2.0 rev 26, cl_qcom_other_image
 32 |     rev 1, and cl_qcom_compressed_image rev 1.
 33 | 
 34 | Overview
 35 | 
 36 |     This extension allows an application to create a single plane derivative of
 37 |     a multi-plane planar image. This would allow the application to process each
 38 |     plane of a planar image as a separate image. For example a YUV image could
 39 |     be split into Y-only and UV-only images. Image processing could be applied
 40 |     exclusively to the Y-only image. Also, on some GPUs, direct writes to a YUV
 41 |     image may not be supported but writes to Y-only and UV-only images may be
 42 |     available. The child derivative images and the parent planar image both
 43 |     reference the same image data. No data copy is involved.
 44 | 
 45 | Header File
 46 | 
 47 |     None
 48 | 
 49 | New Procedures and Functions
 50 | 
 51 |     None
 52 | 
 53 | New Tokens
 54 | 
 55 |     None
 56 | 
 57 | Additions to Chapter 5.3 of the OpenCL 2.0 Specification
 58 | 
 59 |     The function
 60 | 
 61 |     cl_mem  clCreateImage(cl_context              context,
 62 |                           cl_mem_flags            flags,
 63 |                           const cl_image_format * image_format,
 64 |                           const cl_image_desc *   image_desc,
 65 |                           void *                  host_ptr,
 66 |                           cl_int *                errcode_ret)
 67 | 
 68 |     can be used to create child images from an existing parent planar image.
 69 | 
 70 |     <image_desc> is a pointer to a structure that describes type and dimensions
 71 |     of the child image to be created.
 72 | 
 73 |     * image_desc->mem_object refers to a valid planar image memory object, which
 74 |       is the parent image of the to be created child images.
 75 | 
 76 |     * image_desc->image_type must match exactly the type of the parent image
 77 |       object and it must also be supported image type for the given
 78 |       <image_format>.
 79 | 
 80 |     * image_desc's dimensions must match exactly the dimensions of the parent
 81 |       image object.
 82 | 
 83 |     * image_desc's pitches (i.e image_row_pitch, image_slice_pitch) must be set
 84 |       to zero.
 85 | 
 86 |     <context> is a valid OpenCL context on which the image object is to be
 87 |     created.
 88 | 
 89 |     * context must be identical to the context of the parent image object.
 90 | 
 91 |     <image_format> holds the format of the to be created child image.
 92 | 
 93 |     * The child image_format must be a single plane derivative of the parent
 94 |       image. For example CL_QCOM_NV12_Y and CL_QCOM_NV12_UV are derivatives of
 95 |       CL_QCOM_NV12.
 96 | 
 97 |     * The compression type or packing type of the child image formats must match
 98 |       exactly that of the parent image. For example creating a linear
 99 |       CL_QCOM_NV12_Y child image from a compressed CL_QCOM_COMPRESSED_NV12
100 |       parent image is not valid.
101 | 
102 |     <flags> is a bit-field that is used to specify allocation and usage
103 |     information about the image memory object to be created and is described in
104 |     Table 5.3.
105 | 
106 |     * The read/write bits in the <flags> must be set according to the read/write
107 |       capability supported for the <image_format>.
108 | 
109 |     * CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, and CL_MEM_COPY_HOST_PTR
110 |       cannot be set for the <flags> when creating a child image.
111 | 
112 |     <host_ptr> must be set to NULL.
113 | 
114 |     <errcode_ret> will return an appropriate error code. If <errcode_ret> is
115 |     NULL, no error code is returned.
116 | 
117 |     * CL_INVALID_IMAGE_SIZE if image dimensions specified in image_desc do not
118 |       match the dimensions of the parent image.
119 | 
120 |     * CL_INVALID_IMAGE_DESCRIPTOR if values specified in image_desc do not
121 |       satisfy the rules for <image_desc>->mem_object and the child image's
122 |       pitches specified above.
123 | 
124 |     * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if image_format does not sastisfy the
125 |       rules for the child image formats specified above.
126 | 
127 |     If the call succeeds, the returned child image object references the image
128 |     data of the parent image's plane specified by the child <image_format>. The
129 |     child image data & its layout therefore is identical to the associated plane
130 |     on the parent image with <origin> is (0,0,0) and <region> is (<width>,
131 |     <height>, <depth>). It is important to notice that coordinate offset is not
132 |     allowed on the child image.
133 | 
134 |     Concurrent reading from and writing to both a child image object and its
135 |     parent image object is undefined. Concurrent reading from and writing to
136 |     child images created with the same parent image is undefined. Only
137 |     concurrent reading from both a child image and its parent image, and
138 |     concurrent reading from multiple child images created from the same parent
139 |     image is defined.
140 | 
141 | Sample Code
142 | 
143 |     // Create a 2D CL_QCOM_COMPRESSED_NV12_Y child image from an existing 2D
144 |     // CL_QCOM_COMPRESSED_NV12 parent image.
145 | 
146 |     cl_int          errcode               = CL_SUCCESS;
147 |     cl_mem_flags    child_image_flag      = {0};
148 |     cl_image_desc   child_image_desc      = {0};
149 |     cl_image_format child_planar_y_format = {0};
150 | 
151 |     // Query the parent_image's dimensions
152 |     error_code = clGetImageInfo (&parent_planar_image, CL_IMAGE_WIDTH,
153 |         sizeof(size_t), (void*)&parent_image_width, NULL);
154 |     if(error_code != CL_SUCCESS) exit(-1);
155 | 
156 |     error_code = clGetImageInfo (&parent_planar_image, CL_IMAGE_WIDTH,
157 |         sizeof(size_t), (void*)&parent_image_height, NULL);
158 |     if(error_code != CL_SUCCESS) exit(-1);
159 | 
160 |     // Setting the image_desc & image_format for creating the child_image
161 |     child_planar_y_format.image_channel_order     = CL_UNORM_INT8;
162 |     child_planar_y_format.image_channel_data_type = CL_QCOM_COMPRESSED_NV12_Y;
163 | 
164 |     child_image_desc.image_type         = CL_MEM_OBJECT_IMAGE2D;
165 |     child_image_desc.image_width        = parent_image_width;
166 |     child_image_desc.image_height       = parent_image_height;
167 |     child_image_desc.image_row_pitch    = 0;
168 |     child_image_desc.image_slice_pitch  = 0;
169 |     child_image_desc.mem_object         = parent_planar_image;
170 | 
171 |     child_image_flag                    = CL_MEM_READ_WRITE;
172 | 
173 |     child_y_image = clCreateImage(context,
174 |                                   child_image_flag,
175 |                                   child_planar_y_format,
176 |                                   &child_image_desc,
177 |                                   NULL, // host_ptr must be set to NULL
178 |                                   &errcode);
179 |     if(errcode != CL_SUCCESS) exit(-1);
180 | 
181 | Revision History
182 | 
183 |     Revision 1, 2016/12/06: First draft.
184 |     Revision 2, 2017/03/19: Second draft.
185 |     Revision 3, 2017/06/16: Clean up. No functional changes.
186 |     Revision 4, 2017/11/13: Clean up. No functional changes.
187 |     Revision 5, 2018/01/19: Formatting and misc changes. No functional changes.
188 | 
189 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_ion_host_ptr.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_ion_host_ptr
  4 | 
  5 | Contributors
  6 | 
  7 |     Balaji Calidas, Qualcomm Technologies, Inc.
  8 |     David Garcia, Qualcomm Technologies, Inc.
  9 |     Sushmita Susheelendra, Qualcomm Innovation Center, Inc.
 10 | 
 11 | Contact
 12 | 
 13 |     bcalidas at qti dot qualcomm dot com
 14 | 
 15 | Version
 16 | 
 17 |     Version 7, 2018/01/19
 18 | 
 19 | Number
 20 | 
 21 |     OpenCL Extension #22
 22 | 
 23 | Status
 24 | 
 25 |     Shipping
 26 | 
 27 | Extension Type
 28 | 
 29 |     OpenCL device extension
 30 | 
 31 | Dependencies
 32 | 
 33 |     OpenCL 1.1 is required. cl_qcom_ext_host_ptr is required.
 34 | 
 35 |     This extension is written against the OpenCL 1.1 specification
 36 | 
 37 |     If present, cl_qcom_ext_host_ptr_iocoherent extends the functionality of
 38 |     this extension.
 39 | 
 40 | Overview
 41 | 
 42 |     This extension extends the functionality provided by clCreateBuffer,
 43 |     clCreateImage2D, clCreateImage3D. It allows applications to pass an ION
 44 |     memory allocation to these functions so that it can be mapped to the
 45 |     device's address space and thus avoid having to copy data back and forth
 46 |     between the host and the device.
 47 | 
 48 | Header File
 49 | 
 50 |     cl_ext.h
 51 | 
 52 | New Tokens
 53 | 
 54 |     Accepted by the <host_ptr> argument of clCreateBuffer, clCreateImage2D and
 55 |     clCreateImage3D:
 56 | 
 57 |         typedef struct _cl_mem_ion_host_ptr
 58 |         {
 59 |             // Type of external memory allocation.
 60 |             // Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations.
 61 |             cl_mem_ext_host_ptr   ext_host_ptr;
 62 | 
 63 |             // ION file descriptor
 64 |             int                   ion_filedesc;
 65 | 
 66 |             // Host pointer to the ION allocated memory
 67 |             void*                 ion_hostptr;
 68 | 
 69 |         } cl_mem_ion_host_ptr;
 70 | 
 71 |     Used together with CL_MEM_EXT_HOST_PTR_QCOM:
 72 | 
 73 |         CL_MEM_ION_HOST_PTR_QCOM                   0x40A8
 74 | 
 75 | Additions to Chapter 5.2.1 of the OpenCL 1.1 Specification
 76 | 
 77 |     (Creating Buffer Objects)
 78 | 
 79 |     When CL_MEM_EXT_HOST_PTR_QCOM is enabled in the <flags> argument, then
 80 |     <host_ptr> is interpreted as a pointer to cl_mem_ext_host_ptr. When
 81 |     <host_ptr>->allocation_type is equal to CL_MEM_ION_HOST_PTR_QCOM then
 82 |     <host_ptr> can also be interpreted as a pointer to cl_mem_ion_host_ptr.
 83 | 
 84 |     In addition to that, the application must also initialize the following
 85 |     struct fields:
 86 | 
 87 |     * <host_ptr>->host_cache_policy should be set as follows - If the ion
 88 |       allocation was made with the flag ION_FLAG_CACHED enabled and
 89 |       cl_mem_ext_host_ptr_iocoherent is present, <host_ptr>->host_cache_policy
 90 |       can be set to either CL_MEM_HOST_WRITEBACK_QCOM or
 91 |       CL_MEM_HOST_IOCOHERENT_QCOM. If the ION allocation was made with the
 92 |       flag ION_FLAG_CACHED enabled and cl_mem_ext_host_ptr_iocoherent is not
 93 |       present, <host_ptr>->host_cache_policy should be set to
 94 |       CL_MEM_HOST_WRITEBACK_QCOM. It must be equal to
 95 |       CL_MEM_HOST_UNCACHED_QCOM otherwise.
 96 | 
 97 |     * <host_ptr>->ion_filedesc must be the file descriptor of the ION memory
 98 |       allocation that the application wants to use as storage bits for the
 99 |       memory object.
100 | 
101 |     * <host_ptr>->ion_hostptr must be the host virtual pointer associated with
102 |       the same ION memory allocation. If the application does not need to map
103 |       the newly created cl memory object for host access, it can set
104 |       <host_ptr>->ion_hostptr to NULL. If this happens, then calls to host
105 |       access functions such as clEnqueueMapBuffer will fail out and return
106 |       an errorcode of CL_INVALID_OPERATION. Setting <host_ptr>->ion_hostptr to
107 |       NULL avoids the need for the application to make an extra map call for
108 |       acquiring the host virtual pointer.
109 | 
110 |     Memory specified this way must be aligned to the device's page size. The
111 |     application can query the device's page size by using
112 |     clGetDeviceInfo(..., CL_DEVICE_PAGE_SIZE_QCOM, ...).
113 | 
114 |     Once the memory object is created, the application must call
115 |     clEnqueueMapBuffer/clEnqueueMapImage with appropriate flags before
116 |     reading or writing to it on the host. The host unmaps the region when
117 |     accesses (reads and/or writes) to this mapped region by the host are
118 |     complete. As per the OpenCL 1.2 specification, clEnqueueMapBuffer and
119 |     clEnqueueMapImage act as synchronization points for the region of the
120 |     buffer object being mapped.
121 | 
122 | Sample Code
123 | 
124 |     1) Using the extension for CL buffer objects
125 | 
126 |         cl_mem               buffer_object            = NULL;
127 |         size_t               buffer_size_in_bytes     = 0;
128 |         size_t               buffer_size_with_padding = 0;
129 |         cl_mem_ion_host_ptr  myionmem                 = {0};
130 |         size_t               ext_mem_padding_in_bytes = 0;
131 |         size_t               device_page_size         = 0;
132 | 
133 |         // Query the device's page size and the amount of padding necessary at
134 |         // the end of the buffer.
135 |         clGetDeviceInfo(device, CL_DEVICE_PAGE_SIZE_QCOM,
136 |             sizeof(device_page_size), &device_page_size, NULL);
137 |         clGetDeviceInfo(device, CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM,
138 |             sizeof(ext_mem_padding_in_bytes), &ext_mem_padding_in_bytes, NULL);
139 | 
140 |         // Compute the desired size for the data in the buffer.
141 |         buffer_size_in_bytes = foobar();
142 | 
143 |         // Compute amount of memory that needs to be allocated for the buffer
144 |         // including padding.
145 |         buffer_size_with_padding = buffer_size_in_bytes +
146 |             ext_mem_padding_in_bytes;
147 | 
148 |         // Make an ION memory allocation of size buffer_size_with_padding here.
149 |         // Note that allocating buffer_size_in_bytes instead would be a mistake.
150 |         // It's important to allocate the extra padding. Let's say the
151 |         // parameters of the allocation are stored in a struct named ion_info
152 |         // that we will use below.
153 | 
154 |         // Create an OpenCL buffer object that uses ion_info as its data store.
155 |         // Notice how the buffer is created with size buffer_size_in_bytes, not
156 |         // buffer_size_with_padding.
157 |         myionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM;
158 |         myionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM;
159 |         // file descriptor for ION
160 |         myionmem.ion_filedesc = ion_info_fd.file_descriptor;
161 |         // hostptr returned by ION which is device page size aligned
162 |         myionmem.ion_hostptr = ion_info.host_virtual_address;
163 | 
164 |         if(myionmem.ion_hostptr % device_page_size)
165 |         {
166 |             error("Host pointer must be aligned to device_page_size!");
167 |         }
168 | 
169 |         buffer_object = clCreateBuffer(context,
170 |             CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
171 |             buffer_size_in_bytes, &myionmem, &errcode);
172 | 
173 |     2) Using the extension for CL image objects
174 | 
175 |         cl_mem              image_object             = NULL;
176 |         cl_mem_ion_host_ptr myionmem                 = {0};
177 |         size_t              ext_mem_padding_in_bytes = 0;
178 |         size_t              device_page_size         = 0;
179 |         size_t              row_pitch                = 0;
180 | 
181 |         // Query the device's page size and the amount of padding necessary at
182 |         // the end of the buffer.
183 |         clGetDeviceInfo(device, CL_DEVICE_PAGE_SIZE_QCOM,
184 |         sizeof(device_page_size), &device_page_size, NULL);
185 |         clGetDeviceInfo(device, CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM,
186 |             sizeof(ext_mem_padding_in_bytes), &ext_mem_padding_in_bytes, NULL);
187 | 
188 |         // Query the device supported row and slice pitch using
189 |         // clGetDeviceImageInfoQCOM
190 |         // imgw - image width
191 |         // imgh - image height
192 |         // img_fmt - image format
193 |         clGetDeviceImageInfoQCOM(device, imgw, imgh, &img_fmt,
194 |             CL_IMAGE_ROW_PITCH, sizeof(image_row_pitch), &image_row_pitch,
195 |             NULL);
196 | 
197 |         // Use the image height, row pitch obtained above and element size to
198 |         // compute the size of the buffer
199 |         buffer_size_in_bytes = imgh * image_row_pitch;
200 | 
201 |         // Compute amount of memory that needs to be allocated for the buffer
202 |         // including padding.
203 |         buffer_size_with_padding = buffer_size_in_bytes +
204 |             ext_mem_padding_in_bytes;
205 | 
206 |         // Make an ION memory allocation of size buffer_size_with_padding here.
207 |         // Note that allocating buffer_size_in_bytes instead would be a mistake.
208 |         // It's important to allocate the extra padding. Let's say the
209 |         // parameters of the allocation are stored in a struct named ion_info
210 |         // that we will use below.
211 | 
212 |         // Create an OpenCL image object that uses ion_info as its data store.
213 |         myionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM;
214 |         myionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM;
215 |         // file descriptor for ION
216 |         myionmem.ion_filedesc = ion_info_fd.file_descriptor;
217 |         // hostptr returned by ION which is device page size aligned
218 |         myionmem.ion_hostptr = ion_info.host_virtual_address;
219 | 
220 |         if(myionmem.ion_hostptr % device_page_size)
221 |         {
222 |             error("Host pointer must be aligned to device_page_size!");
223 |         }
224 | 
225 |         // Note that the image_row_pitch obtained by calling
226 |         // clGetDeviceImageInfoQCOM should be passed to clCreateImage2D
227 |         image_object = clCreateImage2D(context,
228 |             CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, &image_fmt, imgw,
229 |             imgh, image_row_pitch, &myionmem, &errcode);
230 | 
231 |         // Call clEnqueueMapImage before filling input image data
232 |         pinput = clEnqueueMapImage(command_queue, image_object, CL_TRUE,
233 |             CL_MAP_WRITE, origin, region, &row_pitch, NULL, 0, NULL, NULL,
234 |             &errcode);
235 | 
236 |         // Fill the input image data using the hostptr and row_pitch returned by
237 |         // clEnqueueMapImage
238 |         cl_uchar* inp = pinput;
239 |         memset(inp, 0x0, (row_pitch * imgh));
240 |         for(i = 0; i < (row_pitch * imgh); i+=row_pitch)
241 |         {
242 |             memset(inp+i, 0xff, imgw * element_size);
243 |         }
244 | 
245 |         errcode =  clEnqueueUnmapMemObject(command_queue, image_object, pinput,
246 |             0, NULL, NULL);
247 | 
248 | Revision History
249 | 
250 |     Revision 1, 2012/10/18: Initial version.
251 |     Revision 2, 2012/11/01: Improved sample code.
252 |     Revision 3, 2013/05/17: Generalized. Cleaned-up for Khronos. Added final
253 |                             token values.
254 |     Revision 4, 2017/06/16: Clean up. No functional changes.
255 |     Revision 5, 2017/11/13: Clean up. No functional changes.
256 |     Revision 6, 2018/01/03: Added reference to cl_qcom_ext_host_ptr_iocoherent.
257 |     Revision 7, 2018/01/19: Formatting and misc changes. No functional changes.
258 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_other_image.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_other_image
  4 | 
  5 | Contributors
  6 | 
  7 |     Balaji Calidas, Qualcomm Technologies, Inc.
  8 |     Roto Le, Qualcomm Technologies, Inc.
  9 |     Manali Torpe, Qualcomm Innovation Center, Inc.
 10 |     Chris Grimm, Qualcomm Technologies, Inc.
 11 | 
 12 | Contact
 13 | 
 14 |     bcalidas at qti dot qualcomm dot com
 15 | 
 16 | Version
 17 | 
 18 |     Version 6, 2018/04/25
 19 | 
 20 | Status
 21 | 
 22 |     Shipping
 23 | 
 24 | Extension Type
 25 | 
 26 |     OpenCL device extension
 27 | 
 28 | Dependencies
 29 | 
 30 |     OpenCL 2.0 or later is required.
 31 | 
 32 |     cl_qcom_android_native_buffer_host_ptr or cl_qcom_ion_host_ptr is required.
 33 | 
 34 |     This extension is written against the OpenCL 2.0 Specification.
 35 | 
 36 | Overview
 37 | 
 38 |     This extension enables an application to read from and/or write to
 39 |     non-conventional OpenCL image objects. Examples of non-conventional images
 40 |     are planar images such as NV12 or TP10, MIPI packed images, Bayer pattern
 41 |     images and tiled images. These images do not conform to the standards for
 42 |     images as described in the OpenCL specification. Therefore they are not
 43 |     exposed directly through OpenCL. Instead they are exposed through this
 44 |     extension. These images are supported on select Qualcomm GPUs.
 45 | 
 46 |     An application can use this extension to query supported non-conventional
 47 |     image formats. It can then create an image of a supported non-conventional
 48 |     format from an ION or ANB allocation. Only reads and writes of this image
 49 |     from inside a CL kernel are defined, as well as a limited use of the
 50 |     clEnqueueMapImage host API described below.
 51 | 
 52 | Header File
 53 | 
 54 |     cl_ext_qcom.h
 55 | 
 56 | New Procedures and Functions
 57 | 
 58 |     None
 59 | 
 60 | New Tokens
 61 | 
 62 |     Added to the list of supported cl_mem_flags by clCreateImage in
 63 |     Table 5.3 of the OpenCL 2.0 Specification.
 64 | 
 65 |         CL_MEM_OTHER_IMAGE_QCOM
 66 | 
 67 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification
 68 | (Creating Image Objects)
 69 | (Append to the section introduced by cl_qcom_ion_host_ptr extension)
 70 | 
 71 |     Non-conventional images can be created from ION buffers in much the same way
 72 |     as conventional images. <host_ptr>->allocation_type should be set to
 73 |     CL_MEM_ION_HOST_PTR_QCOM . The application is responsible for the layout of
 74 |     the non-conventional image data held in an ION allocation, which must follow
 75 |     the structure defined by the image format. Any deviation from the standard
 76 |     will lead to undefined results.
 77 | 
 78 |     While creating non-conventional images, when CL_MEM_EXT_HOST_PTR_QCOM is
 79 |     enabled in the <flags> argument, the image_row_pitch and image_slice_pitch
 80 |     fields of cl_image_desc must be set to 0.
 81 | 
 82 | Additions to Chapter 5.3.2 of the OpenCL 2.0 Specification
 83 | (Querying List of Supported Image Formats)
 84 | 
 85 |     When CL_MEM_OTHER_IMAGE_QCOM is enabled in the <flags> argument, the
 86 |     implementation will return a list of supported non-conventional image
 87 |     formats. CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE and
 88 |     CL_MEM_KERNEL_READ_AND_WRITE flags retain the same meaning as for
 89 |     conventional image formats.
 90 | 
 91 | Additions to Chapter 5.3.6 of the OpenCL 2.0 Specification
 92 | (Mapping Image Objects)
 93 | (Append to description of clEnqueueMapImage function)
 94 | 
 95 |     If the image is of a non-conventional format enabled by
 96 |     CL_MEM_OTHER_IMAGE_QCOM, then mapping a region has a special meaning that is
 97 |     only defined under certain conditions. All other uses will invoke undefined
 98 |     behavior. If origin[0], origin[1] and origin[2] are all 0, the value of
 99 |     region specifies the entire image, and the image was created with a valid
100 |     ION or ANB allocation, then this function will return a valid host pointer
101 |     to the start of the underlying ION or ANB allocation. Additionally, any
102 |     necessary cache operations will be performed to ensure appropriate data
103 |     visibility. The object must be unmapped using clEnqueueUnmapMemObject when
104 |     host access is complete to ensure that any changes become visible.
105 | 
106 | Additions to Chapter 6.13.14.2 of the OpenCL-C 2.0 Specification
107 | (Built-in Image Read Functions)
108 | 
109 |     The sampler used for reading non-conventional images can take the following
110 |     values: CLK_FILTER_NEAREST and CLK_FILTER_LINEAR for the sampler's filter
111 |     mode. CLK_ADDRESS_NONE, CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE for
112 |     the sampler's addressing mode. For some non-conventional image formats there
113 |     may be restrictions on which filter modes and which addressing modes can be
114 |     used.
115 | 
116 | Android Native Buffer Option
117 | 
118 |     On Android platforms it is possible to create a non-conventional image from
119 |     ANativeWindowBuffer (ANB) aka graphics buffer. The application is
120 |     responsible for creating an ANB buffer of appropriate format and size which
121 |     can be used to store the non-conventional image data.
122 | 
123 | Additions to Chapter 5.3.1 of the OpenCL 2.0 Specification
124 | (Creating Image Objects)
125 | 
126 | (Append to the section introduced by cl_qcom_android_native_buffer_host_ptr
127 | extension)
128 | 
129 |     Non-conventional images can be created from ANB buffers in much the same way
130 |     as conventional images. <host_ptr>->allocation_type should be set to
131 |     CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM. The application is responsible
132 |     for the layout of non-conventional image data held in an ANB allocation,
133 |     which must follow the structure defined by Qualcomm compression standard.
134 |     Any deviation from the standard will lead to undefined results. While
135 |     creating non-conventional images, when CL_MEM_EXT_HOST_PTR_QCOM is enabled
136 |     in the <flags> argument, the image_row_pitch and image_slice_pitch fields of
137 |     cl_image_desc must be set to 0.
138 | 
139 |     Compressed Planar Images
140 | 
141 |     It is possible to combine the cl_qcom_other_image extension with the
142 |     cl_qcom_compressed_image_extension to create images that are both compressed
143 |     and non-conventional. An example would be CL_QCOM_COMPRESSED_NV12. Such
144 |     images may be queried by enabling CL_MEM_OTHER_IMAGE_QCOM |
145 |     CL_MEM_COMPRESSED_IMAGE_QCOM in the flags argument of
146 |     clgetSupportedImageFormats. These images can be created in the same manner
147 |     as conventional images using ION or ANB buffers.
148 | 
149 |     Mem Flags Usage
150 | 
151 |     When querying non-conventional and/or compressed image formats, it is
152 |     important to use the correct mem flags. CL_MEM_OTHER_IMAGE_QCOM will query
153 |     the non-conventional and uncompressed image formats.
154 |     CL_MEM_OTHER_IMAGE_QCOM | CL_MEM_COMPRESSED_IMAGE_QCOM will query the
155 |     non-conventional and compressed image formats. CL_MEM_COMPRESSED_IMAGE_QCOM
156 |     when used alone will query the conventional compressed image formats such as
157 |     CL_QCOM_COMPRESSED_RGBA.
158 | 
159 | Sample Code
160 | 
161 |     1) Querying supported formats for read_only non-conventional images
162 | 
163 |         #define MAX_NUM_FORMATS 128
164 | 
165 |         cl_image_format format_list[ MAX_NUM_FORMATS] = {0};
166 |         cl_int          num_format_list_entries       = MAX_NUM_FORMATS;
167 |         cl_int          num_reported_image_formats    = 0;
168 |         cl_int          errcode                       = 0;
169 | 
170 |         // Query the supported formats for compressed non-conventional images
171 |         errcode = clGetSupportedImageFormats(context,
172 |             CL_MEM_READ_ONLY | CL_MEM_OTHER_IMAGE_QCOM |
173 |                 CL_MEM_COMPRESSED_IMAGE_QCOM,
174 |             CL_MEM_OBJECT_IMAGE2D,
175 |             num_format_list_entries,
176 |             format_list,
177 |             &num_reported_image_formats);
178 | 
179 |     2) Creating an ION buffer for holding non-conventional image data.
180 | 
181 |         cl_mem_ion_host_ptr nv12_ionmem = {0};
182 | 
183 |         // Initialize ION buffer attributes
184 |         nv12_ionmem.ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM;
185 |         nv12_ionmem.ext_host_ptr.host_cache_policy = CL_MEM_HOST_UNCACHED_QCOM;
186 |         // file descriptor for ION
187 |         nv12_ionmem.ion_filedesc = ion_info_fd.file_descriptor;
188 |         // hostptr returned by ION
189 |         nv12_ionmem.ion_hostptr = ion_info.host_virtual_address;
190 | 
191 |     3) Using cl_qcom_ion_host_ptr holding planar image data to create a
192 |        non-conventional compressed NV12 image object.
193 | 
194 |         cl_image_format image_format = {0};
195 |         cl_image_desc   image_desc   = {0};
196 |         cl_int          errcode      = 0;
197 | 
198 |         // Set image format
199 |         image_format->image_channel_order      = CL_QCOM_COMPRESSED_NV12;
200 |         image_format->image_channel_data_type  = CL_UNORM_INT8;
201 | 
202 |         // Set image parameters. image_height and image_row_pitch are always 0
203 |         // for non-conventional images.
204 |         image_desc->image_width = 256;
205 |         image_desc->image_height = 256;
206 |         image_desc->image_row_pitch = 0;
207 |         image_desc->image_slice_pitch = 0;
208 | 
209 |         // Create a non_conventional image
210 |         other_image = clCreateImage(context,
211 |             CL_MEM_EXT_HOST_PTR_QCOM|CL_MEM_READ_ONLY,
212 |             image_format,
213 |             image_desc,
214 |             (void*)nv12_ionmem,
215 |             &errcode);
216 | 
217 |     4) Creating an ANB buffer for holding planar image data
218 | 
219 |         cl_mem_android_native_buffer_host_ptr  other_anb           = {0};
220 |         GraphicBuffer *gb; // previously created
221 | 
222 |         other.ext_host_ptr.allocation_type =
223 |             CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM;
224 |         other.ext_host_ptr.host_cache_policy = CL_MEM_HOST_WRITEBACK_QCOM;
225 |         // the hostptr to a native buffer and gb is an Android GraphicBuffer
226 |         other.anb_ptr = gb->getNativeBuffer();
227 | 
228 |     5) Using cl_qcom_android_native_buffer_host_ptr holding planar image data to
229 |        create a non-conventional image object.
230 | 
231 |         cl_image_format image_format = {0};
232 |         cl_image_desc   image_desc   = {0};
233 |         cl_int          errcode      = 0;
234 | 
235 |         // Set image format
236 |         image_format->image_channel_order     = CL_QCOM_COMPRESSED_NV12;
237 |         image_format->image_channel_data_type = CL_QCOM_UNORM_INT8;
238 | 
239 |         // Set image parameters. image_height and image_row_pitch are always 0
240 |         // for non-conventional images.
241 |         image_desc->image_width = 256;
242 |         image_desc->image_height = 256;
243 |         image_desc->image_row_pitch = 0;
244 |         image_desc->image_slice_pitch = 0;
245 | 
246 |         // Create a non-conventional  image
247 |         nv12_img = clCreateImage(context,
248 |             CL_MEM_EXT_HOST_PTR_QCOM | CL_MEM_READ_ONLY,
249 |             image_format,
250 |             image_desc,
251 |             (void*)other_anb,
252 |             &errcode);
253 | 
254 | Revision History
255 | 
256 |     Revision 1, 2016/09/02: Initial version.
257 |     Revision 2, 2017/06/16: Clean up. No functional changes.
258 |     Revision 3, 2017/09/13: Clarified Mem Flags usage.
259 |     Revision 4, 2017/11/13: Clean up. No functional changes.
260 |     Revision 5, 2018/01/19: Formatting and misc changes. No functional changes.
261 |     Revision 6, 2018/04/25: Document map behavior.
262 | 
263 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_perf_hint.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_perf_hint
  4 | 
  5 | Contributors
  6 | 
  7 |     Sreelakshmi Haridas Maruthur, Qualcomm Innovation Center, Inc.
  8 | 
  9 | Contact
 10 | 
 11 |     sharidas at quicinc dot com
 12 | 
 13 | Version
 14 | 
 15 |     Version 4, 2018/01/19
 16 | 
 17 | Status
 18 | 
 19 |     Shipping
 20 | 
 21 | Extension Type
 22 | 
 23 |     OpenCL device extension
 24 | 
 25 | Dependencies
 26 | 
 27 |     OpenCL 1.1 or later is required.
 28 | 
 29 |     This extension is written against the OpenCL 1.1 specification.
 30 | 
 31 | Overview
 32 | 
 33 |     This extension allows applications to request the
 34 |     performance level desired for device(s) on an OpenCL context.
 35 | 
 36 |     Higher performance implies higher frequencies on the device.
 37 | 
 38 |     The hint may be provided at context creation as a context property and/or
 39 |     updated using a separate API at any point during the context’s lifetime.
 40 | 
 41 | Header File
 42 | 
 43 |     cl_ext_qcom.h
 44 | 
 45 | New Procedures and Functions
 46 | 
 47 |     clSetPerfHintQCOM
 48 | 
 49 | New Tokens
 50 | 
 51 |     Added to the list of supported properties by clCreateContext in
 52 |     Table 4.4 of the OpenCL 1.1 specification.
 53 | 
 54 |         CL_CONTEXT_PERF_HINT_QCOM    0x40C2
 55 | 
 56 |     New list of supported values for CL_CONTEXT_PERF_HINT_QCOM property
 57 | 
 58 |         CL_PERF_HINT_HIGH_QCOM       0x40C3
 59 |         CL_PERF_HINT_NORMAL_QCOM     0x40C4
 60 |         CL_PERF_HINT_LOW_QCOM        0x40C5
 61 | 
 62 | Additions to Chapter 4.3 of the OpenCL 1.1 Specification
 63 | (Contexts)
 64 | 
 65 |     Add the following tokens to Table 4.4 (List of supported properties by
 66 |     clCreateContext)
 67 | 
 68 |         CL_CONTEXT_PERF_HINT_QCOM  cl_perf_hint  Specifies the performance
 69 |                                                  hint for this context
 70 | 
 71 |     Add the following to Table 4.4a (List of supported performance hint values
 72 |     by CL_CONTEXT_PERF_HINT_QCOM)
 73 | 
 74 |     List of supported performance hint values and their effect on performance
 75 |     is described in Table 4.4a
 76 | 
 77 |         cl_perf_hint              Description
 78 | 
 79 |         CL_PERF_HINT_HIGH_QCOM    Requests the highest performance level from
 80 |                                   device. This is the default setting for
 81 |                                   devices in an OpenCL context.
 82 | 
 83 |         CL_PERF_HINT_NORMAL_QCOM  Requests a balanced performance setting that
 84 |                                   is set dynamically by the GPU frequency and
 85 |                                   power management
 86 | 
 87 |         CL_PERF_HINT_LOW_QCOM     Requests a performance setting that
 88 |                                   prioritizes lower power consumption
 89 | 
 90 |     Add the following error descriptions for clCreateContext:
 91 | 
 92 |         * CL_INVALID_PROPERTY if the the context property
 93 |           CL_CONTEXT_PERF_HINT_QCOM is specified and at least one of the devices
 94 |           in <devices> does not support the performance hint property
 95 | 
 96 |     Add the following to Table 4.6 (List of supported param_names by
 97 |     clGetContextInfo), append to the entry for CL_CONTEXT_PROPERTIES
 98 | 
 99 |     cl_context_info  Return Type        Information returned in
100 |                                         param_value
101 | 
102 |     CL_CONTEXT_      cl_context_        If a CL_CONTEXT_PERF_HINT_QCOM
103 |         PROPERTIES       properties[]   property was set using
104 |                                         clSetPerfHintQCOM, the properties
105 |                                         argument returned will be populated with
106 |                                         this property, even if the property was
107 |                                         not specified in clCreateContext or
108 |                                         clCreateContextFromType.
109 | 
110 |     Add the following new function
111 | 
112 |         The function
113 | 
114 |             cl_int clSetPerfHintQCOM(cl_context context,
115 |                                      cl_perf_hint perf_hint)
116 | 
117 |         can be used to set the value of CL_CONTEXT_PERF_HINT_QCOM property on a
118 |         context This function can be used to set or update the
119 |         CL_CONTEXT_PERF_HINT_QCOM property irrespective of whether it was
120 |         specified at context time as one of the context properties.
121 | 
122 |         <context> must be a valid OpenCL context
123 |         <perf_hint> identifies the hint being set. It has to be one of the
124 |                     values in Table 4.4a
125 | 
126 |         clSetPerfHintQCOM returns CL_SUCCESS if the property
127 |         CL_CONTEXT_PERF_HINT_QCOM was set to the provided value. Otherwise, it
128 |         returns one of the following errors:
129 | 
130 |         * CL_INVALID_CONTEXT if context is not a valid context
131 | 
132 |         * CL_INVALID_PROPERTY if least one of the devices in the context does
133 |           not support the performance hint property
134 | 
135 |         * CL_INVALID_VALUE if the value of perf_hint is not one of the supported
136 |           values as specified in Table 4.4a.
137 | 
138 |         * CL_OUT_OF_RESOURCES if there is a failure to set the perf-hint on any
139 |           device in the context
140 | 
141 |         * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
142 |           required by the OpenCL implementation on the host
143 | 
144 |         The effect of clSetPerfHintQCOM on commands that are in flight is
145 |         undefined. These commands may be executed at either of the existing or
146 |         new performance levels. If the user desires to complete all pending work
147 |         at the existing level and execute all subsequent commands at the new
148 |         level, a clFinish must be added before calling clSetPerfHintQCOM to set
149 |         the new level.
150 | 
151 | Sample Code
152 | 
153 |     1) Set performance hint at time of CL context creation:
154 | 
155 |        cl_context_properties properties[] = {CL_CONTEXT_PERF_HINT_QCOM,
156 |            CL_PERF_HINT_LOW_QCOM, 0};
157 |        clCreateContext(properties, 1, &device_id, NULL, NULL, NULL);
158 | 
159 |     2) Set perfomrnace hint for an existing CL context:
160 | 
161 |        clSetPerfHintQCOM(context, CL_PERF_HINT_NORMAL_QCOM);
162 | 
163 | Revision History
164 | 
165 |     Revision 1, 2014/05/20: Initial version.
166 |     Revision 2, 2017/06/16: Clean up. No functional changes.
167 |     Revision 3, 2017/11/13: Clean up. No functional changes.
168 |     Revision 4, 2018/01/19: Formatting and misc changes. No functional changes.
169 | 
170 | 


--------------------------------------------------------------------------------
/docs/extensions/cl_qcom_priority_hint.txt:
--------------------------------------------------------------------------------
  1 | Name Strings
  2 | 
  3 |     cl_qcom_priority_hint
  4 | 
  5 | Contributors
  6 | 
  7 |     Richard Ruigrok, Qualcomm Innovation Center, Inc.
  8 | 
  9 | Contact
 10 | 
 11 |     bcalidas at qti dot qualcomm dot com
 12 | 
 13 | Version
 14 | 
 15 |     Version 5, 2018/01/19
 16 | 
 17 | Status
 18 | 
 19 |     Shipping
 20 | 
 21 | Extension Type
 22 | 
 23 |     OpenCL device extension
 24 | 
 25 | Dependencies
 26 | 
 27 |     OpenCL 1.1 or later is required.
 28 | 
 29 |     This extension is written against the OpenCL 1.1 specification.
 30 | 
 31 | Overview
 32 | 
 33 |     This extension allows applications to specify the desired priority for
 34 |     enqueued kernels to be submitted to the device(s) on an OpenCL context.
 35 | 
 36 |     Higher priority implies that enqueued kernels may be submitted to the
 37 |     device for processing before other enqueues on other contexts that
 38 |     have lower priority.
 39 | 
 40 |     The default behavior selects the priority that would otherwise be used
 41 |     for the context if this extension is not used.
 42 | 
 43 |     The hint may be provided at context creation as a context property.
 44 |     The context property may not be updated after context creation.
 45 | 
 46 | Header File
 47 | 
 48 |     cl_ext_qcom.h
 49 | 
 50 | New Procedures and Functions
 51 | 
 52 |     None
 53 | 
 54 | New Tokens
 55 | 
 56 |     Added to the list of supported properties by clCreateContext in
 57 |     Table 4.4 of the OpenCL 1.1 specification.
 58 | 
 59 |         CL_CONTEXT_PRIORITY_HINT_QCOM   0x40C9
 60 | 
 61 |     New list of supported values for CL_CONTEXT_PRIORITY_HINT_QCOM property
 62 | 
 63 |         CL_PRIORITY_HINT_HIGH_QCOM       0x40CA
 64 |         CL_PRIORITY_HINT_NORMAL_QCOM     0x40CB
 65 |         CL_PRIORITY_HINT_LOW_QCOM        0x40CC
 66 | 
 67 | 
 68 | Additions to Chapter 4.3 of the OpenCL 1.1 Specification
 69 | (Contexts)
 70 | 
 71 |     Add the following tokens to Table 4.4
 72 |     (List of supported properties by clCreateContext)
 73 | 
 74 |         CL_CONTEXT_PRIORITY_  cl_priority_hint  Specifies the desired priority
 75 |             HINT_QCOM                           level for this context
 76 | 
 77 |     Add the following to Table 4.4a
 78 |     (List of supported priority hint values by CL_CONTEXT_PRIORITY_HINT_QCOM)
 79 | 
 80 |     List of supported priority hint values and their effect on priority is
 81 |     described in Table 4.4a
 82 | 
 83 |         cl_priority_hint              Description
 84 | 
 85 |         CL_PRIORITY_HINT_HIGH_QCOM    Requests the highest priority level for
 86 |                                       all submissions for any command, for all
 87 |                                       devices on this context.
 88 | 
 89 |         CL_PRIORITY_HINT_NORMAL_QCOM  Requests a balanced priority level for all
 90 |                                       submissions for any command, for all
 91 |                                       devices on this context. This is the
 92 |                                       default.
 93 | 
 94 |         CL_PRIORITY_HINT_LOW_QCOM     Requests a lower priority level for all
 95 |                                       submissions for any command, for all
 96 |                                       devices on this context.
 97 | 
 98 |     Add the following error descriptions for clCreateContext:
 99 | 
100 |         * CL_INVALID_PROPERTY if the context property
101 |           CL_CONTEXT_PRIORITY_HINT_QCOM is specified and at least one of the
102 |           devices in <devices> does not support the priority hint property
103 | 
104 |     Add the following to Table 4.6
105 |     (List of supported param_names by clGetContextInfo)
106 |     append to the entry for CL_CONTEXT_PROPERTIES
107 | 
108 |         cl_context_info  Return Type       Information returned in
109 |                                                param_value
110 | 
111 |         CL_CONTEXT_      cl_context_       If a CL_CONTEXT_PRIORITY_HINT_QCOM
112 |             PROPERTIES       properties[]  property was given at context
113 |                                            creation, this property will be
114 |                                            returned.
115 | 
116 | Sample Code
117 | 
118 |     cl_context_properties properties[] = {CL_CONTEXT_PERF_HINT_QCOM,
119 |         CL_PERF_HINT_LOW_QCOM, 0};
120 |     clCreateContext(properties, 1, &device_id, NULL, NULL, NULL);
121 | 
122 | Revision History
123 | 
124 |     Revision 1, 2014/10/21: Initial version.
125 |     Revision 2, 2017/06/16: Clean up. No functional changes.
126 |     Revision 3, 2017/11/08: Now a public extension.
127 |     Revision 4, 2017/11/13: Clean up. No functional changes.
128 |     Revision 5, 2018/01/19: Formatting and misc changes. No functional changes.
129 | 
130 | 


--------------------------------------------------------------------------------
/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__128x128_CIRCLE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__256x256_CIRCLE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__64x64_CIRCLE.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat


--------------------------------------------------------------------------------
/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willhua/QualcommOpenCLSDKNote/71b7477ade38be5d90860e7d528ef9f0e0ee6b9f/example_images/CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat


--------------------------------------------------------------------------------
/example_images/create_example_images.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import random as rand
  3 | 
  4 | ########
  5 | # NV12 #
  6 | ########
  7 | 
  8 | prefix = (
  9 |         "\x18\x00\x00\x00" +
 10 |         "\x18\x00\x00\x00" +
 11 |         "\xD2\x10\x00\x00" +
 12 |         "\x33\x41\x00\x00"
 13 | )
 14 | 
 15 | const_data = prefix
 16 | incr_data = prefix
 17 | for i in range(0, 0x18 * 0x18 * 3 / 2):
 18 |     const_data += struct.pack("B", 128)
 19 |     if i < 0x18 * 0x18:
 20 |         incr_data += struct.pack("B", i % 256)
 21 | 
 22 | for i in range(0, 0x18 * 0x18 / 4):
 23 |     incr_data += struct.pack("BB", i % 256, i % 256)
 24 | 
 25 | 
 26 | rand_prefix = (
 27 |         "\xA0\x00\x00\x00" +
 28 |         "\x78\x00\x00\x00" +
 29 |         "\xD2\x10\x00\x00" +
 30 |         "\x33\x41\x00\x00"
 31 | )
 32 | 
 33 | rand.seed(42)
 34 | 
 35 | rand_data = rand_prefix
 36 | for i in range(0xA0 * 0x78 * 3 / 2):
 37 |     rand_data += struct.pack("B", rand.randint(0, 255))
 38 | 
 39 | with open("CL_QCOM_NV12__CL_UNORM_INT8__RANDOM.dat", "wb") as f:
 40 |     f.write(rand_data)
 41 | 
 42 | with open("CL_QCOM_NV12__CL_UNORM_INT8__CONSTANT.dat", "wb") as f:
 43 |     f.write(const_data)
 44 | 
 45 | with open("CL_QCOM_NV12__CL_UNORM_INT8__INCREASING.dat", "wb") as f:
 46 |     f.write(incr_data)
 47 | 
 48 | # Make some circles of various sizes
 49 | for i in range(6, 9):
 50 |     height = width = 2 ** i
 51 |     data = ""
 52 |     data += struct.pack("<I", width)
 53 |     data += struct.pack("<I", height)
 54 |     data += "\xD2\x10\x00\x00" + \
 55 |             "\x33\x41\x00\x00"
 56 |     center         = (width / 2, height / 2)
 57 |     radius_squared = (width / 4) ** 2
 58 |     for w in range(width):
 59 |         for h in range(height):
 60 |             dist = ((center[0] - w) ** 2) + ((center[1] - h) ** 2)
 61 |             value = 255 if dist <= radius_squared else 0
 62 |             data += struct.pack("B", value)
 63 |     data += "\x00" * (width * height / 2)
 64 |     filename = "CL_QCOM_NV12__CL_UNORM_INT8__{}x{}_CIRCLE.dat".format(width, height)
 65 |     with open(filename, "wb") as f:
 66 |         f.write(data)
 67 | 
 68 | ########
 69 | # TP10 #
 70 | ########
 71 | 
 72 | prefix = (
 73 |         "\x18\x00\x00\x00" +
 74 |         "\x18\x00\x00\x00" +
 75 |         "\x5D\x41\x00\x00" +
 76 |         "\x45\x41\x00\x00"
 77 | )
 78 | 
 79 | const_data = prefix
 80 | incr_data = prefix
 81 | tp10_mask = 0x3FFFFFFF # Zeroes out unused bits
 82 | for i in range(0, 0x18 * 0x18 / 3):
 83 |     const_data += struct.pack("<I", 0xFFFFFFFF & tp10_mask)
 84 |     incr_data += struct.pack("<I", (i % (2 ** 32)) & tp10_mask)
 85 | 
 86 | for i in range(0, 0x18 * 0x18 / 6):
 87 |     const_data += struct.pack("<I", 0xEEEEEEEE & tp10_mask)
 88 |     incr_data += struct.pack("<I", (i % (2 ** 32)) & tp10_mask)
 89 | 
 90 | rand_prefix = (
 91 |         "\xA2\x00\x00\x00" +
 92 |         "\x78\x00\x00\x00" +
 93 |         "\x5D\x41\x00\x00" +
 94 |         "\x45\x41\x00\x00"
 95 | )
 96 | 
 97 | rand_data = rand_prefix
 98 | for i in range(0xA2 * 0x78 / 2):
 99 |     rand_data += struct.pack("<I", rand.randint(0, 2 ** 32) & tp10_mask)
100 | 
101 | with open("CL_QCOM_TP10__CL_QCOM_UNORM_INT10__RANDOM.dat", "wb") as f:
102 |     f.write(rand_data)
103 | 
104 | with open("CL_QCOM_TP10__CL_QCOM_UNORM_INT10__CONSTANT.dat", "wb") as f:
105 |     f.write(const_data)
106 | 
107 | with open("CL_QCOM_TP10__CL_QCOM_UNORM_INT10__INCREASING.dat", "wb") as f:
108 |     f.write(incr_data)
109 | 
110 | ########
111 | # P010 #
112 | ########
113 | 
114 | prefix = (
115 |         "\x18\x00\x00\x00" +
116 |         "\x18\x00\x00\x00" +
117 |         "\x5D\x41\x00\x00" +
118 |         "\x3C\x41\x00\x00"
119 | )
120 | 
121 | const_data = prefix
122 | incr_data = prefix
123 | p010_mask = 0xFFC0 # Zeroes out unused bits
124 | for i in range(0, 0x18 * 0x18):
125 |     const_data += struct.pack("<H", 0xFFFF & p010_mask)
126 |     incr_data += struct.pack("<H", (i % (2 ** 16)) & p010_mask)
127 | 
128 | for i in range(0, 0x18 * 0x18 / 4):
129 |     const_data += struct.pack("<HH", 0xEEEE & p010_mask, 0xDDDD & p010_mask)
130 |     incr_data += struct.pack("<HH", (i % (2 ** 16)) & p010_mask, (i % (2 ** 16)) & p010_mask)
131 | 
132 | rand_prefix = (
133 |         "\xA2\x00\x00\x00" +
134 |         "\x78\x00\x00\x00" +
135 |         "\x5D\x41\x00\x00" +
136 |         "\x3C\x41\x00\x00"
137 | )
138 | 
139 | rand_data = rand_prefix
140 | for i in range(0xA2 * 0x78 * 3 / 2):
141 |     rand_data += struct.pack("<H", rand.randint(0, 2 ** 16) & p010_mask)
142 | 
143 | with open("CL_QCOM_P010__CL_QCOM_UNORM_INT10__RANDOM.dat", "wb") as f:
144 |     f.write(rand_data)
145 | 
146 | with open("CL_QCOM_P010__CL_QCOM_UNORM_INT10__CONSTANT.dat", "wb") as f:
147 |     f.write(const_data)
148 | 
149 | with open("CL_QCOM_P010__CL_QCOM_UNORM_INT10__INCREASING.dat", "wb") as f:
150 |     f.write(incr_data)
151 | 
152 | neutral_face = \
153 |     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158 |      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
159 |      0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
160 |      0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
161 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
162 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
163 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
164 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
165 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
166 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
167 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
168 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
169 |      0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
170 |      0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
171 |      0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
172 |      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
173 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 |      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
177 | 
178 | ##################
179 | # Bayer + MIPI10 #
180 | ##################
181 | 
182 | prefix = (
183 |         "\x30\x00\x00\x00" +
184 |         "\x30\x00\x00\x00" +
185 |         "\x59\x41\x00\x00" +
186 |         "\x4E\x41\x00\x00"
187 | )
188 | 
189 | bayer_data = prefix
190 | for j in range(24):
191 |     blue_row, red_row = "", ""
192 |     for i in range(24 / 2):
193 |         pixel_1, pixel_2 = neutral_face[2 * i + j * 24], neutral_face[2 * i + 1 + j * 24]
194 |         green_val_1, green_val_2 = pixel_1 * 255, pixel_2 * 255
195 |         blue_val = int(float(2 * i + j * 24) / (24 * 24) * 255)
196 |         red_val = 255 - blue_val
197 |         blue_row += struct.pack("BBBBB", blue_val, green_val_1, blue_val, green_val_2, 0)
198 |         red_row += struct.pack("BBBBB", green_val_1, red_val, green_val_2, red_val, 0)
199 |     bayer_data += blue_row
200 |     bayer_data += red_row
201 | 
202 | with open("CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__48x48_FACE.dat", "wb") as f:
203 |     f.write(bayer_data)
204 | 
205 | prefix = (
206 |         "\xC0\x00\x00\x00" +
207 |         "\xC0\x00\x00\x00" +
208 |         "\x59\x41\x00\x00" +
209 |         "\x4E\x41\x00\x00"
210 | )
211 | 
212 | bayer_data = prefix
213 | for j in range(24):
214 |     blue_row, red_row = "", ""
215 |     for i in range(24):
216 |         pixel_1, pixel_2 = neutral_face[i + j * 24], neutral_face[i + j * 24]
217 |         green_val_1, green_val_2 = pixel_1 * 255, pixel_2 * 255
218 |         blue_val = int(float(i + j * 24) / (24 * 24) * 255)
219 |         red_val = 255 - blue_val
220 |         blue_row += struct.pack("BBBBB", blue_val, green_val_1, blue_val, green_val_2, 0) * 2
221 |         red_row += struct.pack("BBBBB", green_val_1, red_val, green_val_2, red_val, 0) * 2
222 |     bayer_data += blue_row
223 |     bayer_data += red_row
224 |     bayer_data += blue_row
225 |     bayer_data += red_row
226 |     bayer_data += blue_row
227 |     bayer_data += red_row
228 |     bayer_data += blue_row
229 |     bayer_data += red_row
230 | 
231 | with open("CL_QCOM_BAYER__CL_QCOM_UNORM_MIPI10__192x192_FACE.dat", "wb") as f:
232 |     f.write(bayer_data)
233 | 
234 | ###########################
235 | # Bayer + Unpacked 10-bit #
236 | ###########################
237 | 
238 | prefix = (
239 |         "\x30\x00\x00\x00" +
240 |         "\x30\x00\x00\x00" +
241 |         "\x5D\x41\x00\x00" +
242 |         "\x4E\x41\x00\x00"
243 | )
244 | 
245 | bayer_data = prefix
246 | for j in range(24):
247 |     blue_row, red_row = "", ""
248 |     for i in range(24 / 2):
249 |         pixel_1, pixel_2 = neutral_face[2 * i + j * 24], neutral_face[2 * i + 1 + j * 24]
250 |         green_val_1, green_val_2 = pixel_1 * 255, pixel_2 * 255
251 |         blue_val = int(float(2 * i + j * 24) / (24 * 24) * 255)
252 |         red_val = 255 - blue_val
253 |         blue_val    <<= 8
254 |         green_val_1 <<= 8
255 |         green_val_2 <<= 8
256 |         red_val     <<= 8
257 |         blue_row += struct.pack("<HHHH", blue_val, green_val_1, blue_val, green_val_2)
258 |         red_row += struct.pack("<HHHH", green_val_1, red_val, green_val_2, red_val)
259 |     bayer_data += blue_row
260 |     bayer_data += red_row
261 | 
262 | with open("CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__48x48_FACE.dat", "wb") as f:
263 |     f.write(bayer_data)
264 | 
265 | prefix = (
266 |         "\xC0\x00\x00\x00" +
267 |         "\xC0\x00\x00\x00" +
268 |         "\x5D\x41\x00\x00" +
269 |         "\x4E\x41\x00\x00"
270 | )
271 | 
272 | bayer_data = prefix
273 | for j in range(24):
274 |     blue_row, red_row = "", ""
275 |     for i in range(24):
276 |         pixel_1, pixel_2 = neutral_face[i + j * 24], neutral_face[i + j * 24]
277 |         green_val_1, green_val_2 = pixel_1 * 255, pixel_2 * 255
278 |         blue_val = int(float(i + j * 24) / (24 * 24) * 255)
279 |         red_val = 255 - blue_val
280 |         blue_val    <<= 8
281 |         green_val_1 <<= 8
282 |         green_val_2 <<= 8
283 |         red_val     <<= 8
284 |         blue_row += struct.pack("<HHHH", blue_val, green_val_1, blue_val, green_val_2) * 2
285 |         red_row += struct.pack("<HHHH", green_val_1, red_val, green_val_2, red_val) * 2
286 |     bayer_data += blue_row
287 |     bayer_data += red_row
288 |     bayer_data += blue_row
289 |     bayer_data += red_row
290 |     bayer_data += blue_row
291 |     bayer_data += red_row
292 |     bayer_data += blue_row
293 |     bayer_data += red_row
294 | 
295 | with open("CL_QCOM_BAYER__CL_QCOM_UNORM_INT10__192x192_FACE.dat", "wb") as f:
296 |     f.write(bayer_data)
297 | 


--------------------------------------------------------------------------------
/inc/CL/cl_egl.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2008-2015 The Khronos Group Inc.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and/or associated documentation files (the
  6 |  * "Materials"), to deal in the Materials without restriction, including
  7 |  * without limitation the rights to use, copy, modify, merge, publish,
  8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
  9 |  * permit persons to whom the Materials are furnished to do so, subject to
 10 |  * the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included
 13 |  * in all copies or substantial portions of the Materials.
 14 |  *
 15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
 16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
 17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
 18 |  *    https://www.khronos.org/registry/
 19 |  *
 20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 27 |  ******************************************************************************/
 28 | 
 29 | #ifndef __OPENCL_CL_EGL_H
 30 | #define __OPENCL_CL_EGL_H
 31 | 
 32 | #ifdef __APPLE__
 33 | 
 34 | #else
 35 | #include <CL/cl.h>
 36 | #endif  
 37 | 
 38 | #ifdef __cplusplus
 39 | extern "C" {
 40 | #endif
 41 | 
 42 | 
 43 | /* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
 44 | #define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR  0x202F
 45 | #define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR    0x202D
 46 | #define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR    0x202E
 47 | 
 48 | /* Error type for clCreateFromEGLImageKHR */
 49 | #define CL_INVALID_EGL_OBJECT_KHR             -1093
 50 | #define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR      -1092
 51 | 
 52 | /* CLeglImageKHR is an opaque handle to an EGLImage */
 53 | typedef void* CLeglImageKHR;
 54 | 
 55 | /* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
 56 | typedef void* CLeglDisplayKHR;
 57 | 
 58 | /* CLeglSyncKHR is an opaque handle to an EGLSync object */
 59 | typedef void* CLeglSyncKHR;
 60 | 
 61 | /* properties passed to clCreateFromEGLImageKHR */
 62 | typedef intptr_t cl_egl_image_properties_khr;
 63 | 
 64 | 
 65 | #define cl_khr_egl_image 1
 66 | 
 67 | extern CL_API_ENTRY cl_mem CL_API_CALL
 68 | clCreateFromEGLImageKHR(cl_context                  /* context */,
 69 |                         CLeglDisplayKHR             /* egldisplay */,
 70 |                         CLeglImageKHR               /* eglimage */,
 71 |                         cl_mem_flags                /* flags */,
 72 |                         const cl_egl_image_properties_khr * /* properties */,
 73 |                         cl_int *                    /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 74 | 
 75 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
 76 | 	cl_context                  context,
 77 | 	CLeglDisplayKHR             egldisplay,
 78 | 	CLeglImageKHR               eglimage,
 79 | 	cl_mem_flags                flags,
 80 | 	const cl_egl_image_properties_khr * properties,
 81 | 	cl_int *                    errcode_ret);
 82 | 
 83 | 
 84 | extern CL_API_ENTRY cl_int CL_API_CALL
 85 | clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
 86 |                               cl_uint          /* num_objects */,
 87 |                               const cl_mem *   /* mem_objects */,
 88 |                               cl_uint          /* num_events_in_wait_list */,
 89 |                               const cl_event * /* event_wait_list */,
 90 |                               cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 91 | 
 92 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
 93 | 	cl_command_queue command_queue,
 94 | 	cl_uint          num_objects,
 95 | 	const cl_mem *   mem_objects,
 96 | 	cl_uint          num_events_in_wait_list,
 97 | 	const cl_event * event_wait_list,
 98 | 	cl_event *       event);
 99 | 
100 | 
101 | extern CL_API_ENTRY cl_int CL_API_CALL
102 | clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
103 |                               cl_uint          /* num_objects */,
104 |                               const cl_mem *   /* mem_objects */,
105 |                               cl_uint          /* num_events_in_wait_list */,
106 |                               const cl_event * /* event_wait_list */,
107 |                               cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
108 | 
109 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
110 | 	cl_command_queue command_queue,
111 | 	cl_uint          num_objects,
112 | 	const cl_mem *   mem_objects,
113 | 	cl_uint          num_events_in_wait_list,
114 | 	const cl_event * event_wait_list,
115 | 	cl_event *       event);
116 | 
117 | 
118 | #define cl_khr_egl_event 1
119 | 
120 | extern CL_API_ENTRY cl_event CL_API_CALL
121 | clCreateEventFromEGLSyncKHR(cl_context      /* context */,
122 |                             CLeglSyncKHR    /* sync */,
123 |                             CLeglDisplayKHR /* display */,
124 |                             cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
125 | 
126 | typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
127 | 	cl_context      context,
128 | 	CLeglSyncKHR    sync,
129 | 	CLeglDisplayKHR display,
130 | 	cl_int *        errcode_ret);
131 | 
132 | #ifdef __cplusplus
133 | }
134 | #endif
135 | 
136 | #endif /* __OPENCL_CL_EGL_H */
137 | 


--------------------------------------------------------------------------------
/inc/CL/cl_ext_qcom.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2009-2017 Qualcomm Technologies, Inc.  All Rights Reserved.
  2 |  * Qualcomm Technologies Proprietary and Confidential.
  3 |  */
  4 | 
  5 | #ifndef __OPENCL_CL_EXT_QCOM_H
  6 | #define __OPENCL_CL_EXT_QCOM_H
  7 | 
  8 | // Needed by cl_khr_egl_event extension 
  9 | #include <EGL/egl.h>
 10 | #include <EGL/eglext.h>
 11 | #include <CL/cl_ext.h>
 12 | 
 13 | #ifdef __cplusplus
 14 | extern "C" {
 15 | #endif
 16 | 
 17 | 
 18 | /************************************
 19 |  * cl_qcom_create_buffer_from_image *
 20 |  ************************************/
 21 | 
 22 | #define CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM         0x40C0
 23 | #define CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM       0x40C1
 24 | 
 25 | extern CL_API_ENTRY cl_mem CL_API_CALL
 26 | clCreateBufferFromImageQCOM(cl_mem       image,
 27 |                             cl_mem_flags flags,
 28 |                             cl_int      *errcode_ret);
 29 | 
 30 | 
 31 | /************************************
 32 |  * cl_qcom_limited_printf extension *
 33 |  ************************************/
 34 | 
 35 | /* Builtin printf function buffer size in bytes. */
 36 | #define CL_DEVICE_PRINTF_BUFFER_SIZE_QCOM           0x1049
 37 | 
 38 | 
 39 | /*************************************
 40 |  * cl_qcom_extended_images extension *
 41 |  *************************************/
 42 | 
 43 | #define CL_CONTEXT_ENABLE_EXTENDED_IMAGES_QCOM      0x40AA
 44 | #define CL_DEVICE_EXTENDED_IMAGE2D_MAX_WIDTH_QCOM   0x40AB
 45 | #define CL_DEVICE_EXTENDED_IMAGE2D_MAX_HEIGHT_QCOM  0x40AC
 46 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_WIDTH_QCOM   0x40AD
 47 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_HEIGHT_QCOM  0x40AE
 48 | #define CL_DEVICE_EXTENDED_IMAGE3D_MAX_DEPTH_QCOM   0x40AF
 49 | 
 50 | /*************************************
 51 |  * cl_qcom_perf_hint extension *
 52 |  *************************************/
 53 | 
 54 | typedef cl_uint                                     cl_perf_hint;
 55 | 
 56 | #define CL_CONTEXT_PERF_HINT_QCOM                   0x40C2
 57 | 
 58 | /*cl_perf_hint*/
 59 | #define CL_PERF_HINT_HIGH_QCOM                      0x40C3
 60 | #define CL_PERF_HINT_NORMAL_QCOM                    0x40C4
 61 | #define CL_PERF_HINT_LOW_QCOM                       0x40C5
 62 | 
 63 | extern CL_API_ENTRY cl_int CL_API_CALL
 64 | clSetPerfHintQCOM(cl_context    context,
 65 |                   cl_perf_hint  perf_hint);
 66 | 
 67 | // This extension is published at Khronos, so its definitions are made in cl_ext.h.
 68 | // This duplication is for backward compatibility.
 69 | 
 70 | #ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
 71 | 
 72 | /*********************************
 73 | * cl_qcom_android_native_buffer_host_ptr extension
 74 | *********************************/
 75 | 
 76 | #define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM                  0x40C6
 77 | 
 78 | 
 79 | typedef struct _cl_mem_android_native_buffer_host_ptr
 80 | {
 81 |     // Type of external memory allocation.
 82 |     // Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers.
 83 |     cl_mem_ext_host_ptr  ext_host_ptr;
 84 | 
 85 |     // Virtual pointer to the android native buffer
 86 |     void*                anb_ptr;
 87 | 
 88 | } cl_mem_android_native_buffer_host_ptr;
 89 | 
 90 | #endif   //#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
 91 | 
 92 | /***********************************
 93 | * cl_img_egl_image extension *
 94 | ************************************/
 95 | typedef void* CLeglImageIMG;
 96 | typedef void* CLeglDisplayIMG;
 97 | 
 98 | extern CL_API_ENTRY cl_mem CL_API_CALL
 99 | clCreateFromEGLImageIMG(cl_context      context,
100 |                         cl_mem_flags     flags,
101 |                         CLeglImageIMG    image,
102 |                         CLeglDisplayIMG  display,
103 |                         cl_int           *errcode_ret);
104 | 
105 | 
106 | /*********************************
107 | * cl_qcom_other_image extension
108 | *********************************/
109 | 
110 | // Extended flag for creating/querying QCOM non-standard images
111 | #define CL_MEM_OTHER_IMAGE_QCOM                             (1<<25)
112 | 
113 | // cl_channel_type
114 | #define CL_QCOM_UNORM_MIPI10                                0x4159
115 | #define CL_QCOM_UNORM_MIPI12                                0x415A
116 | #define CL_QCOM_UNSIGNED_MIPI10                             0x415B
117 | #define CL_QCOM_UNSIGNED_MIPI12                             0x415C
118 | #define CL_QCOM_UNORM_INT10                                 0x415D
119 | #define CL_QCOM_UNORM_INT12                                 0x415E
120 | #define CL_QCOM_UNSIGNED_INT16                              0x415F
121 | 
122 | // cl_channel_order
123 | // Dedicate 0x4130-0x415F range for QCOM extended image formats
124 | // 0x4130 - 0x4132 range is assigned to pixel-oriented compressed format
125 | #define CL_QCOM_BAYER                                       0x414E
126 | 
127 | #define CL_QCOM_NV12                                        0x4133
128 | #define CL_QCOM_NV12_Y                                      0x4134
129 | #define CL_QCOM_NV12_UV                                     0x4135
130 | 
131 | #define CL_QCOM_TILED_NV12                                  0x4136
132 | #define CL_QCOM_TILED_NV12_Y                                0x4137
133 | #define CL_QCOM_TILED_NV12_UV                               0x4138
134 | 
135 | #define CL_QCOM_P010                                        0x413C
136 | #define CL_QCOM_P010_Y                                      0x413D
137 | #define CL_QCOM_P010_UV                                     0x413E
138 | 
139 | #define CL_QCOM_TILED_P010                                  0x413F
140 | #define CL_QCOM_TILED_P010_Y                                0x4140
141 | #define CL_QCOM_TILED_P010_UV                               0x4141
142 | 
143 | 
144 | #define CL_QCOM_TP10                                        0x4145
145 | #define CL_QCOM_TP10_Y                                      0x4146
146 | #define CL_QCOM_TP10_UV                                     0x4147
147 | 
148 | #define CL_QCOM_TILED_TP10                                  0x4148
149 | #define CL_QCOM_TILED_TP10_Y                                0x4149
150 | #define CL_QCOM_TILED_TP10_UV                               0x414A
151 | 
152 | /*********************************
153 | * cl_qcom_compressed_image extension
154 | *********************************/
155 | 
156 | // Extended flag for creating/querying QCOM non-planar compressed images
157 | #define CL_MEM_COMPRESSED_IMAGE_QCOM                        (1<<27)
158 | 
159 | // Extended image format
160 | // cl_channel_order
161 | #define CL_QCOM_COMPRESSED_RGBA                             0x4130
162 | #define CL_QCOM_COMPRESSED_RGBx                             0x4131
163 | 
164 | #define CL_QCOM_COMPRESSED_NV12_Y                           0x413A
165 | #define CL_QCOM_COMPRESSED_NV12_UV                          0x413B
166 | 
167 | #define CL_QCOM_COMPRESSED_P010                             0x4142
168 | #define CL_QCOM_COMPRESSED_P010_Y                           0x4143
169 | #define CL_QCOM_COMPRESSED_P010_UV                          0x4144
170 | 
171 | #define CL_QCOM_COMPRESSED_TP10                             0x414B
172 | #define CL_QCOM_COMPRESSED_TP10_Y                           0x414C
173 | #define CL_QCOM_COMPRESSED_TP10_UV                          0x414D
174 | 
175 | #define CL_QCOM_COMPRESSED_NV12_4R                          0x414F
176 | #define CL_QCOM_COMPRESSED_NV12_4R_Y                        0x4150
177 | #define CL_QCOM_COMPRESSED_NV12_4R_UV                       0x4151
178 | /*********************************
179 | * cl_qcom_compressed_yuv_image_read extension
180 | *********************************/
181 | 
182 | // Extended flag for creating/querying QCOM compressed images
183 | #define CL_MEM_COMPRESSED_YUV_IMAGE_QCOM                    (1<<28)
184 | 
185 | // Extended image format
186 | #define CL_QCOM_COMPRESSED_NV12                             0x10C4
187 | 
188 | // Extended flag for setting ION buffer allocation type
189 | #define CL_MEM_ION_HOST_PTR_COMPRESSED_YUV_QCOM                 0x40CD
190 | #define CL_MEM_ION_HOST_PTR_PROTECTED_COMPRESSED_YUV_QCOM       0x40CE
191 | 
192 | /*********************************
193 | * cl_qcom_accelerated_image_ops
194 | *********************************/
195 | #define CL_MEM_OBJECT_WEIGHT_IMAGE_QCOM                         0x4110
196 | #define CL_DEVICE_HOF_MAX_NUM_PHASES_QCOM                       0x4111
197 | #define CL_DEVICE_HOF_MAX_FILTER_SIZE_X_QCOM                    0x4112
198 | #define CL_DEVICE_HOF_MAX_FILTER_SIZE_Y_QCOM                    0x4113
199 | #define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_X_QCOM         0x4114
200 | #define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_Y_QCOM         0x4115
201 | 
202 | //Extended flag for specifying weight image type
203 | #define CL_WEIGHT_IMAGE_SEPARABLE_QCOM                          (1<<0)
204 | 
205 | // Box Filter
206 | typedef struct _cl_box_filter_size_qcom
207 | {
208 |     // Width of box filter on X direction.
209 |     float box_filter_width;
210 | 
211 |     // Height of box filter on Y direction.
212 |     float box_filter_height;
213 | } cl_box_filter_size_qcom;
214 | 
215 | // HOF Weight Image Desc
216 | typedef struct _cl_weight_desc_qcom
217 | {
218 |     /** Coordinate of the "center" point of the weight image,
219 |         based on the weight image's top-left corner as the origin. */
220 |     size_t        center_coord_x;
221 |     size_t        center_coord_y;
222 |     cl_bitfield   flags;
223 | } cl_weight_desc_qcom;
224 | 
225 | typedef struct _cl_weight_image_desc_qcom
226 | {
227 |     cl_image_desc           image_desc;
228 |     cl_weight_desc_qcom     weight_desc;
229 | } cl_weight_image_desc_qcom;
230 | 
231 | /*************************************
232 |  * cl_qcom_protected_context extension *
233 |  *************************************/
234 | 
235 | #define CL_CONTEXT_PROTECTED_QCOM                    0x40C7
236 | #define CL_MEM_ION_HOST_PTR_PROTECTED_QCOM           0x40C8
237 | 
238 | /*************************************
239 |  * cl_qcom_priority_hint extension *
240 |  *************************************/
241 | #define CL_PRIORITY_HINT_NONE_QCOM                   0
242 | typedef cl_uint                                     cl_priority_hint;
243 | 
244 | #define CL_CONTEXT_PRIORITY_HINT_QCOM               0x40C9
245 | 
246 | /*cl_priority_hint*/
247 | #define CL_PRIORITY_HINT_HIGH_QCOM                  0x40CA
248 | #define CL_PRIORITY_HINT_NORMAL_QCOM                0x40CB
249 | #define CL_PRIORITY_HINT_LOW_QCOM                   0x40CC
250 | 
251 | #ifdef __cplusplus
252 | }
253 | #endif
254 | 
255 | #endif /* __OPENCL_CL_EXT_QCOM_H */
256 | 


--------------------------------------------------------------------------------
/inc/CL/cl_gl.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************************
  2 |  * Copyright (c) 2008-2015 The Khronos Group Inc.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and/or associated documentation files (the
  6 |  * "Materials"), to deal in the Materials without restriction, including
  7 |  * without limitation the rights to use, copy, modify, merge, publish,
  8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
  9 |  * permit persons to whom the Materials are furnished to do so, subject to
 10 |  * the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included
 13 |  * in all copies or substantial portions of the Materials.
 14 |  *
 15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
 16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
 17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
 18 |  *    https://www.khronos.org/registry/
 19 |  *
 20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 27 |  **********************************************************************************/
 28 | 
 29 | #ifndef __OPENCL_CL_GL_H
 30 | #define __OPENCL_CL_GL_H
 31 | 
 32 | #ifdef __APPLE__
 33 | #include <OpenCL/cl.h>
 34 | #else
 35 | #include <CL/cl.h>
 36 | #endif	
 37 | 
 38 | #ifdef __cplusplus
 39 | extern "C" {
 40 | #endif
 41 | 
 42 | typedef cl_uint     cl_gl_object_type;
 43 | typedef cl_uint     cl_gl_texture_info;
 44 | typedef cl_uint     cl_gl_platform_info;
 45 | typedef struct __GLsync *cl_GLsync;
 46 | 
 47 | /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
 48 | #define CL_GL_OBJECT_BUFFER                     0x2000
 49 | #define CL_GL_OBJECT_TEXTURE2D                  0x2001
 50 | #define CL_GL_OBJECT_TEXTURE3D                  0x2002
 51 | #define CL_GL_OBJECT_RENDERBUFFER               0x2003
 52 | #define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
 53 | #define CL_GL_OBJECT_TEXTURE1D                  0x200F
 54 | #define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
 55 | #define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
 56 | 
 57 | /* cl_gl_texture_info           */
 58 | #define CL_GL_TEXTURE_TARGET                    0x2004
 59 | #define CL_GL_MIPMAP_LEVEL                      0x2005
 60 | #define CL_GL_NUM_SAMPLES                       0x2012
 61 | 
 62 | 
 63 | extern CL_API_ENTRY cl_mem CL_API_CALL
 64 | clCreateFromGLBuffer(cl_context     /* context */,
 65 |                      cl_mem_flags   /* flags */,
 66 |                      cl_GLuint      /* bufobj */,
 67 |                      int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 68 | 
 69 | extern CL_API_ENTRY cl_mem CL_API_CALL
 70 | clCreateFromGLTexture(cl_context      /* context */,
 71 |                       cl_mem_flags    /* flags */,
 72 |                       cl_GLenum       /* target */,
 73 |                       cl_GLint        /* miplevel */,
 74 |                       cl_GLuint       /* texture */,
 75 |                       cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
 76 |     
 77 | extern CL_API_ENTRY cl_mem CL_API_CALL
 78 | clCreateFromGLRenderbuffer(cl_context   /* context */,
 79 |                            cl_mem_flags /* flags */,
 80 |                            cl_GLuint    /* renderbuffer */,
 81 |                            cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 82 | 
 83 | extern CL_API_ENTRY cl_int CL_API_CALL
 84 | clGetGLObjectInfo(cl_mem                /* memobj */,
 85 |                   cl_gl_object_type *   /* gl_object_type */,
 86 |                   cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
 87 |                   
 88 | extern CL_API_ENTRY cl_int CL_API_CALL
 89 | clGetGLTextureInfo(cl_mem               /* memobj */,
 90 |                    cl_gl_texture_info   /* param_name */,
 91 |                    size_t               /* param_value_size */,
 92 |                    void *               /* param_value */,
 93 |                    size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 94 | 
 95 | extern CL_API_ENTRY cl_int CL_API_CALL
 96 | clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
 97 |                           cl_uint               /* num_objects */,
 98 |                           const cl_mem *        /* mem_objects */,
 99 |                           cl_uint               /* num_events_in_wait_list */,
100 |                           const cl_event *      /* event_wait_list */,
101 |                           cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
102 | 
103 | extern CL_API_ENTRY cl_int CL_API_CALL
104 | clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
105 |                           cl_uint               /* num_objects */,
106 |                           const cl_mem *        /* mem_objects */,
107 |                           cl_uint               /* num_events_in_wait_list */,
108 |                           const cl_event *      /* event_wait_list */,
109 |                           cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
110 | 
111 | 
112 | /* Deprecated OpenCL 1.1 APIs */
113 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
114 | clCreateFromGLTexture2D(cl_context      /* context */,
115 |                         cl_mem_flags    /* flags */,
116 |                         cl_GLenum       /* target */,
117 |                         cl_GLint        /* miplevel */,
118 |                         cl_GLuint       /* texture */,
119 |                         cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
120 |     
121 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
122 | clCreateFromGLTexture3D(cl_context      /* context */,
123 |                         cl_mem_flags    /* flags */,
124 |                         cl_GLenum       /* target */,
125 |                         cl_GLint        /* miplevel */,
126 |                         cl_GLuint       /* texture */,
127 |                         cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
128 |     
129 | /* cl_khr_gl_sharing extension  */
130 |     
131 | #define cl_khr_gl_sharing 1
132 |     
133 | typedef cl_uint     cl_gl_context_info;
134 |     
135 | /* Additional Error Codes  */
136 | #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
137 |     
138 | /* cl_gl_context_info  */
139 | #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
140 | #define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
141 |     
142 | /* Additional cl_context_properties  */
143 | #define CL_GL_CONTEXT_KHR                       0x2008
144 | #define CL_EGL_DISPLAY_KHR                      0x2009
145 | #define CL_GLX_DISPLAY_KHR                      0x200A
146 | #define CL_WGL_HDC_KHR                          0x200B
147 | #define CL_CGL_SHAREGROUP_KHR                   0x200C
148 |     
149 | extern CL_API_ENTRY cl_int CL_API_CALL
150 | clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
151 |                       cl_gl_context_info            /* param_name */,
152 |                       size_t                        /* param_value_size */,
153 |                       void *                        /* param_value */,
154 |                       size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
155 |     
156 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
157 |     const cl_context_properties * properties,
158 |     cl_gl_context_info            param_name,
159 |     size_t                        param_value_size,
160 |     void *                        param_value,
161 |     size_t *                      param_value_size_ret);
162 | 
163 | #ifdef __cplusplus
164 | }
165 | #endif
166 | 
167 | #endif  /* __OPENCL_CL_GL_H */
168 | 


--------------------------------------------------------------------------------
/inc/CL/cl_gl_ext.h:
--------------------------------------------------------------------------------
 1 | /**********************************************************************************
 2 |  * Copyright (c) 2008-2015 The Khronos Group Inc.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and/or associated documentation files (the
 6 |  * "Materials"), to deal in the Materials without restriction, including
 7 |  * without limitation the rights to use, copy, modify, merge, publish,
 8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
 9 |  * permit persons to whom the Materials are furnished to do so, subject to
10 |  * the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included
13 |  * in all copies or substantial portions of the Materials.
14 |  *
15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
18 |  *    https://www.khronos.org/registry/
19 |  *
20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
27 |  **********************************************************************************/
28 | 
29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
30 | 
31 | /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
32 | /* OpenGL dependencies.                                                         */
33 | 
34 | #ifndef __OPENCL_CL_GL_EXT_H
35 | #define __OPENCL_CL_GL_EXT_H
36 | 
37 | #ifdef __cplusplus
38 | extern "C" {
39 | #endif
40 | 
41 | #ifdef __APPLE__
42 |     #include <OpenCL/cl_gl.h>
43 | #else
44 |     #include <CL/cl_gl.h>
45 | #endif
46 | 
47 | /*
48 |  * For each extension, follow this template
49 |  *  cl_VEN_extname extension  */
50 | /* #define cl_VEN_extname 1
51 |  * ... define new types, if any
52 |  * ... define new tokens, if any
53 |  * ... define new APIs, if any
54 |  *
55 |  *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
56 |  *  This allows us to avoid having to decide whether to include GL headers or GLES here.
57 |  */
58 | 
59 | /* 
60 |  *  cl_khr_gl_event  extension
61 |  *  See section 9.9 in the OpenCL 1.1 spec for more information
62 |  */
63 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
64 | 
65 | extern CL_API_ENTRY cl_event CL_API_CALL
66 | clCreateEventFromGLsyncKHR(cl_context           /* context */,
67 |                            cl_GLsync            /* cl_GLsync */,
68 |                            cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
69 | 
70 | #ifdef __cplusplus
71 | }
72 | #endif
73 | 
74 | #endif	/* __OPENCL_CL_GL_EXT_H  */
75 | 


--------------------------------------------------------------------------------
/inc/CL/opencl.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2008-2015 The Khronos Group Inc.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and/or associated documentation files (the
 6 |  * "Materials"), to deal in the Materials without restriction, including
 7 |  * without limitation the rights to use, copy, modify, merge, publish,
 8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
 9 |  * permit persons to whom the Materials are furnished to do so, subject to
10 |  * the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included
13 |  * in all copies or substantial portions of the Materials.
14 |  *
15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
18 |  *    https://www.khronos.org/registry/
19 |  *
20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
27 |  ******************************************************************************/
28 | 
29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
30 | 
31 | #ifndef __OPENCL_H
32 | #define __OPENCL_H
33 | 
34 | #ifdef __cplusplus
35 | extern "C" {
36 | #endif
37 | 
38 | #ifdef __APPLE__
39 | 
40 | #include <OpenCL/cl.h>
41 | #include <OpenCL/cl_gl.h>
42 | #include <OpenCL/cl_gl_ext.h>
43 | #include <OpenCL/cl_ext.h>
44 | 
45 | #else
46 | 
47 | #include <CL/cl.h>
48 | #include <CL/cl_gl.h>
49 | #include <CL/cl_gl_ext.h>
50 | #include <CL/cl_ext.h>
51 | 
52 | #endif
53 | 
54 | #ifdef __cplusplus
55 | }
56 | #endif
57 | 
58 | #endif  /* __OPENCL_H   */
59 | 
60 | 


--------------------------------------------------------------------------------
/src/examples/basic/hello_world.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: hello_world.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2017 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <fstream>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | 
 20 | // Library includes
 21 | #include <CL/cl.h>
 22 | 
 23 | static const char *HELP_MESSAGE = "\n"
 24 | "Usage: hello_world <input> <output>\n"
 25 | "\n"
 26 | "This example copies the input file to the output file.\n"
 27 | "Use it to test your build tools.\n";
 28 | 
 29 | static const char *PROGRAM_SOURCE[] = {
 30 | "__kernel void copy(__global char *src,\n", //如果使用cl_mem clCreateBuffer对象， 那么参数应该为指针的形式
 31 | "                   __global char *dst\n",
 32 | "                   )\n",
 33 | "{\n",
 34 | "    uint wid_x = get_global_id(0);\n",
 35 | "    dst[wid_x] = src[wid_x];\n",
 36 | "}\n"
 37 | };
 38 | 
 39 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 40 | 
 41 | int main(int argc, char** argv)
 42 | {
 43 |     if (argc < 3)
 44 |     {
 45 |         std::cerr << "Please specify source and destination files.\n";
 46 |         std::cerr << HELP_MESSAGE;
 47 |         std::exit(EXIT_SUCCESS);
 48 |     }
 49 |     const std::string src_filename(argv[1]);
 50 |     const std::string out_filename(argv[2]);
 51 | 
 52 |     cl_wrapper       wrapper;
 53 |     cl_program       program         = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 54 |     cl_kernel        kernel          = wrapper.make_kernel("copy", program);
 55 |     cl_context       context         = wrapper.get_context();
 56 |     cl_command_queue command_queue   = wrapper.get_command_queue();
 57 |     cl_int           err             = CL_SUCCESS;
 58 | 
 59 |     /*
 60 |      * Step 0: Create CL buffers.
 61 |      */
 62 | 
 63 |     std::ifstream fin(src_filename, std::ios::binary);
 64 |     if (!fin)
 65 |     {
 66 |         std::cerr << "Couldn't open file " << src_filename << "\n";
 67 |         std::exit(EXIT_FAILURE);
 68 |     }
 69 | 
 70 |     const auto        fin_begin = fin.tellg();
 71 | 
 72 |     fin.seekg(0, std::ios::end);
 73 |     const auto        fin_end   = fin.tellg();
 74 |     const size_t      buf_size  = static_cast<size_t>(fin_end - fin_begin);
 75 |     std::vector<char> buf(buf_size);    //使用vector自动管理内存，创建对象的时候就分配需要的内存大小
 76 | 
 77 |     fin.seekg(0, std::ios::beg);
 78 |     fin.read(buf.data(), buf_size);
 79 | 
 80 |     cl_mem src_buffer = clCreateBuffer(
 81 |             context,
 82 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
 83 |             buf_size,
 84 |             buf.data(),
 85 |             &err
 86 |     );
 87 |     if (err != CL_SUCCESS)
 88 |     {
 89 |         std::cerr << "Error " << err << " with clCreateBuffer for source file." << "\n";
 90 |         std::exit(err);
 91 |     }
 92 | 
 93 |     cl_mem out_buffer = clCreateBuffer(
 94 |             context,
 95 |             CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
 96 |             buf_size,
 97 |             NULL,
 98 |             &err
 99 |     );
100 |     if (err != CL_SUCCESS)
101 |     {
102 |         std::cerr << "Error " << err << " with clCreateBuffer for output file." << "\n";
103 |         std::exit(err);
104 |     }
105 | 
106 |     /*
107 |      * Step 1: Set up kernel arguments and run the kernel.
108 |      */
109 | 
110 |     err = clSetKernelArg(kernel, 0, sizeof(src_buffer), &src_buffer);
111 |     if (err != CL_SUCCESS)
112 |     {
113 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n";
114 |         std::exit(err);
115 |     }
116 | 
117 |     err = clSetKernelArg(kernel, 1, sizeof(out_buffer), &out_buffer);
118 |     if (err != CL_SUCCESS)
119 |     {
120 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n";
121 |         std::exit(err);
122 |     }
123 | 
124 |     err = clEnqueueNDRangeKernel(
125 |             command_queue,
126 |             kernel,
127 |             1,
128 |             NULL,
129 |             &buf_size,
130 |             NULL,
131 |             0,
132 |             NULL,
133 |             NULL
134 |     );
135 |     if (err != CL_SUCCESS)
136 |     {
137 |         std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n";
138 |         std::exit(err);
139 |     }
140 | 
141 |     /*
142 |      * Step 2: Copy the data out of the ion buffer for each plane.
143 |      */
144 | 
145 |     char *mapped_ptr = static_cast<char *>(clEnqueueMapBuffer(
146 |             command_queue,
147 |             out_buffer,
148 |             CL_TRUE,
149 |             CL_MAP_READ,
150 |             0,
151 |             buf_size,
152 |             0,
153 |             NULL,
154 |             NULL,
155 |             &err
156 |     ));
157 |     if (err != CL_SUCCESS)
158 |     {
159 |         std::cerr << "Error " << err << " mapping output buffer." << "\n";
160 |         std::exit(err);
161 |     }
162 | 
163 |     std::ofstream fout(out_filename, std::ios::binary);
164 |     if (!fout)
165 |     {
166 |         std::cerr << "Couldn't open file " << out_filename << "\n";
167 |         std::exit(EXIT_FAILURE);
168 |     }
169 |     fout.write(mapped_ptr, buf_size);
170 |     fout.close();
171 | 
172 |     err = clEnqueueUnmapMemObject(command_queue, out_buffer, mapped_ptr, 0, NULL, NULL);
173 |     if (err != CL_SUCCESS)
174 |     {
175 |         std::cerr << "Error " << err << " unmapping output buffer." << "\n";
176 |         std::exit(err);
177 |     }
178 | 
179 |     clFinish(command_queue);
180 | 
181 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
182 |     clReleaseMemObject(src_buffer);
183 |     clReleaseMemObject(out_buffer);
184 | 
185 |     return 0;
186 | }
187 | 


--------------------------------------------------------------------------------
/src/examples/bayer_mipi/mipi10_to_unpacked.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: mipi10_to_unpacked.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <cstring>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | #include "util/util.h"
 20 | 
 21 | // Library includes
 22 | #include <CL/cl.h>
 23 | #include <CL/cl_ext_qcom.h>
 24 | 
 25 | static const char *HELP_MESSAGE = "\n"
 26 | "Usage: mipi10_to_unpacked <source image data file> <output image data file>\n"
 27 | "\n"
 28 | "Converts a single-channel MIPI10 image into an unpacked 16-bit format.\n";
 29 | 
 30 | static const char *PROGRAM_SOURCE[] = {
 31 | "__kernel void unpack(__read_only  image2d_t packed_image,\n",
 32 | "                     __write_only image2d_t unpacked_image,\n",
 33 | "                                  sampler_t sampler)\n",
 34 | "{\n",
 35 | "    const int  wid_x = get_global_id(0);\n",
 36 | "    const int  wid_y = get_global_id(1);\n",
 37 | "    const int2 coord = (int2)(4 * wid_x, wid_y);\n",
 38 | "    const float4 pixels[] = {\n",
 39 | "        read_imagef(packed_image, sampler, coord + (int2)(0, 0)),\n",
 40 | "        read_imagef(packed_image, sampler, coord + (int2)(1, 0)),\n",
 41 | "        read_imagef(packed_image, sampler, coord + (int2)(2, 0)),\n",
 42 | "        read_imagef(packed_image, sampler, coord + (int2)(3, 0)),\n",
 43 | "    };\n",
 44 | "    write_imagef(unpacked_image, coord + (int2)(0, 0), pixels[0]);\n",
 45 | "    write_imagef(unpacked_image, coord + (int2)(1, 0), pixels[1]);\n",
 46 | "    write_imagef(unpacked_image, coord + (int2)(2, 0), pixels[2]);\n",
 47 | "    write_imagef(unpacked_image, coord + (int2)(3, 0), pixels[3]);\n",
 48 | "}\n"
 49 | };
 50 | 
 51 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 52 | 
 53 | int main(int argc, char** argv)
 54 | {
 55 |     if (argc < 3)
 56 |     {
 57 |         std::cerr << "Please specify source and output images.\n";
 58 |         std::cerr << HELP_MESSAGE;
 59 |         std::exit(EXIT_SUCCESS);
 60 |     }
 61 |     const std::string src_image_filename(argv[1]);
 62 |     const std::string out_image_filename(argv[2]);
 63 | 
 64 |     cl_wrapper wrapper;
 65 |     cl_program           program              = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 66 |     cl_kernel            kernel               = wrapper.make_kernel("unpack", program);
 67 |     cl_context           context              = wrapper.get_context();
 68 |     cl_command_queue     command_queue        = wrapper.get_command_queue();
 69 |     bayer_mipi10_image_t src_bayer_image_info = load_bayer_mipi_10_image_data(src_image_filename);
 70 | 
 71 |     /*
 72 |      * Step 0: Confirm the required OpenCL extensions are supported.
 73 |      */
 74 | 
 75 |     if (!wrapper.check_extension_support("cl_qcom_other_image"))
 76 |     {
 77 |         std::cerr << "Extension cl_qcom_other_image needed for MIPI10 data type is not supported.\n";
 78 |         std::exit(EXIT_FAILURE);
 79 |     }
 80 | 
 81 |     if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr"))
 82 |     {
 83 |         std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n";
 84 |         std::exit(EXIT_FAILURE);
 85 |     }
 86 | 
 87 |     if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr"))
 88 |     {
 89 |         std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n";
 90 |         std::exit(EXIT_FAILURE);
 91 |     }
 92 | 
 93 |     /*
 94 |      * Step 1: Create suitable ion buffer-backed CL images.
 95 |      *         Note the source image has the same layout as with bayer_mipi10_to_rgba.cpp example.
 96 |      *         The difference is how such images are addressed on the GPU.
 97 |      */
 98 | 
 99 |     cl_image_format src_format;
100 |     src_format.image_channel_order     = CL_R;
101 |     src_format.image_channel_data_type = CL_QCOM_UNORM_MIPI10;
102 | 
103 |     cl_image_desc src_desc;
104 |     std::memset(&src_desc, 0, sizeof(src_desc));
105 |     src_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
106 |     src_desc.image_width     = src_bayer_image_info.width;
107 |     src_desc.image_height    = src_bayer_image_info.height;
108 |     src_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(src_format, src_desc);
109 | 
110 |     cl_mem_ion_host_ptr src_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(src_format, src_desc);
111 |     cl_int              err         = 0;
112 |     cl_mem              src_image   = clCreateImage(
113 |             context,
114 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
115 |             &src_format,
116 |             &src_desc,
117 |             &src_ion_mem,
118 |             &err
119 |     );
120 |     if (err != CL_SUCCESS)
121 |     {
122 |         std::cerr << "Error " << err << " with clCreateImage for source image." << "\n";
123 |         std::exit(err);
124 |     }
125 | 
126 |     const size_t   origin[]     = {0, 0, 0};
127 |     size_t         row_pitch    = 0 ;
128 |     const size_t   src_region[] = {src_desc.image_width, src_desc.image_height, 1};
129 |     unsigned char *image_ptr    = static_cast<unsigned char *>(clEnqueueMapImage(
130 |             command_queue,
131 |             src_image,
132 |             CL_BLOCKING,
133 |             CL_MAP_WRITE,
134 |             origin,
135 |             src_region,
136 |             &row_pitch,
137 |             NULL,
138 |             0,
139 |             NULL,
140 |             NULL,
141 |             &err
142 |     ));
143 |     if (err != CL_SUCCESS)
144 |     {
145 |         std::cerr << "Error " << err << " with clEnqueueMapImage for source image." << "\n";
146 |         std::exit(err);
147 |     }
148 | 
149 |     // Copies image data from the host to the ION buffer
150 |     for (uint32_t i = 0; i < src_desc.image_height; ++i)
151 |     {
152 |         std::memcpy(
153 |                 image_ptr                          + i * src_desc.image_row_pitch,
154 |                 src_bayer_image_info.pixels.data() + i * src_desc.image_width / 4 * 5,
155 |                 src_desc.image_width / 4 * 5
156 |         );
157 |     }
158 | 
159 |     err = clEnqueueUnmapMemObject(command_queue, src_image, image_ptr, 0, NULL, NULL);
160 |     if (err != CL_SUCCESS)
161 |     {
162 |         std::cerr << "Error " << err << " unmapping source image." << "\n";
163 |         std::exit(err);
164 |     }
165 | 
166 |     cl_image_format out_format;
167 |     out_format.image_channel_order     = CL_R;
168 |     out_format.image_channel_data_type = CL_UNORM_INT16;
169 | 
170 |     cl_image_desc out_desc;
171 |     std::memset(&out_desc, 0, sizeof(out_desc));
172 |     out_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
173 |     out_desc.image_width     = src_bayer_image_info.width;
174 |     out_desc.image_height    = src_bayer_image_info.height;
175 |     out_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(out_format, out_desc);
176 | 
177 |     cl_mem_ion_host_ptr out_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(out_format, out_desc);
178 |     cl_mem out_image = clCreateImage(
179 |             context,
180 |             CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
181 |             &out_format,
182 |             &out_desc,
183 |             &out_ion_mem,
184 |             &err
185 |     );
186 |     if (err != CL_SUCCESS)
187 |     {
188 |         std::cerr << "Error " << err << " with clCreateImage for output image." << "\n";
189 |         std::exit(err);
190 |     }
191 | 
192 |     /*
193 |      * Step 2: Set up kernel arguments and run the kernel.
194 |      */
195 | 
196 |     cl_sampler sampler = clCreateSampler(
197 |             context,
198 |             CL_FALSE,
199 |             CL_ADDRESS_NONE,
200 |             CL_FILTER_NEAREST,
201 |             &err
202 |     );
203 |     if (err != CL_SUCCESS)
204 |     {
205 |         std::cerr << "Error " << err << " with clCreateSampler." << "\n";
206 |         std::exit(err);
207 |     }
208 | 
209 |     err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image);
210 |     if (err != CL_SUCCESS)
211 |     {
212 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 0." << "\n";
213 |         std::exit(err);
214 |     }
215 | 
216 |     err = clSetKernelArg(kernel, 1, sizeof(out_image), &out_image);
217 |     if (err != CL_SUCCESS)
218 |     {
219 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 1." << "\n";
220 |         std::exit(err);
221 |     }
222 | 
223 |     err = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
224 |     if (err != CL_SUCCESS)
225 |     {
226 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 2." << "\n";
227 |         std::exit(err);
228 |     }
229 | 
230 |     const size_t global_work_size[] = {out_desc.image_width / 4, out_desc.image_height};
231 |     err = clEnqueueNDRangeKernel(
232 |             command_queue,
233 |             kernel,
234 |             2,
235 |             NULL,
236 |             global_work_size,
237 |             NULL,
238 |             0,
239 |             NULL,
240 |             NULL
241 |     );
242 |     if (err != CL_SUCCESS)
243 |     {
244 |         std::cerr << "\tError " << err << " with clEnqueueNDRangeKernel." << "\n";
245 |         std::exit(err);
246 |     }
247 | 
248 |     /*
249 |      * Step 3: Copy the data out of the ion buffer.
250 |      */
251 | 
252 |     single_channel_int16_image_t out_image_info;
253 |     out_image_info.width  = out_desc.image_width;
254 |     out_image_info.height = out_desc.image_height;
255 |     out_image_info.pixels.resize((out_image_info.width * 2) * out_image_info.height);
256 | 
257 |     const size_t out_region[] = {out_desc.image_width, out_desc.image_height, 1};
258 |     row_pitch                 = 0;
259 |     image_ptr                 = static_cast<unsigned char *>(clEnqueueMapImage(
260 |             command_queue,
261 |             out_image,
262 |             CL_TRUE,
263 |             CL_MAP_READ,
264 |             origin,
265 |             out_region,
266 |             &row_pitch,
267 |             NULL,
268 |             0,
269 |             NULL,
270 |             NULL,
271 |             &err
272 |     ));
273 |     if (err != CL_SUCCESS)
274 |     {
275 |         std::cerr << "Error " << err << " mapping dest image buffer for reading." << "\n";
276 |         std::exit(err);
277 |     }
278 | 
279 |     // Copies image data from the ION buffer to the host
280 |     for (uint32_t i = 0; i < out_desc.image_height; ++i)
281 |     {
282 |         std::memcpy(
283 |                 out_image_info.pixels.data() + i * out_desc.image_width * 2,
284 |                 image_ptr                    + i * row_pitch,
285 |                 out_desc.image_width * 2
286 |         );
287 |     }
288 | 
289 |     err = clEnqueueUnmapMemObject(command_queue, out_image, image_ptr, 0, NULL, NULL);
290 |     if (err != CL_SUCCESS)
291 |     {
292 |         std::cerr << "Error " << err << " unmapping dest image." << "\n";
293 |         std::exit(err);
294 |     }
295 | 
296 |     clFinish(command_queue);
297 | 
298 |     save_single_channel_image_data(out_image_filename, out_image_info);
299 | 
300 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
301 |     clReleaseSampler(sampler);
302 |     clReleaseMemObject(src_image);
303 |     clReleaseMemObject(out_image);
304 | 
305 |     return 0;
306 | }
307 | 
308 | 


--------------------------------------------------------------------------------
/src/examples/bayer_mipi/unpacked_to_mipi10.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: unpacked_to_mipi10.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <cstring>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | #include "util/util.h"
 20 | 
 21 | // Library includes
 22 | #include <CL/cl.h>
 23 | #include <CL/cl_ext_qcom.h>
 24 | 
 25 | static const char *HELP_MESSAGE = "\n"
 26 | "Usage: unpacked_to_mipi10 <source image data file> <output image data file>\n"
 27 | "\n"
 28 | "Converts a single-channel unpacked 16-bit image to MIPI10 format.\n";
 29 | 
 30 | static const char *PROGRAM_SOURCE[] = {
 31 | "__kernel void pack(__read_only  image2d_t unpacked_image,\n",
 32 | "                   __write_only image2d_t packed_image,\n",
 33 | "                                sampler_t sampler)\n",
 34 | "{\n",
 35 | "    const int  wid_x      = get_global_id(0);\n",
 36 | "    const int  wid_y      = get_global_id(1);\n",
 37 | "    const int2 coord      = (int2)(4 * wid_x, wid_y);\n",
 38 | "    const float4 pixels[] = {\n",
 39 | "        read_imagef(unpacked_image, sampler, coord + (int2)(0, 0)),\n",
 40 | "        read_imagef(unpacked_image, sampler, coord + (int2)(1, 0)),\n",
 41 | "        read_imagef(unpacked_image, sampler, coord + (int2)(2, 0)),\n",
 42 | "        read_imagef(unpacked_image, sampler, coord + (int2)(3, 0)),\n",
 43 | "    };\n",
 44 | "    float      out_pixel[] = {pixels[0].x, pixels[1].x, pixels[2].x, pixels[3].x};\n",
 45 | "    const int2 write_coord = (int2)(wid_x, wid_y);\n",
 46 | "    qcom_write_imagefv_4x1_n10m00(packed_image, write_coord, out_pixel);\n",
 47 | "}\n"
 48 | };
 49 | 
 50 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 51 | 
 52 | int main(int argc, char** argv)
 53 | {
 54 |     if (argc < 3)
 55 |     {
 56 |         std::cerr << "Please specify source and output images.\n";
 57 |         std::cerr << HELP_MESSAGE;
 58 |         std::exit(EXIT_SUCCESS);
 59 |     }
 60 |     const std::string src_image_filename(argv[1]);
 61 |     const std::string out_image_filename(argv[2]);
 62 | 
 63 |     cl_wrapper wrapper;
 64 |     cl_program                   program              = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 65 |     cl_kernel                    kernel               = wrapper.make_kernel("pack", program);
 66 |     cl_context                   context              = wrapper.get_context();
 67 |     cl_command_queue             command_queue        = wrapper.get_command_queue();
 68 |     single_channel_int16_image_t src_int16_image_info = load_single_channel_image_data(src_image_filename);
 69 | 
 70 |     /*
 71 |      * Step 0: Confirm the required OpenCL extensions are supported.
 72 |      */
 73 | 
 74 |     if (!wrapper.check_extension_support("cl_qcom_other_image"))
 75 |     {
 76 |         std::cerr << "Extension cl_qcom_other_image needed for MIPI10 data type is not supported.\n";
 77 |         std::exit(EXIT_FAILURE);
 78 |     }
 79 | 
 80 |     if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr"))
 81 |     {
 82 |         std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n";
 83 |         std::exit(EXIT_FAILURE);
 84 |     }
 85 | 
 86 |     if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr"))
 87 |     {
 88 |         std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n";
 89 |         std::exit(EXIT_FAILURE);
 90 |     }
 91 | 
 92 |     /*
 93 |      * Step 1: Create suitable ion buffer-backed CL images.
 94 |      *         Note the source image has the same layout as with bayer_mipi10_to_rgba.cpp example.
 95 |      *         The difference is how such images are addressed on the GPU.
 96 |      */
 97 | 
 98 |     cl_image_format src_format;
 99 |     src_format.image_channel_order     = CL_R;
100 |     src_format.image_channel_data_type = CL_UNORM_INT16;
101 | 
102 |     cl_image_desc src_desc;
103 |     std::memset(&src_desc, 0, sizeof(src_desc));
104 |     src_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
105 |     src_desc.image_width     = src_int16_image_info.width;
106 |     src_desc.image_height    = src_int16_image_info.height;
107 |     src_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(src_format, src_desc);
108 | 
109 |     cl_mem_ion_host_ptr src_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(src_format, src_desc);
110 |     cl_int              err         = 0;
111 |     cl_mem              src_image   = clCreateImage(
112 |             context,
113 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
114 |             &src_format,
115 |             &src_desc,
116 |             &src_ion_mem,
117 |             &err
118 |     );
119 |     if (err != CL_SUCCESS)
120 |     {
121 |         std::cerr << "Error " << err << " with clCreateImage for source image." << "\n";
122 |         std::exit(err);
123 |     }
124 | 
125 |     const size_t   origin[]     = {0, 0, 0};
126 |     size_t         row_pitch    = 0 ;
127 |     const size_t   src_region[] = {src_desc.image_width, src_desc.image_height, 1};
128 |     unsigned char *image_ptr    = static_cast<unsigned char *>(clEnqueueMapImage(
129 |             command_queue,
130 |             src_image,
131 |             CL_BLOCKING,
132 |             CL_MAP_WRITE,
133 |             origin,
134 |             src_region,
135 |             &row_pitch,
136 |             NULL,
137 |             0,
138 |             NULL,
139 |             NULL,
140 |             &err
141 |     ));
142 |     if (err != CL_SUCCESS)
143 |     {
144 |         std::cerr << "Error " << err << " with clEnqueueMapImage for source image." << "\n";
145 |         std::exit(err);
146 |     }
147 | 
148 |     // Copies image data from the host to the ION buffer
149 |     for (uint32_t i = 0; i < src_desc.image_height; ++i)
150 |     {
151 |         std::memcpy(
152 |                 image_ptr                          + i * src_desc.image_row_pitch,
153 |                 src_int16_image_info.pixels.data() + i * src_desc.image_width * 2,
154 |                 src_desc.image_width * 2
155 |         );
156 |     }
157 | 
158 |     err = clEnqueueUnmapMemObject(command_queue, src_image, image_ptr, 0, NULL, NULL);
159 |     if (err != CL_SUCCESS)
160 |     {
161 |         std::cerr << "Error " << err << " unmapping source image." << "\n";
162 |         std::exit(err);
163 |     }
164 | 
165 |     cl_image_format out_format;
166 |     out_format.image_channel_order     = CL_R;
167 |     out_format.image_channel_data_type = CL_QCOM_UNORM_MIPI10;
168 | 
169 |     cl_image_desc out_desc;
170 |     std::memset(&out_desc, 0, sizeof(out_desc));
171 |     out_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
172 |     out_desc.image_width     = src_int16_image_info.width;
173 |     out_desc.image_height    = src_int16_image_info.height;
174 |     out_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(out_format, out_desc);
175 | 
176 |     cl_mem_ion_host_ptr out_ion_mem = wrapper.make_ion_buffer_for_nonplanar_image(out_format, out_desc);
177 |     cl_mem out_image = clCreateImage(
178 |             context,
179 |             CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
180 |             &out_format,
181 |             &out_desc,
182 |             &out_ion_mem,
183 |             &err
184 |     );
185 |     if (err != CL_SUCCESS)
186 |     {
187 |         std::cerr << "Error " << err << " with clCreateImage for output image." << "\n";
188 |         std::exit(err);
189 |     }
190 | 
191 |     /*
192 |      * Step 2: Set up kernel arguments and run the kernel.
193 |      */
194 | 
195 |     cl_sampler sampler = clCreateSampler(
196 |             context,
197 |             CL_FALSE,
198 |             CL_ADDRESS_NONE,
199 |             CL_FILTER_NEAREST,
200 |             &err
201 |     );
202 |     if (err != CL_SUCCESS)
203 |     {
204 |         std::cerr << "Error " << err << " with clCreateSampler." << "\n";
205 |         std::exit(err);
206 |     }
207 | 
208 |     err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image);
209 |     if (err != CL_SUCCESS)
210 |     {
211 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 0." << "\n";
212 |         std::exit(err);
213 |     }
214 | 
215 |     err = clSetKernelArg(kernel, 1, sizeof(out_image), &out_image);
216 |     if (err != CL_SUCCESS)
217 |     {
218 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 1." << "\n";
219 |         std::exit(err);
220 |     }
221 | 
222 |     err = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
223 |     if (err != CL_SUCCESS)
224 |     {
225 |         std::cerr << "\tError " << err << " with clSetKernelArg for argument 2." << "\n";
226 |         std::exit(err);
227 |     }
228 | 
229 |     const size_t global_work_size[] = {out_desc.image_width / 4, out_desc.image_height};
230 |     err = clEnqueueNDRangeKernel(
231 |             command_queue,
232 |             kernel,
233 |             2,
234 |             NULL,
235 |             global_work_size,
236 |             NULL,
237 |             0,
238 |             NULL,
239 |             NULL
240 |     );
241 |     if (err != CL_SUCCESS)
242 |     {
243 |         std::cerr << "\tError " << err << " with clEnqueueNDRangeKernel." << "\n";
244 |         std::exit(err);
245 |     }
246 | 
247 |     /*
248 |      * Step 3: Copy the data out of the ion buffer.
249 |      */
250 | 
251 |     bayer_mipi10_image_t out_image_info;
252 |     out_image_info.width  = out_desc.image_width;
253 |     out_image_info.height = out_desc.image_height;
254 |     out_image_info.pixels.resize((out_image_info.width / 4 * 5) * out_image_info.height);
255 | 
256 |     const size_t out_region[] = {out_desc.image_width, out_desc.image_height, 1};
257 |     row_pitch                 = 0;
258 |     image_ptr                 = static_cast<unsigned char *>(clEnqueueMapImage(
259 |             command_queue,
260 |             out_image,
261 |             CL_TRUE,
262 |             CL_MAP_READ,
263 |             origin,
264 |             out_region,
265 |             &row_pitch,
266 |             NULL,
267 |             0,
268 |             NULL,
269 |             NULL,
270 |             &err
271 |     ));
272 |     if (err != CL_SUCCESS)
273 |     {
274 |         std::cerr << "Error " << err << " mapping dest image buffer for reading." << "\n";
275 |         std::exit(err);
276 |     }
277 | 
278 |     // Copies image data from the ION buffer to the host
279 |     for (uint32_t i = 0; i < out_desc.image_height; ++i)
280 |     {
281 |         std::memcpy(
282 |                 out_image_info.pixels.data() + i * out_desc.image_width / 4 * 5,
283 |                 image_ptr                    + i * row_pitch,
284 |                 out_desc.image_width / 4 * 5
285 |         );
286 |     }
287 | 
288 |     err = clEnqueueUnmapMemObject(command_queue, out_image, image_ptr, 0, NULL, NULL);
289 |     if (err != CL_SUCCESS)
290 |     {
291 |         std::cerr << "Error " << err << " unmapping dest image." << "\n";
292 |         std::exit(err);
293 |     }
294 | 
295 |     clFinish(command_queue);
296 | 
297 |     save_bayer_mipi_10_image_data(out_image_filename, out_image_info);
298 | 
299 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
300 |     clReleaseSampler(sampler);
301 |     clReleaseMemObject(src_image);
302 |     clReleaseMemObject(out_image);
303 | 
304 |     return 0;
305 | }
306 | 
307 | 


--------------------------------------------------------------------------------
/src/examples/io_coherent_ion/io_coherent_ion_buffers.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: io_coherent_ion_buffers.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <fstream>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | 
 20 | // Library includes
 21 | #include <CL/cl.h>
 22 | 
 23 | static const char *HELP_MESSAGE = "\n"
 24 | "Usage: io_coherent_ion_buffers <input> <output>\n"
 25 | "\n"
 26 | "This example copies the input file to the output file.\n"
 27 | "It uses io-coherent ION buffers.\n";
 28 | 
 29 | static const char *PROGRAM_SOURCE[] = {
 30 | "__kernel void copy(__global char *src,\n",
 31 | "                   __global char *dst\n",
 32 | "                   )\n",
 33 | "{\n",
 34 | "    uint wid_x = get_global_id(0);\n",
 35 | "    dst[wid_x] = src[wid_x];\n",
 36 | "}\n"
 37 | };
 38 | 
 39 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 40 | 
 41 | int main(int argc, char** argv)
 42 | {
 43 |     if (argc < 3)
 44 |     {
 45 |         std::cerr << "Please specify source and destination files.\n";
 46 |         std::cerr << HELP_MESSAGE;
 47 |         std::exit(EXIT_SUCCESS);
 48 |     }
 49 |     const std::string src_filename(argv[1]);
 50 |     const std::string out_filename(argv[2]);
 51 | 
 52 |     cl_wrapper       wrapper;
 53 |     cl_program       program         = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 54 |     cl_kernel        kernel          = wrapper.make_kernel("copy", program);
 55 |     cl_context       context         = wrapper.get_context();
 56 |     cl_command_queue command_queue   = wrapper.get_command_queue();
 57 |     cl_int           err             = CL_SUCCESS;
 58 | 
 59 |     /*
 60 |      * Step 0: Create CL buffers.
 61 |      */
 62 | 
 63 |     std::ifstream fin(src_filename, std::ios::binary);
 64 |     if (!fin)
 65 |     {
 66 |         std::cerr << "Couldn't open file " << src_filename << "\n";
 67 |         std::exit(EXIT_FAILURE);
 68 |     }
 69 | 
 70 |     const auto          fin_begin   = fin.tellg();
 71 | 
 72 |     fin.seekg(0, std::ios::end);
 73 |     const auto          fin_end     = fin.tellg();
 74 |     const size_t        buf_size    = static_cast<size_t>(fin_end - fin_begin);
 75 |     cl_mem_ion_host_ptr src_buf_ion = wrapper.make_iocoherent_ion_buffer(buf_size);
 76 | 
 77 |     cl_mem src_buffer = clCreateBuffer(
 78 |             context,
 79 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
 80 |             buf_size,
 81 |             &src_buf_ion,
 82 |             &err
 83 |     );
 84 |     if (err != CL_SUCCESS)
 85 |     {
 86 |         std::cerr << "Error " << err << " with clCreateBuffer for source file." << "\n";
 87 |         std::exit(err);
 88 |     }
 89 | 
 90 |     char *buf_ptr = static_cast<char *>(clEnqueueMapBuffer(
 91 |             command_queue,
 92 |             src_buffer,
 93 |             CL_BLOCKING,
 94 |             CL_MAP_WRITE,
 95 |             0,
 96 |             buf_size,
 97 |             0,
 98 |             NULL,
 99 |             NULL,
100 |             &err
101 |     ));
102 |     if (err != CL_SUCCESS)
103 |     {
104 |         std::cerr << "Error " << err << " mapping source buffer for writing." << "\n";
105 |         std::exit(err);
106 |     }
107 | 
108 |     fin.seekg(0, std::ios::beg);
109 |     fin.read(buf_ptr, buf_size);
110 |     fin.close();
111 | 
112 |     err = clEnqueueUnmapMemObject(command_queue, src_buffer, buf_ptr, 0, NULL, NULL);
113 |     if (err != CL_SUCCESS)
114 |     {
115 |         std::cerr << "Error " << err << " unmapping source buffer." << "\n";
116 |         std::exit(err);
117 |     }
118 | 
119 |     cl_mem_ion_host_ptr out_buf_ion = wrapper.make_iocoherent_ion_buffer(buf_size);
120 |     cl_mem out_buffer = clCreateBuffer(
121 |             context,
122 |             CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
123 |             buf_size,
124 |             &out_buf_ion,
125 |             &err
126 |     );
127 |     if (err != CL_SUCCESS)
128 |     {
129 |         std::cerr << "Error " << err << " with clCreateBuffer for output file." << "\n";
130 |         std::exit(err);
131 |     }
132 | 
133 |     /*
134 |      * Step 1: Set up kernel arguments and run the kernel.
135 |      */
136 | 
137 |     err = clSetKernelArg(kernel, 0, sizeof(src_buffer), &src_buffer);
138 |     if (err != CL_SUCCESS)
139 |     {
140 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n";
141 |         std::exit(err);
142 |     }
143 | 
144 |     err = clSetKernelArg(kernel, 1, sizeof(out_buffer), &out_buffer);
145 |     if (err != CL_SUCCESS)
146 |     {
147 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n";
148 |         std::exit(err);
149 |     }
150 | 
151 |     err = clEnqueueNDRangeKernel(
152 |             command_queue,
153 |             kernel,
154 |             1,
155 |             NULL,
156 |             &buf_size,
157 |             NULL,
158 |             0,
159 |             NULL,
160 |             NULL
161 |     );
162 |     if (err != CL_SUCCESS)
163 |     {
164 |         std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n";
165 |         std::exit(err);
166 |     }
167 |     /*
168 |      * Step 2: Copy the data out of the ion buffer.
169 |      */
170 | 
171 |     std::ofstream fout(out_filename, std::ios::binary);
172 |     if (!fout)
173 |     {
174 |         std::cerr << "Couldn't open file " << out_filename << "\n";
175 |         std::exit(EXIT_FAILURE);
176 |     }
177 | 
178 |     buf_ptr = static_cast<char *>(clEnqueueMapBuffer(
179 |             command_queue,
180 |             out_buffer,
181 |             CL_BLOCKING,
182 |             CL_MAP_READ,
183 |             0,
184 |             buf_size,
185 |             0,
186 |             NULL,
187 |             NULL,
188 |             &err
189 |     ));
190 |     if (err != CL_SUCCESS)
191 |     {
192 |         std::cerr << "Error " << err << " mapping output buffer for writing." << "\n";
193 |         std::exit(err);
194 |     }
195 | 
196 |     fout.write(buf_ptr, buf_size);
197 |     fout.close();
198 | 
199 |     err = clEnqueueUnmapMemObject(command_queue, out_buffer, buf_ptr, 0, NULL, NULL);
200 |     if (err != CL_SUCCESS)
201 |     {
202 |         std::cerr << "Error " << err << " unmapping output buffer." << "\n";
203 |         std::exit(err);
204 |     }
205 | 
206 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
207 |     clReleaseMemObject(src_buffer);
208 |     clReleaseMemObject(out_buffer);
209 | 
210 |     return 0;
211 | }
212 | 


--------------------------------------------------------------------------------
/src/examples/linear_algebra/image_matrix_transpose.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: image_matrix_transpose.cpp
  3 | // Desc: Demonstrates transposing matrices with images
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <cstring>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | #include "util/util.h"
 20 | 
 21 | // Library includes
 22 | #include <CL/cl.h>
 23 | #include <CL/cl_ext_qcom.h>
 24 | 
 25 | static const char *HELP_MESSAGE = "\n"
 26 | "Usage: image_matrix_transpose <input matrix> [<output file>]\n"
 27 | "Given a matrix, computes its transpose.\n"
 28 | "There is no size restriction for the matrix, but it may be padded with extra elements.\n"
 29 | "If no file is specified for the output, then it is written to stdout.\n";
 30 | 
 31 | static const char *PROGRAM_SOURCE[] = {
 32 | "__kernel void transpose(__read_only  image2d_t matrix,\n",
 33 | "                        __write_only image2d_t matrix_t)\n",
 34 | "{\n",
 35 | "    const int wid_x     = get_global_id(0);\n",
 36 | "    const int wid_y     = get_global_id(1);\n",
 37 | "    const float4 rows[] = {\n",
 38 | "        read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 0)),\n",
 39 | "        read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 1)),\n",
 40 | "        read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 2)),\n",
 41 | "        read_imagef(matrix, (int2)(wid_x, 4 * wid_y + 3)),\n",
 42 | "        };\n",
 43 | "    write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 0), (float4)(rows[0].x, rows[1].x, rows[2].x, rows[3].x));\n",
 44 | "    write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 1), (float4)(rows[0].y, rows[1].y, rows[2].y, rows[3].y));\n",
 45 | "    write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 2), (float4)(rows[0].z, rows[1].z, rows[2].z, rows[3].z));\n",
 46 | "    write_imagef(matrix_t, (int2)(wid_y, 4 * wid_x + 3), (float4)(rows[0].w, rows[1].w, rows[2].w, rows[3].w));\n",
 47 | "}\n"
 48 | };
 49 | 
 50 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 51 | 
 52 | int main(int argc, char** argv)
 53 | {
 54 |     if (argc < 2)
 55 |     {
 56 |         std::cerr << "Please specify input file.\n";
 57 |         std::cerr << HELP_MESSAGE;
 58 |         std::exit(EXIT_SUCCESS);
 59 |     }
 60 | 
 61 |     const std::string matrix_a_filename(argv[1]);
 62 |     const bool        output_to_file = argc >= 3;
 63 |     const matrix_t    matrix_a       = load_matrix(matrix_a_filename);
 64 |     const std::string output_filename(output_to_file ? argv[2] : "");
 65 | 
 66 |     matrix_t matrix_b;
 67 |     matrix_b.width              = matrix_a.height;
 68 |     matrix_b.height             = matrix_a.width;
 69 |     const size_t matrix_b_size  = matrix_b.width * matrix_b.height;
 70 |     matrix_b.elements.resize(matrix_b_size);
 71 | 
 72 |     cl_wrapper       wrapper;
 73 |     cl_program       program       = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 74 |     cl_kernel        kernel        = wrapper.make_kernel("transpose", program);
 75 |     cl_context       context       = wrapper.get_context();
 76 |     cl_command_queue command_queue = wrapper.get_command_queue();
 77 | 
 78 |     /*
 79 |      * Step 0: Confirm the required OpenCL extensions are supported.
 80 |      */
 81 | 
 82 |     if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr"))
 83 |     {
 84 |         std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n";
 85 |         std::exit(EXIT_FAILURE);
 86 |     }
 87 | 
 88 |     if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr"))
 89 |     {
 90 |         std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n";
 91 |         std::exit(EXIT_FAILURE);
 92 |     }
 93 | 
 94 |     cl_int err = CL_SUCCESS;
 95 | 
 96 |     /*
 97 |      * Step 1: Create suitable ION-backed images.
 98 |      */
 99 | 
100 |     /*
101 |      * Matrix A
102 |      */
103 | 
104 |     cl_image_format matrix_a_format;
105 |     matrix_a_format.image_channel_order     = CL_RGBA;
106 |     matrix_a_format.image_channel_data_type = CL_FLOAT;
107 | 
108 |     cl_image_desc matrix_a_desc;
109 |     std::memset(&matrix_a_desc, 0, sizeof(matrix_a_desc));
110 |     matrix_a_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
111 |     matrix_a_desc.image_width     = ((matrix_a.width + 3) / 4);
112 |     matrix_a_desc.image_height    = ((matrix_a.height + 3) / 4) * 4;
113 |     matrix_a_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(matrix_a_format, matrix_a_desc);
114 | 
115 |     cl_mem_ion_host_ptr matrix_a_ion_buf = wrapper.make_ion_buffer_for_nonplanar_image(matrix_a_format,
116 |                                                                                        matrix_a_desc);
117 |     cl_mem matrix_a_mem = clCreateImage(
118 |             context,
119 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
120 |             &matrix_a_format,
121 |             &matrix_a_desc,
122 |             &matrix_a_ion_buf,
123 |             &err
124 |     );
125 |     if (err != CL_SUCCESS)
126 |     {
127 |         std::cerr << "Error " << err << " with clCreateImage for matrix A." << "\n";
128 |         std::exit(err);
129 |     }
130 | 
131 |     char         *image_ptr;
132 |     const size_t  origin[]          = {0, 0, 0};
133 |     const size_t  matrix_a_region[] = {matrix_a_desc.image_width, matrix_a_desc.image_height, 1};
134 |     size_t        row_pitch         = 0;
135 |     image_ptr = static_cast<char *>(clEnqueueMapImage(
136 |             command_queue,
137 |             matrix_a_mem,
138 |             CL_BLOCKING,
139 |             CL_MAP_WRITE,
140 |             origin,
141 |             matrix_a_region,
142 |             &row_pitch,
143 |             NULL,
144 |             0,
145 |             NULL,
146 |             NULL,
147 |             &err
148 |     ));
149 |     if (err != CL_SUCCESS)
150 |     {
151 |         std::cerr << "Error " << err << " mapping matrix A image." << "\n";
152 |         std::exit(err);
153 |     }
154 | 
155 |     for (size_t i = 0; i < matrix_a_desc.image_height; ++i)
156 |     {
157 |         if (i < static_cast<size_t>(matrix_a.height))
158 |         {
159 |             const size_t unpadded_row_size = sizeof(cl_float) * matrix_a.width;
160 |             std::memcpy(
161 |                     image_ptr                + i * row_pitch,
162 |                     matrix_a.elements.data() + i * matrix_a.width,
163 |                     unpadded_row_size
164 |             );
165 |             const size_t remaining_bytes = row_pitch - unpadded_row_size;
166 |             std::memset(image_ptr + (i * row_pitch) + unpadded_row_size, 0, remaining_bytes);
167 |         }
168 |         else
169 |         {
170 |             std::memset(image_ptr + i * row_pitch, 0, row_pitch);
171 |         }
172 |     }
173 | 
174 |     err = clEnqueueUnmapMemObject(command_queue, matrix_a_mem, image_ptr, 0, NULL, NULL);
175 |     if (err != CL_SUCCESS)
176 |     {
177 |         std::cerr << "Error " << err << " unmapping matrix A image." << "\n";
178 |         std::exit(err);
179 |     }
180 | 
181 |     /*
182 |      * Matrix B
183 |      */
184 | 
185 |     cl_image_format matrix_b_format;
186 |     matrix_b_format.image_channel_order     = CL_RGBA;
187 |     matrix_b_format.image_channel_data_type = CL_FLOAT;
188 | 
189 |     cl_image_desc matrix_b_desc;
190 |     std::memset(&matrix_b_desc, 0, sizeof(matrix_b_desc));
191 |     matrix_b_desc.image_type      = CL_MEM_OBJECT_IMAGE2D;
192 |     matrix_b_desc.image_width     = ((matrix_b.width + 3) / 4);
193 |     matrix_b_desc.image_height    = ((matrix_b.height + 3) / 4) * 4;
194 |     matrix_b_desc.image_row_pitch = wrapper.get_ion_image_row_pitch(matrix_b_format, matrix_b_desc);
195 | 
196 |     cl_mem_ion_host_ptr matrix_b_ion_buf = wrapper.make_ion_buffer_for_nonplanar_image(matrix_b_format,
197 |                                                                                        matrix_b_desc);
198 |     cl_mem matrix_b_mem = clCreateImage(
199 |             context,
200 |             CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
201 |             &matrix_b_format,
202 |             &matrix_b_desc,
203 |             &matrix_b_ion_buf,
204 |             &err
205 |     );
206 |     if (err != CL_SUCCESS)
207 |     {
208 |         std::cerr << "Error " << err << " with clCreateImage for matrix B." << "\n";
209 |         std::exit(err);
210 |     }
211 | 
212 |     /*
213 |      * Step 2: Set up the kernel arguments
214 |      */
215 | 
216 |     err = clSetKernelArg(kernel, 0, sizeof(matrix_a_mem), &matrix_a_mem);
217 |     if (err != CL_SUCCESS)
218 |     {
219 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n";
220 |         std::exit(err);
221 |     }
222 | 
223 |     err = clSetKernelArg(kernel, 1, sizeof(matrix_b_mem), &matrix_b_mem);
224 |     if (err != CL_SUCCESS)
225 |     {
226 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n";
227 |         std::exit(err);
228 |     }
229 | 
230 |     /*
231 |      * Step 3: Run the kernel.
232 |      */
233 | 
234 |     const size_t global_work_size[] = {matrix_a_desc.image_width, matrix_a_desc.image_height / 4};
235 |     err = clEnqueueNDRangeKernel(
236 |             command_queue,
237 |             kernel,
238 |             2,
239 |             NULL,
240 |             global_work_size,
241 |             NULL,
242 |             0,
243 |             NULL,
244 |             NULL
245 |     );
246 |     if (err != CL_SUCCESS)
247 |     {
248 |         std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n";
249 |         std::exit(err);
250 |     }
251 | 
252 |     /*
253 |      * Step 4: Copy the data out of the ION buffer.
254 |      */
255 | 
256 |     const size_t  matrix_b_region[] = {matrix_b_desc.image_width, matrix_b_desc.image_height, 1};
257 |     image_ptr = static_cast<char *>(clEnqueueMapImage(
258 |             command_queue,
259 |             matrix_b_mem,
260 |             CL_BLOCKING,
261 |             CL_MAP_READ,
262 |             origin,
263 |             matrix_b_region,
264 |             &row_pitch,
265 |             NULL,
266 |             0,
267 |             NULL,
268 |             NULL,
269 |             &err
270 |     ));
271 |     if (err != CL_SUCCESS)
272 |     {
273 |         std::cerr << "Error " << err << " with clEnqueueMapImage for matrix B." << "\n";
274 |         std::exit(err);
275 |     }
276 | 
277 |     for (size_t i = 0; i < static_cast<size_t>(matrix_b.height); ++i)
278 |     {
279 |         const size_t unpadded_row_size = sizeof(cl_float) * matrix_b.width;
280 |         std::memcpy(
281 |                 matrix_b.elements.data() + i * matrix_b.width,
282 |                 image_ptr                + i * row_pitch,
283 |                 unpadded_row_size
284 |         );
285 |     }
286 |     err = clEnqueueUnmapMemObject(command_queue, matrix_b_mem, image_ptr, 0, NULL, NULL);
287 |     if (err != CL_SUCCESS)
288 |     {
289 |         std::cerr << "Error " << err << " with clEnqueueUnmapMemObject." << "\n";
290 |         std::exit(err);
291 |     }
292 | 
293 |     clFinish(command_queue);
294 | 
295 |     if (output_to_file)
296 |     {
297 |         save_matrix(output_filename, matrix_b);
298 |     }
299 |     else
300 |     {
301 |         save_matrix(std::cout, matrix_b);
302 |     }
303 | 
304 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
305 |     clReleaseMemObject(matrix_a_mem);
306 |     clReleaseMemObject(matrix_b_mem);
307 | 
308 |     return 0;
309 | }
310 | 


--------------------------------------------------------------------------------
/src/examples/linear_algebra/matrix_addition.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: matrix_addition.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | // Std includes
 13 | #include <cstdlib>
 14 | #include <cstring>
 15 | #include <iostream>
 16 | 
 17 | // Project includes
 18 | #include "util/cl_wrapper.h"
 19 | #include "util/util.h"
 20 | 
 21 | // Library includes
 22 | #include <CL/cl.h>
 23 | #include <CL/cl_ext_qcom.h>
 24 | 
 25 | static const char *HELP_MESSAGE = "\n"
 26 | "Usage: matrix_addition <matrix A> <matrix B> [<output file>]\n"
 27 | "Computes the matrix sum C = A + B. See README.md for matrix input format.\n"
 28 | "If no file is specified for the output, then it is written to stdout.\n";
 29 | 
 30 | static const char *PROGRAM_SOURCE[] = {
 31 | "__kernel void buffer_addition(__global const float *matrix_a,\n",
 32 | "                              __global const float *matrix_b,\n",
 33 | "                              __global       float *matrix_c)\n",
 34 | "{\n",
 35 | "    const int wid_x = get_global_id(0);\n",
 36 | "    matrix_c[wid_x] = matrix_a[wid_x] + matrix_b[wid_x];\n",
 37 | "}\n",
 38 | };
 39 | 
 40 | static const cl_uint PROGRAM_SOURCE_LEN = sizeof(PROGRAM_SOURCE) / sizeof(const char *);
 41 | 
 42 | int main(int argc, char** argv)
 43 | {
 44 |     if (argc < 3)
 45 |     {
 46 |         std::cerr << "Please specify input files.\n";
 47 |         std::cerr << HELP_MESSAGE;
 48 |         std::exit(EXIT_SUCCESS);
 49 |     }
 50 |     const std::string matrix_a_filename(argv[1]);
 51 |     const std::string matrix_b_filename(argv[2]);
 52 |     const bool        output_to_file = argc >= 4;
 53 |     const matrix_t    matrix_a       = load_matrix(matrix_a_filename);
 54 |     const matrix_t    matrix_b       = load_matrix(matrix_b_filename);
 55 |     const size_t      matrix_size    = matrix_a.width * matrix_a.height;
 56 |     const size_t      matrix_bytes   = matrix_size * sizeof(cl_float);
 57 |     const std::string output_filename(output_to_file ? argv[3] : "");
 58 | 
 59 |     if (matrix_a.width != matrix_b.width && matrix_a.height != matrix_b.height)
 60 |     {
 61 |         std::cerr << "Matrix A and B must have the same dimensions.\n";
 62 |         std::exit(EXIT_FAILURE);
 63 |     }
 64 | 
 65 |     cl_wrapper       wrapper;
 66 |     cl_program       program       = wrapper.make_program(PROGRAM_SOURCE, PROGRAM_SOURCE_LEN);
 67 |     cl_kernel        kernel        = wrapper.make_kernel("buffer_addition", program);
 68 |     cl_context       context       = wrapper.get_context();
 69 |     cl_command_queue command_queue = wrapper.get_command_queue();
 70 | 
 71 |     /*
 72 |      * Step 0: Confirm the required OpenCL extensions are supported.
 73 |      */
 74 | 
 75 |     if (!wrapper.check_extension_support("cl_qcom_ext_host_ptr"))
 76 |     {
 77 |         std::cerr << "Extension cl_qcom_ext_host_ptr needed for ION-backed images is not supported.\n";
 78 |         std::exit(EXIT_FAILURE);
 79 |     }
 80 | 
 81 |     if (!wrapper.check_extension_support("cl_qcom_ion_host_ptr"))
 82 |     {
 83 |         std::cerr << "Extension cl_qcom_ion_host_ptr needed for ION-backed images is not supported.\n";
 84 |         std::exit(EXIT_FAILURE);
 85 |     }
 86 | 
 87 |     /*
 88 |      * Step 1: Create suitable ION-backed buffers.
 89 |      */
 90 | 
 91 |     cl_int err =  CL_SUCCESS;
 92 | 
 93 |     cl_mem_ion_host_ptr matrix_a_ion_buf = wrapper.make_ion_buffer(matrix_bytes);
 94 |     std::memcpy(matrix_a_ion_buf.ion_hostptr, matrix_a.elements.data(), matrix_bytes);
 95 |     cl_mem              matrix_a_mem     = clCreateBuffer(
 96 |             context,
 97 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
 98 |             matrix_bytes,
 99 |             &matrix_a_ion_buf,
100 |             &err
101 |     );
102 |     if (err != CL_SUCCESS)
103 |     {
104 |         std::cerr << "Error " << err << " with clCreateBuffer for matrix A." << "\n";
105 |         std::exit(err);
106 |     }
107 | 
108 |     cl_mem_ion_host_ptr matrix_b_ion_buf = wrapper.make_ion_buffer(matrix_bytes);
109 |     std::memcpy(matrix_b_ion_buf.ion_hostptr, matrix_b.elements.data(), matrix_bytes);
110 |     cl_mem              matrix_b_mem     = clCreateBuffer(
111 |             context,
112 |             CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
113 |             matrix_bytes,
114 |             &matrix_b_ion_buf,
115 |             &err
116 |     );
117 |     if (err != CL_SUCCESS)
118 |     {
119 |         std::cerr << "Error " << err << " with clCreateBuffer for matrix B." << "\n";
120 |         std::exit(err);
121 |     }
122 | 
123 |     cl_mem_ion_host_ptr matrix_c_ion_buf = wrapper.make_ion_buffer(matrix_bytes);
124 |     cl_mem              matrix_c_mem     = clCreateBuffer(
125 |             context,
126 |             CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
127 |             matrix_bytes,
128 |             &matrix_c_ion_buf,
129 |             &err
130 |     );
131 |     if (err != CL_SUCCESS)
132 |     {
133 |         std::cerr << "Error " << err << " with clCreateBuffer for matrix C." << "\n";
134 |         std::exit(err);
135 |     }
136 | 
137 |     /*
138 |      * Step 2: Set up the kernel arguments
139 |      */
140 | 
141 |     err = clSetKernelArg(kernel, 0, sizeof(matrix_a_mem), &matrix_a_mem);
142 |     if (err != CL_SUCCESS)
143 |     {
144 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 0." << "\n";
145 |         std::exit(err);
146 |     }
147 | 
148 |     err = clSetKernelArg(kernel, 1, sizeof(matrix_b_mem), &matrix_b_mem);
149 |     if (err != CL_SUCCESS)
150 |     {
151 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 1." << "\n";
152 |         std::exit(err);
153 |     }
154 | 
155 |     err = clSetKernelArg(kernel, 2, sizeof(matrix_c_mem), &matrix_c_mem);
156 |     if (err != CL_SUCCESS)
157 |     {
158 |         std::cerr << "Error " << err << " with clSetKernelArg for argument 2." << "\n";
159 |         std::exit(err);
160 |     }
161 | 
162 |     /*
163 |      * Step 3: Run the kernel.
164 |      */
165 | 
166 |     const size_t global_work_size = matrix_size;
167 |     err = clEnqueueNDRangeKernel(
168 |             command_queue,
169 |             kernel,
170 |             1,
171 |             NULL,
172 |             &global_work_size,
173 |             NULL,
174 |             0,
175 |             NULL,
176 |             NULL
177 |     );
178 |     if (err != CL_SUCCESS)
179 |     {
180 |         std::cerr << "Error " << err << " with clEnqueueNDRangeKernel." << "\n";
181 |         std::exit(err);
182 |     }
183 | 
184 |     /*
185 |      * Step 4: Copy the data out of the ION buffer.
186 |      */
187 | 
188 |     cl_float *ptr = static_cast<cl_float *>(clEnqueueMapBuffer(
189 |             command_queue,
190 |             matrix_c_mem,
191 |             CL_BLOCKING,
192 |             CL_MAP_READ,
193 |             0,
194 |             matrix_bytes,
195 |             0,
196 |             NULL,
197 |             NULL,
198 |             &err
199 |     ));
200 |     if (err != CL_SUCCESS)
201 |     {
202 |         std::cerr << "Error " << err << " with clEnqueueMapBuffer." << "\n";
203 |         std::exit(err);
204 |     }
205 | 
206 |     matrix_t matrix_c;
207 |     matrix_c.width  = matrix_a.width;
208 |     matrix_c.height = matrix_a.height;
209 |     matrix_c.elements.resize(matrix_size);
210 |     std::memcpy(matrix_c.elements.data(), ptr, matrix_bytes);
211 | 
212 |     err = clEnqueueUnmapMemObject(command_queue, matrix_c_mem, ptr, 0, NULL, NULL);
213 |     if (err != CL_SUCCESS)
214 |     {
215 |         std::cerr << "Error " << err << " with clEnqueueUnmapMemObject." << "\n";
216 |         std::exit(err);
217 |     }
218 | 
219 |     clFinish(command_queue);
220 | 
221 |     if (output_to_file)
222 |     {
223 |         save_matrix(output_filename, matrix_c);
224 |     }
225 |     else
226 |     {
227 |         save_matrix(std::cout, matrix_c);
228 |     }
229 | 
230 |     // Clean up cl resources that aren't automatically handled by cl_wrapper
231 |     clReleaseMemObject(matrix_a_mem);
232 |     clReleaseMemObject(matrix_b_mem);
233 |     clReleaseMemObject(matrix_c_mem);
234 | 
235 |     return 0;
236 | }
237 | 


--------------------------------------------------------------------------------
/src/util/cl_wrapper.h:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: cl_wrapper.h
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2017 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | #ifndef SDK_EXAMPLES_CL_WRAPPER_H
 13 | #define SDK_EXAMPLES_CL_WRAPPER_H
 14 | #include <string>
 15 | #include <vector>
 16 | #include <utility>
 17 | 
 18 | #include <CL/cl.h>
 19 | #include <CL/cl_ext_qcom.h>
 20 | 
 21 | #ifdef USES_ANDROID_CMAKE
 22 | #include <msm_ion.h>
 23 | #include <ion.h>
 24 | #else /* USES_ANDROID_CMAKE */
 25 | #ifdef USES_LIBION
 26 | #include <drivers/staging/android/uapi/msm_ion.h>
 27 | #include <ion/ion.h>
 28 | #else /* USES_LIBION */
 29 | #include <linux/msm_ion.h>
 30 | #include <linux/ion.h>
 31 | #endif /* USES_LIBION */
 32 | #endif /* USES_ANDROID_CMAKE */
 33 | 
 34 | #include "util.h"
 35 | 
 36 | /**
 37 |  * \brief A wrapper around OpenCL setup/teardown code.
 38 |  *
 39 |  * All objects exposed are owned by the wrapper, and are cleaned up when it is destroyed.
 40 |  */
 41 | class cl_wrapper {
 42 | public:
 43 |     /**
 44 |      * \brief Sets up OpenCL.
 45 |      */
 46 |     cl_wrapper();
 47 | 
 48 |     /**
 49 |      * \brief Frees associated OpenCL objects, including the results of make_kernel, make_program, and make_ion_buffer.
 50 |      */
 51 |     ~cl_wrapper();
 52 | 
 53 |     /**
 54 |      * \brief Gets the cl_context associated with the wrapper for using in OpenCL functions.
 55 |      * @return
 56 |      */
 57 |     cl_context          get_context() const;
 58 | 
 59 |     /**
 60 |     * \brief Gets the cl_command_queue associated with the wrapper for using in OpenCL functions.
 61 |     * @return
 62 |     */
 63 |     cl_command_queue    get_command_queue() const;
 64 | 
 65 |     /**
 66 |      * \brief Makes a cl_kernel from the given program.
 67 |      *
 68 |      * @param kernel_name
 69 |      * @param program
 70 |      * @return
 71 |      */
 72 |     cl_kernel           make_kernel(const std::string &kernel_name, cl_program program);
 73 | 
 74 |     /**
 75 |      * Makes a cl_program (whose lifetime is managed by cl_wrapper) from the given source code strings.
 76 |      *
 77 |      * @param program_source - The source code strings.
 78 |      * @param program_source_len - The length of program_source
 79 |      * @return
 80 |      */
 81 |     cl_program          make_program(const char **program_source, cl_uint program_source_len);
 82 | 
 83 |     /**
 84 |      * \brief Makes an uncached ion buffer that can be used for a YUV 4:2:0 image.
 85 |      *
 86 |      * @param img_format [in] - The image format
 87 |      * @param img_desc [in] - The image description
 88 |      * @return
 89 |      */
 90 |     cl_mem_ion_host_ptr make_ion_buffer_for_yuv_image(const cl_image_format &img_format, const cl_image_desc &img_desc);
 91 | 
 92 |     /**
 93 |      * \brief Makes an uncached ion buffer that can be used for a nonplanar image, e.g. CL_R or CL_RGB
 94 |      *
 95 |      * @param img_format [in]
 96 |      * @param img_desc [in]
 97 |      * @return
 98 |      */
 99 |     cl_mem_ion_host_ptr make_ion_buffer_for_nonplanar_image(const cl_image_format &img_format, const cl_image_desc &img_desc);
100 | 
101 |     /**
102 |      * \brief Makes an uncached ion buffer that can be used for a compressed image.
103 |      *
104 |      * @param img_format [in] - The image format
105 |      * @param img_desc [in] - The image description
106 |      * @return
107 |      */
108 |     cl_mem_ion_host_ptr make_ion_buffer_for_compressed_image(cl_image_format img_format, const cl_image_desc &img_desc);
109 | 
110 |     /**
111 |      * \brief Makes an uncached ion buffer of the specified size.
112 |      *
113 |      * @param size [in] - Desired buffer size
114 |      * @return
115 |      */
116 |     cl_mem_ion_host_ptr make_ion_buffer(size_t size);
117 | 
118 |     /**
119 |      * \brief Makes an ion buffer of the specified size, using the IO-coherent
120 |      *        cache policy.
121 |      *
122 |      * @param size [in] - Desired buffer size
123 |      * @return
124 |      */
125 |     cl_mem_ion_host_ptr make_iocoherent_ion_buffer(size_t size);
126 | 
127 |     /**
128 |      * \brief Makes an ion buffer that can be used for a YUV 4:2:0 image, using
129 |      *        the IO-coherent cache policy.
130 |      *
131 |      * @param img_format [in] - The image format
132 |      * @param img_desc [in] - The image description
133 |      * @return
134 |      */
135 |     cl_mem_ion_host_ptr make_iocoherent_ion_buffer_for_yuv_image(const cl_image_format &img_format, const cl_image_desc &img_desc);
136 | 
137 |     /**
138 |      * \brief Checks if the wrapped device supports the desired extension via clGetDeviceInfo
139 |      *
140 |      * @param desired_extension
141 |      * @return true if the desired_extension is supported, otherwise false
142 |      */
143 |     bool                check_extension_support(const std::string &desired_extension) const;
144 | 
145 |     /**
146 |      * \brief Gets the required row pitch for the given image. Must be considered when accessing the underlying ion buffer.
147 |      *
148 |      * @param img_format [in] - The image format
149 |      * @param img_desc [in] - The image description
150 |      * @return the image row pitch
151 |      */
152 |     size_t              get_ion_image_row_pitch(const cl_image_format &img_format, const cl_image_desc &img_desc) const;
153 | 
154 |     /**
155 |      * \brief Gets the max workgroup size for the specified kernel.
156 |      *
157 |      * @param kernel
158 |      * @return
159 |      */
160 |     size_t              get_max_workgroup_size(cl_kernel kernel) const;
161 | 
162 | private:
163 | 
164 |     cl_mem_ion_host_ptr
165 |     make_ion_buffer_internal(size_t size, unsigned int ion_allocation_flags, cl_uint host_cache_policy);
166 | 
167 |     // Data members
168 |     cl_device_id m_device;
169 |     cl_context m_context;
170 |     cl_command_queue m_cmd_queue;
171 |     std::vector<cl_program> m_programs;
172 |     std::vector<cl_kernel> m_kernels;
173 | 
174 |     // ION stuff
175 | #if USES_LIBION
176 |     // Pass
177 | #else
178 |     std::vector<ion_handle_data> m_handle_data;
179 | #endif
180 |     std::vector<int> m_file_descs;
181 |     std::vector<std::pair<void*, size_t>> m_ion_host_ptrs;
182 |     int m_ion_device_fd;
183 | };
184 | 
185 | 
186 | #endif //SDK_EXAMPLES_CL_CONTEXT_WRAPPER_H
187 | 


--------------------------------------------------------------------------------
/src/util/half_float.cpp:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: half_float.cpp
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | #include "half_float.h"
 13 | #include <cmath>
 14 | #include <limits>
 15 | 
 16 | cl_half to_half(float f)
 17 | {
 18 |     static const struct
 19 |     {
 20 |         unsigned int bit_size       = 16;                                                 // total number of bits in the representation
 21 |         unsigned int num_frac_bits  = 10;                                                 // number of fractional (mantissa) bits
 22 |         unsigned int num_exp_bits   = 5;                                                  // number of (biased) exponent bits
 23 |         unsigned int sign_bit       = 15;                                                 // position of the sign bit
 24 |         unsigned int sign_mask      = 1 << 15;                                            // mask to extract sign bit
 25 |         unsigned int frac_mask      = (1 << 10) - 1;                                      // mask to extract the fractional (mantissa) bits
 26 |         unsigned int exp_mask       = ((1 << 5) - 1) << 10;                               // mask to extract the exponent bits
 27 |         unsigned int e_max          = (1 << (5 - 1)) - 1;                                 // max value for the exponent
 28 |         int          e_min          = -((1 << (5 - 1)) - 1) + 1;                          // min value for the exponent
 29 |         unsigned int max_normal     = ((((1 << (5 - 1)) - 1) + 127) << 23) | 0x7FE000;    // max value that can be represented by the 16 bit float
 30 |         unsigned int min_normal     = ((-((1 << (5 - 1)) - 1) + 1) + 127) << 23;          // min value that can be represented by the 16 bit float
 31 |         unsigned int bias_diff      = ((unsigned int)(((1 << (5 - 1)) - 1) - 127) << 23); // difference in bias between the float16 and float32 exponent
 32 |         unsigned int frac_bits_diff = 23 - 10;                                            // difference in number of fractional bits between float16/float32
 33 |     } float16_params;
 34 | 
 35 |     static const struct
 36 |     {
 37 |         unsigned int abs_value_mask    = 0x7FFFFFFF; // ANDing with this value gives the abs value
 38 |         unsigned int sign_bit_mask     = 0x80000000; // ANDing with this value gives the sign
 39 |         unsigned int e_max             = 127;        // max value for the exponent
 40 |         unsigned int num_mantissa_bits = 23;         // 23 bit mantissa on single precision floats
 41 |         unsigned int mantissa_mask     = 0x007FFFFF; // 23 bit mantissa on single precision floats
 42 |     } float32_params;
 43 | 
 44 |     const union
 45 |     {
 46 |         float f;
 47 |         unsigned int bits;
 48 |     } value = {f};
 49 | 
 50 |     const unsigned int f_abs_bits = value.bits & float32_params.abs_value_mask;
 51 |     const bool         is_neg     = value.bits & float32_params.sign_bit_mask;
 52 |     const unsigned int sign       = (value.bits & float32_params.sign_bit_mask) >> (float16_params.num_frac_bits + float16_params.num_exp_bits + 1);
 53 |     cl_half            half       = 0;
 54 | 
 55 |     if (std::isnan(value.f))
 56 |     {
 57 |         half = float16_params.exp_mask | float16_params.frac_mask;
 58 |     }
 59 |     else if (std::isinf(value.f))
 60 |     {
 61 |         half = is_neg ? float16_params.sign_mask | float16_params.exp_mask : float16_params.exp_mask;
 62 |     }
 63 |     else if (f_abs_bits > float16_params.max_normal)
 64 |     {
 65 |         // Clamp to max float 16 value
 66 |         half = sign | (((1 << float16_params.num_exp_bits) - 1) << float16_params.num_frac_bits) | float16_params.frac_mask;
 67 |     }
 68 |     else if (f_abs_bits < float16_params.min_normal)
 69 |     {
 70 |         const unsigned int frac_bits    = (f_abs_bits & float32_params.mantissa_mask) | (1 << float32_params.num_mantissa_bits);
 71 |         const int          nshift       = float16_params.e_min + float32_params.e_max - (f_abs_bits >> float32_params.num_mantissa_bits);
 72 |         const unsigned int shifted_bits = nshift < 24 ? frac_bits >> nshift : 0;
 73 |         half                            = sign | (shifted_bits >> float16_params.frac_bits_diff);
 74 |     }
 75 |     else
 76 |     {
 77 |         half = sign | ((f_abs_bits + float16_params.bias_diff) >> float16_params.frac_bits_diff);
 78 |     }
 79 |     return half;
 80 | }
 81 | 
 82 | cl_float to_float(cl_half f)
 83 | {
 84 |     static const struct {
 85 |         uint16_t sign_mask                   = 0x8000;
 86 |         uint16_t exp_mask                    = 0x7C00;
 87 |         int      exp_bias                    = 15;
 88 |         int      exp_offset                  = 10;
 89 |         uint16_t biased_exp_max              = (1 << 5) - 1;
 90 |         uint16_t frac_mask                   = 0x03FF;
 91 |         float    smallest_subnormal_as_float = 5.96046448e-8f;
 92 |     } float16_params;
 93 | 
 94 |     static const struct {
 95 |         int sign_offset = 31;
 96 |         int exp_bias    = 127;
 97 |         int exp_offset  = 23;
 98 |     } float32_params;
 99 | 
100 |     const bool     is_pos          = (f & float16_params.sign_mask) == 0;
101 |     const uint32_t biased_exponent = (f & float16_params.exp_mask) >> float16_params.exp_offset;
102 |     const uint32_t frac            = (f & float16_params.frac_mask);
103 |     const bool     is_inf          = biased_exponent == float16_params.biased_exp_max
104 |                                      && (frac == 0);
105 | 
106 |     if (is_inf)
107 |     {
108 |         return is_pos ? std::numeric_limits<float>::infinity() : -std::numeric_limits<float>::infinity();
109 |     }
110 | 
111 |     const bool is_nan = biased_exponent == float16_params.biased_exp_max
112 |                         && (frac != 0);
113 |     if (is_nan)
114 |     {
115 |         return std::numeric_limits<float>::quiet_NaN();
116 |     }
117 | 
118 |     const bool is_subnormal = biased_exponent == 0;
119 |     if (is_subnormal)
120 |     {
121 |         return static_cast<float>(frac) * float16_params.smallest_subnormal_as_float * (is_pos ? 1.f : -1.f);
122 |     }
123 | 
124 |     const int      unbiased_exp        = static_cast<int>(biased_exponent) - float16_params.exp_bias;
125 |     const uint32_t biased_f32_exponent = static_cast<uint32_t>(unbiased_exp + float32_params.exp_bias);
126 | 
127 |     union
128 |     {
129 |         cl_float f;
130 |         uint32_t ui;
131 |     } res = {0};
132 | 
133 |     res.ui = (is_pos ? 0 : 1 << float32_params.sign_offset)
134 |              | (biased_f32_exponent << float32_params.exp_offset)
135 |              | (frac << (float32_params.exp_offset - float16_params.exp_offset));
136 | 
137 |     return res.f;
138 | }
139 | 


--------------------------------------------------------------------------------
/src/util/half_float.h:
--------------------------------------------------------------------------------
 1 | //--------------------------------------------------------------------------------------
 2 | // File: half_float.h
 3 | // Desc:
 4 | //
 5 | // Author:      QUALCOMM
 6 | //
 7 | //               Copyright (c) 2018 QUALCOMM Technologies, Inc.
 8 | //                         All Rights Reserved.
 9 | //                      QUALCOMM Proprietary/GTDR
10 | //--------------------------------------------------------------------------------------
11 | 
12 | #ifndef SDK_EXAMPLES_HALF_FLOAT_H
13 | #define SDK_EXAMPLES_HALF_FLOAT_H
14 | 
15 | #include <CL/cl.h>
16 | 
17 | /**
18 |  * \brief Given a 32-bit float, converts it (potentially with some error due to loss of precision)
19 |  * to a 16-bit half float for use with OpenCL.
20 |  *
21 |  * @param f [in] - The 32-bit float to convert
22 |  * @return the equivalent 16-bit half float
23 |  */
24 | cl_half to_half(float f);
25 | 
26 | /**
27 |  * \brief Given a 16-bit half float, converts it to a 32-bit float.
28 |  *
29 |  * @param f [in] - The 16-bit half float to convert
30 |  * @return the equivalent 32-bit float
31 |  */
32 | cl_float to_float(cl_half f);
33 | 
34 | #endif //SDK_EXAMPLES_HALF_FLOAT_H
35 | 


--------------------------------------------------------------------------------
/src/util/util.h:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------------
  2 | // File: util.h
  3 | // Desc:
  4 | //
  5 | // Author:      QUALCOMM
  6 | //
  7 | //               Copyright (c) 2017 QUALCOMM Technologies, Inc.
  8 | //                         All Rights Reserved.
  9 | //                      QUALCOMM Proprietary/GTDR
 10 | //--------------------------------------------------------------------------------------
 11 | 
 12 | #ifndef SDK_EXAMPLES_UTIL_H
 13 | #define SDK_EXAMPLES_UTIL_H
 14 | 
 15 | #include <algorithm>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <vector>
 19 | #include <CL/cl.h>
 20 | 
 21 | /**
 22 |  * \brief yuv_image_t represents the "raw bytes" + width and height of YUV image with two planes.
 23 |  *        this encompasses e.g. NV12, TP10, P010.
 24 |  */
 25 | struct yuv_image_t
 26 | {
 27 |     uint32_t y_width;
 28 |     uint32_t y_height;
 29 |     std::vector<unsigned char> y_plane;
 30 |     std::vector<unsigned char> uv_plane;
 31 | };
 32 | 
 33 | struct nv12_image_t : public yuv_image_t {};
 34 | 
 35 | struct tp10_image_t : public yuv_image_t {};
 36 | 
 37 | struct p010_image_t : public yuv_image_t {};
 38 | 
 39 | struct matrix_t
 40 | {
 41 |     int width, height;
 42 |     std::vector<cl_float> elements;
 43 | };
 44 | 
 45 | struct half_matrix_t
 46 | {
 47 |     int width, height;
 48 |     std::vector<cl_half> elements;
 49 | };
 50 | 
 51 | /**
 52 |  * \brief nonplanar_image_t represents an image type that in contrast to
 53 |  *        yuv_image_t does not separate its pixel data into different planes.
 54 |  */
 55 | struct nonplanar_image_t
 56 | {
 57 |     uint32_t width;
 58 |     uint32_t height;
 59 |     std::vector<unsigned char> pixels;
 60 | };
 61 | 
 62 | /**
 63 |  * \brief bayer_mipi10_image_t represents the "raw bytes" + width and height of
 64 |  *        a Bayer-ordered MIPI RAW10 data type image. In Bayer order, blue and
 65 |  *        red values are interleaved with green values in alternating rows:
 66 |  *
 67 |  *            BGBGBGBG...
 68 |  *            GRGRGRGR...
 69 |  *
 70 |  *        One "quad" of values here means two green values and one each of red
 71 |  *        and blue values that are in the same two columns and span two
 72 |  *        consecutive rows. The top left corner of a quad is always a blue
 73 |  *        value.
 74 |  *
 75 |  *        We consider the width of such an image as the total # of blue/green
 76 |  *        or green/red values per row, and the height is the number of rows.
 77 |  *        However these images are addressed in OpenCL kernels as though each
 78 |  *        quad were one pixel, effectively dividing the image dimensions by 2.
 79 |  *
 80 |  *        MIPI RAW10 is a packed 10-bit ber channel data type -- the 8 most
 81 |  *        significant bits of 4 consecutive values per row are followed by 1
 82 |  *        byte with the 2 least significant bits for the preceding values, in
 83 |  *        order. The MSBs of the fifth byte hold the LSBs for value 1. For
 84 |  *        example the top row of a Bayer-ordered image would start with this
 85 |  *        sequence of 5 bytes:
 86 |  *
 87 |  *        | byte 1  | byte 2  | byte 3  | byte 4  | byte 5 |
 88 |  *        | b1 MSBs | g1 MSBs | b2 MSBs | g2 MSBs | LSBs   |
 89 |  */
 90 | struct bayer_mipi10_image_t : public nonplanar_image_t {};
 91 | 
 92 | /**
 93 |  * \brief Unpacked Bayer image format. Pixels are Bayer-ordered as above, but
 94 |  *        each 10-bit channel is held in a 16-bit int with 6 unused bits.
 95 |  */
 96 | struct bayer_int10_image_t : public nonplanar_image_t {};
 97 | 
 98 | /**
 99 |  * \brief Represents an RGBA 8888 image.
100 |  */
101 | struct rgba_image_t : public nonplanar_image_t {};
102 | 
103 | /**
104 |  * \brief Represents a single-channel CL_R image type with an unsigned 16-bit
105 |  *        data type.
106 |  */
107 | struct single_channel_int16_image_t : public nonplanar_image_t {};
108 | 
109 | /**
110 |  * \brief Represents a single-channel CL_R image type with 32-bit float data
111 |  *        type.
112 |  */
113 | struct single_channel_float_image_t : public nonplanar_image_t {};
114 | 
115 | /**
116 |  * \brief Loads an 8-bit NV12 image from image data at filename
117 |  *
118 |  * @param filename
119 |  * @return
120 |  */
121 | nv12_image_t load_nv12_image_data(const std::string &filename);
122 | 
123 | /**
124 |  * \brief Saves 8-bit NV12 image to the given filename
125 |  *
126 |  * @param filename
127 |  * @param image
128 |  */
129 | void save_nv12_image_data(const std::string &filename, const nv12_image_t &image);
130 | 
131 | /**
132 |  * \brief Loads a TP10 image from image data at filename
133 |  *
134 |  * @param filename
135 |  * @return
136 |  */
137 | tp10_image_t load_tp10_image_data(const std::string &filename);
138 | 
139 | /**
140 |  * \brief Saves TP10 image to the given filename
141 |  *
142 |  * @param filename
143 |  * @param image
144 |  */
145 | void save_tp10_image_data(const std::string &filename, const tp10_image_t &image);
146 | 
147 | /**
148 |  * \brief Loads a p010 image from image data at filename
149 |  *
150 |  * @param filename
151 |  * @return
152 |  */
153 | p010_image_t load_p010_image_data(const std::string &filename);
154 | 
155 | /**
156 |  * \brief Saves p010 image to the given filename
157 |  *
158 |  * @param filename
159 |  * @param image
160 |  */
161 | void save_p010_image_data(const std::string &filename, const p010_image_t &image);
162 | 
163 | /**
164 |  * \brief Loads a matrix from the given file according to the format
165 |  *        described in README.md
166 |  * @param filename
167 |  */
168 | matrix_t load_matrix(const std::string &filename);
169 | 
170 | /**
171 |  * \brief Loads a matrix of half-floats from the given file according to the
172 |  *        format described in README.md
173 |  * @param filename
174 |  */
175 | half_matrix_t load_half_matrix(const std::string &filename);
176 | 
177 | /**
178 |  * \brief Saves a matrix to the given filename.
179 |  * @param filename
180 |  * @param matrix
181 |  */
182 | void save_matrix(const std::string &filename, const matrix_t &matrix);
183 | 
184 | /**
185 |  * \brief Serializes the matrix to the given output stream
186 |  * @param filename
187 |  * @param matrix
188 |  */
189 | void save_matrix(std::ostream &out, const matrix_t &matrix);
190 | 
191 | /**
192 |  * \brief Loads a Bayer MIPI10 from image data at filename
193 |  * @param filename
194 |  * @return
195 |  */
196 | bayer_mipi10_image_t load_bayer_mipi_10_image_data(const std::string &filename);
197 | 
198 | /**
199 |  * \brief Saves a Bayer MIPI10 image to the given filename
200 |  * @param filename
201 |  * @param image
202 |  */
203 | void save_bayer_mipi_10_image_data(const std::string &filename, const bayer_mipi10_image_t &image);
204 | 
205 | /**
206 |  * \brief Loads a Bayer unpacked 10-bit image from image data at filename
207 |  * @param filename
208 |  * @return
209 |  */
210 | bayer_int10_image_t load_bayer_int_10_image_data(const std::string &filename);
211 | 
212 | 
213 | /**
214 |  * \brief Loads an 8-bit depth RGBA image from the given filename.
215 |  * @param filename
216 |  * @param image
217 |  */
218 | rgba_image_t load_rgba_image_data(const std::string &filename);
219 | 
220 | /**
221 |  * \brief Saves an 8-bit depth RGBA image to the given filename.
222 |  * @param filename
223 |  * @param image
224 |  */
225 | void save_rgba_image_data(const std::string &filename, const rgba_image_t &image);
226 | 
227 | /**
228 |  * \brief Saves a 16-bit depth single-channel image to the given filename.
229 |  * @param filename
230 |  * @param image
231 |  */
232 | void save_single_channel_image_data(const std::string &filename, const single_channel_int16_image_t &image);
233 | 
234 | /**
235 |  * \brief Saves a 32-bit float single-channel image to the given filename.
236 |  * @param filename
237 |  * @param image
238 |  */
239 | void save_single_channel_image_data(const std::string &filename, const single_channel_float_image_t &image);
240 | 
241 | /**
242 |  * \brief Loads a 16-bit depth single-channel image from image data at filename
243 |  * @param filename
244 |  * @return
245 |  */
246 | single_channel_int16_image_t load_single_channel_image_data(const std::string &filename);
247 | 
248 | /**
249 |  * \brief Returns smallest y such that y % r == 0 and y >= x
250 |  * @param x
251 |  * @param r
252 |  * @return
253 |  */
254 | size_t work_units(size_t x, size_t r);
255 | 
256 | /**
257 |  * \brief get supported formats with specific mem flag
258 |  * @param context
259 |  * @param mem_flags
260 |  */
261 | std::vector<cl_image_format> get_image_formats(cl_context context, cl_mem_flags mem_flags);
262 | 
263 | /**
264 |  * \brief print supported image formats
265 |  * @param formats
266 |  */
267 | void print_formats(const std::vector<cl_image_format> &formats);
268 | 
269 | /**
270 |  * \brief check if specific format in the supported formats list
271 |  * @param formats
272 |  * @param format
273 |  */
274 | bool is_format_supported(const std::vector<cl_image_format> &formats, const cl_image_format &format);
275 | 
276 | #endif //SDK_EXAMPLES_UTIL_H
277 | 


--------------------------------------------------------------------------------
/toolchain/linux_embedded/linux_embedded-toolchain.cmake:
--------------------------------------------------------------------------------
 1 | set(TARGET apq8053)
 2 | set(ARCH 64)
 3 | 
 4 | INCLUDE(CMakeForceCompiler)
 5 | set(CMAKE_SYSTEM_NAME Linux)
 6 | 
 7 | set(OE_HOST_SYSROOT "poky/build/tmp-glibc/sysroots/x86_64-linux")
 8 | 
 9 | if(${ARCH} STREQUAL "32")
10 | set(OE_TARGET_SYSROOT "$ENV{OE_ROOT}/poky/build/tmp-glibc/sysroots/lib32-${TARGET}")
11 | set(CMAKE_C_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/arm-oemllib32-linux-gnueabi/arm-oemllib32-linux-gnueabi-gcc)
12 | set(CMAKE_CXX_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/arm-oemllib32-linux-gnueabi/arm-oemllib32-linux-gnueabi-g++)
13 | set(BIT_FLAGS "-mcpu=cortex-a15 -mfloat-abi=softfp -mfpu=neon")
14 | else()
15 | set(OE_TARGET_SYSROOT "$ENV{OE_ROOT}/poky/build/tmp-glibc/sysroots/${TARGET}")
16 | set(CMAKE_C_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/aarch64-oe-linux/aarch64-oe-linux-gcc)
17 | set(CMAKE_CXX_COMPILER ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++)
18 | set(BIT_FLAGS "")
19 | endif()
20 | 
21 | set(CMAKE_SYSROOT ${OE_TARGET_SYSROOT})
22 | set(SYSROOT "--sysroot=${OE_TARGET_SYSROOT}")
23 | set(INC_DIR "-I${OE_TARGET_SYSROOT}/usr/include")
24 | 
25 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SYSROOT} ${INC_DIR} ${BIT_FLAGS}" CACHE STRING "" FORCE)
26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SYSROOT} ${INC_DIR} ${BIT_FLAGS}" CACHE STRING "" FORCE)
27 | 
28 | set(CMAKE_FIND_ROOT_PATH ${OE_ROOT}/${OE_HOST_SYSROOT}/usr/bin)
29 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
30 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
31 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
32 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
33 | 


--------------------------------------------------------------------------------