├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README ├── README.md ├── setup.py ├── src └── opencl4py │ ├── __init__.py │ ├── _cffi.py │ ├── _py.py │ └── blas │ ├── __init__.py │ └── _clblas.py └── tests ├── test.cl ├── test_api.py └── test_clblas.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 27 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/opencl4py/*.py 2 | include src/opencl4py/blas/*.py 3 | include tests/*.py 4 | include tests/*.cl 5 | include LICENSE 6 | include README 7 | include README.md 8 | include setup.py 9 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | opencl4py 2 | ========= 3 | 4 | Python cffi OpenCL bindings and helper classes. 5 | 6 | Tested with Python 2.7, Python 3.3, Python 3.4 and PyPy on Linux, MacOSX and Windows. 7 | 8 | To use clBLAS, libclBLAS.so (clBLAS.dll) should be present. 9 | 10 | Not all OpenCL api is currently covered. 11 | 12 | To install the module run: 13 | ```bash 14 | pip install . 15 | ``` 16 | or just copy src/opencl4py to any place where python 17 | interpreter will be able to find it. 18 | 19 | To run the tests, execute: 20 | 21 | for Python 2.7: 22 | ```bash 23 | PYTHONPATH=src nosetests -w tests 24 | ``` 25 | 26 | for Python 3.3, 3.4: 27 | ```bash 28 | PYTHONPATH=src nosetests3 -w tests 29 | ``` 30 | 31 | for PyPy: 32 | ```bash 33 | PYTHONPATH=src pypy -m nose -w tests 34 | ``` 35 | 36 | Example usage: 37 | 38 | ```python 39 | import opencl4py as cl 40 | import logging 41 | import numpy 42 | 43 | 44 | if __name__ == "__main__": 45 | logging.basicConfig(level=logging.DEBUG) 46 | platforms = cl.Platforms() 47 | logging.info("OpenCL devices available:\n\n%s\n", 48 | platforms.dump_devices()) 49 | ctx = platforms.create_some_context() 50 | queue = ctx.create_queue(ctx.devices[0]) 51 | prg = ctx.create_program( 52 | """ 53 | __kernel void test(__global const float *a, __global const float *b, 54 | __global float *c, const float k) { 55 | size_t i = get_global_id(0); 56 | c[i] = (a[i] + b[i]) * k; 57 | } 58 | """) 59 | krn = prg.get_kernel("test") 60 | a = numpy.arange(1000000, dtype=numpy.float32) 61 | b = numpy.arange(1000000, dtype=numpy.float32) 62 | c = numpy.empty(1000000, dtype=numpy.float32) 63 | k = numpy.array([0.5], dtype=numpy.float32) 64 | a_buf = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, 65 | a) 66 | b_buf = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, 67 | b) 68 | c_buf = ctx.create_buffer(cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_ALLOC_HOST_PTR, 69 | size=c.nbytes) 70 | krn.set_args(a_buf, b_buf, c_buf, k[0:1]) 71 | queue.execute_kernel(krn, [a.size], None) 72 | queue.read_buffer(c_buf, c) 73 | max_diff = numpy.fabs(c - (a + b) * k[0]).max() 74 | logging.info("max_diff = %.6f", max_diff) 75 | ``` 76 | 77 | Released under Simplified BSD License. 78 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 79 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Setup script. 38 | """ 39 | try: 40 | from setuptools import setup 41 | except ImportError: 42 | from distutils.core import setup 43 | 44 | 45 | setup( 46 | name="opencl4py", 47 | description="OpenCL cffi bindings and helper classes", 48 | version="1.5.2", 49 | license="Simplified BSD", 50 | author="Samsung Electronics Co.,Ltd.", 51 | author_email="a.kazantsev@samsung.com", 52 | url="https://github.com/Samsung/opencl4py", 53 | download_url="https://github.com/Samsung/opencl4py", 54 | packages=["opencl4py", "opencl4py.blas"], 55 | install_requires=["cffi"], 56 | package_dir={"opencl4py": "src/opencl4py"}, 57 | keywords=["OpenCL", "clBLAS", "opencl4py"], 58 | classifiers=[ 59 | "Development Status :: 4 - Beta", 60 | "Environment :: Console", 61 | "Intended Audience :: Developers", 62 | "License :: OSI Approved :: BSD License", 63 | "Operating System :: POSIX", 64 | "Programming Language :: Python :: 2.7", 65 | "Programming Language :: Python :: 3.2", 66 | "Programming Language :: Python :: 3.3", 67 | "Programming Language :: Python :: 3.4", 68 | "Topic :: Software Development :: Libraries" 69 | ] 70 | ) 71 | -------------------------------------------------------------------------------- /src/opencl4py/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Init module. 38 | """ 39 | 40 | from opencl4py import _cffi 41 | from opencl4py._py import Platforms, Context, CLRuntimeError, skip 42 | from opencl4py._cffi import (initialize, 43 | 44 | CL_DEVICE_TYPE_CPU, 45 | CL_DEVICE_TYPE_GPU, 46 | CL_DEVICE_TYPE_ACCELERATOR, 47 | CL_DEVICE_TYPE_CUSTOM, 48 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 49 | CL_QUEUE_PROFILING_ENABLE, 50 | CL_QUEUE_ON_DEVICE, 51 | CL_QUEUE_ON_DEVICE_DEFAULT, 52 | CL_QUEUE_PROPERTIES, 53 | CL_QUEUE_SIZE, 54 | CL_MAP_READ, 55 | CL_MAP_WRITE, 56 | CL_MAP_WRITE_INVALIDATE_REGION, 57 | CL_MEM_READ_WRITE, 58 | CL_MEM_WRITE_ONLY, 59 | CL_MEM_READ_ONLY, 60 | CL_MEM_USE_HOST_PTR, 61 | CL_MEM_ALLOC_HOST_PTR, 62 | CL_MEM_COPY_HOST_PTR, 63 | CL_MEM_HOST_NO_ACCESS, 64 | CL_MEM_SVM_FINE_GRAIN_BUFFER, 65 | CL_MEM_SVM_ATOMICS, 66 | CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, 67 | CL_DEVICE_SVM_FINE_GRAIN_BUFFER, 68 | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, 69 | CL_DEVICE_SVM_ATOMICS, 70 | CL_PROFILING_COMMAND_QUEUED, 71 | CL_PROFILING_COMMAND_SUBMIT, 72 | CL_PROFILING_COMMAND_START, 73 | CL_PROFILING_COMMAND_END, 74 | 75 | CL_SUCCESS, 76 | CL_DEVICE_NOT_FOUND, 77 | CL_DEVICE_NOT_AVAILABLE, 78 | CL_COMPILER_NOT_AVAILABLE, 79 | CL_MEM_OBJECT_ALLOCATION_FAILURE, 80 | CL_OUT_OF_RESOURCES, 81 | CL_OUT_OF_HOST_MEMORY, 82 | CL_PROFILING_INFO_NOT_AVAILABLE, 83 | CL_MEM_COPY_OVERLAP, 84 | CL_IMAGE_FORMAT_MISMATCH, 85 | CL_IMAGE_FORMAT_NOT_SUPPORTED, 86 | CL_BUILD_PROGRAM_FAILURE, 87 | CL_MAP_FAILURE, 88 | CL_MISALIGNED_SUB_BUFFER_OFFSET, 89 | CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, 90 | CL_COMPILE_PROGRAM_FAILURE, 91 | CL_LINKER_NOT_AVAILABLE, 92 | CL_LINK_PROGRAM_FAILURE, 93 | CL_DEVICE_PARTITION_FAILED, 94 | CL_KERNEL_ARG_INFO_NOT_AVAILABLE, 95 | 96 | CL_INVALID_VALUE, 97 | CL_INVALID_DEVICE_TYPE, 98 | CL_INVALID_PLATFORM, 99 | CL_INVALID_DEVICE, 100 | CL_INVALID_CONTEXT, 101 | CL_INVALID_QUEUE_PROPERTIES, 102 | CL_INVALID_COMMAND_QUEUE, 103 | CL_INVALID_HOST_PTR, 104 | CL_INVALID_MEM_OBJECT, 105 | CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, 106 | CL_INVALID_IMAGE_SIZE, 107 | CL_INVALID_SAMPLER, 108 | CL_INVALID_BINARY, 109 | CL_INVALID_BUILD_OPTIONS, 110 | CL_INVALID_PROGRAM, 111 | CL_INVALID_PROGRAM_EXECUTABLE, 112 | CL_INVALID_KERNEL_NAME, 113 | CL_INVALID_KERNEL_DEFINITION, 114 | CL_INVALID_KERNEL, 115 | CL_INVALID_ARG_INDEX, 116 | CL_INVALID_ARG_VALUE, 117 | CL_INVALID_ARG_SIZE, 118 | CL_INVALID_KERNEL_ARGS, 119 | CL_INVALID_WORK_DIMENSION, 120 | CL_INVALID_WORK_GROUP_SIZE, 121 | CL_INVALID_WORK_ITEM_SIZE, 122 | CL_INVALID_GLOBAL_OFFSET, 123 | CL_INVALID_EVENT_WAIT_LIST, 124 | CL_INVALID_EVENT, 125 | CL_INVALID_OPERATION, 126 | CL_INVALID_GL_OBJECT, 127 | CL_INVALID_BUFFER_SIZE, 128 | CL_INVALID_MIP_LEVEL, 129 | CL_INVALID_GLOBAL_WORK_SIZE, 130 | CL_INVALID_PROPERTY, 131 | CL_INVALID_IMAGE_DESCRIPTOR, 132 | CL_INVALID_COMPILER_OPTIONS, 133 | CL_INVALID_LINKER_OPTIONS, 134 | CL_INVALID_DEVICE_PARTITION_COUNT, 135 | CL_INVALID_PIPE_SIZE, 136 | CL_INVALID_DEVICE_QUEUE) 137 | 138 | 139 | def get_ffi(): 140 | """Returns CFFI() instance for the loaded shared library. 141 | """ 142 | return _cffi.ffi 143 | 144 | 145 | def eq_addr(a, b): 146 | """Compares addresses of the two numpy arrays. 147 | """ 148 | return a.__array_interface__["data"][0] == b.__array_interface__["data"][0] 149 | 150 | 151 | def realign_array(a, align, np): 152 | """Returns aligned copy of the numpy array with continuous memory layout. 153 | (useful for CL_MEM_USE_HOST_PTR buffers). 154 | 155 | Parameters: 156 | a: numpy array to create aligned array from. 157 | align: alignment in bytes of the new array. 158 | np: reference to numpy module. 159 | """ 160 | if a.__array_interface__["data"][0] % align == 0 and eq_addr(a, a.ravel()): 161 | return a 162 | b = np.empty(a.nbytes + align, dtype=np.byte) 163 | addr = b.__array_interface__["data"][0] 164 | offs = 0 165 | if addr % align != 0: 166 | offs += align - (addr % align) 167 | b = b[offs:offs + a.nbytes].view(dtype=a.dtype) 168 | b.shape = a.shape 169 | if b.__array_interface__["data"][0] % align != 0: 170 | raise ValueError("Could not realign numpy array with shape %s" % 171 | str(a.shape)) 172 | b[:] = a[:] 173 | return b 174 | -------------------------------------------------------------------------------- /src/opencl4py/_cffi.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | OpenCL cffi bindings. 38 | """ 39 | import cffi 40 | import threading 41 | 42 | 43 | # Constants 44 | CL_CONTEXT_PLATFORM = 0x1084 45 | CL_PLATFORM_NAME = 0x0902 46 | CL_DEVICE_TYPE_CPU = 2 47 | CL_DEVICE_TYPE_GPU = 4 48 | CL_DEVICE_TYPE_ACCELERATOR = 8 49 | CL_DEVICE_TYPE_CUSTOM = 16 50 | CL_DEVICE_TYPE_ALL = 0xFFFFFFFF 51 | CL_DEVICE_TYPE = 0x1000 52 | CL_DEVICE_NAME = 0x102B 53 | CL_DEVICE_OPENCL_C_VERSION = 0x103D 54 | CL_DEVICE_VENDOR_ID = 0x1001 55 | CL_DEVICE_MAX_COMPUTE_UNITS = 0x1002 56 | CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = 0x1003 57 | CL_DEVICE_MAX_WORK_GROUP_SIZE = 0x1004 58 | CL_DEVICE_MAX_WORK_ITEM_SIZES = 0x1005 59 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR = 0x1006 60 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT = 0x1007 61 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT = 0x1008 62 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG = 0x1009 63 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT = 0x100A 64 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE = 0x100B 65 | CL_DEVICE_MAX_CLOCK_FREQUENCY = 0x100C 66 | CL_DEVICE_ADDRESS_BITS = 0x100D 67 | CL_DEVICE_MAX_READ_IMAGE_ARGS = 0x100E 68 | CL_DEVICE_MAX_WRITE_IMAGE_ARGS = 0x100F 69 | CL_DEVICE_MAX_MEM_ALLOC_SIZE = 0x1010 70 | CL_DEVICE_IMAGE2D_MAX_WIDTH = 0x1011 71 | CL_DEVICE_IMAGE2D_MAX_HEIGHT = 0x1012 72 | CL_DEVICE_IMAGE3D_MAX_WIDTH = 0x1013 73 | CL_DEVICE_IMAGE3D_MAX_HEIGHT = 0x1014 74 | CL_DEVICE_IMAGE3D_MAX_DEPTH = 0x1015 75 | CL_DEVICE_IMAGE_SUPPORT = 0x1016 76 | CL_DEVICE_MAX_PARAMETER_SIZE = 0x1017 77 | CL_DEVICE_MAX_SAMPLERS = 0x1018 78 | CL_DEVICE_MEM_BASE_ADDR_ALIGN = 0x1019 79 | CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE = 0x101A 80 | CL_DEVICE_SINGLE_FP_CONFIG = 0x101B 81 | CL_DEVICE_GLOBAL_MEM_CACHE_TYPE = 0x101C 82 | CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE = 0x101D 83 | CL_DEVICE_GLOBAL_MEM_CACHE_SIZE = 0x101E 84 | CL_DEVICE_GLOBAL_MEM_SIZE = 0x101F 85 | CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE = 0x1020 86 | CL_DEVICE_MAX_CONSTANT_ARGS = 0x1021 87 | CL_DEVICE_LOCAL_MEM_TYPE = 0x1022 88 | CL_DEVICE_LOCAL_MEM_SIZE = 0x1023 89 | CL_DEVICE_ERROR_CORRECTION_SUPPORT = 0x1024 90 | CL_DEVICE_PROFILING_TIMER_RESOLUTION = 0x1025 91 | CL_DEVICE_ENDIAN_LITTLE = 0x1026 92 | CL_DEVICE_AVAILABLE = 0x1027 93 | CL_DEVICE_COMPILER_AVAILABLE = 0x1028 94 | CL_DEVICE_EXECUTION_CAPABILITIES = 0x1029 95 | CL_DEVICE_QUEUE_PROPERTIES = 0x102A 96 | CL_DEVICE_NAME = 0x102B 97 | CL_DEVICE_VENDOR = 0x102C 98 | CL_DRIVER_VERSION = 0x102D 99 | CL_DEVICE_PROFILE = 0x102E 100 | CL_DEVICE_VERSION = 0x102F 101 | CL_DEVICE_EXTENSIONS = 0x1030 102 | CL_DEVICE_PLATFORM = 0x1031 103 | CL_DEVICE_DOUBLE_FP_CONFIG = 0x1032 104 | CL_DEVICE_HALF_FP_CONFIG = 0x1033 105 | CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF = 0x1034 106 | CL_DEVICE_HOST_UNIFIED_MEMORY = 0x1035 107 | CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR = 0x1036 108 | CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT = 0x1037 109 | CL_DEVICE_NATIVE_VECTOR_WIDTH_INT = 0x1038 110 | CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG = 0x1039 111 | CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT = 0x103A 112 | CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE = 0x103B 113 | CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF = 0x103C 114 | CL_DEVICE_OPENCL_C_VERSION = 0x103D 115 | CL_DEVICE_LINKER_AVAILABLE = 0x103E 116 | CL_DEVICE_BUILT_IN_KERNELS = 0x103F 117 | CL_DEVICE_IMAGE_MAX_BUFFER_SIZE = 0x1040 118 | CL_DEVICE_IMAGE_MAX_ARRAY_SIZE = 0x1041 119 | CL_DEVICE_PARENT_DEVICE = 0x1042 120 | CL_DEVICE_PARTITION_MAX_SUB_DEVICES = 0x1043 121 | CL_DEVICE_PARTITION_PROPERTIES = 0x1044 122 | CL_DEVICE_PARTITION_AFFINITY_DOMAIN = 0x1045 123 | CL_DEVICE_PARTITION_TYPE = 0x1046 124 | CL_DEVICE_REFERENCE_COUNT = 0x1047 125 | CL_DEVICE_PREFERRED_INTEROP_USER_SYNC = 0x1048 126 | CL_DEVICE_PRINTF_BUFFER_SIZE = 0x1049 127 | CL_DEVICE_IMAGE_PITCH_ALIGNMENT = 0x104A 128 | CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT = 0x104B 129 | CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS = 0x1056 130 | CL_DEVICE_PIPE_MAX_PACKET_SIZE = 0x1057 131 | CL_DEVICE_SVM_CAPABILITIES = 0x1053 132 | CL_DEVICE_SVM_COARSE_GRAIN_BUFFER = 1 133 | CL_DEVICE_SVM_FINE_GRAIN_BUFFER = 2 134 | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM = 4 135 | CL_DEVICE_SVM_ATOMICS = 8 136 | CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT = 0x1058 137 | CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT = 0x1059 138 | CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT = 0x105A 139 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = 1 140 | CL_QUEUE_PROFILING_ENABLE = 2 141 | CL_QUEUE_ON_DEVICE = 4 142 | CL_QUEUE_ON_DEVICE_DEFAULT = 8 143 | CL_QUEUE_PROPERTIES = 0x1093 144 | CL_QUEUE_SIZE = 0x1094 145 | CL_PROGRAM_BUILD_LOG = 0x1183 146 | CL_MAP_READ = 1 147 | CL_MAP_WRITE = 2 148 | CL_MAP_WRITE_INVALIDATE_REGION = 4 149 | CL_MEM_READ_WRITE = 1 150 | CL_MEM_WRITE_ONLY = 2 151 | CL_MEM_READ_ONLY = 4 152 | CL_MEM_USE_HOST_PTR = 8 153 | CL_MEM_ALLOC_HOST_PTR = 16 154 | CL_MEM_COPY_HOST_PTR = 32 155 | CL_MEM_HOST_NO_ACCESS = 512 156 | CL_MEM_SVM_FINE_GRAIN_BUFFER = 1024 157 | CL_MEM_SVM_ATOMICS = 2048 158 | CL_PROFILING_COMMAND_QUEUED = 0x1280 159 | CL_PROFILING_COMMAND_SUBMIT = 0x1281 160 | CL_PROFILING_COMMAND_START = 0x1282 161 | CL_PROFILING_COMMAND_END = 0x1283 162 | CL_PROGRAM_REFERENCE_COUNT = 0x1160 163 | CL_PROGRAM_CONTEXT = 0x1161 164 | CL_PROGRAM_NUM_DEVICES = 0x1162 165 | CL_PROGRAM_DEVICES = 0x1163 166 | CL_PROGRAM_SOURCE = 0x1164 167 | CL_PROGRAM_BINARY_SIZES = 0x1165 168 | CL_PROGRAM_BINARIES = 0x1166 169 | CL_PROGRAM_NUM_KERNELS = 0x1167 170 | CL_PROGRAM_KERNEL_NAMES = 0x1168 171 | CL_KERNEL_FUNCTION_NAME = 0x1190 172 | CL_KERNEL_NUM_ARGS = 0x1191 173 | CL_KERNEL_REFERENCE_COUNT = 0x1192 174 | CL_KERNEL_CONTEXT = 0x1193 175 | CL_KERNEL_PROGRAM = 0x1194 176 | CL_KERNEL_ATTRIBUTES = 0x1195 177 | CL_BUFFER_CREATE_TYPE_REGION = 0x1220 178 | CL_KERNEL_GLOBAL_WORK_SIZE = 0x11B5 179 | CL_KERNEL_WORK_GROUP_SIZE = 0x11B0 180 | CL_KERNEL_COMPILE_WORK_GROUP_SIZE = 0x11B1 181 | CL_KERNEL_LOCAL_MEM_SIZE = 0x11B2 182 | CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 0x11B3 183 | CL_KERNEL_PRIVATE_MEM_SIZE = 0x11B4 184 | 185 | 186 | # Error codes 187 | CL_SUCCESS = 0 188 | CL_DEVICE_NOT_FOUND = -1 189 | CL_DEVICE_NOT_AVAILABLE = -2 190 | CL_COMPILER_NOT_AVAILABLE = -3 191 | CL_MEM_OBJECT_ALLOCATION_FAILURE = -4 192 | CL_OUT_OF_RESOURCES = -5 193 | CL_OUT_OF_HOST_MEMORY = -6 194 | CL_PROFILING_INFO_NOT_AVAILABLE = -7 195 | CL_MEM_COPY_OVERLAP = -8 196 | CL_IMAGE_FORMAT_MISMATCH = -9 197 | CL_IMAGE_FORMAT_NOT_SUPPORTED = -10 198 | CL_BUILD_PROGRAM_FAILURE = -11 199 | CL_MAP_FAILURE = -12 200 | CL_MISALIGNED_SUB_BUFFER_OFFSET = -13 201 | CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST = -14 202 | CL_COMPILE_PROGRAM_FAILURE = -15 203 | CL_LINKER_NOT_AVAILABLE = -16 204 | CL_LINK_PROGRAM_FAILURE = -17 205 | CL_DEVICE_PARTITION_FAILED = -18 206 | CL_KERNEL_ARG_INFO_NOT_AVAILABLE = -19 207 | 208 | CL_INVALID_VALUE = -30 209 | CL_INVALID_DEVICE_TYPE = -31 210 | CL_INVALID_PLATFORM = -32 211 | CL_INVALID_DEVICE = -33 212 | CL_INVALID_CONTEXT = -34 213 | CL_INVALID_QUEUE_PROPERTIES = -35 214 | CL_INVALID_COMMAND_QUEUE = -36 215 | CL_INVALID_HOST_PTR = -37 216 | CL_INVALID_MEM_OBJECT = -38 217 | CL_INVALID_IMAGE_FORMAT_DESCRIPTOR = -39 218 | CL_INVALID_IMAGE_SIZE = -40 219 | CL_INVALID_SAMPLER = -41 220 | CL_INVALID_BINARY = -42 221 | CL_INVALID_BUILD_OPTIONS = -43 222 | CL_INVALID_PROGRAM = -44 223 | CL_INVALID_PROGRAM_EXECUTABLE = -45 224 | CL_INVALID_KERNEL_NAME = -46 225 | CL_INVALID_KERNEL_DEFINITION = -47 226 | CL_INVALID_KERNEL = -48 227 | CL_INVALID_ARG_INDEX = -49 228 | CL_INVALID_ARG_VALUE = -50 229 | CL_INVALID_ARG_SIZE = -51 230 | CL_INVALID_KERNEL_ARGS = -52 231 | CL_INVALID_WORK_DIMENSION = -53 232 | CL_INVALID_WORK_GROUP_SIZE = -54 233 | CL_INVALID_WORK_ITEM_SIZE = -55 234 | CL_INVALID_GLOBAL_OFFSET = -56 235 | CL_INVALID_EVENT_WAIT_LIST = -57 236 | CL_INVALID_EVENT = -58 237 | CL_INVALID_OPERATION = -59 238 | CL_INVALID_GL_OBJECT = -60 239 | CL_INVALID_BUFFER_SIZE = -61 240 | CL_INVALID_MIP_LEVEL = -62 241 | CL_INVALID_GLOBAL_WORK_SIZE = -63 242 | CL_INVALID_PROPERTY = -64 243 | CL_INVALID_IMAGE_DESCRIPTOR = -65 244 | CL_INVALID_COMPILER_OPTIONS = -66 245 | CL_INVALID_LINKER_OPTIONS = -67 246 | CL_INVALID_DEVICE_PARTITION_COUNT = -68 247 | CL_INVALID_PIPE_SIZE = -69 248 | CL_INVALID_DEVICE_QUEUE = -70 249 | 250 | 251 | #: ffi parser 252 | ffi = None 253 | 254 | 255 | #: Loaded shared library 256 | lib = None 257 | 258 | 259 | #: Lock 260 | lock = threading.Lock() 261 | 262 | 263 | def _initialize(backends): 264 | global lib 265 | if lib is not None: 266 | return 267 | # C function definitions 268 | src = """ 269 | typedef int32_t cl_int; 270 | typedef uint32_t cl_uint; 271 | typedef uint64_t cl_ulong; 272 | typedef uint64_t cl_device_type; 273 | typedef uint32_t cl_platform_info; 274 | typedef uint32_t cl_device_info; 275 | typedef uint32_t cl_program_build_info; 276 | typedef cl_uint cl_program_info; 277 | typedef cl_uint cl_kernel_info; 278 | typedef uint32_t cl_kernel_work_group_info; 279 | typedef uint64_t cl_command_queue_properties; 280 | typedef uint64_t cl_queue_properties; 281 | typedef uint64_t cl_mem_flags; 282 | typedef uint32_t cl_bool; 283 | typedef uint64_t cl_map_flags; 284 | typedef uint32_t cl_profiling_info; 285 | typedef uint32_t cl_buffer_create_type; 286 | typedef uint64_t cl_svm_mem_flags; 287 | 288 | typedef void* cl_platform_id; 289 | typedef void* cl_device_id; 290 | typedef void* cl_context; 291 | typedef void* cl_program; 292 | typedef void* cl_kernel; 293 | typedef void* cl_command_queue; 294 | typedef void* cl_mem; 295 | typedef void* cl_event; 296 | 297 | typedef intptr_t cl_context_properties; 298 | typedef intptr_t cl_pipe_properties; 299 | 300 | cl_int clGetPlatformIDs(cl_uint num_entries, 301 | cl_platform_id *platforms, 302 | cl_uint *num_platforms); 303 | cl_int clGetDeviceIDs(cl_platform_id platform, 304 | cl_device_type device_type, 305 | cl_uint num_entries, 306 | cl_device_id *devices, 307 | cl_uint *num_devices); 308 | 309 | cl_int clGetPlatformInfo(cl_platform_id platform, 310 | cl_platform_info param_name, 311 | size_t param_value_size, 312 | void *param_value, 313 | size_t *param_value_size_ret); 314 | cl_int clGetDeviceInfo(cl_device_id device, 315 | cl_device_info param_name, 316 | size_t param_value_size, 317 | void *param_value, 318 | size_t *param_value_size_ret); 319 | 320 | cl_context clCreateContext(const cl_context_properties *properties, 321 | cl_uint num_devices, 322 | const cl_device_id *devices, 323 | void *pfn_notify, 324 | void *user_data, 325 | cl_int *errcode_ret); 326 | cl_int clReleaseContext(cl_context context); 327 | 328 | cl_program clCreateProgramWithSource(cl_context context, 329 | cl_uint count, 330 | const char **strings, 331 | const size_t *lengths, 332 | cl_int *errcode_ret); 333 | 334 | cl_program clCreateProgramWithBinary(cl_context context, 335 | cl_uint num_devices, 336 | const cl_device_id *device_list, 337 | const size_t *lengths, 338 | const unsigned char **binaries, 339 | cl_int *binary_status, 340 | cl_int *errcode_ret); 341 | 342 | cl_int clReleaseProgram(cl_program program); 343 | cl_int clBuildProgram(cl_program program, 344 | cl_uint num_devices, 345 | const cl_device_id *device_list, 346 | const char *options, 347 | void *pfn_notify, 348 | void *user_data); 349 | cl_int clGetProgramBuildInfo(cl_program program, 350 | cl_device_id device, 351 | cl_program_build_info param_name, 352 | size_t param_value_size, 353 | void *param_value, 354 | size_t *param_value_size_ret); 355 | 356 | cl_int clGetProgramInfo(cl_program program, 357 | cl_program_info param_name, 358 | size_t param_value_size, 359 | void *param_value, 360 | size_t *param_value_size_ret); 361 | 362 | cl_kernel clCreateKernel(cl_program program, 363 | const char *kernel_name, 364 | cl_int *errcode_ret); 365 | cl_int clReleaseKernel(cl_kernel kernel); 366 | cl_int clGetKernelInfo(cl_kernel kernel, 367 | cl_kernel_info param_name, 368 | size_t param_value_size, 369 | void *param_value, 370 | size_t *param_value_size_ret); 371 | 372 | cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, 373 | cl_device_id device, 374 | cl_kernel_work_group_info param_name, 375 | size_t param_value_size, 376 | void *param_value, 377 | size_t *param_value_size_ret); 378 | cl_int clSetKernelArg(cl_kernel kernel, 379 | cl_uint arg_index, 380 | size_t arg_size, 381 | const void *arg_value); 382 | 383 | cl_command_queue clCreateCommandQueue( 384 | cl_context context, 385 | cl_device_id device, 386 | cl_command_queue_properties properties, 387 | cl_int *errcode_ret); 388 | cl_command_queue clCreateCommandQueueWithProperties( 389 | cl_context context, 390 | cl_device_id device, 391 | const cl_queue_properties *properties, 392 | cl_int *errcode_ret); 393 | cl_int clReleaseCommandQueue(cl_command_queue command_queue); 394 | 395 | cl_mem clCreateBuffer(cl_context context, 396 | cl_mem_flags flags, 397 | size_t size, 398 | void *host_ptr, 399 | cl_int *errcode_ret); 400 | cl_mem clCreateSubBuffer(cl_mem buffer, 401 | cl_mem_flags flags, 402 | cl_buffer_create_type buffer_create_type, 403 | const void *buffer_create_info, 404 | cl_int *errcode_ret); 405 | cl_int clReleaseMemObject(cl_mem memobj); 406 | void* clEnqueueMapBuffer(cl_command_queue command_queue, 407 | cl_mem buffer, 408 | cl_bool blocking_map, 409 | cl_map_flags map_flags, 410 | size_t offset, 411 | size_t size, 412 | cl_uint num_events_in_wait_list, 413 | const cl_event *event_wait_list, 414 | cl_event *event, 415 | cl_int *errcode_ret); 416 | cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, 417 | cl_mem memobj, 418 | void *mapped_ptr, 419 | cl_uint num_events_in_wait_list, 420 | const cl_event *event_wait_list, 421 | cl_event *event); 422 | cl_int clEnqueueReadBuffer(cl_command_queue command_queue, 423 | cl_mem buffer, 424 | cl_bool blocking_read, 425 | size_t offset, 426 | size_t size, 427 | void *ptr, 428 | cl_uint num_events_in_wait_list, 429 | const cl_event *event_wait_list, 430 | cl_event *event); 431 | cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, 432 | cl_mem buffer, 433 | cl_bool blocking_write, 434 | size_t offset, 435 | size_t size, 436 | const void *ptr, 437 | cl_uint num_events_in_wait_list, 438 | const cl_event *event_wait_list, 439 | cl_event *event); 440 | cl_int clEnqueueCopyBuffer(cl_command_queue command_queue, 441 | cl_mem src_buffer, 442 | cl_mem dst_buffer, 443 | size_t src_offset, 444 | size_t dst_offset, 445 | size_t size, 446 | cl_uint num_events_in_wait_list, 447 | const cl_event *event_wait_list, 448 | cl_event *event); 449 | cl_int clEnqueueCopyBufferRect(cl_command_queue command_queue, 450 | cl_mem src_buffer, 451 | cl_mem dst_buffer, 452 | const size_t *src_origin, 453 | const size_t *dst_origin, 454 | const size_t *region, 455 | size_t src_row_pitch, 456 | size_t src_slice_pitch, 457 | size_t dst_row_pitch, 458 | size_t dst_slice_pitch, 459 | cl_uint num_events_in_wait_list, 460 | const cl_event *event_wait_list, 461 | cl_event *event); 462 | cl_int clEnqueueFillBuffer(cl_command_queue command_queue, 463 | cl_mem buffer, 464 | const void *pattern, 465 | size_t pattern_size, 466 | size_t offset, 467 | size_t size, 468 | cl_uint num_events_in_wait_list, 469 | const cl_event *event_wait_list, 470 | cl_event *event); 471 | 472 | cl_int clWaitForEvents(cl_uint num_events, 473 | const cl_event *event_list); 474 | cl_int clReleaseEvent(cl_event event); 475 | 476 | cl_int clFlush(cl_command_queue command_queue); 477 | cl_int clFinish(cl_command_queue command_queue); 478 | 479 | cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, 480 | cl_kernel kernel, 481 | cl_uint work_dim, 482 | const size_t *global_work_offset, 483 | const size_t *global_work_size, 484 | const size_t *local_work_size, 485 | cl_uint num_events_in_wait_list, 486 | const cl_event *event_wait_list, 487 | cl_event *event); 488 | 489 | cl_int clGetEventProfilingInfo(cl_event event, 490 | cl_profiling_info param_name, 491 | size_t param_value_size, 492 | void *param_value, 493 | size_t *param_value_size_ret); 494 | 495 | cl_mem clCreatePipe(cl_context context, 496 | cl_mem_flags flags, 497 | cl_uint pipe_packet_size, 498 | cl_uint pipe_max_packets, 499 | const cl_pipe_properties *properties, 500 | cl_int *errcode_ret); 501 | 502 | void *clSVMAlloc(cl_context context, 503 | cl_svm_mem_flags flags, 504 | size_t size, 505 | unsigned int alignment); 506 | void clSVMFree(cl_context context, 507 | void *svm_pointer); 508 | cl_int clEnqueueSVMMap(cl_command_queue command_queue, 509 | cl_bool blocking_map, 510 | cl_map_flags map_flags, 511 | void *svm_ptr, 512 | size_t size, 513 | cl_uint num_events_in_wait_list, 514 | const cl_event *event_wait_list, 515 | cl_event *event); 516 | cl_int clEnqueueSVMUnmap(cl_command_queue command_queue, 517 | void *svm_ptr, 518 | cl_uint num_events_in_wait_list, 519 | const cl_event *event_wait_list, 520 | cl_event *event); 521 | cl_int clSetKernelArgSVMPointer(cl_kernel kernel, 522 | cl_uint arg_index, 523 | const void *arg_value); 524 | cl_int clEnqueueSVMMemcpy(cl_command_queue command_queue, 525 | cl_bool blocking_copy, 526 | void *dst_ptr, 527 | const void *src_ptr, 528 | size_t size, 529 | cl_uint num_events_in_wait_list, 530 | const cl_event *event_wait_list, 531 | cl_event *event); 532 | cl_int clEnqueueSVMMemFill(cl_command_queue command_queue, 533 | void *svm_ptr, 534 | const void *pattern, 535 | size_t pattern_size, 536 | size_t size, 537 | cl_uint num_events_in_wait_list, 538 | const cl_event *event_wait_list, 539 | cl_event *event); 540 | """ 541 | 542 | # Parse 543 | global ffi 544 | ffi = cffi.FFI() 545 | ffi.cdef(src) 546 | 547 | # Load library 548 | for libnme in backends: 549 | try: 550 | lib = ffi.dlopen(libnme) 551 | break 552 | except OSError: 553 | pass 554 | else: 555 | ffi = None 556 | raise OSError("Could not load OpenCL library") 557 | 558 | 559 | def initialize(backends=("libOpenCL.so", "OpenCL.dll", "OpenCL")): 560 | global lib 561 | if lib is not None: 562 | return 563 | global lock 564 | with lock: 565 | _initialize(backends) 566 | -------------------------------------------------------------------------------- /src/opencl4py/_py.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Helper classes for OpenCL cffi bindings. 38 | """ 39 | import opencl4py._cffi as cl 40 | 41 | 42 | class CLRuntimeError(RuntimeError): 43 | def __init__(self, msg, code): 44 | super(CLRuntimeError, self).__init__(msg) 45 | self.code = code 46 | 47 | 48 | class CL(object): 49 | """Base OpenCL class. 50 | 51 | Attributes: 52 | _lib: handle to cffi.FFI object. 53 | _handle: cffi handle to OpenCL object. 54 | """ 55 | 56 | ERRORS = { 57 | cl.CL_SUCCESS: "CL_SUCCESS", 58 | cl.CL_DEVICE_NOT_FOUND: "CL_DEVICE_NOT_FOUND", 59 | cl.CL_DEVICE_NOT_AVAILABLE: "CL_DEVICE_NOT_AVAILABLE", 60 | cl.CL_COMPILER_NOT_AVAILABLE: "CL_COMPILER_NOT_AVAILABLE", 61 | cl.CL_MEM_OBJECT_ALLOCATION_FAILURE: 62 | "CL_MEM_OBJECT_ALLOCATION_FAILURE", 63 | cl.CL_OUT_OF_RESOURCES: "CL_OUT_OF_RESOURCES", 64 | cl.CL_OUT_OF_HOST_MEMORY: "CL_OUT_OF_HOST_MEMORY", 65 | cl.CL_PROFILING_INFO_NOT_AVAILABLE: "CL_PROFILING_INFO_NOT_AVAILABLE", 66 | cl.CL_MEM_COPY_OVERLAP: "CL_MEM_COPY_OVERLAP", 67 | cl.CL_IMAGE_FORMAT_MISMATCH: "CL_IMAGE_FORMAT_MISMATCH", 68 | cl.CL_IMAGE_FORMAT_NOT_SUPPORTED: "CL_IMAGE_FORMAT_NOT_SUPPORTED", 69 | cl.CL_BUILD_PROGRAM_FAILURE: "CL_BUILD_PROGRAM_FAILURE", 70 | cl.CL_MAP_FAILURE: "CL_MAP_FAILURE", 71 | cl.CL_MISALIGNED_SUB_BUFFER_OFFSET: "CL_MISALIGNED_SUB_BUFFER_OFFSET", 72 | cl.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: 73 | "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST", 74 | cl.CL_COMPILE_PROGRAM_FAILURE: "CL_COMPILE_PROGRAM_FAILURE", 75 | cl.CL_LINKER_NOT_AVAILABLE: "CL_LINKER_NOT_AVAILABLE", 76 | cl.CL_LINK_PROGRAM_FAILURE: "CL_LINK_PROGRAM_FAILURE", 77 | cl.CL_DEVICE_PARTITION_FAILED: "CL_DEVICE_PARTITION_FAILED", 78 | cl.CL_KERNEL_ARG_INFO_NOT_AVAILABLE: 79 | "CL_KERNEL_ARG_INFO_NOT_AVAILABLE", 80 | 81 | cl.CL_INVALID_VALUE: "CL_INVALID_VALUE", 82 | cl.CL_INVALID_DEVICE_TYPE: "CL_INVALID_DEVICE_TYPE", 83 | cl.CL_INVALID_PLATFORM: "CL_INVALID_PLATFORM", 84 | cl.CL_INVALID_DEVICE: "CL_INVALID_DEVICE", 85 | cl.CL_INVALID_CONTEXT: "CL_INVALID_CONTEXT", 86 | cl.CL_INVALID_QUEUE_PROPERTIES: "CL_INVALID_QUEUE_PROPERTIES", 87 | cl.CL_INVALID_COMMAND_QUEUE: "CL_INVALID_COMMAND_QUEUE", 88 | cl.CL_INVALID_HOST_PTR: "CL_INVALID_HOST_PTR", 89 | cl.CL_INVALID_MEM_OBJECT: "CL_INVALID_MEM_OBJECT", 90 | cl.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: 91 | "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", 92 | cl.CL_INVALID_IMAGE_SIZE: "CL_INVALID_IMAGE_SIZE", 93 | cl.CL_INVALID_SAMPLER: "CL_INVALID_SAMPLER", 94 | cl.CL_INVALID_BINARY: "CL_INVALID_BINARY", 95 | cl.CL_INVALID_BUILD_OPTIONS: "CL_INVALID_BUILD_OPTIONS", 96 | cl.CL_INVALID_PROGRAM: "CL_INVALID_PROGRAM", 97 | cl.CL_INVALID_PROGRAM_EXECUTABLE: "CL_INVALID_PROGRAM_EXECUTABLE", 98 | cl.CL_INVALID_KERNEL_NAME: "CL_INVALID_KERNEL_NAME", 99 | cl.CL_INVALID_KERNEL_DEFINITION: "CL_INVALID_KERNEL_DEFINITION", 100 | cl.CL_INVALID_KERNEL: "CL_INVALID_KERNEL", 101 | cl.CL_INVALID_ARG_INDEX: "CL_INVALID_ARG_INDEX", 102 | cl.CL_INVALID_ARG_VALUE: "CL_INVALID_ARG_VALUE", 103 | cl.CL_INVALID_ARG_SIZE: "CL_INVALID_ARG_SIZE", 104 | cl.CL_INVALID_KERNEL_ARGS: "CL_INVALID_KERNEL_ARGS", 105 | cl.CL_INVALID_WORK_DIMENSION: "CL_INVALID_WORK_DIMENSION", 106 | cl.CL_INVALID_WORK_GROUP_SIZE: "CL_INVALID_WORK_GROUP_SIZE", 107 | cl.CL_INVALID_WORK_ITEM_SIZE: "CL_INVALID_WORK_ITEM_SIZE", 108 | cl.CL_INVALID_GLOBAL_OFFSET: "CL_INVALID_GLOBAL_OFFSET", 109 | cl.CL_INVALID_EVENT_WAIT_LIST: "CL_INVALID_EVENT_WAIT_LIST", 110 | cl.CL_INVALID_EVENT: "CL_INVALID_EVENT", 111 | cl.CL_INVALID_OPERATION: "CL_INVALID_OPERATION", 112 | cl.CL_INVALID_GL_OBJECT: "CL_INVALID_GL_OBJECT", 113 | cl.CL_INVALID_BUFFER_SIZE: "CL_INVALID_BUFFER_SIZE", 114 | cl.CL_INVALID_MIP_LEVEL: "CL_INVALID_MIP_LEVEL", 115 | cl.CL_INVALID_GLOBAL_WORK_SIZE: "CL_INVALID_GLOBAL_WORK_SIZE", 116 | cl.CL_INVALID_PROPERTY: "CL_INVALID_PROPERTY", 117 | cl.CL_INVALID_IMAGE_DESCRIPTOR: "CL_INVALID_IMAGE_DESCRIPTOR", 118 | cl.CL_INVALID_COMPILER_OPTIONS: "CL_INVALID_COMPILER_OPTIONS", 119 | cl.CL_INVALID_LINKER_OPTIONS: "CL_INVALID_LINKER_OPTIONS", 120 | cl.CL_INVALID_DEVICE_PARTITION_COUNT: 121 | "CL_INVALID_DEVICE_PARTITION_COUNT", 122 | cl.CL_INVALID_PIPE_SIZE: "CL_INVALID_PIPE_SIZE", 123 | cl.CL_INVALID_DEVICE_QUEUE: "CL_INVALID_DEVICE_QUEUE" 124 | } 125 | 126 | def __init__(self): 127 | self._lib = cl.lib # to hold the reference 128 | self._handle = None 129 | 130 | @property 131 | def handle(self): 132 | """Returns cffi handle to OpenCL object. 133 | """ 134 | return self._handle 135 | 136 | @staticmethod 137 | def extract_ptr_and_size(host_array, size): 138 | """Returns cffi pointer to host_array and its size. 139 | """ 140 | if hasattr(host_array, "__array_interface__"): 141 | host_ptr = host_array.__array_interface__["data"][0] 142 | if size is None: 143 | size = host_array.nbytes 144 | else: 145 | host_ptr = host_array 146 | if size is None: 147 | raise ValueError("size should be set " 148 | "in case of non-numpy host_array") 149 | return (cl.ffi.NULL if host_ptr is None 150 | else cl.ffi.cast("void*", host_ptr), size) 151 | 152 | @staticmethod 153 | def get_wait_list(wait_for): 154 | """Returns cffi event list and number of events 155 | from list of Event objects, returns (None, 0) if wait_for is None. 156 | """ 157 | if wait_for is not None: 158 | n_events = len(wait_for) 159 | wait_list = cl.ffi.new("cl_event[]", n_events) 160 | for i, ev in enumerate(wait_for): 161 | wait_list[i] = ev.handle 162 | else: 163 | n_events = 0 164 | wait_list = cl.ffi.NULL 165 | return (wait_list, n_events) 166 | 167 | @staticmethod 168 | def get_error_name_from_code(code): 169 | return CL.ERRORS.get(code, "UNKNOWN") 170 | 171 | @staticmethod 172 | def get_error_description(code): 173 | return "%s (%d)" % (CL.get_error_name_from_code(code), code) 174 | 175 | 176 | class Event(CL): 177 | """Holds OpenCL event. 178 | 179 | Attributes: 180 | profiling_values: 181 | dictionary of profiling values 182 | if get_profiling_info was ever called; 183 | keys: CL_PROFILING_COMMAND_QUEUED, 184 | CL_PROFILING_COMMAND_SUBMIT, 185 | CL_PROFILING_COMMAND_START, 186 | CL_PROFILING_COMMAND_END; 187 | values: the current device time counter in seconds (float), 188 | or 0 if there was an error, in such case, corresponding 189 | profile_errors will be set with the error code. 190 | profiling_errors: dictionary of profiling errors 191 | if get_profiling_info was ever called. 192 | """ 193 | def __init__(self, handle): 194 | super(Event, self).__init__() 195 | self._handle = handle 196 | 197 | @staticmethod 198 | def wait_multi(wait_for, lib=cl.lib): 199 | """Wait on list of Event objects. 200 | """ 201 | wait_list, n_events = CL.get_wait_list(wait_for) 202 | n = lib.clWaitForEvents(n_events, wait_list) 203 | if n: 204 | raise CLRuntimeError("clWaitForEvents() failed with " 205 | "error %s" % CL.get_error_description(n), n) 206 | 207 | def wait(self): 208 | """Waits on this event. 209 | """ 210 | Event.wait_multi((self,), self._lib) 211 | 212 | def get_profiling_info(self, raise_exception=True): 213 | """Get profiling info of the event. 214 | 215 | Queue should be created with CL_QUEUE_PROFILING_ENABLE flag, 216 | and event should be in complete state (wait completed). 217 | 218 | Parameters: 219 | raise_exception: raise exception on error or not, 220 | self.profiling_values, self.profiling_errors 221 | will be available anyway. 222 | 223 | Returns: 224 | tuple of (profiling_values, profiling_errors). 225 | """ 226 | vle = cl.ffi.new("cl_ulong[]", 1) 227 | sz = cl.ffi.sizeof(vle) 228 | vles = {} 229 | errs = {} 230 | for name in (cl.CL_PROFILING_COMMAND_QUEUED, 231 | cl.CL_PROFILING_COMMAND_SUBMIT, 232 | cl.CL_PROFILING_COMMAND_START, 233 | cl.CL_PROFILING_COMMAND_END): 234 | vle[0] = 0 235 | n = self._lib.clGetEventProfilingInfo( 236 | self.handle, name, sz, vle, cl.ffi.NULL) 237 | vles[name] = 1.0e-9 * vle[0] if not n else 0.0 238 | errs[name] = n 239 | self.profiling_values = vles 240 | self.profiling_errors = errs 241 | if raise_exception: 242 | for err in errs.values(): 243 | if not err: 244 | continue 245 | raise CLRuntimeError( 246 | "clGetEventProfilingInfo() failed with " 247 | "error %s" % CL.get_error_description(err), err) 248 | return (vles, errs) 249 | 250 | def _release(self): 251 | if self.handle is not None: 252 | self._lib.clReleaseEvent(self.handle) 253 | self._handle = None 254 | 255 | def __del__(self): 256 | self._release() 257 | 258 | 259 | class Queue(CL): 260 | """Holds OpenCL command queue. 261 | 262 | Attributes: 263 | context: context associated with this queue. 264 | device: device associated with this queue. 265 | """ 266 | def __init__(self, context, device, flags, properties=None): 267 | """Creates the OpenCL command queue associated with the given device. 268 | 269 | Parameters: 270 | context: Context instance. 271 | device: Device instance. 272 | flags: flags for the command queue creation. 273 | properties: dictionary of the OpenCL 2.0 queue properties. 274 | """ 275 | super(Queue, self).__init__() 276 | context._add_ref(self) 277 | self._context = context 278 | self._device = device 279 | err = cl.ffi.new("cl_int *") 280 | if properties is None or device.version < 2.0: 281 | fnme = "clCreateCommandQueue" 282 | self._handle = self._lib.clCreateCommandQueue( 283 | context.handle, device.handle, flags, err) 284 | else: 285 | fnme = "clCreateCommandQueueWithProperties" 286 | if properties is None and flags == 0: 287 | props = cl.ffi.NULL 288 | else: 289 | if cl.CL_QUEUE_PROPERTIES not in properties and flags != 0: 290 | properties[cl.CL_QUEUE_PROPERTIES] = flags 291 | props = cl.ffi.new("uint64_t[]", len(properties) * 2 + 1) 292 | for i, kv in enumerate(sorted(properties.items())): 293 | props[i * 2] = kv[0] 294 | props[i * 2 + 1] = kv[1] 295 | self._handle = self._lib.clCreateCommandQueueWithProperties( 296 | context.handle, device.handle, props, err) 297 | if err[0]: 298 | self._handle = None 299 | raise CLRuntimeError("%s() failed with error %s" % 300 | (fnme, CL.get_error_description(err[0])), 301 | err[0]) 302 | 303 | @property 304 | def context(self): 305 | """ 306 | context associated with this queue. 307 | """ 308 | return self._context 309 | 310 | @property 311 | def device(self): 312 | """ 313 | device associated with this queue. 314 | """ 315 | return self._device 316 | 317 | def execute_kernel(self, kernel, global_size, local_size, 318 | global_offset=None, wait_for=None, need_event=True): 319 | """Executes OpenCL kernel (calls clEnqueueNDRangeKernel). 320 | 321 | Parameters: 322 | kernel: Kernel object. 323 | global_size: global size. 324 | local_size: local size. 325 | global_offset: global offset. 326 | wait_for: list of the Event objects to wait. 327 | need_event: return Event object or not. 328 | 329 | Returns: 330 | Event object or None if need_event == False. 331 | """ 332 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 333 | wait_list, n_events = CL.get_wait_list(wait_for) 334 | n_dims = len(global_size) 335 | global_work_size = cl.ffi.new("size_t[]", n_dims) 336 | for i, sz in enumerate(global_size): 337 | global_work_size[i] = sz 338 | if local_size is None: 339 | local_work_size = cl.ffi.NULL 340 | else: 341 | if len(local_size) != n_dims: 342 | raise ValueError("local_size should be the same length " 343 | "as global_size") 344 | local_work_size = cl.ffi.new("size_t[]", n_dims) 345 | for i, sz in enumerate(local_size): 346 | local_work_size[i] = sz 347 | if global_offset is None: 348 | global_work_offset = cl.ffi.NULL 349 | else: 350 | if len(global_work_offset) != n_dims: 351 | raise ValueError("global_offset should be the same length " 352 | "as global_size") 353 | global_work_offset = cl.ffi.new("size_t[]", n_dims) 354 | for i, sz in enumerate(global_offset): 355 | global_work_offset[i] = sz 356 | n = self._lib.clEnqueueNDRangeKernel( 357 | self.handle, kernel.handle, n_dims, global_work_offset, 358 | global_work_size, local_work_size, n_events, wait_list, event) 359 | if n: 360 | raise CLRuntimeError("clEnqueueNDRangeKernel() failed with " 361 | "error %s" % CL.get_error_description(n), n) 362 | return Event(event[0]) if event != cl.ffi.NULL else None 363 | 364 | def map_buffer(self, buf, flags, size, blocking=True, offset=0, 365 | wait_for=None, need_event=False): 366 | """Maps buffer. 367 | 368 | Parameters: 369 | buf: Buffer object. 370 | flags: mapping flags. 371 | size: mapping size. 372 | blocking: if the call would block until completion. 373 | offset: mapping offset. 374 | wait_for: list of the Event objects to wait. 375 | need_event: return Event object or not. 376 | 377 | Returns: 378 | (event, ptr): event - Event object or None if need_event == False, 379 | ptr - pointer to the mapped buffer 380 | (cffi void* converted to int). 381 | """ 382 | err = cl.ffi.new("cl_int *") 383 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 384 | wait_list, n_events = CL.get_wait_list(wait_for) 385 | ptr = self._lib.clEnqueueMapBuffer( 386 | self.handle, buf.handle, blocking, flags, offset, size, 387 | n_events, wait_list, event, err) 388 | if err[0]: 389 | raise CLRuntimeError("clEnqueueMapBuffer() failed with error %s" % 390 | CL.get_error_description(err[0]), err[0]) 391 | return (None if event == cl.ffi.NULL else Event(event[0]), 392 | int(cl.ffi.cast("size_t", ptr))) 393 | 394 | def unmap_buffer(self, buf, ptr, wait_for=None, need_event=True): 395 | """Unmaps previously mapped buffer. 396 | 397 | Parameters: 398 | buf: Buffer object to unmap. 399 | ptr: pointer to the mapped buffer. 400 | wait_for: list of the Event objects to wait. 401 | need_event: return Event object or not. 402 | 403 | Returns: 404 | Event object or None if need_event == False. 405 | """ 406 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 407 | wait_list, n_events = CL.get_wait_list(wait_for) 408 | n = self._lib.clEnqueueUnmapMemObject( 409 | self.handle, buf.handle, cl.ffi.cast("void*", ptr), 410 | n_events, wait_list, event) 411 | if n: 412 | raise CLRuntimeError("clEnqueueUnmapMemObject() failed with " 413 | "error %s" % CL.get_error_description(n), n) 414 | return Event(event[0]) if event != cl.ffi.NULL else None 415 | 416 | def read_buffer(self, buf, host_array, blocking=True, size=None, offset=0, 417 | wait_for=None, need_event=False): 418 | """Copies from device buffer to host buffer. 419 | 420 | Parameters: 421 | buf: Buffer object. 422 | host_array: numpy array. 423 | blocking: if the read is blocking. 424 | size: size in bytes to copy (None for entire numpy array). 425 | offset: offset in the device buffer. 426 | wait_for: list of the Event objects to wait. 427 | need_event: return Event object or not. 428 | 429 | Returns: 430 | Event object or None if need_event == False. 431 | """ 432 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 433 | wait_list, n_events = CL.get_wait_list(wait_for) 434 | host_ptr, size = CL.extract_ptr_and_size(host_array, size) 435 | n = self._lib.clEnqueueReadBuffer( 436 | self.handle, buf.handle, blocking, offset, size, host_ptr, 437 | n_events, wait_list, event) 438 | if n: 439 | raise CLRuntimeError("clEnqueueReadBuffer() failed with " 440 | "error %s" % CL.get_error_description(n), n) 441 | return Event(event[0]) if event != cl.ffi.NULL else None 442 | 443 | def write_buffer(self, buf, host_array, blocking=True, size=None, offset=0, 444 | wait_for=None, need_event=False): 445 | """Copies from host buffer to device buffer. 446 | 447 | Parameters: 448 | buf: Buffer object. 449 | host_array: numpy array. 450 | blocking: if the read is blocking. 451 | size: size in bytes to copy (None for entire numpy array). 452 | offset: offset in the device buffer. 453 | wait_for: list of the Event objects to wait. 454 | need_event: return Event object or not. 455 | 456 | Returns: 457 | Event object or None if need_event == False. 458 | """ 459 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 460 | wait_list, n_events = CL.get_wait_list(wait_for) 461 | host_ptr, size = CL.extract_ptr_and_size(host_array, size) 462 | n = self._lib.clEnqueueWriteBuffer( 463 | self.handle, buf.handle, blocking, offset, size, host_ptr, 464 | n_events, wait_list, event) 465 | if n: 466 | raise CLRuntimeError("clEnqueueReadBuffer() failed with " 467 | "error %s" % CL.get_error_description(n), n) 468 | return Event(event[0]) if event != cl.ffi.NULL else None 469 | 470 | def copy_buffer(self, src, dst, src_offset, dst_offset, size, 471 | wait_for=None, need_event=True): 472 | """Enqueues a command to copy from one buffer object to another. 473 | 474 | Parameters: 475 | src: source Buffer object. 476 | dst: destination Buffer object. 477 | src_offset: offset in bytes where to begin copying data from src. 478 | dst_offset: offset in bytes where to begin copying data into dst. 479 | size: number of bytes to copy. 480 | wait_for: list of the Event objects to wait. 481 | need_event: return Event object or not. 482 | 483 | Returns: 484 | Event object or None if need_event == False. 485 | """ 486 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 487 | wait_list, n_events = CL.get_wait_list(wait_for) 488 | n = self._lib.clEnqueueCopyBuffer( 489 | self.handle, src.handle, dst.handle, src_offset, dst_offset, size, 490 | n_events, wait_list, event) 491 | if n: 492 | raise CLRuntimeError("clEnqueueCopyBuffer() failed with " 493 | "error %s" % CL.get_error_description(n), n) 494 | return Event(event[0]) if event != cl.ffi.NULL else None 495 | 496 | def copy_buffer_rect(self, src, dst, src_origin, dst_origin, region, 497 | src_row_pitch=0, src_slice_pitch=0, 498 | dst_row_pitch=0, dst_slice_pitch=0, 499 | wait_for=None, need_event=True): 500 | """Enqueues a command to copy a 3D rectangular region from one 501 | buffer object to another. 502 | 503 | Parameters: 504 | src: source Buffer object. 505 | dst: destination Buffer object. 506 | src_origin: the (x in bytes, y, z) in the source buffer, 507 | offset in bytes is computed as: 508 | z * src_slice_pitch + y * src_row_pitch + x. 509 | dst_origin: the (x in bytes, y, z) in the destination buffer, 510 | offset in bytes is computed as: 511 | z * dst_slice_pitch + y * dst_row_pitch + x. 512 | region: the (width in bytes, height, depth) 513 | of the rectangle being copied. 514 | src_row_pitch: the length of each source row in bytes, 515 | if 0, region[0] will be used. 516 | src_slice_pitch: the length of each 2D source slice in bytes, 517 | if 0, region[1] * src_row_pitch will be used. 518 | dst_row_pitch: the length of each destination row in bytes, 519 | if 0, region[0] will be used. 520 | dst_slice_pitch: the length of each 2D destination slice in bytes, 521 | if 0, region[1] * src_row_pitch will be used. 522 | wait_for: list of the Event objects to wait. 523 | need_event: return Event object or not. 524 | 525 | Returns: 526 | Event object or None if need_event == False. 527 | """ 528 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 529 | wait_list, n_events = CL.get_wait_list(wait_for) 530 | _src_origin = cl.ffi.new("size_t[]", src_origin) 531 | _dst_origin = cl.ffi.new("size_t[]", dst_origin) 532 | _region = cl.ffi.new("size_t[]", region) 533 | n = self._lib.clEnqueueCopyBufferRect( 534 | self.handle, src.handle, dst.handle, 535 | _src_origin, _dst_origin, _region, 536 | src_row_pitch, src_slice_pitch, 537 | dst_row_pitch, dst_slice_pitch, 538 | n_events, wait_list, event) 539 | if n: 540 | raise CLRuntimeError("clEnqueueCopyBufferRect() failed with " 541 | "error %s" % CL.get_error_description(n), n) 542 | return Event(event[0]) if event != cl.ffi.NULL else None 543 | 544 | def fill_buffer(self, buffer, pattern, pattern_size, size, offset=0, 545 | wait_for=None, need_event=True): 546 | """Enqueues a command to copy from one buffer object to another. 547 | 548 | Parameters: 549 | buffer: Buffer object. 550 | pattern: a pointer to the data pattern of size pattern_size 551 | in bytes, pattern will be used to fill a region in 552 | buffer starting at offset and is size bytes in size 553 | (numpy array or direct cffi pointer). 554 | pattern_size: pattern size in bytes. 555 | size: the size in bytes of region being filled in buffer 556 | and must be a multiple of pattern_size. 557 | wait_for: list of the Event objects to wait. 558 | need_event: return Event object or not. 559 | 560 | Returns: 561 | Event object or None if need_event == False. 562 | """ 563 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 564 | wait_list, n_events = CL.get_wait_list(wait_for) 565 | pattern, _ = CL.extract_ptr_and_size(pattern, 0) 566 | n = self._lib.clEnqueueFillBuffer( 567 | self.handle, buffer.handle, pattern, pattern_size, offset, size, 568 | n_events, wait_list, event) 569 | if n: 570 | raise CLRuntimeError("clEnqueueFillBuffer() failed with " 571 | "error %s" % CL.get_error_description(n), n) 572 | return Event(event[0]) if event != cl.ffi.NULL else None 573 | 574 | def svm_map(self, svm_ptr, flags, size, blocking=True, 575 | wait_for=None, need_event=False): 576 | """Enqueues a command that will allow the host to update a region 577 | of a SVM buffer. 578 | 579 | Parameters: 580 | svm_ptr: SVM object or numpy array or direct cffi pointer. 581 | flags: mapping flags. 582 | size: mapping size (may be None if svm_ptr is a numpy array). 583 | blocking: if the call would block until completion. 584 | wait_for: list of the Event objects to wait. 585 | need_event: return Event object or not. 586 | 587 | Returns: 588 | Event object or None if need_event == False. 589 | """ 590 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 591 | wait_list, n_events = CL.get_wait_list(wait_for) 592 | if isinstance(svm_ptr, SVM): 593 | ptr = svm_ptr.handle 594 | else: 595 | ptr, size = CL.extract_ptr_and_size(svm_ptr, size) 596 | err = self._lib.clEnqueueSVMMap( 597 | self.handle, blocking, flags, ptr, size, 598 | n_events, wait_list, event) 599 | if err: 600 | raise CLRuntimeError("clEnqueueSVMMap() failed with error %s" % 601 | CL.get_error_description(err), err) 602 | return None if event == cl.ffi.NULL else Event(event[0]) 603 | 604 | def svm_unmap(self, svm_ptr, wait_for=None, need_event=True): 605 | """Unmaps previously mapped SVM buffer. 606 | 607 | Parameters: 608 | svm_ptr: pointer that was specified in a previous call to svm_map. 609 | wait_for: list of the Event objects to wait. 610 | need_event: return Event object or not. 611 | 612 | Returns: 613 | Event object or None if need_event == False. 614 | """ 615 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 616 | wait_list, n_events = CL.get_wait_list(wait_for) 617 | if isinstance(svm_ptr, SVM): 618 | ptr = svm_ptr.handle 619 | else: 620 | ptr, _size = CL.extract_ptr_and_size(svm_ptr, 0) 621 | err = self._lib.clEnqueueSVMUnmap( 622 | self.handle, ptr, n_events, wait_list, event) 623 | if err: 624 | raise CLRuntimeError( 625 | "clEnqueueSVMUnmap() failed with error %s" % 626 | CL.get_error_description(err), err) 627 | return Event(event[0]) if event != cl.ffi.NULL else None 628 | 629 | def svm_memcpy(self, dst, src, size, blocking=True, 630 | wait_for=None, need_event=False): 631 | """Enqueues a command to do a memcpy operation. 632 | 633 | Parameters: 634 | dst: destination (numpy array or direct cffi pointer). 635 | src: source (numpy array or direct cffi pointer). 636 | size: number of bytes to copy. 637 | blocking: if the call would block until completion. 638 | wait_for: list of the Event objects to wait. 639 | need_event: return Event object or not. 640 | 641 | Returns: 642 | Event object or None if need_event == False. 643 | """ 644 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 645 | wait_list, n_events = CL.get_wait_list(wait_for) 646 | dst, _ = CL.extract_ptr_and_size(dst, 0) 647 | src, _ = CL.extract_ptr_and_size(src, 0) 648 | n = self._lib.clEnqueueSVMMemcpy( 649 | self.handle, blocking, dst, src, size, n_events, wait_list, event) 650 | if n: 651 | raise CLRuntimeError("clEnqueueSVMMemcpy() failed with " 652 | "error %s" % CL.get_error_description(n), n) 653 | return Event(event[0]) if event != cl.ffi.NULL else None 654 | 655 | def svm_memfill(self, svm_ptr, pattern, pattern_size, size, 656 | wait_for=None, need_event=True): 657 | """Enqueues a command to fill a region in memory with a pattern 658 | of a given pattern size. 659 | 660 | Parameters: 661 | svm_ptr: SVM object or numpy array or direct cffi pointer. 662 | pattern: a pointer to the data pattern of size pattern_size 663 | in bytes (numpy array or direct cffi pointer). 664 | pattern_size: pattern size in bytes. 665 | size: the size in bytes of region being filled starting 666 | with svm_ptr and must be a multiple of pattern_size. 667 | wait_for: list of the Event objects to wait. 668 | need_event: return Event object or not. 669 | 670 | Returns: 671 | Event object or None if need_event == False. 672 | """ 673 | event = cl.ffi.new("cl_event[]", 1) if need_event else cl.ffi.NULL 674 | wait_list, n_events = CL.get_wait_list(wait_for) 675 | if isinstance(svm_ptr, SVM): 676 | ptr = svm_ptr.handle 677 | else: 678 | ptr, _ = CL.extract_ptr_and_size(svm_ptr, 0) 679 | pattern, _ = CL.extract_ptr_and_size(pattern, 0) 680 | n = self._lib.clEnqueueSVMMemFill( 681 | self.handle, ptr, pattern, pattern_size, size, 682 | n_events, wait_list, event) 683 | if n: 684 | raise CLRuntimeError("clEnqueueSVMMemFill() failed with " 685 | "error %s" % CL.get_error_description(n), n) 686 | return Event(event[0]) if event != cl.ffi.NULL else None 687 | 688 | def flush(self): 689 | """Flushes the queue. 690 | """ 691 | n = self._lib.clFlush(self.handle) 692 | if n: 693 | raise CLRuntimeError("clFlush() failed with error %s" % 694 | CL.get_error_description(n), n) 695 | 696 | def finish(self): 697 | """Waits for all previous commands issued to this queue to end. 698 | """ 699 | n = self._lib.clFinish(self.handle) 700 | if n: 701 | raise CLRuntimeError("clFinish() failed with error %s" % 702 | CL.get_error_description(n), n) 703 | 704 | def _release(self): 705 | if self.handle is not None: 706 | self._lib.clReleaseCommandQueue(self.handle) 707 | self._handle = None 708 | 709 | def __del__(self): 710 | if self.context.handle is None: 711 | raise SystemError("Incorrect destructor call order detected") 712 | self._release() 713 | self.context._del_ref(self) 714 | 715 | 716 | class Buffer(CL): 717 | """Holds OpenCL buffer. 718 | 719 | Attributes: 720 | context: Context object associated with this buffer. 721 | flags: flags supplied for the creation of this buffer. 722 | host_array: host array reference, such as numpy array, 723 | will be stored only if flags include CL_MEM_USE_HOST_PTR. 724 | size: size of the host array. 725 | parent: parent buffer if this one should be created as sub buffer. 726 | origin: origin of the sub buffer if parent is not None. 727 | _n_refs: reference count as a workaround for possible 728 | incorrect destructor call order, see 729 | http://bugs.python.org/issue23720 730 | (weakrefs do not help here). 731 | """ 732 | def __init__(self, context, flags, host_array, size=None, 733 | parent=None, origin=0): 734 | super(Buffer, self).__init__() 735 | context._add_ref(self) 736 | self._n_refs = 1 737 | self._parent = parent 738 | if parent is not None: 739 | parent._add_ref(self) 740 | self._context = context 741 | self._flags = flags 742 | self._host_array = (host_array if flags & cl.CL_MEM_USE_HOST_PTR != 0 743 | else None) 744 | host_ptr, size = CL.extract_ptr_and_size(host_array, size) 745 | self._size = size 746 | self._origin = origin 747 | err = cl.ffi.new("cl_int *") 748 | if parent is None: 749 | self._handle = self._lib.clCreateBuffer( 750 | context.handle, flags, size, host_ptr, err) 751 | else: 752 | info = cl.ffi.new("size_t[]", 2) 753 | info[0] = origin 754 | info[1] = size 755 | self._handle = self._lib.clCreateSubBuffer( 756 | parent.handle, flags, cl.CL_BUFFER_CREATE_TYPE_REGION, 757 | info, err) 758 | if err[0]: 759 | self._handle = None 760 | raise CLRuntimeError( 761 | "%s failed with error %s" % 762 | ("clCreateBuffer()" if parent is None else "clCreateSubBuffer", 763 | CL.get_error_description(err[0])), err[0]) 764 | 765 | def _add_ref(self, obj): 766 | self._n_refs += 1 767 | 768 | def _del_ref(self, obj): 769 | with cl.lock: 770 | self._n_refs -= 1 771 | n_refs = self._n_refs 772 | if n_refs <= 0: 773 | self._release() 774 | 775 | def create_sub_buffer(self, origin, size, flags=0): 776 | """Creates subbufer from the region of the original buffer. 777 | 778 | Parameters: 779 | flags: flags for the creation of this buffer 780 | (0 - inherit all from the original buffer). 781 | origin: offset in bytes in the original buffer 782 | size: size in bytes of the new buffer. 783 | """ 784 | return Buffer(self._context, flags, self._host_array, size, 785 | self, origin) 786 | 787 | @property 788 | def context(self): 789 | """ 790 | Context object associated with this buffer. 791 | """ 792 | return self._context 793 | 794 | @property 795 | def flags(self): 796 | """ 797 | Flags supplied for the creation of this buffer. 798 | """ 799 | return self._flags 800 | 801 | @property 802 | def host_array(self): 803 | """ 804 | Host array reference, such as numpy array, 805 | will be stored only if flags include CL_MEM_USE_HOST_PTR. 806 | """ 807 | return self._host_array 808 | 809 | @property 810 | def size(self): 811 | """ 812 | Size of the host array. 813 | """ 814 | return self._size 815 | 816 | @property 817 | def parent(self): 818 | """Returns parent buffer if this buffer is a sub buffer. 819 | """ 820 | return self._parent 821 | 822 | def _release(self): 823 | if self.handle is not None: 824 | if self.parent is not None and self.parent.handle is None: 825 | raise SystemError("Incorrect destructor call order detected") 826 | self._lib.clReleaseMemObject(self.handle) 827 | self._handle = None 828 | 829 | def __del__(self): 830 | if self.context.handle is None: 831 | raise SystemError("Incorrect destructor call order detected") 832 | self._del_ref(self) 833 | if self.parent is not None: 834 | self.parent._del_ref(self) 835 | self.context._del_ref(self) 836 | 837 | 838 | class skip(object): 839 | """A marker to skip setting arguments in Kernel.set_args. 840 | Passing in the class type makes set_args to skip setting one argument; 841 | passing skip(n) makes set_args skip n arguments. 842 | """ 843 | def __init__(self, number): 844 | self.number = number 845 | 846 | @property 847 | def number(self): 848 | return self._number 849 | 850 | @number.setter 851 | def number(self, value): 852 | if value < 1: 853 | raise ValueError("number must be greater than 0") 854 | self._number = value 855 | 856 | 857 | class WorkGroupInfo(CL): 858 | """Some information about the kernel concerning the specified device. 859 | """ 860 | def __init__(self, kernel, device): 861 | super(WorkGroupInfo, self).__init__() 862 | self._kernel = kernel 863 | self._device = device 864 | 865 | @property 866 | def kernel(self): 867 | return self._kernel 868 | 869 | @property 870 | def device(self): 871 | return self._device 872 | 873 | @property 874 | def global_work_size(self): 875 | """Returns the maximum global size that can be used to execute a kernel 876 | on this device. 877 | 878 | Raises: 879 | CLRuntimeError: when device is not a custom device or 880 | kernel is not a built-in kernel. 881 | """ 882 | buf = cl.ffi.new("size_t[]", 3) 883 | self._get_info(cl.CL_KERNEL_GLOBAL_WORK_SIZE, buf) 884 | return int(buf[0]), int(buf[1]), int(buf[2]) 885 | 886 | @property 887 | def work_group_size(self): 888 | """Returns the maximum global size that can be used to execute a kernel 889 | on this device. 890 | """ 891 | buf = cl.ffi.new("size_t *") 892 | self._get_info(cl.CL_KERNEL_WORK_GROUP_SIZE, buf) 893 | return int(buf[0]) 894 | 895 | @property 896 | def compile_work_group_size(self): 897 | """Returns the work-group size specified by the 898 | __attribute__((reqd_work_group_size(X, Y, Z))) qualifier. 899 | """ 900 | buf = cl.ffi.new("size_t[]", 3) 901 | self._get_info(cl.CL_KERNEL_COMPILE_WORK_GROUP_SIZE, buf) 902 | return int(buf[0]), int(buf[1]), int(buf[2]) 903 | 904 | @property 905 | def local_mem_size(self): 906 | """Returns the amount of local memory in bytes being used by a kernel. 907 | """ 908 | buf = cl.ffi.new("uint64_t *") 909 | self._get_info(cl.CL_KERNEL_LOCAL_MEM_SIZE, buf) 910 | return int(buf[0]) 911 | 912 | @property 913 | def preferred_work_group_size_multiple(self): 914 | """Returns the preferred multiple of workgroup size for launch. 915 | """ 916 | buf = cl.ffi.new("size_t *") 917 | self._get_info(cl.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, buf) 918 | return int(buf[0]) 919 | 920 | @property 921 | def private_mem_size(self): 922 | """Returns the minimum amount of private memory, in bytes, 923 | used by each workitem in the kernel. 924 | """ 925 | buf = cl.ffi.new("uint64_t *") 926 | self._get_info(cl.CL_KERNEL_PRIVATE_MEM_SIZE, buf) 927 | return int(buf[0]) 928 | 929 | def _get_info(self, code, buf): 930 | sz = cl.ffi.new("size_t *") 931 | err = self._lib.clGetKernelWorkGroupInfo( 932 | self.kernel.handle, self.device.handle, code, 933 | cl.ffi.sizeof(buf), buf, sz) 934 | if err: 935 | raise CLRuntimeError( 936 | "clGetKernelWorkGroupInfo() failed with error %s" % 937 | CL.get_error_description(err), err) 938 | return sz[0] 939 | 940 | 941 | class Kernel(CL): 942 | """Holds OpenCL kernel. 943 | 944 | Attributes: 945 | program: Program object associated with this kernel. 946 | name: kernel name in the program. 947 | """ 948 | 949 | def __init__(self, program, name): 950 | super(Kernel, self).__init__() 951 | self._program = program 952 | self._name = name 953 | err = cl.ffi.new("cl_int *") 954 | ss = cl.ffi.new("char[]", name.encode("utf-8")) 955 | self._handle = self._lib.clCreateKernel(program.handle, ss, err) 956 | if err[0]: 957 | self._handle = None 958 | raise CLRuntimeError("clCreateKernel() failed with error %s" % 959 | CL.get_error_description(err[0]), 960 | err[0]) 961 | 962 | @property 963 | def program(self): 964 | """ 965 | Program object associated with this kernel. 966 | """ 967 | return self._program 968 | 969 | @property 970 | def name(self): 971 | """ 972 | kernel name in the program. 973 | """ 974 | return self._name 975 | 976 | @property 977 | def reference_count(self): 978 | buf = cl.ffi.new("cl_uint *") 979 | self._get_kernel_info(cl.CL_KERNEL_REFERENCE_COUNT, buf) 980 | return buf[0] 981 | 982 | @property 983 | def num_args(self): 984 | buf = cl.ffi.new("size_t *") 985 | self._get_kernel_info(cl.CL_KERNEL_NUM_ARGS, buf) 986 | return buf[0] 987 | 988 | @property 989 | def attributes(self): 990 | buf = cl.ffi.new("char[]", 4096) 991 | self._get_kernel_info(cl.CL_KERNEL_ATTRIBUTES, buf) 992 | return cl.ffi.string(buf).decode("utf-8", "replace").strip() 993 | 994 | def get_work_group_info(self, device): 995 | return WorkGroupInfo(self, device) 996 | 997 | def set_arg(self, idx, vle, size=None): 998 | """Sets kernel argument. 999 | 1000 | Parameters: 1001 | idx: index of the kernel argument (zero-based). 1002 | vle: kernel argument: 1003 | - for buffers should be an instance of Buffer, 1004 | - for scalars should be a numpy array slice 1005 | (k[0:1] for example), 1006 | - for NULL should be None, 1007 | - may be cffi pointer also, in such case size should be set. 1008 | size: size of the vle (may be None for buffers and scalars). 1009 | """ 1010 | if isinstance(vle, Buffer) or isinstance(vle, Pipe): 1011 | arg_value = cl.ffi.new("cl_mem[]", 1) 1012 | arg_value[0] = vle.handle 1013 | arg_size = cl.ffi.sizeof("cl_mem") 1014 | elif hasattr(vle, "__array_interface__"): 1015 | arg_value = cl.ffi.cast("const void*", 1016 | vle.__array_interface__["data"][0]) 1017 | arg_size = vle.nbytes if size is None else size 1018 | elif vle is None: 1019 | arg_value = cl.ffi.NULL 1020 | arg_size = cl.ffi.sizeof("cl_mem") if size is None else size 1021 | elif type(vle) == type(cl.ffi.NULL): # cffi pointer 1022 | arg_value = cl.ffi.cast("const void*", vle) 1023 | if size is None: 1024 | raise ValueError("size should be set in case of cffi pointer") 1025 | arg_size = size 1026 | elif isinstance(vle, SVM): 1027 | return self.set_arg_svm(idx, vle) 1028 | else: 1029 | raise ValueError("vle should be of type Buffer, Pipe, SVM, " 1030 | "numpy array, cffi pointer or None " 1031 | "in Kernel::set_arg()") 1032 | n = self._lib.clSetKernelArg(self.handle, idx, arg_size, arg_value) 1033 | if n: 1034 | raise CLRuntimeError("clSetKernelArg(%d, %s) failed with error " 1035 | "%s" % (idx, repr(vle), 1036 | CL.get_error_description(n)), 1037 | n) 1038 | 1039 | def set_arg_svm(self, idx, svm_ptr): 1040 | """Sets SVM pointer as the kernel argument. 1041 | 1042 | Parameters: 1043 | idx: index of the kernel argument (zero-based). 1044 | svm_ptr: SVM object or numpy array or direct cffi pointer. 1045 | """ 1046 | if isinstance(svm_ptr, SVM): 1047 | ptr = svm_ptr.handle 1048 | else: 1049 | ptr, _size = CL.extract_ptr_and_size(svm_ptr, 0) 1050 | err = self._lib.clSetKernelArgSVMPointer(self.handle, idx, ptr) 1051 | if err: 1052 | raise CLRuntimeError( 1053 | "clSetKernelArgSVMPointer(%d, %s) failed with error %s" % 1054 | (idx, repr(svm_ptr), CL.get_error_description(err)), err) 1055 | 1056 | def set_args(self, *args): 1057 | i = 0 1058 | for arg in args: 1059 | if arg is skip: 1060 | i += 1 1061 | continue 1062 | if isinstance(arg, skip): 1063 | i += arg.number 1064 | continue 1065 | if isinstance(arg, tuple) and len(arg) == 2: 1066 | self.set_arg(i, *arg) 1067 | else: 1068 | self.set_arg(i, arg) 1069 | i += 1 1070 | 1071 | def _release(self): 1072 | if self.handle is not None: 1073 | self._lib.clReleaseKernel(self.handle) 1074 | self._handle = None 1075 | 1076 | def _get_kernel_info(self, code, buf): 1077 | sz = cl.ffi.new("size_t *") 1078 | err = self._lib.clGetKernelInfo( 1079 | self.handle, code, cl.ffi.sizeof(buf), buf, sz) 1080 | if err: 1081 | raise CLRuntimeError("clGetKernelInfo() failed with error %s" % 1082 | CL.get_error_description(err), err) 1083 | return sz[0] 1084 | 1085 | def __del__(self): 1086 | self._release() 1087 | 1088 | def __repr__(self): 1089 | return '' % self.name 1090 | 1091 | 1092 | class Program(CL): 1093 | """Holds OpenCL program. 1094 | 1095 | Attributes: 1096 | context: Context object associated with this program. 1097 | devices: list of Device objects associated with this program. 1098 | build_logs: list of program build logs (same length as devices list). 1099 | src: program source. 1100 | include_dirs: list of include dirs. 1101 | options: additional build options. 1102 | binary: False if the program should be created from source; otherwise, 1103 | src is interpreted as precompiled binaries iterable. 1104 | """ 1105 | 1106 | def __init__(self, context, devices, src, include_dirs=(), options="", 1107 | binary=False): 1108 | super(Program, self).__init__() 1109 | context._add_ref(self) 1110 | self._context = context 1111 | self._devices = devices 1112 | self._src = src.encode("utf-8") if not binary else None 1113 | self._include_dirs = list(include_dirs) 1114 | self._options = options.strip().encode("utf-8") 1115 | self._build_logs = [] 1116 | if not binary: 1117 | self._create_program_from_source() 1118 | else: 1119 | self._create_program_from_binary(src) 1120 | 1121 | @property 1122 | def context(self): 1123 | """ 1124 | Context object associated with this program. 1125 | """ 1126 | return self._context 1127 | 1128 | @property 1129 | def devices(self): 1130 | """ 1131 | List of Device objects associated with this program. 1132 | """ 1133 | return self._devices 1134 | 1135 | @property 1136 | def build_logs(self): 1137 | """ 1138 | List of program build logs (same length as devices list). 1139 | """ 1140 | return self._build_logs 1141 | 1142 | @property 1143 | def source(self): 1144 | """ 1145 | Program source. 1146 | """ 1147 | return self._src 1148 | 1149 | @property 1150 | def include_dirs(self): 1151 | """ 1152 | List of include dirs. 1153 | """ 1154 | return self._include_dirs 1155 | 1156 | @property 1157 | def options(self): 1158 | """ 1159 | Additional build options. 1160 | """ 1161 | return self._options 1162 | 1163 | @property 1164 | def reference_count(self): 1165 | buf = cl.ffi.new("cl_uint *") 1166 | self._get_program_info(cl.CL_PROGRAM_REFERENCE_COUNT, buf) 1167 | return buf[0] 1168 | 1169 | @property 1170 | def num_kernels(self): 1171 | buf = cl.ffi.new("size_t *") 1172 | self._get_program_info(cl.CL_PROGRAM_NUM_KERNELS, buf) 1173 | return buf[0] 1174 | 1175 | @property 1176 | def kernel_names(self): 1177 | buf = cl.ffi.new("char[]", 4096) 1178 | self._get_program_info(cl.CL_PROGRAM_KERNEL_NAMES, buf) 1179 | names = cl.ffi.string(buf).decode("utf-8", "replace") 1180 | return names.split(';') 1181 | 1182 | @property 1183 | def binaries(self): 1184 | sizes = cl.ffi.new("size_t[]", len(self.devices)) 1185 | self._get_program_info(cl.CL_PROGRAM_BINARY_SIZES, sizes) 1186 | buf = cl.ffi.new("char *[]", len(self.devices)) 1187 | bufr = [] # to hold the references to cffi arrays 1188 | for i in range(len(self.devices)): 1189 | bufr.append(cl.ffi.new("char[]", sizes[i])) 1190 | buf[i] = bufr[-1] 1191 | self._get_program_info(cl.CL_PROGRAM_BINARIES, buf) 1192 | bins = [] 1193 | for i in range(len(self.devices)): 1194 | bins.append(bytes(cl.ffi.buffer(buf[i], sizes[i])[0:sizes[i]])) 1195 | del bufr 1196 | return bins 1197 | 1198 | def get_kernel(self, name): 1199 | """Returns Kernel object from its name. 1200 | """ 1201 | return Kernel(self, name) 1202 | 1203 | def _get_program_info(self, code, buf): 1204 | sz = cl.ffi.new("size_t *") 1205 | err = self._lib.clGetProgramInfo(self.handle, code, 1206 | cl.ffi.sizeof(buf), buf, sz) 1207 | if err: 1208 | raise CLRuntimeError("clGetProgramInfo() failed with error %s" % 1209 | CL.get_error_description(err), err) 1210 | return sz[0] 1211 | 1212 | def _get_build_logs(self, device_list): 1213 | del self.build_logs[:] 1214 | log = cl.ffi.new("char[]", 65536) 1215 | sz = cl.ffi.new("size_t *") 1216 | for dev in device_list: 1217 | e = self._lib.clGetProgramBuildInfo( 1218 | self.handle, dev, cl.CL_PROGRAM_BUILD_LOG, cl.ffi.sizeof(log), 1219 | log, sz) 1220 | if e or sz[0] <= 0: 1221 | self.build_logs.append("") 1222 | continue 1223 | self.build_logs.append(cl.ffi.string(log).decode("utf-8", 1224 | "replace")) 1225 | 1226 | def _create_program_from_source(self): 1227 | err = cl.ffi.new("cl_int *") 1228 | srcptr = cl.ffi.new("char[]", self.source) 1229 | strings = cl.ffi.new("char*[]", 1) 1230 | strings[0] = srcptr 1231 | self._handle = self._lib.clCreateProgramWithSource( 1232 | self.context.handle, 1, strings, cl.ffi.NULL, err) 1233 | del srcptr 1234 | if err[0]: 1235 | self._handle = None 1236 | raise CLRuntimeError("clCreateProgramWithSource() failed with " 1237 | "error %s" % 1238 | CL.get_error_description(err[0]), err[0]) 1239 | options = self.options.decode("utf-8") 1240 | for dirnme in self.include_dirs: 1241 | if not len(dirnme): 1242 | continue 1243 | options += " -I " + (dirnme if dirnme.find(" ") < 0 1244 | else "\'%s\'" % dirnme) 1245 | options = options.encode("utf-8") 1246 | n_devices = len(self.devices) 1247 | device_list = cl.ffi.new("cl_device_id[]", n_devices) 1248 | for i, dev in enumerate(self.devices): 1249 | device_list[i] = dev.handle 1250 | err = self._lib.clBuildProgram(self.handle, n_devices, device_list, 1251 | options, cl.ffi.NULL, cl.ffi.NULL) 1252 | del options 1253 | self._get_build_logs(device_list) 1254 | if err: 1255 | raise CLRuntimeError( 1256 | "clBuildProgram() failed with error %s\n" 1257 | "Logs are:\n%s\nSource was:\n%s\n" % 1258 | (CL.get_error_description(err), "\n".join(self.build_logs), 1259 | self.source.decode("utf-8")), 1260 | err) 1261 | 1262 | def _create_program_from_binary(self, src): 1263 | count = len(self.devices) 1264 | if count != len(src): 1265 | raise ValueError("You have supplied %d binaries for %d devices" % 1266 | (len(src), count)) 1267 | device_list = cl.ffi.new("cl_device_id[]", count) 1268 | for i, dev in enumerate(self.devices): 1269 | device_list[i] = dev.handle 1270 | lengths = cl.ffi.new("size_t[]", count) 1271 | for i, b in enumerate(src): 1272 | lengths[i] = len(b) 1273 | binaries_ffi = cl.ffi.new("unsigned char *[]", count) 1274 | # The following 4 lines are here to prevent Python 1275 | # from garbage collecting binaries_ffi[:] 1276 | binaries_ref = [] 1277 | for i, b in enumerate(src): 1278 | binaries_ref.append(cl.ffi.new("unsigned char[]", b)) 1279 | binaries_ffi[i] = binaries_ref[-1] 1280 | binary_status = cl.ffi.new("cl_int[]", count) 1281 | err = cl.ffi.new("cl_int *") 1282 | self._handle = self._lib.clCreateProgramWithBinary( 1283 | self.context.handle, count, device_list, lengths, 1284 | binaries_ffi, binary_status, err) 1285 | if err[0]: 1286 | self._handle = None 1287 | statuses = [CL.get_error_name_from_code(s) for s in binary_status] 1288 | raise CLRuntimeError("clCreateProgramWithBinary() failed with " 1289 | "error %s; status %s" % ( 1290 | CL.get_error_description(err[0]), 1291 | ", ".join(statuses)), 1292 | err[0]) 1293 | err = self._lib.clBuildProgram(self.handle, count, device_list, 1294 | self.options, cl.ffi.NULL, cl.ffi.NULL) 1295 | del binaries_ref 1296 | self._get_build_logs(device_list) 1297 | if err: 1298 | raise CLRuntimeError("clBuildProgram() failed with error %s.\n" 1299 | "Logs are:\n%s" % ( 1300 | CL.get_error_description(err), 1301 | "\n".join(self.build_logs)), 1302 | err) 1303 | 1304 | def _release(self): 1305 | if self.handle is not None: 1306 | self._lib.clReleaseProgram(self.handle) 1307 | self._handle = None 1308 | 1309 | def __del__(self): 1310 | if self.context.handle is None: 1311 | raise SystemError("Incorrect destructor call order detected") 1312 | self._release() 1313 | self.context._del_ref(self) 1314 | 1315 | 1316 | class Pipe(CL): 1317 | """Holds OpenCL pipe. 1318 | 1319 | Attributes: 1320 | context: Context object associated with this pipe. 1321 | flags: flags for a pipe; 1322 | as of OpenCL 2.0 only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, 1323 | CL_MEM_READ_WRITE, and CL_MEM_HOST_NO_ACCESS can be specified 1324 | when creating a pipe object (0 defaults to CL_MEM_READ_WRITE). 1325 | packet_size: size in bytes of a pipe packet (must be greater than 0). 1326 | max_packets: maximum number of packets the pipe can hold 1327 | (must be greater than 0). 1328 | """ 1329 | def __init__(self, context, flags, packet_size, max_packets): 1330 | super(Pipe, self).__init__() 1331 | context._add_ref(self) 1332 | self._context = context 1333 | self._flags = flags 1334 | self._packet_size = packet_size 1335 | self._max_packets = max_packets 1336 | err = cl.ffi.new("cl_int *") 1337 | self._handle = self._lib.clCreatePipe( 1338 | context.handle, flags, packet_size, max_packets, cl.ffi.NULL, err) 1339 | if err[0]: 1340 | self._handle = None 1341 | raise CLRuntimeError("clCreatePipe() failed with error %s" % 1342 | CL.get_error_description(err[0]), err[0]) 1343 | 1344 | @property 1345 | def context(self): 1346 | return self._context 1347 | 1348 | @property 1349 | def flags(self): 1350 | return self._flags 1351 | 1352 | @property 1353 | def packet_size(self): 1354 | return self._packet_size 1355 | 1356 | @property 1357 | def max_packets(self): 1358 | return self._max_packets 1359 | 1360 | def _release(self): 1361 | if self.handle is not None: 1362 | self._lib.clReleaseMemObject(self.handle) 1363 | self._handle = None 1364 | 1365 | def __del__(self): 1366 | if self.context.handle is None: 1367 | raise SystemError("Incorrect destructor call order detected") 1368 | self._release() 1369 | self.context._del_ref(self) 1370 | 1371 | 1372 | class SVM(CL): 1373 | """Holds shared virtual memory (SVM) buffer. 1374 | 1375 | Attributes: 1376 | handle: pointer to the created buffer. 1377 | context: Context object associated with this buffer. 1378 | flags: flags for a buffer. 1379 | size: size in bytes of the SVM buffer to be allocated. 1380 | alignment: the minimum alignment in bytes (can be 0). 1381 | """ 1382 | def __init__(self, context, flags, size, alignment=0): 1383 | super(SVM, self).__init__() 1384 | context._add_ref(self) 1385 | self._context = context 1386 | self._flags = flags 1387 | self._size = size 1388 | self._alignment = alignment 1389 | self._handle = self._lib.clSVMAlloc( 1390 | context.handle, flags, size, alignment) 1391 | if self._handle == cl.ffi.NULL: 1392 | self._handle = None 1393 | raise CLRuntimeError("clSVMAlloc() failed", cl.CL_INVALID_VALUE) 1394 | 1395 | @property 1396 | def context(self): 1397 | return self._context 1398 | 1399 | @property 1400 | def flags(self): 1401 | return self._flags 1402 | 1403 | @property 1404 | def size(self): 1405 | return self._size 1406 | 1407 | @property 1408 | def alignment(self): 1409 | return self._alignment 1410 | 1411 | @property 1412 | def buffer(self): 1413 | """Returns buffer object from this SVM pointer. 1414 | 1415 | You can supply it to numpy.frombuffer() for example, 1416 | but be sure that destructor of an SVM object is called 1417 | after the last access to that numpy array. 1418 | """ 1419 | return cl.ffi.buffer(self.handle, self.size) 1420 | 1421 | def _release(self): 1422 | if self.handle is not None and self.context.handle is not None: 1423 | self._lib.clSVMFree(self.context.handle, self.handle) 1424 | self._handle = None 1425 | 1426 | def __del__(self): 1427 | if self.context.handle is None: 1428 | raise SystemError("Incorrect destructor call order detected") 1429 | self._release() 1430 | self.context._del_ref(self) 1431 | 1432 | 1433 | class Context(CL): 1434 | """Holds OpenCL context. 1435 | 1436 | Attributes: 1437 | platform: Platform object associated with this context. 1438 | devices: list of Device object associated with this context. 1439 | _n_refs: reference count as a workaround for possible 1440 | incorrect destructor call order, see 1441 | http://bugs.python.org/issue23720 1442 | (weakrefs do not help here). 1443 | """ 1444 | def __init__(self, platform, devices): 1445 | super(Context, self).__init__() 1446 | self._n_refs = 1 1447 | self._platform = platform 1448 | self._devices = devices 1449 | props = cl.ffi.new("cl_context_properties[]", 3) 1450 | props[0] = cl.CL_CONTEXT_PLATFORM 1451 | props[1] = cl.ffi.cast("cl_context_properties", platform.handle) 1452 | props[2] = 0 1453 | err = cl.ffi.new("cl_int *") 1454 | n_devices = len(devices) 1455 | device_list = cl.ffi.new("cl_device_id[]", n_devices) 1456 | for i, dev in enumerate(devices): 1457 | device_list[i] = dev.handle 1458 | self._handle = self._lib.clCreateContext( 1459 | props, n_devices, device_list, cl.ffi.NULL, cl.ffi.NULL, err) 1460 | if err[0]: 1461 | self._handle = None 1462 | raise CLRuntimeError("clCreateContext() failed with error %s" % 1463 | CL.get_error_description(err[0]), 1464 | err[0]) 1465 | 1466 | def _add_ref(self, obj): 1467 | self._n_refs += 1 1468 | 1469 | def _del_ref(self, obj): 1470 | with cl.lock: 1471 | self._n_refs -= 1 1472 | n_refs = self._n_refs 1473 | if n_refs <= 0: 1474 | self._release() 1475 | 1476 | @property 1477 | def platform(self): 1478 | """ 1479 | Platform object associated with this context. 1480 | """ 1481 | return self._platform 1482 | 1483 | @property 1484 | def devices(self): 1485 | """ 1486 | List of Device object associated with this context. 1487 | """ 1488 | return self._devices 1489 | 1490 | def create_queue(self, device, flags=0, properties=None): 1491 | """Creates Queue object for the supplied device. 1492 | 1493 | Parameters: 1494 | device: Device object. 1495 | flags: queue flags (for example 1496 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE). 1497 | properties: dictionary of OpenCL 2.0 queue properties. 1498 | 1499 | Returns: 1500 | Queue object. 1501 | """ 1502 | return Queue(self, device, flags, properties) 1503 | 1504 | def create_buffer(self, flags, host_array=None, size=None): 1505 | """Creates Buffer object based on host_array. 1506 | 1507 | Parameters: 1508 | host_array: numpy array of None. 1509 | size: size if host_array is not a numpy array. 1510 | 1511 | Returns: 1512 | Buffer object. 1513 | """ 1514 | return Buffer(self, flags, host_array, size) 1515 | 1516 | def create_program(self, src, include_dirs=(), options="", devices=None, 1517 | binary=False): 1518 | """Creates and builds OpenCL program from source 1519 | for the supplied devices associated with this context. 1520 | 1521 | Parameters: 1522 | src: program source. 1523 | include_dirs: list of include directories. 1524 | options: additional build options. 1525 | devices: list of devices on which to build the program 1526 | (if None will build on all devices). 1527 | Returns: 1528 | Program object. 1529 | """ 1530 | return Program(self, self.devices if devices is None else devices, 1531 | src, include_dirs, options, binary) 1532 | 1533 | def create_pipe(self, flags, packet_size, max_packets): 1534 | """Creates OpenCL 2.0 pipe. 1535 | 1536 | Parameters: 1537 | flags: flags for a pipe; 1538 | as of OpenCL 2.0 only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, 1539 | CL_MEM_READ_WRITE, and CL_MEM_HOST_NO_ACCESS 1540 | can be specified when creating a pipe object 1541 | (0 defaults to CL_MEM_READ_WRITE). 1542 | packet_size: size in bytes of a pipe packet 1543 | (must be greater than 0). 1544 | max_packets: maximum number of packets the pipe can hold 1545 | (must be greater than 0). 1546 | """ 1547 | return Pipe(self, flags, packet_size, max_packets) 1548 | 1549 | def svm_alloc(self, flags, size, alignment=0): 1550 | """Allocates shared virtual memory (SVM) buffer. 1551 | 1552 | Parameters: 1553 | flags: flags for a buffer; 1554 | (CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, 1555 | CL_MEM_READ_ONLY, CL_MEM_SVM_FINE_GRAIN_BUFFER, 1556 | CL_MEM_SVM_ATOMICS). 1557 | size: size in bytes of the SVM buffer to be allocated. 1558 | alignment: the minimum alignment in bytes, 1559 | it must be a power of two up to the largest 1560 | data type supported by the OpenCL device, 1561 | 0 defaults to the largest supported alignment. 1562 | """ 1563 | return SVM(self, flags, size, alignment) 1564 | 1565 | def _release(self): 1566 | if self.handle is not None: 1567 | self._lib.clReleaseContext(self.handle) 1568 | self._handle = None 1569 | 1570 | def __del__(self): 1571 | self._del_ref(self) 1572 | 1573 | 1574 | class Device(CL): 1575 | """OpenCL device. 1576 | 1577 | Attributes: 1578 | platform: Platform object associated with this device. 1579 | type: OpenCL type of the device (integer). 1580 | name: OpenCL name of the device. 1581 | path: opencl4py device identifier, 1582 | version: OpenCL version number of the device (float). 1583 | version_string: OpenCL version string of the device. 1584 | vendor: OpenCL vendor name of the device. 1585 | vendor_id: OpenCL vendor id of the device (integer). 1586 | memsize: global memory size of the device. 1587 | memalign: align in bytes, required for clMapBuffer. 1588 | """ 1589 | def __init__(self, handle, platform, path): 1590 | super(Device, self).__init__() 1591 | self._handle = handle 1592 | self._platform = platform 1593 | self._path = path 1594 | 1595 | self._version_string = self._get_device_info_str( 1596 | cl.CL_DEVICE_OPENCL_C_VERSION) 1597 | n = len("OpenCL C ") 1598 | m = self._version_string.find(" ", n) 1599 | try: 1600 | self._version = float(self._version_string[n:m]) 1601 | except ValueError: 1602 | self._version = 0.0 1603 | 1604 | @property 1605 | def platform(self): 1606 | """ 1607 | Platform object associated with this device. 1608 | """ 1609 | return self._platform 1610 | 1611 | @property 1612 | def type(self): 1613 | """ 1614 | OpenCL type of the device (integer). 1615 | """ 1616 | return self._get_device_info_int(cl.CL_DEVICE_TYPE) 1617 | 1618 | @property 1619 | def name(self): 1620 | """ 1621 | OpenCL name of the device. 1622 | """ 1623 | return self._get_device_info_str(cl.CL_DEVICE_NAME) 1624 | 1625 | @property 1626 | def path(self): 1627 | """ 1628 | opencl4py device identifier, 1629 | """ 1630 | return self._path 1631 | 1632 | @property 1633 | def version(self): 1634 | """ 1635 | OpenCL version number of the device (float). 1636 | """ 1637 | return self._version 1638 | 1639 | @property 1640 | def version_string(self): 1641 | """ 1642 | OpenCL version string of the device. 1643 | """ 1644 | return self._version_string 1645 | 1646 | @property 1647 | def vendor(self): 1648 | """ 1649 | OpenCL vendor name of the device. 1650 | """ 1651 | return self._get_device_info_str(cl.CL_DEVICE_VENDOR) 1652 | 1653 | @property 1654 | def vendor_id(self): 1655 | """ 1656 | OpenCL vendor id of the device (integer). 1657 | """ 1658 | return self._get_device_info_int(cl.CL_DEVICE_VENDOR_ID) 1659 | 1660 | @property 1661 | def memsize(self): 1662 | """ 1663 | Global memory size of the device. 1664 | """ 1665 | return self.global_memsize 1666 | 1667 | @property 1668 | def memalign(self): 1669 | """ 1670 | Alignment in bytes, required by clMapBuffer. 1671 | """ 1672 | return self.mem_base_addr_align 1673 | 1674 | @property 1675 | def available(self): 1676 | return self._get_device_info_bool(cl.CL_DEVICE_AVAILABLE) 1677 | 1678 | @property 1679 | def compiler_available(self): 1680 | return self._get_device_info_bool(cl.CL_DEVICE_COMPILER_AVAILABLE) 1681 | 1682 | @property 1683 | def little_endian(self): 1684 | return self._get_device_info_bool(cl.CL_DEVICE_ENDIAN_LITTLE) 1685 | 1686 | @property 1687 | def supports_error_correction(self): 1688 | return self._get_device_info_bool( 1689 | cl.CL_DEVICE_ERROR_CORRECTION_SUPPORT) 1690 | 1691 | @property 1692 | def host_unified_memory(self): 1693 | return self._get_device_info_bool(cl.CL_DEVICE_HOST_UNIFIED_MEMORY) 1694 | 1695 | @property 1696 | def supports_images(self): 1697 | return self._get_device_info_bool(cl.CL_DEVICE_IMAGE_SUPPORT) 1698 | 1699 | @property 1700 | def linker_available(self): 1701 | return self._get_device_info_bool(cl.CL_DEVICE_LINKER_AVAILABLE) 1702 | 1703 | @property 1704 | def prefers_user_sync(self): 1705 | return self._get_device_info_bool( 1706 | cl.CL_DEVICE_PREFERRED_INTEROP_USER_SYNC) 1707 | 1708 | @property 1709 | def address_bits(self): 1710 | return self._get_device_info_int(cl.CL_DEVICE_ADDRESS_BITS) 1711 | 1712 | @property 1713 | def double_fp_config(self): 1714 | return self._get_device_info_int(cl.CL_DEVICE_DOUBLE_FP_CONFIG) 1715 | 1716 | @property 1717 | def execution_capabilities(self): 1718 | return self._get_device_info_int(cl.CL_DEVICE_EXECUTION_CAPABILITIES) 1719 | 1720 | @property 1721 | def global_mem_cache_size(self): 1722 | return self._get_device_info_int(cl.CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) 1723 | 1724 | @property 1725 | def global_mem_cache_line_size(self): 1726 | return self._get_device_info_int( 1727 | cl.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) 1728 | 1729 | @property 1730 | def half_fp_config(self): 1731 | return self._get_device_info_int(cl.CL_DEVICE_HALF_FP_CONFIG) 1732 | 1733 | @property 1734 | def image2d_max_height(self): 1735 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE2D_MAX_HEIGHT) 1736 | 1737 | @property 1738 | def image2d_max_width(self): 1739 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE2D_MAX_WIDTH) 1740 | 1741 | @property 1742 | def image3d_max_depth(self): 1743 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE3D_MAX_DEPTH) 1744 | 1745 | @property 1746 | def image3d_max_height(self): 1747 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE3D_MAX_HEIGHT) 1748 | 1749 | @property 1750 | def image3d_max_width(self): 1751 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE3D_MAX_WIDTH) 1752 | 1753 | @property 1754 | def image_max_buffer_size(self): 1755 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) 1756 | 1757 | @property 1758 | def image_max_array_size(self): 1759 | return self._get_device_info_int(cl.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) 1760 | 1761 | @property 1762 | def local_memsize(self): 1763 | return self._get_device_info_int(cl.CL_DEVICE_LOCAL_MEM_SIZE) 1764 | 1765 | @property 1766 | def global_memsize(self): 1767 | return self._get_device_info_int(cl.CL_DEVICE_GLOBAL_MEM_SIZE) 1768 | 1769 | @property 1770 | def max_clock_frequency(self): 1771 | return self._get_device_info_int(cl.CL_DEVICE_MAX_CLOCK_FREQUENCY) 1772 | 1773 | @property 1774 | def max_compute_units(self): 1775 | return self._get_device_info_int(cl.CL_DEVICE_MAX_COMPUTE_UNITS) 1776 | 1777 | @property 1778 | def max_constant_args(self): 1779 | return self._get_device_info_int(cl.CL_DEVICE_MAX_CONSTANT_ARGS) 1780 | 1781 | @property 1782 | def max_constant_buffer_size(self): 1783 | return self._get_device_info_int( 1784 | cl.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) 1785 | 1786 | @property 1787 | def max_mem_alloc_size(self): 1788 | return self._get_device_info_int(cl.CL_DEVICE_MAX_MEM_ALLOC_SIZE) 1789 | 1790 | @property 1791 | def max_parameter_size(self): 1792 | return self._get_device_info_int(cl.CL_DEVICE_MAX_PARAMETER_SIZE) 1793 | 1794 | @property 1795 | def max_read_image_args(self): 1796 | return self._get_device_info_int(cl.CL_DEVICE_MAX_READ_IMAGE_ARGS) 1797 | 1798 | @property 1799 | def max_work_group_size(self): 1800 | return self._get_device_info_int(cl.CL_DEVICE_MAX_WORK_GROUP_SIZE) 1801 | 1802 | @property 1803 | def max_work_item_dimensions(self): 1804 | return self._get_device_info_int( 1805 | cl.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) 1806 | 1807 | @property 1808 | def max_write_image_args(self): 1809 | return self._get_device_info_int(cl.CL_DEVICE_MAX_WRITE_IMAGE_ARGS) 1810 | 1811 | @property 1812 | def mem_base_addr_align(self): 1813 | return self._get_device_info_int(cl.CL_DEVICE_MEM_BASE_ADDR_ALIGN) 1814 | 1815 | @property 1816 | def min_data_type_align_size(self): 1817 | return self._get_device_info_int( 1818 | cl.CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE) 1819 | 1820 | @property 1821 | def preferred_vector_width_char(self): 1822 | return self._get_device_info_int( 1823 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) 1824 | 1825 | @property 1826 | def preferred_vector_width_short(self): 1827 | return self._get_device_info_int( 1828 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) 1829 | 1830 | @property 1831 | def preferred_vector_width_int(self): 1832 | return self._get_device_info_int( 1833 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) 1834 | 1835 | @property 1836 | def preferred_vector_width_long(self): 1837 | return self._get_device_info_int( 1838 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) 1839 | 1840 | @property 1841 | def preferred_vector_width_float(self): 1842 | return self._get_device_info_int( 1843 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) 1844 | 1845 | @property 1846 | def preferred_vector_width_double(self): 1847 | return self._get_device_info_int( 1848 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) 1849 | 1850 | @property 1851 | def preferred_vector_width_half(self): 1852 | return self._get_device_info_int( 1853 | cl.CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) 1854 | 1855 | @property 1856 | def printf_buffer_size(self): 1857 | return self._get_device_info_int(cl.CL_DEVICE_PRINTF_BUFFER_SIZE) 1858 | 1859 | @property 1860 | def profiling_timer_resolution(self): 1861 | return self._get_device_info_int( 1862 | cl.CL_DEVICE_PROFILING_TIMER_RESOLUTION) 1863 | 1864 | @property 1865 | def reference_count(self): 1866 | return self._get_device_info_int(cl.CL_DEVICE_REFERENCE_COUNT) 1867 | 1868 | @property 1869 | def single_fp_config(self): 1870 | return self._get_device_info_int(cl.CL_DEVICE_SINGLE_FP_CONFIG) 1871 | 1872 | @property 1873 | def built_in_kernels(self): 1874 | return [kernel.strip() for kernel in self._get_device_info_str( 1875 | cl.CL_DEVICE_BUILT_IN_KERNELS).split(';') 1876 | if kernel.strip()] 1877 | 1878 | @property 1879 | def extensions(self): 1880 | return [ext.strip() for ext in self._get_device_info_str( 1881 | cl.CL_DEVICE_EXTENSIONS).split(' ') 1882 | if ext.strip()] 1883 | 1884 | @property 1885 | def profile(self): 1886 | return self._get_device_info_str(cl.CL_DEVICE_PROFILE) 1887 | 1888 | @property 1889 | def driver_version(self): 1890 | return self._get_device_info_str(cl.CL_DRIVER_VERSION) 1891 | 1892 | @property 1893 | def max_work_item_sizes(self): 1894 | value = cl.ffi.new("size_t[]", self.max_work_item_dimensions) 1895 | err = self._lib.clGetDeviceInfo( 1896 | self._handle, cl.CL_DEVICE_MAX_WORK_ITEM_SIZES, 1897 | cl.ffi.sizeof(value), value, cl.ffi.NULL) 1898 | if err: 1899 | return None 1900 | return list(value) 1901 | 1902 | @property 1903 | def pipe_max_packet_size(self): 1904 | return self._get_device_info_int(cl.CL_DEVICE_PIPE_MAX_PACKET_SIZE) 1905 | 1906 | @property 1907 | def pipe_max_active_reservations(self): 1908 | return self._get_device_info_int( 1909 | cl.CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS) 1910 | 1911 | @property 1912 | def svm_capabilities(self): 1913 | return self._get_device_info_int(cl.CL_DEVICE_SVM_CAPABILITIES) 1914 | 1915 | @property 1916 | def preferred_platform_atomic_alignment(self): 1917 | return self._get_device_info_int( 1918 | cl.CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT) 1919 | 1920 | @property 1921 | def preferred_global_atomic_alignment(self): 1922 | return self._get_device_info_int( 1923 | cl.CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT) 1924 | 1925 | @property 1926 | def preferred_local_atomic_alignment(self): 1927 | return self._get_device_info_int( 1928 | cl.CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT) 1929 | 1930 | def _get_device_info_bool(self, name): 1931 | value = cl.ffi.new("cl_bool[]", 1) 1932 | err = self._lib.clGetDeviceInfo( 1933 | self._handle, name, cl.ffi.sizeof(value), value, cl.ffi.NULL) 1934 | if err: 1935 | raise CLRuntimeError("clGetDeviceInfo(%d) failed with error %s" % 1936 | (name, CL.get_error_description(err)), err) 1937 | return bool(value[0]) 1938 | 1939 | def _get_device_info_int(self, name): 1940 | value = cl.ffi.new("uint64_t[]", 1) 1941 | err = self._lib.clGetDeviceInfo( 1942 | self._handle, name, cl.ffi.sizeof(value), value, cl.ffi.NULL) 1943 | if err: 1944 | raise CLRuntimeError("clGetDeviceInfo(%d) failed with error %s" % 1945 | (name, CL.get_error_description(err)), err) 1946 | return int(value[0]) 1947 | 1948 | def _get_device_info_str(self, name): 1949 | value = cl.ffi.new("char[]", 1024) 1950 | err = self._lib.clGetDeviceInfo( 1951 | self._handle, name, cl.ffi.sizeof(value), value, cl.ffi.NULL) 1952 | if err: 1953 | raise CLRuntimeError("clGetDeviceInfo(%d) failed with error %s" % 1954 | (name, CL.get_error_description(err)), err) 1955 | return cl.ffi.string(value).decode("utf-8") 1956 | 1957 | def __repr__(self): 1958 | return '' % self.name 1959 | 1960 | 1961 | class Platform(CL): 1962 | """OpenCL platform. 1963 | 1964 | Attributes: 1965 | devices: list of Device objects available on this platform. 1966 | name: OpenCL name of the platform. 1967 | path: opencl4py platform identifier. 1968 | """ 1969 | def __init__(self, handle, path): 1970 | super(Platform, self).__init__() 1971 | self._handle = handle 1972 | self._path = path 1973 | 1974 | sz = cl.ffi.new("size_t[]", 1) 1975 | nme = cl.ffi.new("char[]", 256) 1976 | n = self._lib.clGetPlatformInfo(handle, cl.CL_PLATFORM_NAME, 1977 | 256, nme, sz) 1978 | self._name = ((b"".join(nme[0:sz[0] - 1])).decode("utf-8") 1979 | if not n else None) 1980 | 1981 | nn = cl.ffi.new("cl_uint[]", 1) 1982 | n = self._lib.clGetDeviceIDs(handle, cl.CL_DEVICE_TYPE_ALL, 1983 | 0, cl.ffi.NULL, nn) 1984 | if n: 1985 | raise CLRuntimeError("clGetDeviceIDs() failed with error %s" % 1986 | CL.get_error_description(n), n) 1987 | ids = cl.ffi.new("cl_device_id[]", nn[0]) 1988 | n = self._lib.clGetDeviceIDs(handle, cl.CL_DEVICE_TYPE_ALL, 1989 | nn[0], ids, nn) 1990 | if n: 1991 | raise CLRuntimeError("clGetDeviceIDs() failed with error %s" % 1992 | CL.get_error_description(n), n) 1993 | self._devices = list(Device(dev_id, self, 1994 | "%s:%d" % (self.path, dev_num)) 1995 | for dev_id, dev_num in zip(ids, range(len(ids)))) 1996 | 1997 | @property 1998 | def devices(self): 1999 | """ 2000 | List of Device objects available on this platform. 2001 | """ 2002 | return self._devices 2003 | 2004 | @property 2005 | def name(self): 2006 | """ 2007 | OpenCL name of the platform. 2008 | """ 2009 | return self._name 2010 | 2011 | @property 2012 | def path(self): 2013 | """ 2014 | opencl4py platform identifier. 2015 | """ 2016 | return self._path 2017 | 2018 | def __iter__(self): 2019 | return iter(self.devices) 2020 | 2021 | def create_context(self, devices): 2022 | """Creates OpenCL context on this platform and selected devices. 2023 | 2024 | Parameters: 2025 | devices: list of Device objects. 2026 | 2027 | Returns: 2028 | Context object. 2029 | """ 2030 | return Context(self, devices) 2031 | 2032 | def __repr__(self): 2033 | return '' % self.name 2034 | 2035 | 2036 | class Platforms(CL): 2037 | """List of OpenCL plaforms. 2038 | 2039 | Attributes: 2040 | platforms: list of Platform objects. 2041 | """ 2042 | def __init__(self): 2043 | cl.initialize() 2044 | super(Platforms, self).__init__() 2045 | nn = cl.ffi.new("cl_uint[]", 1) 2046 | n = self._lib.clGetPlatformIDs(0, cl.ffi.NULL, nn) 2047 | if n: 2048 | raise CLRuntimeError("clGetPlatformIDs() failed with error %s" % 2049 | CL.get_error_description(n), n) 2050 | ids = cl.ffi.new("cl_platform_id[]", nn[0]) 2051 | n = self._lib.clGetPlatformIDs(nn[0], ids, nn) 2052 | if n: 2053 | raise CLRuntimeError("clGetPlatformIDs() failed with error %s" % 2054 | CL.get_error_description(n), n) 2055 | self._platforms = list(Platform(p_id, str(p_num)) 2056 | for p_id, p_num in zip(ids, range(len(ids)))) 2057 | 2058 | @property 2059 | def platforms(self): 2060 | return self._platforms 2061 | 2062 | def __iter__(self): 2063 | return iter(self.platforms) 2064 | 2065 | def dump_devices(self): 2066 | """Returns string with information about OpenCL platforms and devices. 2067 | """ 2068 | if not len(self.platforms): 2069 | return "No OpenCL devices available." 2070 | lines = [] 2071 | for i, platform in enumerate(self.platforms): 2072 | lines.append("Platform %d: %s" % (i, platform.name.strip())) 2073 | for j, device in enumerate(platform.devices): 2074 | lines.append("\tDevice %d: %s (%d Mb, %d align, %s)" % ( 2075 | j, device.name.strip(), device.memsize // (1024 * 1024), 2076 | device.memalign, device.version_string.strip())) 2077 | return "\n".join(lines) 2078 | 2079 | def create_some_context(self): 2080 | """Returns Context object with some OpenCL platform, devices attached. 2081 | 2082 | If environment variable PYOPENCL_CTX is set and not empty, 2083 | gets context based on it, format is: 2084 | : 2085 | (Examples: 0:0 - first platform, first device, 2086 | 1:0,2 - second platform, first and third devices). 2087 | 2088 | If PYOPENCL_CTX is not set and os.isatty(0) == True, then 2089 | displays available devices and reads line from stdin in the same 2090 | format as PYOPENCL_CTX. 2091 | 2092 | Else chooses first platform and device. 2093 | """ 2094 | if len(self.platforms) == 1 and len(self.platforms[0].devices) == 1: 2095 | return self.platforms[0].create_context(self.platforms[0].devices) 2096 | import os 2097 | ctx = os.environ.get("PYOPENCL_CTX") 2098 | if ctx is None or not len(ctx): 2099 | if os.isatty(0): 2100 | import sys 2101 | sys.stdout.write( 2102 | "\nEnter " 2103 | ": or " 2104 | "set PYOPENCL_CTX environment variable.\n" 2105 | "Examples: 0:0 - first platform, first device;\n" 2106 | " 1:0,2 - second platform, first and third " 2107 | "devices.\n" 2108 | "\nOpenCL devices available:\n\n%s\n\n" % 2109 | (self.dump_devices())) 2110 | sys.stdout.flush() 2111 | ctx = sys.stdin.readline().strip() 2112 | else: 2113 | ctx = "" 2114 | idx = ctx.find(":") 2115 | if idx >= 0: 2116 | try: 2117 | platform_number = int(ctx[:idx]) if len(ctx[:idx]) else 0 2118 | except ValueError: 2119 | raise ValueError("Incorrect platform number") 2120 | ctx = ctx[idx + 1:] 2121 | else: 2122 | platform_number = 0 2123 | device_strings = ctx.split(",") 2124 | device_numbers = [] 2125 | try: 2126 | for s in device_strings: 2127 | device_numbers.append(int(s) if len(s) else 0) 2128 | except ValueError: 2129 | raise ValueError("Incorrect device number") 2130 | try: 2131 | platform = self.platforms[platform_number] 2132 | except IndexError: 2133 | raise IndexError("Platform index is out of range") 2134 | devices = [] 2135 | try: 2136 | for i in device_numbers: 2137 | devices.append(platform.devices[i]) 2138 | except IndexError: 2139 | raise IndexError("Devicve index is out of range") 2140 | return platform.create_context(devices) 2141 | -------------------------------------------------------------------------------- /src/opencl4py/blas/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Init module for BLAS cffi bindings and helper classes. 38 | """ 39 | 40 | from opencl4py.blas._clblas import (CLBLAS, 41 | 42 | clblasRowMajor, 43 | clblasColumnMajor, 44 | 45 | clblasNoTrans, 46 | clblasTrans, 47 | clblasConjTrans, 48 | 49 | clblasSuccess, 50 | clblasInvalidValue, 51 | clblasInvalidCommandQueue, 52 | clblasInvalidContext, 53 | clblasInvalidMemObject, 54 | clblasInvalidDevice, 55 | clblasInvalidEventWaitList, 56 | clblasOutOfResources, 57 | clblasOutOfHostMemory, 58 | clblasInvalidOperation, 59 | clblasCompilerNotAvailable, 60 | clblasBuildProgramFailure, 61 | clblasNotImplemented, 62 | clblasNotInitialized, 63 | clblasInvalidMatA, 64 | clblasInvalidMatB, 65 | clblasInvalidMatC, 66 | clblasInvalidVecX, 67 | clblasInvalidVecY, 68 | clblasInvalidDim, 69 | clblasInvalidLeadDimA, 70 | clblasInvalidLeadDimB, 71 | clblasInvalidLeadDimC, 72 | clblasInvalidIncX, 73 | clblasInvalidIncY, 74 | clblasInsufficientMemMatA, 75 | clblasInsufficientMemMatB, 76 | clblasInsufficientMemMatC, 77 | clblasInsufficientMemVecX, 78 | clblasInsufficientMemVecY) 79 | -------------------------------------------------------------------------------- /src/opencl4py/blas/_clblas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | clBLAS cffi bindings and helper classes. 38 | """ 39 | import opencl4py._cffi as clffi 40 | from opencl4py._py import CL, CLRuntimeError, Event 41 | import cffi 42 | 43 | 44 | #: ffi parser 45 | ffi = None 46 | 47 | 48 | #: Loaded shared library 49 | lib = None 50 | 51 | 52 | #: Error codes 53 | clblasSuccess = clffi.CL_SUCCESS 54 | clblasInvalidValue = clffi.CL_INVALID_VALUE 55 | clblasInvalidCommandQueue = clffi.CL_INVALID_COMMAND_QUEUE 56 | clblasInvalidContext = clffi.CL_INVALID_CONTEXT 57 | clblasInvalidMemObject = clffi.CL_INVALID_MEM_OBJECT 58 | clblasInvalidDevice = clffi.CL_INVALID_DEVICE 59 | clblasInvalidEventWaitList = clffi.CL_INVALID_EVENT_WAIT_LIST 60 | clblasOutOfResources = clffi.CL_OUT_OF_RESOURCES 61 | clblasOutOfHostMemory = clffi.CL_OUT_OF_HOST_MEMORY 62 | clblasInvalidOperation = clffi.CL_INVALID_OPERATION 63 | clblasCompilerNotAvailable = clffi.CL_COMPILER_NOT_AVAILABLE 64 | clblasBuildProgramFailure = clffi.CL_BUILD_PROGRAM_FAILURE 65 | clblasNotImplemented = -1024 66 | clblasNotInitialized = -1023 67 | clblasInvalidMatA = -1022 68 | clblasInvalidMatB = -1021 69 | clblasInvalidMatC = -1020 70 | clblasInvalidVecX = -1019 71 | clblasInvalidVecY = -1018 72 | clblasInvalidDim = -1017 73 | clblasInvalidLeadDimA = -1016 74 | clblasInvalidLeadDimB = -1015 75 | clblasInvalidLeadDimC = -1014 76 | clblasInvalidIncX = -1013 77 | clblasInvalidIncY = -1012 78 | clblasInsufficientMemMatA = -1011 79 | clblasInsufficientMemMatB = -1010 80 | clblasInsufficientMemMatC = -1009 81 | clblasInsufficientMemVecX = -1008 82 | clblasInsufficientMemVecY = -1007 83 | 84 | 85 | #: Error descriptions 86 | ERRORS = { 87 | clblasNotImplemented: "Functionality is not implemented", 88 | clblasNotInitialized: "clblas library is not initialized yet", 89 | clblasInvalidMatA: "Matrix A is not a valid memory object", 90 | clblasInvalidMatB: "Matrix B is not a valid memory object", 91 | clblasInvalidMatC: "Matrix C is not a valid memory object", 92 | clblasInvalidVecX: "Vector X is not a valid memory object", 93 | clblasInvalidVecY: "Vector Y is not a valid memory object", 94 | clblasInvalidDim: "An input dimension (M,N,K) is invalid", 95 | clblasInvalidLeadDimA: 96 | "Leading dimension A must not be less " 97 | "than the size of the first dimension", 98 | clblasInvalidLeadDimB: 99 | "Leading dimension B must not be less " 100 | "than the size of the second dimension", 101 | clblasInvalidLeadDimC: 102 | "Leading dimension C must not be less " 103 | "than the size of the third dimension", 104 | clblasInvalidIncX: "The increment for a vector X must not be 0", 105 | clblasInvalidIncY: "The increment for a vector Y must not be 0", 106 | clblasInsufficientMemMatA: "The memory object for Matrix A is too small", 107 | clblasInsufficientMemMatB: "The memory object for Matrix B is too small", 108 | clblasInsufficientMemMatC: "The memory object for Matrix C is too small", 109 | clblasInsufficientMemVecX: "The memory object for Vector X is too small", 110 | clblasInsufficientMemVecY: "The memory object for Vector Y is too small" 111 | } 112 | 113 | 114 | #: clblasOrder 115 | clblasRowMajor = 0 116 | clblasColumnMajor = 1 117 | 118 | 119 | #: clblasTranspose 120 | clblasNoTrans = 0 121 | clblasTrans = 1 122 | clblasConjTrans = 2 123 | 124 | 125 | def _initialize(backends): 126 | global lib 127 | if lib is not None: 128 | return 129 | # C function definitions 130 | src = """ 131 | typedef int32_t cl_int; 132 | typedef uint32_t cl_uint; 133 | typedef float cl_float; 134 | typedef double cl_double; 135 | 136 | typedef void* cl_mem; 137 | typedef void* cl_command_queue; 138 | typedef void* cl_event; 139 | 140 | typedef int clblasStatus; 141 | typedef int clblasOrder; 142 | typedef int clblasTranspose; 143 | 144 | clblasStatus clblasSetup(); 145 | void clblasTeardown(); 146 | 147 | clblasStatus clblasSgemm(clblasOrder order, 148 | clblasTranspose transA, 149 | clblasTranspose transB, 150 | size_t M, 151 | size_t N, 152 | size_t K, 153 | cl_float alpha, 154 | const cl_mem A, 155 | size_t offA, 156 | size_t lda, 157 | const cl_mem B, 158 | size_t offB, 159 | size_t ldb, 160 | cl_float beta, 161 | cl_mem C, 162 | size_t offC, 163 | size_t ldc, 164 | cl_uint numCommandQueues, 165 | cl_command_queue *commandQueues, 166 | cl_uint numEventsInWaitList, 167 | const cl_event *eventWaitList, 168 | cl_event *events); 169 | clblasStatus clblasDgemm(clblasOrder order, 170 | clblasTranspose transA, 171 | clblasTranspose transB, 172 | size_t M, 173 | size_t N, 174 | size_t K, 175 | cl_double alpha, 176 | const cl_mem A, 177 | size_t offA, 178 | size_t lda, 179 | const cl_mem B, 180 | size_t offB, 181 | size_t ldb, 182 | cl_double beta, 183 | cl_mem C, 184 | size_t offC, 185 | size_t ldc, 186 | cl_uint numCommandQueues, 187 | cl_command_queue *commandQueues, 188 | cl_uint numEventsInWaitList, 189 | const cl_event *eventWaitList, 190 | cl_event *events); 191 | """ 192 | 193 | # Parse 194 | global ffi 195 | ffi = cffi.FFI() 196 | ffi.cdef(src) 197 | 198 | # Load library 199 | for libnme in backends: 200 | try: 201 | lib = ffi.dlopen(libnme) 202 | break 203 | except OSError: 204 | pass 205 | else: 206 | ffi = None 207 | raise OSError("Could not load clBlas library") 208 | 209 | global ERRORS 210 | CL.ERRORS.update(ERRORS) 211 | 212 | 213 | def initialize(backends=("libclBLAS.so", "clBLAS.dll")): 214 | clffi.initialize() 215 | global lib 216 | if lib is not None: 217 | return 218 | with clffi.lock: 219 | _initialize(backends) 220 | 221 | 222 | class CLBLAS(object): 223 | """CLBLAS functions can be invoked from this class. 224 | """ 225 | def __init__(self): 226 | self._lib = None 227 | initialize() 228 | err = lib.clblasSetup() 229 | if err: 230 | raise CLRuntimeError("clblasSetup() failed with error %s" % 231 | CL.get_error_description(err), err) 232 | self._lib = lib # to hold the reference 233 | 234 | def sgemm(self, queues, order, transA, transB, 235 | rowsCountA, columnCountB, commonSideLength, 236 | alpha, A, B, beta, C, 237 | offsetA=0, strideA=0, 238 | offsetB=0, strideB=0, 239 | offsetC=0, strideC=0, 240 | wait_for=None, need_event=False): 241 | """Single precision (float) GEneral Matrix Multiplication. 242 | 243 | C = alpha * dot(A, B) + beta * C 244 | C = alpha * dot(A^T, B) + beta * C 245 | C = alpha * dot(A, B^T) + beta * C 246 | C = alpha * dot(A^T, B^T) + beta * C 247 | 248 | Parameters: 249 | queues: list of the Queue objects on which this operation 250 | will be enqueued. 251 | order: row/column order (clblasRowMajor, clblasColumnMajor). 252 | transA: how matrix A is to be transposed 253 | (clblasNoTrans, clblasTrans, clblasConjTrans). 254 | transB: how matrix B is to be transposed 255 | (clblasNoTrans, clblasTrans, clblasConjTrans). 256 | rowsCountA: number of rows in matrix A. 257 | columnCountB: number of columns in matrix B. 258 | commonSideLength: length of the common side of the matrices. 259 | alpha: the factor of matrix A. 260 | A: Buffer object storing matrix A. 261 | B: Buffer object storing matrix B. 262 | beta: the factor of matrix C. 263 | C: Buffer object storing matrix C. 264 | offsetA: offset of the first element of the matrix A 265 | in the buffer object, counted in elements. 266 | strideA: leading dimension of matrix A: 267 | ((clblasNoTrans, clblasRowMajor) or 268 | (clblasTrans, clblasColumnMajor)): >= commonSideLength, 269 | else: >= rowsCountA. 270 | offsetB: offset of the first element of the matrix B 271 | in the buffer object, counted in elements. 272 | strideB: leading dimension of matrix B: 273 | ((clblasNoTrans, clblasRowMajor) or 274 | (clblasTrans, clblasColumnMajor)): >= columnCountB, 275 | else: >= commonSideLength. 276 | offsetC: offset of the first element of the matrix C 277 | in the buffer object, counted in elements. 278 | strideC: leading dimension of matrix C: 279 | clblasRowMajor: >= columnCountB, 280 | else: >= rowsCountA. 281 | wait_for: list of the Event objects to wait. 282 | need_event: return Event object or not. 283 | 284 | Returns: 285 | Event object or None if need_event == False. 286 | """ 287 | event = ffi.new("cl_event[]", 1) if need_event else clffi.ffi.NULL 288 | wait_list, n_events = CL.get_wait_list(wait_for) 289 | _queues = ffi.new("cl_command_queue[]", len(queues)) 290 | for i, q in enumerate(queues): 291 | _queues[i] = q.handle 292 | if not strideA: 293 | strideA = ( 294 | commonSideLength 295 | if ((transA == clblasNoTrans and order == clblasRowMajor) or 296 | (transA != clblasNoTrans and order == clblasColumnMajor)) 297 | else rowsCountA) 298 | if not strideB: 299 | strideB = ( 300 | columnCountB 301 | if ((transB == clblasNoTrans and order == clblasRowMajor) or 302 | (transB != clblasNoTrans and order == clblasColumnMajor)) 303 | else commonSideLength) 304 | if not strideC: 305 | strideC = columnCountB if order == clblasRowMajor else rowsCountA 306 | err = self._lib.clblasSgemm( 307 | order, transA, transB, rowsCountA, columnCountB, commonSideLength, 308 | alpha, A.handle, offsetA, strideA, B.handle, offsetB, strideB, 309 | beta, C.handle, offsetC, strideC, len(queues), _queues, 310 | n_events, wait_list, event) 311 | if err: 312 | raise CLRuntimeError("clblasSgemm() failed with error %s" % 313 | CL.get_error_description(err), err) 314 | return Event(event[0]) if event != clffi.ffi.NULL else None 315 | 316 | def dgemm(self, queues, order, transA, transB, 317 | rowsCountA, columnCountB, commonSideLength, 318 | alpha, A, B, beta, C, 319 | offsetA=0, strideA=0, 320 | offsetB=0, strideB=0, 321 | offsetC=0, strideC=0, 322 | wait_for=None, need_event=False): 323 | """Double precision (double) GEneral Matrix Multiplication. 324 | 325 | C = alpha * dot(A, B) + beta * C 326 | C = alpha * dot(A^T, B) + beta * C 327 | C = alpha * dot(A, B^T) + beta * C 328 | C = alpha * dot(A^T, B^T) + beta * C 329 | 330 | Parameters: 331 | queues: list of the Queue objects on which this operation 332 | will be enqueued. 333 | order: row/column order (clblasRowMajor, clblasColumnMajor). 334 | transA: how matrix A is to be transposed 335 | (clblasNoTrans, clblasTrans, clblasConjTrans). 336 | transB: how matrix B is to be transposed 337 | (clblasNoTrans, clblasTrans, clblasConjTrans). 338 | rowsCountA: number of rows in matrix A. 339 | columnCountB: number of columns in matrix B. 340 | commonSideLength: length of the common side of the matrices. 341 | alpha: the factor of matrix A. 342 | A: Buffer object storing matrix A. 343 | B: Buffer object storing matrix B. 344 | beta: the factor of matrix C. 345 | C: Buffer object storing matrix C. 346 | offsetA: offset of the first element of the matrix A 347 | in the buffer object, counted in elements. 348 | strideA: leading dimension of matrix A: 349 | ((clblasNoTrans, clblasRowMajor) or 350 | (clblasTrans, clblasColumnMajor)): >= commonSideLength, 351 | else: >= rowsCountA. 352 | offsetB: offset of the first element of the matrix B 353 | in the buffer object, counted in elements. 354 | strideB: leading dimension of matrix B: 355 | ((clblasNoTrans, clblasRowMajor) or 356 | (clblasTrans, clblasColumnMajor)): >= columnCountB, 357 | else: >= commonSideLength. 358 | offsetC: offset of the first element of the matrix C 359 | in the buffer object, counted in elements. 360 | strideC: leading dimension of matrix C: 361 | clblasRowMajor: >= columnCountB, 362 | else: >= rowsCountA. 363 | wait_for: list of the Event objects to wait. 364 | need_event: return Event object or not. 365 | 366 | Returns: 367 | Event object or None if need_event == False. 368 | """ 369 | event = ffi.new("cl_event[]", 1) if need_event else clffi.ffi.NULL 370 | wait_list, n_events = CL.get_wait_list(wait_for) 371 | _queues = ffi.new("cl_command_queue[]", len(queues)) 372 | for i, q in enumerate(queues): 373 | _queues[i] = q.handle 374 | if not strideA: 375 | strideA = ( 376 | commonSideLength 377 | if ((transA == clblasNoTrans and order == clblasRowMajor) or 378 | (transA != clblasNoTrans and order == clblasColumnMajor)) 379 | else rowsCountA) 380 | if not strideB: 381 | strideB = ( 382 | columnCountB 383 | if ((transB == clblasNoTrans and order == clblasRowMajor) or 384 | (transB != clblasNoTrans and order == clblasColumnMajor)) 385 | else commonSideLength) 386 | if not strideC: 387 | strideC = columnCountB if order == clblasRowMajor else rowsCountA 388 | err = self._lib.clblasDgemm( 389 | order, transA, transB, rowsCountA, columnCountB, commonSideLength, 390 | alpha, A.handle, offsetA, strideA, B.handle, offsetB, strideB, 391 | beta, C.handle, offsetC, strideC, len(queues), _queues, 392 | n_events, wait_list, event) 393 | if err: 394 | raise CLRuntimeError("clblasDgemm() failed with error %s" % 395 | CL.get_error_description(err), err) 396 | return Event(event[0]) if event != clffi.ffi.NULL else None 397 | 398 | def __del__(self): 399 | if self._lib is not None: 400 | self._lib.clblasTeardown() 401 | -------------------------------------------------------------------------------- /tests/test.cl: -------------------------------------------------------------------------------- 1 | __kernel __attribute__((vec_type_hint(float4))) 2 | void test(__global float *a, __global float *b, const float c) { 3 | size_t i = get_global_id(0); 4 | a[i] += b[i] * c; 5 | } 6 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Tests some of the api in opencl4py package. 38 | """ 39 | import gc 40 | import logging 41 | import opencl4py as cl 42 | import os 43 | import unittest 44 | 45 | 46 | class Test(unittest.TestCase): 47 | def setUp(self): 48 | self.old_env = os.environ.get("PYOPENCL_CTX") 49 | if self.old_env is None: 50 | os.environ["PYOPENCL_CTX"] = "0:0" 51 | self.src_test = ( 52 | """ 53 | #include "test.cl" 54 | """) 55 | self.include_dirs = ("", os.path.dirname(__file__), ".") 56 | 57 | def tearDown(self): 58 | if self.old_env is None: 59 | del os.environ["PYOPENCL_CTX"] 60 | else: 61 | os.environ["PYOPENCL_CTX"] = self.old_env 62 | del self.old_env 63 | gc.collect() 64 | 65 | def test_constants(self): 66 | self.assertEqual(cl.CL_DEVICE_TYPE_CPU, 2) 67 | self.assertEqual(cl.CL_DEVICE_TYPE_GPU, 4) 68 | self.assertEqual(cl.CL_DEVICE_TYPE_ACCELERATOR, 8) 69 | self.assertEqual(cl.CL_DEVICE_TYPE_CUSTOM, 16) 70 | self.assertEqual(cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 1) 71 | self.assertEqual(cl.CL_QUEUE_PROFILING_ENABLE, 2) 72 | self.assertEqual(cl.CL_QUEUE_ON_DEVICE, 4) 73 | self.assertEqual(cl.CL_QUEUE_ON_DEVICE_DEFAULT, 8) 74 | self.assertEqual(cl.CL_QUEUE_PROPERTIES, 0x1093) 75 | self.assertEqual(cl.CL_QUEUE_SIZE, 0x1094) 76 | self.assertEqual(cl.CL_MAP_READ, 1) 77 | self.assertEqual(cl.CL_MAP_WRITE, 2) 78 | self.assertEqual(cl.CL_MAP_WRITE_INVALIDATE_REGION, 4) 79 | self.assertEqual(cl.CL_MEM_READ_WRITE, 1) 80 | self.assertEqual(cl.CL_MEM_WRITE_ONLY, 2) 81 | self.assertEqual(cl.CL_MEM_READ_ONLY, 4) 82 | self.assertEqual(cl.CL_MEM_USE_HOST_PTR, 8) 83 | self.assertEqual(cl.CL_MEM_ALLOC_HOST_PTR, 16) 84 | self.assertEqual(cl.CL_MEM_COPY_HOST_PTR, 32) 85 | self.assertEqual(cl.CL_MEM_HOST_NO_ACCESS, 512) 86 | self.assertEqual(cl.CL_MEM_SVM_FINE_GRAIN_BUFFER, 1024) 87 | self.assertEqual(cl.CL_MEM_SVM_ATOMICS, 2048) 88 | self.assertEqual(cl.CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, 1) 89 | self.assertEqual(cl.CL_DEVICE_SVM_FINE_GRAIN_BUFFER, 2) 90 | self.assertEqual(cl.CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, 4) 91 | self.assertEqual(cl.CL_DEVICE_SVM_ATOMICS, 8) 92 | self.assertEqual(cl.CL_PROFILING_COMMAND_QUEUED, 0x1280) 93 | self.assertEqual(cl.CL_PROFILING_COMMAND_SUBMIT, 0x1281) 94 | self.assertEqual(cl.CL_PROFILING_COMMAND_START, 0x1282) 95 | self.assertEqual(cl.CL_PROFILING_COMMAND_END, 0x1283) 96 | 97 | def test_error_codes(self): 98 | self.assertEqual(cl.CL_SUCCESS, 0) 99 | self.assertEqual(cl.CL_DEVICE_NOT_FOUND, -1) 100 | self.assertEqual(cl.CL_DEVICE_NOT_AVAILABLE, -2) 101 | self.assertEqual(cl.CL_COMPILER_NOT_AVAILABLE, -3) 102 | self.assertEqual(cl.CL_MEM_OBJECT_ALLOCATION_FAILURE, -4) 103 | self.assertEqual(cl.CL_OUT_OF_RESOURCES, -5) 104 | self.assertEqual(cl.CL_OUT_OF_HOST_MEMORY, -6) 105 | self.assertEqual(cl.CL_PROFILING_INFO_NOT_AVAILABLE, -7) 106 | self.assertEqual(cl.CL_MEM_COPY_OVERLAP, -8) 107 | self.assertEqual(cl.CL_IMAGE_FORMAT_MISMATCH, -9) 108 | self.assertEqual(cl.CL_IMAGE_FORMAT_NOT_SUPPORTED, -10) 109 | self.assertEqual(cl.CL_BUILD_PROGRAM_FAILURE, -11) 110 | self.assertEqual(cl.CL_MAP_FAILURE, -12) 111 | self.assertEqual(cl.CL_MISALIGNED_SUB_BUFFER_OFFSET, -13) 112 | self.assertEqual(cl.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, -14) 113 | self.assertEqual(cl.CL_COMPILE_PROGRAM_FAILURE, -15) 114 | self.assertEqual(cl.CL_LINKER_NOT_AVAILABLE, -16) 115 | self.assertEqual(cl.CL_LINK_PROGRAM_FAILURE, -17) 116 | self.assertEqual(cl.CL_DEVICE_PARTITION_FAILED, -18) 117 | self.assertEqual(cl.CL_KERNEL_ARG_INFO_NOT_AVAILABLE, -19) 118 | 119 | self.assertEqual(cl.CL_INVALID_VALUE, -30) 120 | self.assertEqual(cl.CL_INVALID_DEVICE_TYPE, -31) 121 | self.assertEqual(cl.CL_INVALID_PLATFORM, -32) 122 | self.assertEqual(cl.CL_INVALID_DEVICE, -33) 123 | self.assertEqual(cl.CL_INVALID_CONTEXT, -34) 124 | self.assertEqual(cl.CL_INVALID_QUEUE_PROPERTIES, -35) 125 | self.assertEqual(cl.CL_INVALID_COMMAND_QUEUE, -36) 126 | self.assertEqual(cl.CL_INVALID_HOST_PTR, -37) 127 | self.assertEqual(cl.CL_INVALID_MEM_OBJECT, -38) 128 | self.assertEqual(cl.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, -39) 129 | self.assertEqual(cl.CL_INVALID_IMAGE_SIZE, -40) 130 | self.assertEqual(cl.CL_INVALID_SAMPLER, -41) 131 | self.assertEqual(cl.CL_INVALID_BINARY, -42) 132 | self.assertEqual(cl.CL_INVALID_BUILD_OPTIONS, -43) 133 | self.assertEqual(cl.CL_INVALID_PROGRAM, -44) 134 | self.assertEqual(cl.CL_INVALID_PROGRAM_EXECUTABLE, -45) 135 | self.assertEqual(cl.CL_INVALID_KERNEL_NAME, -46) 136 | self.assertEqual(cl.CL_INVALID_KERNEL_DEFINITION, -47) 137 | self.assertEqual(cl.CL_INVALID_KERNEL, -48) 138 | self.assertEqual(cl.CL_INVALID_ARG_INDEX, -49) 139 | self.assertEqual(cl.CL_INVALID_ARG_VALUE, -50) 140 | self.assertEqual(cl.CL_INVALID_ARG_SIZE, -51) 141 | self.assertEqual(cl.CL_INVALID_KERNEL_ARGS, -52) 142 | self.assertEqual(cl.CL_INVALID_WORK_DIMENSION, -53) 143 | self.assertEqual(cl.CL_INVALID_WORK_GROUP_SIZE, -54) 144 | self.assertEqual(cl.CL_INVALID_WORK_ITEM_SIZE, -55) 145 | self.assertEqual(cl.CL_INVALID_GLOBAL_OFFSET, -56) 146 | self.assertEqual(cl.CL_INVALID_EVENT_WAIT_LIST, -57) 147 | self.assertEqual(cl.CL_INVALID_EVENT, -58) 148 | self.assertEqual(cl.CL_INVALID_OPERATION, -59) 149 | self.assertEqual(cl.CL_INVALID_GL_OBJECT, -60) 150 | self.assertEqual(cl.CL_INVALID_BUFFER_SIZE, -61) 151 | self.assertEqual(cl.CL_INVALID_MIP_LEVEL, -62) 152 | self.assertEqual(cl.CL_INVALID_GLOBAL_WORK_SIZE, -63) 153 | self.assertEqual(cl.CL_INVALID_PROPERTY, -64) 154 | self.assertEqual(cl.CL_INVALID_IMAGE_DESCRIPTOR, -65) 155 | self.assertEqual(cl.CL_INVALID_COMPILER_OPTIONS, -66) 156 | self.assertEqual(cl.CL_INVALID_LINKER_OPTIONS, -67) 157 | self.assertEqual(cl.CL_INVALID_DEVICE_PARTITION_COUNT, -68) 158 | self.assertEqual(cl.CL_INVALID_PIPE_SIZE, -69) 159 | self.assertEqual(cl.CL_INVALID_DEVICE_QUEUE, -70) 160 | 161 | def test_dump_devices(self): 162 | platforms = cl.Platforms() 163 | s = platforms.dump_devices() 164 | del s 165 | 166 | def test_create_context(self): 167 | platforms = cl.Platforms() 168 | ctx = cl.Context(platforms.platforms[0], 169 | platforms.platforms[0].devices[0:1]) 170 | del ctx 171 | 172 | def test_create_some_context(self): 173 | platforms = cl.Platforms() 174 | ctx = platforms.create_some_context() 175 | del ctx 176 | 177 | def test_realign_numpy_array(self): 178 | import numpy 179 | a = numpy.empty(1000, dtype=numpy.float32) 180 | a = cl.realign_array(a, 1056, numpy) 181 | self.assertEqual(a.__array_interface__["data"][0] % 1056, 0) 182 | a = numpy.empty(1024, dtype=numpy.float32) 183 | a = cl.realign_array(a, 4096, numpy) 184 | self.assertEqual(a.__array_interface__["data"][0] % 4096, 0) 185 | 186 | def test_device_info(self): 187 | platforms = cl.Platforms() 188 | ctx = platforms.create_some_context() 189 | dev = ctx.devices[0] 190 | self.assertGreater(dev.max_work_item_dimensions, 0) 191 | self.assertEqual(len(dev.max_work_item_sizes), 192 | dev.max_work_item_dimensions) 193 | for size in dev.max_work_item_sizes: 194 | self.assertGreater(size, 0) 195 | self.assertIsInstance(dev.driver_version.encode("utf-8"), bytes) 196 | self.assertGreater(len(dev.driver_version), 0) 197 | try: 198 | self.assertIsInstance(dev.built_in_kernels, list) 199 | for krn in dev.built_in_kernels: 200 | self.assertIsInstance(krn, str) 201 | self.assertGreater(len(krn), 0) 202 | except cl.CLRuntimeError as e: 203 | if dev.version >= 1.2: 204 | raise 205 | self.assertEqual(e.code, -30) 206 | self.assertIsInstance(dev.extensions, list) 207 | for ext in dev.extensions: 208 | self.assertIsInstance(ext.encode("utf-8"), bytes) 209 | self.assertGreater(len(ext), 0) 210 | self.assertGreater(dev.preferred_vector_width_int, 0) 211 | self.assertGreater(dev.max_work_group_size, 1) 212 | self.assertTrue(dev.available) 213 | try: 214 | self.assertTrue(type(dev.pipe_max_active_reservations) == int) 215 | self.assertTrue(type(dev.pipe_max_packet_size) == int) 216 | self.assertTrue(type(dev.svm_capabilities) == int) 217 | self.assertTrue( 218 | type(dev.preferred_platform_atomic_alignment) == int) 219 | self.assertTrue(type(dev.preferred_global_atomic_alignment) == int) 220 | self.assertTrue(type(dev.preferred_local_atomic_alignment) == int) 221 | except cl.CLRuntimeError as e: 222 | if dev.version >= 2.0: 223 | raise 224 | self.assertEqual(e.code, -30) 225 | 226 | def test_program_info(self): 227 | platforms = cl.Platforms() 228 | ctx = platforms.create_some_context() 229 | prg = ctx.create_program(self.src_test, self.include_dirs) 230 | self.assertGreater(prg.reference_count, 0) 231 | try: 232 | self.assertEqual(prg.num_kernels, 1) 233 | names = prg.kernel_names 234 | self.assertIsInstance(names, list) 235 | self.assertEqual(len(names), 1) 236 | self.assertEqual(names[0], "test") 237 | except cl.CLRuntimeError as e: 238 | if prg.devices[0].version >= 1.2: 239 | raise 240 | self.assertEqual(e.code, -30) 241 | bins = prg.binaries 242 | self.assertEqual(len(bins), 1) 243 | self.assertIsInstance(bins[0], bytes) 244 | self.assertGreater(len(bins[0]), 0) 245 | 246 | def test_kernel_info(self): 247 | platforms = cl.Platforms() 248 | ctx = platforms.create_some_context() 249 | prg = ctx.create_program(self.src_test, self.include_dirs) 250 | krn = prg.get_kernel("test") 251 | self.assertGreater(krn.reference_count, 0) 252 | self.assertEqual(krn.num_args, 3) 253 | try: 254 | self.assertEqual(krn.attributes, "vec_type_hint(float4)") 255 | except cl.CLRuntimeError as e: 256 | self.assertEqual(e.code, -30) 257 | 258 | def test_binary(self): 259 | platforms = cl.Platforms() 260 | ctx = platforms.create_some_context() 261 | prg = ctx.create_program(self.src_test, self.include_dirs) 262 | binary = prg.binaries[0] 263 | prg = ctx.create_program([binary], binary=True) 264 | krn = prg.get_kernel("test") 265 | del krn 266 | 267 | def test_set_kernel_args(self): 268 | import numpy 269 | 270 | platforms = cl.Platforms() 271 | ctx = platforms.create_some_context() 272 | prg = ctx.create_program(self.src_test, self.include_dirs) 273 | krn = prg.get_kernel("test") 274 | queue = ctx.create_queue(ctx.devices[0]) 275 | global_size = [12345] 276 | local_size = None 277 | 278 | krn.set_args(cl.skip(3)) 279 | self.assertRaises(cl.CLRuntimeError, 280 | queue.execute_kernel, krn, global_size, local_size) 281 | krn.set_args(cl.skip, cl.skip, cl.skip) 282 | self.assertRaises(cl.CLRuntimeError, 283 | queue.execute_kernel, krn, global_size, local_size) 284 | krn.set_args(cl.skip(1), cl.skip(1), cl.skip(1)) 285 | self.assertRaises(cl.CLRuntimeError, 286 | queue.execute_kernel, krn, global_size, local_size) 287 | krn.set_args(cl.skip(1000)) 288 | self.assertRaises(cl.CLRuntimeError, 289 | queue.execute_kernel, krn, global_size, local_size) 290 | self.assertRaises(ValueError, cl.skip, 0) 291 | self.assertRaises(ValueError, cl.skip, -1) 292 | 293 | c = numpy.array([1.2345], dtype=numpy.float32) 294 | krn.set_args(cl.skip(2), c) 295 | self.assertRaises(cl.CLRuntimeError, 296 | queue.execute_kernel, krn, global_size, local_size) 297 | krn.set_args(cl.skip, cl.skip, c) 298 | self.assertRaises(cl.CLRuntimeError, 299 | queue.execute_kernel, krn, global_size, local_size) 300 | 301 | def test_api_numpy(self): 302 | import numpy 303 | # Create platform, context, program, kernel and queue 304 | platforms = cl.Platforms() 305 | ctx = platforms.create_some_context() 306 | prg = ctx.create_program(self.src_test, self.include_dirs) 307 | krn = prg.get_kernel("test") 308 | queue = ctx.create_queue(ctx.devices[0]) 309 | 310 | # Create arrays with some values for testing 311 | a = numpy.arange(100000, dtype=numpy.float32) 312 | b = numpy.cos(a) 313 | a = numpy.sin(a) 314 | a_copy = a.copy() 315 | 316 | # Prepare arrays for use with map_buffer 317 | a = cl.realign_array(a, queue.device.memalign, numpy) 318 | b = cl.realign_array(b, queue.device.memalign, numpy) 319 | c = numpy.array([1.2345], dtype=numpy.float32) 320 | d = a + b * c[0] 321 | 322 | # Create buffers 323 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, 324 | a) 325 | b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, 326 | b) 327 | 328 | # Set kernel arguments 329 | krn.set_args(a_, b_, c[0:1]) 330 | 331 | # Execute kernel 332 | global_size = [a.size] 333 | local_size = None 334 | queue.execute_kernel(krn, global_size, local_size, need_event=False) 335 | 336 | # Get results back from the device by map_buffer 337 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, a.nbytes) 338 | del ev 339 | queue.unmap_buffer(a_, ptr).wait() 340 | self.assertLess(numpy.fabs(a - d).max(), 0.0001, 341 | "Incorrect result after map_buffer") 342 | 343 | # Get results back from the device by read_buffer 344 | aa = numpy.zeros(a.shape, dtype=a.dtype) 345 | queue.read_buffer(a_, aa) 346 | self.assertLess(numpy.fabs(aa - d).max(), 0.0001, 347 | "Incorrect result after read_buffer") 348 | 349 | # Refill buffer with stored copy by map_buffer with event 350 | ev, ptr = queue.map_buffer( 351 | a_, cl.CL_MAP_WRITE if queue.device.version < 1.1999 352 | else cl.CL_MAP_WRITE_INVALIDATE_REGION, a.nbytes, 353 | blocking=False, need_event=True) 354 | ev.wait() 355 | a[:] = a_copy[:] 356 | ev = queue.unmap_buffer(a_, ptr) 357 | 358 | # Execute kernel 359 | ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,)) 360 | # Get results back from the device by map_buffer 361 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, a.nbytes, 362 | wait_for=(ev,), need_event=True) 363 | ev.wait() 364 | queue.unmap_buffer(a_, ptr).wait() 365 | self.assertLess(numpy.fabs(a - d).max(), 0.0001, 366 | "Incorrect result after map_buffer") 367 | 368 | # Refill buffer with stored copy by write_buffer 369 | ev = queue.write_buffer(a_, a_copy, blocking=False, need_event=True) 370 | 371 | # Execute kernel 372 | ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,)) 373 | # Get results back from the device by map_buffer 374 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, a.nbytes, 375 | wait_for=(ev,), need_event=True) 376 | ev.wait() 377 | queue.unmap_buffer(a_, ptr).wait() 378 | self.assertLess(numpy.fabs(a - d).max(), 0.0001, 379 | "Incorrect result after map_buffer") 380 | 381 | def test_api_nonumpy(self): 382 | import math 383 | 384 | # Create platform, context, program, kernel and queue 385 | platforms = cl.Platforms() 386 | ctx = platforms.create_some_context() 387 | prg = ctx.create_program(self.src_test, self.include_dirs) 388 | krn = prg.get_kernel("test") 389 | # Create command queue 390 | queue = ctx.create_queue(ctx.devices[0]) 391 | 392 | # Create arrays with some values for testing 393 | N = 100000 394 | ffi = cl.get_ffi() 395 | _a = ffi.new("float[]", N + queue.device.memalign) 396 | sz = int(ffi.cast("size_t", _a)) 397 | if sz % queue.device.memalign != 0: 398 | sz += queue.device.memalign - (sz % queue.device.memalign) 399 | a = ffi.cast("float*", sz) 400 | else: 401 | a = _a 402 | _b = ffi.new("float[]", N + queue.device.memalign) 403 | sz = int(ffi.cast("size_t", _b)) 404 | if sz % queue.device.memalign != 0: 405 | sz += queue.device.memalign - (sz % queue.device.memalign) 406 | b = ffi.cast("float*", sz) 407 | else: 408 | b = _b 409 | c = ffi.new("float[]", 1) 410 | c[0] = 1.2345 411 | d = ffi.new("float[]", N) 412 | sz = ffi.sizeof(d) 413 | for i, t in enumerate(d): 414 | a[i] = math.sin(i) 415 | b[i] = math.cos(i) 416 | d[i] = a[i] + b[i] * c[0] 417 | a_copy = ffi.new("float[]", N) 418 | a_copy[0:N] = a[0:N] 419 | 420 | # Create buffers 421 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, 422 | a, size=sz) 423 | b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, 424 | b, size=sz) 425 | 426 | # Set kernel arguments 427 | krn.set_arg(0, a_) 428 | krn.set_arg(1, b_) 429 | krn.set_arg(2, ffi.cast("const void*", c), ffi.sizeof(c)) 430 | 431 | # Execute kernel 432 | global_size = [N] 433 | local_size = None 434 | queue.execute_kernel(krn, global_size, local_size, need_event=False) 435 | 436 | # Get results back from the device by map_buffer 437 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz) 438 | del ev 439 | queue.unmap_buffer(a_, ptr).wait() 440 | mx = 0 441 | for i, t in enumerate(d): 442 | mx = max(mx, math.fabs(a[i] - t)) 443 | self.assertLess(mx, 0.0001, "Incorrect result after map_buffer") 444 | 445 | # Get results back from the device by read_buffer 446 | aa = ffi.new("float[]", N) 447 | queue.read_buffer(a_, aa, size=sz) 448 | mx = 0 449 | for i, t in enumerate(d): 450 | mx = max(mx, math.fabs(aa[i] - t)) 451 | self.assertLess(mx, 0.0001, "Incorrect result after read_buffer") 452 | 453 | # Refill buffer with stored copy by map_buffer with event 454 | ev, ptr = queue.map_buffer( 455 | a_, cl.CL_MAP_WRITE if queue.device.version < 1.1999 456 | else cl.CL_MAP_WRITE_INVALIDATE_REGION, sz, 457 | blocking=False, need_event=True) 458 | ev.wait() 459 | a[0:N] = a_copy[0:N] 460 | ev = queue.unmap_buffer(a_, ptr) 461 | 462 | # Execute kernel 463 | ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,)) 464 | # Get results back from the device by map_buffer 465 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz, 466 | wait_for=(ev,), need_event=True) 467 | ev.wait() 468 | queue.unmap_buffer(a_, ptr).wait() 469 | mx = 0 470 | for i, t in enumerate(d): 471 | mx = max(mx, math.fabs(a[i] - t)) 472 | self.assertLess(mx, 0.0001, "Incorrect result after map_buffer") 473 | 474 | # Refill buffer with stored copy by write_buffer 475 | ev = queue.write_buffer(a_, a_copy, size=sz, 476 | blocking=False, need_event=True) 477 | 478 | # Execute kernel 479 | ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,)) 480 | # Get results back from the device by map_buffer 481 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz, 482 | wait_for=(ev,), need_event=True) 483 | ev.wait() 484 | queue.unmap_buffer(a_, ptr).wait() 485 | mx = 0 486 | for i, t in enumerate(d): 487 | mx = max(mx, math.fabs(a[i] - t)) 488 | self.assertLess(mx, 0.0001, "Incorrect result after map_buffer") 489 | 490 | del _b 491 | del _a 492 | 493 | def test_event_profiling(self): 494 | import numpy 495 | # Create platform, context, program, kernel and queue 496 | platforms = cl.Platforms() 497 | ctx = platforms.create_some_context() 498 | prg = ctx.create_program(self.src_test, self.include_dirs) 499 | krn = prg.get_kernel("test") 500 | queue = ctx.create_queue(ctx.devices[0], cl.CL_QUEUE_PROFILING_ENABLE) 501 | 502 | # Create arrays with some values for testing 503 | a = numpy.arange(100000, dtype=numpy.float32) 504 | b = numpy.cos(a) 505 | a = numpy.sin(a) 506 | c = numpy.array([1.2345], dtype=numpy.float32) 507 | 508 | # Create buffers 509 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 510 | a) 511 | b_ = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, 512 | b) 513 | 514 | # Set kernel arguments 515 | krn.set_arg(0, a_) 516 | krn.set_arg(1, b_) 517 | krn.set_arg(2, c[0:1]) 518 | 519 | # Execute kernel 520 | ev = queue.execute_kernel(krn, [a.size], None) 521 | ev.wait() 522 | 523 | try: 524 | vles, errs = ev.get_profiling_info() 525 | self.assertEqual(vles, ev.profiling_values) 526 | self.assertEqual(errs, ev.profiling_errors) 527 | except cl.CLRuntimeError: 528 | pass 529 | for name, vle in ev.profiling_values.items(): 530 | err = ev.profiling_errors[name] 531 | self.assertTrue((vle and not err) or (not vle and err)) 532 | self.assertEqual(type(vle), float) 533 | self.assertEqual(type(err), int) 534 | 535 | def test_copy_buffer(self): 536 | import numpy 537 | # Create platform, context and queue 538 | platforms = cl.Platforms() 539 | ctx = platforms.create_some_context() 540 | queue = ctx.create_queue(ctx.devices[0]) 541 | 542 | # Create arrays with some values for testing 543 | a = numpy.arange(10000, dtype=numpy.float32) 544 | b = a * 0.5 545 | c = numpy.empty_like(b) 546 | c[:] = 1.0e30 547 | 548 | # Create buffers 549 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 550 | a) 551 | b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 552 | b) 553 | 554 | # Copy some data from one buffer to another 555 | sz = a.itemsize 556 | queue.copy_buffer(a_, b_, 1000 * sz, 2000 * sz, 3000 * sz).wait() 557 | 558 | queue.read_buffer(b_, c) 559 | diff = numpy.fabs(c[2000:5000] - a[1000:4000]).max() 560 | self.assertEqual(diff, 0) 561 | 562 | def test_copy_buffer_rect(self): 563 | import numpy 564 | # Create platform, context and queue 565 | platforms = cl.Platforms() 566 | ctx = platforms.create_some_context() 567 | queue = ctx.create_queue(ctx.devices[0]) 568 | 569 | # Create arrays with some values for testing 570 | a = numpy.arange(35 * 25 * 15, dtype=numpy.float32).reshape(35, 25, 15) 571 | b = numpy.arange(37 * 27 * 17, dtype=numpy.float32).reshape(37, 27, 17) 572 | b *= 0.5 573 | c = numpy.empty_like(b) 574 | c[:] = 1.0e30 575 | 576 | # Create buffers 577 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 578 | a) 579 | b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 580 | b) 581 | 582 | # Copy 3D rect from one buffer to another 583 | sz = a.itemsize 584 | queue.copy_buffer_rect( 585 | a_, b_, (3 * sz, 4, 5), (6 * sz, 7, 8), (5 * sz, 10, 20), 586 | a.shape[2] * sz, a.shape[1] * a.shape[2] * sz, 587 | b.shape[2] * sz, b.shape[1] * b.shape[2] * sz).wait() 588 | 589 | queue.read_buffer(b_, c) 590 | diff = numpy.fabs(c[8:28, 7:17, 6:11] - a[5:25, 4:14, 3:8]).max() 591 | self.assertEqual(diff, 0) 592 | 593 | def test_fill_buffer(self): 594 | # Create platform, context and queue 595 | platforms = cl.Platforms() 596 | ctx = platforms.create_some_context() 597 | if ctx.devices[0].version < 1.2: 598 | return 599 | queue = ctx.create_queue(ctx.devices[0]) 600 | 601 | import numpy 602 | 603 | # Create array 604 | a = numpy.zeros(4096, dtype=numpy.int32) 605 | 606 | # Create buffer 607 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 608 | a) 609 | 610 | # Fill the buffer 611 | pattern = numpy.array([1, 2, 3, 4], dtype=numpy.int32) 612 | queue.fill_buffer(a_, pattern, pattern.nbytes, a.nbytes).wait() 613 | 614 | queue.read_buffer(a_, a) 615 | diff = 0 616 | for i in range(0, a.size, pattern.size): 617 | diff += numpy.fabs(a[i:i + pattern.size] - pattern).sum() 618 | self.assertEqual(diff, 0) 619 | 620 | def test_set_arg_None(self): 621 | import numpy 622 | # Create platform, context, program, kernel and queue 623 | platforms = cl.Platforms() 624 | ctx = platforms.create_some_context() 625 | src = """ 626 | __kernel void test(__global float *a, __global const float *b, 627 | __global const float *c) { 628 | int idx = get_global_id(0); 629 | a[idx] += b[idx] + (c ? c[idx] : 0); 630 | } 631 | """ 632 | prg = ctx.create_program(src) 633 | krn = prg.get_kernel("test") 634 | queue = ctx.create_queue(ctx.devices[0]) 635 | 636 | # Create arrays with some values for testing 637 | a = numpy.array([1, 2, 3, 4, 5], dtype=numpy.float32) 638 | b = numpy.array([6, 7, 8, 9, 10], dtype=numpy.float32) 639 | c = numpy.array([11, 12, 13, 14, 15], dtype=numpy.float32) 640 | 641 | # Create buffers 642 | a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, 643 | a) 644 | b_ = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, 645 | b) 646 | c_ = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, 647 | c) 648 | 649 | # Set kernel arguments 650 | krn.set_arg(0, a_) 651 | krn.set_arg(1, b_) 652 | krn.set_arg(2, c_) 653 | 654 | # Execute kernel 655 | queue.execute_kernel(krn, [a.size], None).wait() 656 | 657 | # Get results back 658 | d = numpy.zeros_like(a) 659 | queue.read_buffer(a_, d) 660 | t = a + b + c 661 | diff = numpy.fabs(d - t).max() 662 | self.assertEqual(diff, 0) 663 | 664 | # Set arg to None 665 | krn.set_arg(2, None) 666 | 667 | # Execute kernel 668 | queue.execute_kernel(krn, [a.size], None).wait() 669 | 670 | # Get results back 671 | queue.read_buffer(a_, d) 672 | t += b 673 | diff = numpy.fabs(d - t).max() 674 | self.assertEqual(diff, 0) 675 | 676 | def test_create_sub_buffer(self): 677 | import numpy 678 | # Create platform, context, program, kernel and queue 679 | platforms = cl.Platforms() 680 | ctx = platforms.create_some_context() 681 | prg = ctx.create_program(self.src_test, self.include_dirs) 682 | krn = prg.get_kernel("test") 683 | queue = ctx.create_queue(ctx.devices[0]) 684 | 685 | # Create arrays with some values for testing 686 | a = numpy.arange(100000, dtype=numpy.float32) 687 | b = numpy.cos(a) 688 | a = numpy.sin(a) 689 | 690 | # Prepare arrays for use with map_buffer 691 | a = cl.realign_array(a, queue.device.memalign, numpy) 692 | b = cl.realign_array(b, queue.device.memalign, numpy) 693 | c = numpy.array([1.2345], dtype=numpy.float32) 694 | d = a[1024:1024 + 4096] + b[2048:2048 + 4096] * c[0] 695 | 696 | # Create buffers 697 | a_parent_ = ctx.create_buffer( 698 | cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, a) 699 | self.assertEqual(a_parent_._n_refs, 1) 700 | a_ = a_parent_.create_sub_buffer(4096, 16384) 701 | self.assertEqual(a_parent_._n_refs, 2) 702 | self.assertEqual(a_._n_refs, 1) 703 | b_parent_ = ctx.create_buffer( 704 | cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR, b) 705 | self.assertEqual(b_parent_._n_refs, 1) 706 | b_ = b_parent_.create_sub_buffer(8192, 16384) 707 | self.assertEqual(b_parent_._n_refs, 2) 708 | self.assertEqual(b_._n_refs, 1) 709 | 710 | # Set kernel arguments 711 | krn.set_args(a_, b_, c[0:1]) 712 | 713 | # Execute kernel 714 | global_size = [4096] 715 | local_size = None 716 | queue.execute_kernel(krn, global_size, local_size, need_event=False) 717 | 718 | # Get results back from the device by map_buffer 719 | ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, a_.size) 720 | del ev 721 | queue.unmap_buffer(a_, ptr).wait() 722 | self.assertLess(numpy.fabs(a[1024:1024 + 4096] - d).max(), 0.0001, 723 | "Incorrect result after map_buffer") 724 | 725 | # Get results back from the device by read_buffer 726 | aa = numpy.zeros(4096, dtype=numpy.float32) 727 | queue.read_buffer(a_, aa) 728 | self.assertLess(numpy.fabs(aa - d).max(), 0.0001, 729 | "Incorrect result after read_buffer") 730 | 731 | del b_ 732 | self.assertIn(b_parent_._n_refs, (1, 2)) 733 | logging.info("test_create_sub_buffer: " 734 | "b_parent_._n_refs = %d (expected 1 or 2)", 735 | b_parent_._n_refs) 736 | del a_ 737 | self.assertIn(a_parent_._n_refs, (1, 2)) 738 | logging.info("test_create_sub_buffer: " 739 | "a_parent_._n_refs = %d (expected 1 or 2)", 740 | a_parent_._n_refs) 741 | 742 | def test_create_queue_with_properties(self): 743 | ctx = cl.Platforms().create_some_context() 744 | try: 745 | queue = ctx.create_queue( 746 | ctx.devices[0], 747 | cl.CL_QUEUE_ON_DEVICE | 748 | cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 749 | properties={cl.CL_QUEUE_SIZE: 64}) 750 | del queue 751 | except cl.CLRuntimeError: 752 | if ctx.devices[0].version >= 2.0: 753 | raise 754 | return 755 | queue = ctx.create_queue( 756 | ctx.devices[0], 757 | properties={cl.CL_QUEUE_SIZE: 64, 758 | cl.CL_QUEUE_PROPERTIES: 759 | cl.CL_QUEUE_ON_DEVICE | 760 | cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE}) 761 | del queue 762 | 763 | def test_work_group_info(self): 764 | ctx = cl.Platforms().create_some_context() 765 | prg = ctx.create_program(self.src_test, self.include_dirs) 766 | krn = prg.get_kernel("test") 767 | info = krn.get_work_group_info(ctx.devices[0]) 768 | 769 | self.assertRaises(cl.CLRuntimeError, getattr, info, "global_work_size") 770 | 771 | for vle in (info.compile_work_group_size,): 772 | self.assertIsInstance(vle, tuple) 773 | self.assertEqual(len(vle), 3) 774 | for x in vle: 775 | self.assertIsInstance(x, int) 776 | self.assertGreaterEqual(x, 0) 777 | 778 | for vle in (info.work_group_size, info.local_mem_size, 779 | info.preferred_work_group_size_multiple, 780 | info.private_mem_size): 781 | self.assertIsInstance(vle, int) 782 | self.assertGreaterEqual(vle, 0) 783 | 784 | def test_create_pipe(self): 785 | ctx = cl.Platforms().create_some_context() 786 | if ctx.devices[0].version < 2.0: 787 | return 788 | pipe = ctx.create_pipe(0, 8, 16) 789 | del pipe 790 | pipe = ctx.create_pipe(cl.CL_MEM_READ_WRITE, 8, 16) 791 | prg = ctx.create_program(""" 792 | __kernel void test(__write_only pipe int p) { 793 | int x = 0; 794 | write_pipe(p, &x); 795 | } 796 | """, options="-cl-std=CL2.0") 797 | krn = prg.get_kernel("test") 798 | krn.set_arg(0, pipe) 799 | del krn 800 | del prg 801 | del pipe 802 | 803 | def test_svm_alloc(self): 804 | ctx = cl.Platforms().create_some_context() 805 | if ctx.devices[0].version < 2.0: 806 | return 807 | svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096) 808 | svm.release() 809 | self.assertIsNone(svm.handle) 810 | del svm 811 | svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096) 812 | prg = ctx.create_program(""" 813 | __kernel void test(__global void *p) { 814 | __global int *ptr = (__global int*)p; 815 | *ptr += 1; 816 | } 817 | """, options="-cl-std=CL2.0") 818 | krn = prg.get_kernel("test") 819 | krn.set_arg(0, svm) 820 | krn.set_arg_svm(0, svm) 821 | queue = ctx.create_queue(ctx.devices[0]) 822 | queue.svm_map(svm, cl.CL_MAP_WRITE_INVALIDATE_REGION, 4) 823 | p = cl.get_ffi().cast("int*", svm.handle) 824 | p[0] = 2 825 | queue.svm_unmap(svm) 826 | queue.execute_kernel(krn, [1], None) 827 | queue.svm_map(svm, cl.CL_MAP_READ, 4) 828 | self.assertEqual(p[0], 3) 829 | # always ensure that the last unmap had completed before 830 | # the svm destructor 831 | queue.svm_unmap(svm).wait() 832 | try: 833 | import numpy 834 | a = numpy.frombuffer(svm.buffer, dtype=numpy.int32) 835 | queue.execute_kernel(krn, [1], None) 836 | queue.svm_map(svm, cl.CL_MAP_READ, 4) 837 | self.assertEqual(a[0], 4) 838 | queue.svm_unmap(svm).wait() 839 | except ImportError: 840 | pass 841 | del svm # svm destructor here 842 | 843 | def test_svm_memcpy(self): 844 | ctx = cl.Platforms().create_some_context() 845 | if ctx.devices[0].version < 2.0: 846 | return 847 | svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096) 848 | import numpy 849 | a = numpy.frombuffer(svm.buffer, dtype=numpy.int32) 850 | queue = ctx.create_queue(ctx.devices[0]) 851 | queue.svm_map(svm, cl.CL_MAP_WRITE_INVALIDATE_REGION, svm.size) 852 | a[:] = numpy.arange(a.size, dtype=a.dtype) 853 | queue.svm_unmap(svm) 854 | n = a.size // 2 855 | queue.svm_memcpy(a[n:], a, n * a.itemsize) 856 | queue.svm_map(svm, cl.CL_MAP_READ, svm.size) 857 | self.assertEqual(numpy.fabs(a[n:] - a[:n]).max(), 0) 858 | queue.svm_unmap(svm).wait() 859 | del svm 860 | 861 | def test_svm_memfill(self): 862 | ctx = cl.Platforms().create_some_context() 863 | if ctx.devices[0].version < 2.0: 864 | return 865 | svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096) 866 | import numpy 867 | a = numpy.frombuffer(svm.buffer, dtype=numpy.int32) 868 | queue = ctx.create_queue(ctx.devices[0]) 869 | pattern = numpy.array([1, 2, 3, 4], dtype=numpy.int32) 870 | queue.svm_memfill(a, pattern, pattern.nbytes, a.nbytes) 871 | queue.svm_map(svm, cl.CL_MAP_READ, svm.size) 872 | diff = 0 873 | for i in range(0, a.size, pattern.size): 874 | diff += numpy.fabs(a[i:i + pattern.size] - pattern).sum() 875 | self.assertEqual(diff, 0) 876 | queue.svm_unmap(svm).wait() 877 | del svm 878 | 879 | 880 | if __name__ == "__main__": 881 | logging.basicConfig(level=logging.DEBUG) 882 | unittest.main() 883 | -------------------------------------------------------------------------------- /tests/test_clblas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2014, Samsung Electronics Co.,Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | The views and conclusions contained in the software and documentation are those 26 | of the authors and should not be interpreted as representing official policies, 27 | either expressed or implied, of Samsung Electronics Co.,Ltd.. 28 | """ 29 | 30 | """ 31 | opencl4py - OpenCL cffi bindings and helper classes. 32 | URL: https://github.com/Samsung/opencl4py 33 | Original author: Alexey Kazantsev 34 | """ 35 | 36 | """ 37 | Tests some of the api in opencl4py.blas._clBlas module. 38 | """ 39 | 40 | 41 | import unittest 42 | import logging 43 | import numpy 44 | import opencl4py as cl 45 | import opencl4py.blas as blas 46 | import os 47 | 48 | 49 | class Test(unittest.TestCase): 50 | def setUp(self): 51 | self.old_env = os.environ.get("PYOPENCL_CTX") 52 | if self.old_env is None: 53 | os.environ["PYOPENCL_CTX"] = "0:0" 54 | self.blas = blas.CLBLAS() 55 | 56 | def tearDown(self): 57 | if self.old_env is None: 58 | del os.environ["PYOPENCL_CTX"] 59 | else: 60 | os.environ["PYOPENCL_CTX"] = self.old_env 61 | del self.old_env 62 | 63 | def _test_gemm(self, gemm, dtype): 64 | ctx = cl.Platforms().create_some_context() 65 | queue = ctx.create_queue(ctx.devices[0]) 66 | a = numpy.zeros([127, 353], dtype=dtype) 67 | b = numpy.zeros([135, a.shape[1]], dtype=dtype) 68 | c = numpy.zeros([a.shape[0], b.shape[0]], dtype=dtype) 69 | numpy.random.seed(numpy.array([123], dtype=numpy.int32)[0]) 70 | a[:] = numpy.random.rand(a.size).astype(dtype).reshape(a.shape) 71 | b[:] = numpy.random.rand(b.size).astype(dtype).reshape(b.shape) 72 | gold_c = numpy.dot(a, b.transpose()) 73 | a_buf = ctx.create_buffer( 74 | cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, a) 75 | b_buf = ctx.create_buffer( 76 | cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, b) 77 | c_buf = ctx.create_buffer( 78 | cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR, c) 79 | gemm([queue], blas.clblasRowMajor, blas.clblasNoTrans, 80 | blas.clblasTrans, a.shape[0], b.shape[0], a.shape[1], 81 | 1.0, a_buf, b_buf, 0.0, c_buf) 82 | queue.flush() 83 | queue.read_buffer(c_buf, c) 84 | max_diff = numpy.fabs(c - gold_c).max() 85 | self.assertLess(max_diff, 0.00001 if dtype == numpy.float64 86 | else 0.00015) 87 | 88 | def test_sgemm(self): 89 | logging.debug("ENTER: test_sgemm") 90 | self._test_gemm(self.blas.sgemm, numpy.float32) 91 | logging.debug("EXIT: test_sgemm") 92 | 93 | def test_dgemm(self): 94 | logging.debug("ENTER: test_dgemm") 95 | self._test_gemm(self.blas.dgemm, numpy.float64) 96 | logging.debug("EXIT: test_dgemm") 97 | 98 | 99 | if __name__ == "__main__": 100 | logging.basicConfig(level=logging.DEBUG) 101 | unittest.main() 102 | --------------------------------------------------------------------------------