├── .ci
    └── hack-intel-cl-into-conda-env.sh
├── .conda-ci-build-configure.sh
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
    ├── dependabot.yml
    └── workflows
    │   ├── autopush.yml
    │   ├── ci.yml
    │   └── wheels.yml
├── .gitignore
├── .gitlab-ci.yml
├── .gitmodules
├── .pylintrc-local.yml
├── .test-conda-env-py3.yml
├── CITATION.cff
├── CMakeLists.txt
├── LICENSE
├── README.rst
├── TODOs
├── contrib
    ├── cldis.py
    ├── fortran-to-opencl
    │   ├── README
    │   └── translate.py
    └── pyopencl.vim
├── doc
    ├── .gitignore
    ├── Makefile
    ├── algorithm.rst
    ├── array.rst
    ├── conf.py
    ├── howto.rst
    ├── index.rst
    ├── make_constants.py
    ├── misc.rst
    ├── runtime.rst
    ├── runtime_const.rst
    ├── runtime_gl.rst
    ├── runtime_memory.rst
    ├── runtime_platform.rst
    ├── runtime_program.rst
    ├── runtime_queue.rst
    ├── subst.rst
    ├── tools.rst
    ├── types.rst
    └── upload-docs.sh
├── examples
    ├── .gitignore
    ├── black-hole-accretion.py
    ├── demo-struct-reduce.py
    ├── demo.py
    ├── demo_array.py
    ├── demo_array_svm.py
    ├── demo_elementwise.py
    ├── demo_elementwise_complex.py
    ├── demo_mandelbrot.py
    ├── demo_meta_codepy.py
    ├── demo_meta_template.py
    ├── dump-performance.py
    ├── dump-properties.py
    ├── gl_interop_demo.py
    ├── gl_particle_animation.py
    ├── image_filters_using_image2d_t.py
    ├── ipython-demo.ipynb
    ├── median-filter.py
    ├── n-body.py
    ├── narray.py
    ├── noisyImage.jpg
    ├── pi-monte-carlo.py
    ├── svm.py
    └── transpose.py
├── pyopencl
    ├── __init__.py
    ├── _cluda.py
    ├── _mymako.py
    ├── algorithm.py
    ├── array.py
    ├── bitonic_sort.py
    ├── bitonic_sort_templates.py
    ├── cache.py
    ├── capture_call.py
    ├── characterize
    │   ├── __init__.py
    │   └── performance.py
    ├── cl
    │   ├── pyopencl-airy.cl
    │   ├── pyopencl-bessel-j-complex.cl
    │   ├── pyopencl-bessel-j.cl
    │   ├── pyopencl-bessel-y.cl
    │   ├── pyopencl-complex.h
    │   ├── pyopencl-eval-tbl.cl
    │   ├── pyopencl-hankel-complex.cl
    │   └── pyopencl-random123
    │   │   ├── array.h
    │   │   ├── openclfeatures.h
    │   │   ├── philox.cl
    │   │   └── threefry.cl
    ├── clmath.py
    ├── clrandom.py
    ├── cltypes.py
    ├── elementwise.py
    ├── invoker.py
    ├── ipython_ext.py
    ├── reduction.py
    ├── scan.py
    ├── tools.py
    └── version.py
├── pyproject.toml
├── run-mypy.sh
├── run-pylint.sh
├── scripts
    ├── build-ocl-macos.sh
    ├── build-ocl-windows.sh
    └── build-ocl.sh
├── src
    ├── bitlog.cpp
    ├── bitlog.hpp
    ├── clinfo_ext.h
    ├── mempool.hpp
    ├── pyopencl_ext.h
    ├── tools.hpp
    ├── wrap_cl.cpp
    ├── wrap_cl.hpp
    ├── wrap_cl_part_1.cpp
    ├── wrap_cl_part_2.cpp
    ├── wrap_constants.cpp
    ├── wrap_helpers.hpp
    └── wrap_mempool.cpp
└── test
    ├── add-vectors-32.spv
    ├── add-vectors-64.spv
    ├── empty-header.h
    ├── test_algorithm.py
    ├── test_array.py
    ├── test_arrays_in_structs.py
    ├── test_clmath.py
    ├── test_clrandom.py
    ├── test_enqueue_copy.py
    └── test_wrapper.py


/.ci/hack-intel-cl-into-conda-env.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | # https://github.com/conda-forge/intel-compiler-repack-feedstock/issues/7
4 | sed -i 's/- pocl/- intel-opencl-rt!=2022.2/g' "$CONDA_ENVIRONMENT"
5 | 


--------------------------------------------------------------------------------
/.conda-ci-build-configure.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/pyopencl/b8b8d4d852e8a26356861ffda578874dc064e54c/.conda-ci-build-configure.sh


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # https://editorconfig.org/
 2 | # https://github.com/editorconfig/editorconfig-vim 
 3 | # https://github.com/editorconfig/editorconfig-emacs 
 4 | 
 5 | root = true
 6 | 
 7 | [*]
 8 | indent_style = space
 9 | end_of_line = lf
10 | charset = utf-8
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true
13 | 
14 | [*.py]
15 | indent_size = 4
16 | 
17 | [*.rst]
18 | indent_size = 4
19 | 
20 | [*.cpp]
21 | indent_size = 2
22 | 
23 | [*.hpp]
24 | indent_size = 2
25 | 
26 | # There may be one in doc/
27 | [Makefile]
28 | indent_style = tab
29 | 
30 | # https://github.com/microsoft/vscode/issues/1679
31 | [*.md]
32 | trim_trailing_whitespace = false
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Environment (please complete the following information):**
24 |  - OS: [e.g. Linux]
25 |  - ICD Loader and version: [e.g. ocl-icd 2.3.1]
26 |  - ICD and version: [e.g. pocl 1.8]
27 |  - CPU/GPU: [e.g. Nvidia Titan V]
28 |   - Python version: [e.g. 3.10]
29 |   - PyOpenCL version: [e.g. 2021.1]
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: ❓ Question
4 |     url: https://github.com/inducer/pyopencl/discussions/categories/q-a
5 |     about: Ask and answer questions about PyOpenCL on Discussions
6 |   - name: 🔧 Troubleshooting
7 |     url: https://github.com/inducer/pyopencl/discussions/categories/troubleshooting
8 |     about: For troubleshooting help, see the Discussions
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |     # Set update schedule for GitHub Actions
 4 |     - package-ecosystem: "github-actions"
 5 |       directory: "/"
 6 |       schedule:
 7 |           interval: "weekly"
 8 | 
 9 | # vim: sw=4
10 | 


--------------------------------------------------------------------------------
/.github/workflows/autopush.yml:
--------------------------------------------------------------------------------
 1 | name: Gitlab mirror
 2 | on:
 3 |     push:
 4 |         branches:
 5 |         - main
 6 | 
 7 | jobs:
 8 |     autopush:
 9 |         name: Automatic push to gitlab.tiker.net
10 |         if: startsWith(github.repository, 'inducer/')
11 |         runs-on: ubuntu-latest
12 |         steps:
13 |         -   uses: actions/checkout@v4
14 |         -   run: |
15 |                 curl -L -O https://tiker.net/ci-support-v0
16 |                 . ./ci-support-v0
17 |                 mirror_github_to_gitlab
18 | 
19 |             env:
20 |                 GITLAB_AUTOPUSH_KEY: ${{ secrets.GITLAB_AUTOPUSH_KEY }}
21 | 
22 | # vim: sw=4
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |     push:
  4 |         branches:
  5 |         - main
  6 |         tags:
  7 |         - v*
  8 |     pull_request:
  9 |     schedule:
 10 |         - cron:  '17 3 * * 0'
 11 | 
 12 | jobs:
 13 |     ruff:
 14 |         name: Ruff
 15 |         runs-on: ubuntu-latest
 16 |         steps:
 17 |         -   uses: actions/checkout@v4
 18 |             with:
 19 |                 submodules: true
 20 |         -   uses: actions/setup-python@v5
 21 |         -   name: "Main Script"
 22 |             run: |
 23 |                 pip install ruff
 24 |                 ruff check
 25 | 
 26 |     typos:
 27 |         name: Typos
 28 |         runs-on: ubuntu-latest
 29 |         steps:
 30 |         -   uses: actions/checkout@v4
 31 |         -   uses: crate-ci/typos@master
 32 | 
 33 |     pylint:
 34 |         name: Pylint
 35 |         runs-on: ubuntu-latest
 36 |         steps:
 37 |         -   uses: actions/checkout@v4
 38 |         -   name: "Main Script"
 39 |             run: |
 40 |                 CONDA_ENVIRONMENT=.test-conda-env-py3.yml
 41 |                 echo "- matplotlib" >> $CONDA_ENVIRONMENT
 42 |                 echo "- pyopengl" >> $CONDA_ENVIRONMENT
 43 |                 echo "- ipython" >> $CONDA_ENVIRONMENT
 44 | 
 45 |                 curl -L -O https://tiker.net/ci-support-v0
 46 |                 . ci-support-v0
 47 |                 build_py_project_in_conda_env
 48 | 
 49 |                 # Avoid linting local directory, where native module
 50 |                 # cannot be imported.
 51 |                 rm -Rf "$(get_proj_name)"
 52 | 
 53 |                 run_pylint "$(get_proj_name)" test/*.py
 54 | 
 55 |     mypy:
 56 |         name: Mypy
 57 |         runs-on: ubuntu-latest
 58 |         steps:
 59 |         -   uses: actions/checkout@v4
 60 |         -   name: "Main Script"
 61 |             run: |
 62 |                 curl -L -O https://tiker.net/ci-support-v0
 63 |                 . ci-support-v0
 64 |                 export CL_USE_SHIPPED_EXT=on
 65 |                 build_py_project_in_conda_env
 66 | 
 67 |                 python -m pip install mypy importlib-resources
 68 |                 ./run-mypy.sh
 69 | 
 70 |     pytest:
 71 |         name: Pytest Linux POCL
 72 |         runs-on: ubuntu-latest
 73 |         steps:
 74 |         -   uses: actions/checkout@v4
 75 |         -   name: "Main Script"
 76 |             run: |
 77 |                 curl -L -O https://tiker.net/ci-support-v0
 78 |                 . ci-support-v0
 79 |                 export CL_USE_SHIPPED_EXT=on
 80 |                 build_py_project_in_conda_env
 81 |                 test_py_project
 82 | 
 83 |     pytest_intel:
 84 |         name: Pytest Linux Intel CL
 85 |         runs-on: ubuntu-latest
 86 |         steps:
 87 |         -   uses: actions/checkout@v4
 88 |         -   name: "Main Script"
 89 |             run: |
 90 |                 export CONDA_ENVIRONMENT=.test-conda-env-py3.yml
 91 |                 .ci/hack-intel-cl-into-conda-env.sh
 92 | 
 93 |                 curl -L -O https://tiker.net/ci-support-v0
 94 |                 . ci-support-v0
 95 |                 export CL_USE_SHIPPED_EXT=on
 96 |                 build_py_project_in_conda_env
 97 |                 test_py_project
 98 | 
 99 |     pytest_win:
100 |         name: Pytest Windows Intel CL
101 |         runs-on: windows-latest
102 |         steps:
103 |         -   uses: actions/checkout@v4
104 |         -   name: "Main Script"
105 |             shell: bash
106 |             run: |
107 |                 set -x
108 |                 export CONDA_ENVIRONMENT=.test-conda-env-py3.yml
109 | 
110 |                 sed -i 's/- ocl-icd/- khronos-opencl-icd-loader/g' "$CONDA_ENVIRONMENT"
111 |                 sed -i '/- git/d' "$CONDA_ENVIRONMENT"
112 | 
113 |                 .ci/hack-intel-cl-into-conda-env.sh
114 | 
115 |                 curl -L -O https://tiker.net/ci-support-v0
116 |                 . ci-support-v0
117 |                 export CL_USE_SHIPPED_EXT=on
118 |                 build_py_project_in_conda_env
119 |                 test_py_project
120 | 
121 |     pytest_mac:
122 |         name: Pytest Mac POCL
123 |         runs-on: macos-latest
124 |         steps:
125 |         -   uses: actions/checkout@v4
126 |         -   name: "Main Script"
127 |             run: |
128 |                 export CC=gcc
129 |                 CONDA_ENVIRONMENT=.test-conda-env.yml
130 |                 grep -v ocl-icd .test-conda-env-py3.yml > $CONDA_ENVIRONMENT
131 | 
132 |                 curl -L -O https://tiker.net/ci-support-v0
133 |                 . ci-support-v0
134 |                 build_py_project_in_conda_env
135 |                 test_py_project
136 | 
137 |     docs:
138 |         name: Documentation
139 |         runs-on: ubuntu-latest
140 |         steps:
141 |         -   uses: actions/checkout@v4
142 |         -
143 |             uses: actions/setup-python@v5
144 |             with:
145 |                 python-version: '3.x'
146 |         -   name: "Main Script"
147 |             run: |
148 |                 CONDA_ENVIRONMENT=.test-conda-env-py3.yml
149 | 
150 |                 curl -L -O https://tiker.net/ci-support-v0
151 |                 . ci-support-v0
152 |                 export CL_USE_SHIPPED_EXT=on
153 |                 build_py_project_in_conda_env
154 |                 build_docs
155 | 
156 |     examples:
157 |         name: Examples
158 |         runs-on: ubuntu-latest
159 |         steps:
160 |         -   uses: actions/checkout@v4
161 |         -   name: "Main Script"
162 |             run: |
163 |                 EXTRA_INSTALL="pillow cgen mako imageio"
164 | 
165 |                 curl -L -O https://tiker.net/ci-support-v0
166 |                 . ci-support-v0
167 |                 build_py_project_in_conda_env
168 |                 (cd examples; rm -f gl_*)
169 |                 run_examples --no-require-main
170 | 
171 |     downstream_tests:
172 |         strategy:
173 |             matrix:
174 |                 downstream_project: [loopy, boxtree, meshmode]
175 |         name: Tests for downstream project ${{ matrix.downstream_project }}
176 |         runs-on: ubuntu-latest
177 |         steps:
178 |         -   uses: actions/checkout@v4
179 |         -   name: "Main Script"
180 |             env:
181 |                 DOWNSTREAM_PROJECT: ${{ matrix.downstream_project }}
182 |             run: |
183 |                 curl -L -O https://tiker.net/ci-support-v0
184 |                 . ci-support-v0
185 | 
186 |                 prepare_downstream_build "https://github.com/inducer/$DOWNSTREAM_PROJECT.git"
187 |                 sed -i 's/pyopencl/ocl-icd/' .test-conda-env-py3.yml
188 |                 build_py_project_in_conda_env
189 |                 test_py_project
190 | 
191 | # vim: sw=4
192 | 


--------------------------------------------------------------------------------
/.github/workflows/wheels.yml:
--------------------------------------------------------------------------------
 1 | name: Build and upload to PyPI
 2 | 
 3 | # Build on every branch push, tag push, and pull request change:
 4 | on:
 5 |     push:
 6 |         branches:
 7 |         - main
 8 |         tags:
 9 |         - v*
10 |     pull_request:
11 |     schedule:
12 |         - cron:  '17 3 * * 0'
13 | 
14 | jobs:
15 |   build_wheels:
16 |     name: Build wheels on ${{ matrix.os }}
17 |     runs-on: ${{ matrix.os }}
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         os: [ubuntu-latest, windows-latest, macos-13, macos-14]
22 | 
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |         with:
26 |           submodules: 'true'
27 | 
28 |       - uses: actions/setup-python@v5
29 |         with:
30 |           python-version: '3.x'
31 | 
32 |       - name: Install cibuildwheel
33 |         run: python -m pip install cibuildwheel==2.22.0
34 | 
35 |       - name: Build wheels
36 |         shell: bash
37 |         run: |
38 |           set -x
39 |           if [[ ${{ matrix.os }} == windows-* ]]; then
40 |             export CL_INC_DIR="D:/a/pyopencl/pyopencl/OpenCL-Headers/install/include"
41 |             export CL_LIB_DIR="C:/Program Files/OpenCL-ICD-Loader/lib"
42 |           fi
43 |           python -m cibuildwheel --output-dir wheelhouse
44 | 
45 |       - uses: actions/upload-artifact@v4
46 |         with:
47 |           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
48 |           path: ./wheelhouse/*.whl
49 | 
50 |   build_sdist:
51 |     name: Build source distribution
52 |     runs-on: ubuntu-latest
53 |     steps:
54 |       - uses: actions/checkout@v4
55 |         with:
56 |           submodules: 'true'
57 | 
58 |       - name: Build sdist
59 |         run: pipx run build --sdist
60 | 
61 |       - uses: actions/upload-artifact@v4
62 |         with:
63 |           name: cibw-sdist
64 |           path: dist/*.tar.gz
65 | 
66 |   upload_pypi:
67 |     needs: [build_wheels, build_sdist]
68 | 
69 |     environment: pypi
70 |     permissions:
71 |       id-token: write
72 | 
73 |     runs-on: ubuntu-latest
74 |     # upload to PyPI on every tag starting with 'v'
75 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
76 |     # alternatively, to publish when a GitHub Release is created, use the following rule:
77 |     # if: github.event_name == 'release' && github.event.action == 'published'
78 |     steps:
79 |     - uses: actions/download-artifact@v4
80 |       with:
81 |         pattern: cibw-*
82 |         path: dist
83 |         merge-multiple: true
84 | 
85 |     - uses: pypa/gh-action-pypi-publish@release/v1
86 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | _skbuild
 2 | 
 3 | .pydevproject
 4 | .project
 5 | .settings
 6 | *~
 7 | .*.sw[po]
 8 | .sw[po]
 9 | *.dat
10 | *.pyc
11 | build
12 | *.prof
13 | doc/hedge-notes.pdf
14 | *.vtk
15 | *.silo
16 | *.session
17 | dump.py
18 | *.orig
19 | /Makefile
20 | *.png
21 | tags
22 | *.vtu
23 | *.pvtu
24 | *.pvd
25 | doc/user-reference
26 | doc/dev-reference
27 | *.poly
28 | *.node
29 | *.bak
30 | *.pdf
31 | *.tif
32 | *.so
33 | *.pyd
34 | *.mpeg
35 | *-journal
36 | visitlog.py
37 | *.log
38 | .figleaf
39 | dist
40 | *.egg*
41 | MANIFEST
42 | *.patch
43 | *.LOCAL.[0-9]*
44 | *.REMOTE.[0-9]*
45 | *.BASE.[0-9]*
46 | tmp
47 | temp*
48 | setuptools.pth
49 | distribute-*.tar.gz
50 | core
51 | *.sess
52 | _build
53 | __pycache__
54 | *.o
55 | .ipynb_checkpoints
56 | cscope.*
57 | 
58 | # needed by jenkins env
59 | .env
60 | virtualenv-[0-9]*
61 | pytest.xml
62 | setuptools*tar.gz
63 | build-and-test-py-project.sh
64 | 
65 | cffi_build.py
66 | 
67 | .cache
68 | .pytest_cache
69 | .idea
70 | 
71 | wheelhouse
72 | 
73 | memray-*.bin
74 | memray-*.html
75 | 
76 | .pylintrc.yml
77 | .run-pylint.py
78 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
  1 | variables:
  2 |   GIT_SUBMODULE_STRATEGY: recursive
  3 | 
  4 | Python 3 Intel CPU:
  5 |   script: |
  6 |     source /opt/enable-intel-cl.sh
  7 |     export PYOPENCL_TEST="intel(r):pu"
  8 |     export EXTRA_INSTALL="numpy mako"
  9 | 
 10 |     curl -L -O https://tiker.net/ci-support-v0
 11 |     . ci-support-v0
 12 |     build_py_project_in_venv
 13 |     test_py_project
 14 |   tags:
 15 |   - python3
 16 |   - intel-cl-cpu
 17 |   except:
 18 |   - tags
 19 |   artifacts:
 20 |     reports:
 21 |       junit: test/pytest.xml
 22 | 
 23 | Python 3 Nvidia Titan V:
 24 |   script: |
 25 |     export PYOPENCL_TEST=nvi:titan
 26 |     export EXTRA_INSTALL="numpy mako"
 27 | 
 28 |     curl -L -O https://tiker.net/ci-support-v0
 29 |     . ci-support-v0
 30 |     build_py_project_in_venv
 31 |     test_py_project
 32 |   tags:
 33 |   - python3
 34 |   - nvidia-titan-v
 35 |   except:
 36 |   - tags
 37 |   artifacts:
 38 |     reports:
 39 |       junit: test/pytest.xml
 40 | 
 41 | Python 3 POCL:
 42 |   script: |
 43 |     export PYOPENCL_TEST=portable:cpu
 44 |     export EXTRA_INSTALL="numpy mako"
 45 | 
 46 |     curl -L -O https://tiker.net/ci-support-v0
 47 |     . ci-support-v0
 48 |     build_py_project_in_venv
 49 |     test_py_project
 50 |   tags:
 51 |   - python3
 52 |   - pocl
 53 |   except:
 54 |   - tags
 55 |   artifacts:
 56 |     reports:
 57 |       junit: test/pytest.xml
 58 | 
 59 | Python 3 POCL CL 1.1:
 60 |   script: |
 61 |     export PYOPENCL_TEST=portable:cpu
 62 |     export EXTRA_INSTALL="numpy mako"
 63 |     export PYOPENCL_PRETEND_CL_VERSION='1.1'
 64 | 
 65 |     curl -L -O https://tiker.net/ci-support-v0
 66 |     . ci-support-v0
 67 |     build_py_project_in_venv
 68 |     test_py_project
 69 |   tags:
 70 |   - python3
 71 |   - pocl
 72 |   except:
 73 |   - tags
 74 |   artifacts:
 75 |     reports:
 76 |       junit: test/pytest.xml
 77 | 
 78 | Python 3 POCL Titan V:
 79 |   script: |
 80 |     export PYOPENCL_TEST=portable:titan
 81 |     export EXTRA_INSTALL="numpy mako"
 82 | 
 83 |     curl -L -O https://tiker.net/ci-support-v0
 84 |     . ci-support-v0
 85 |     build_py_project_in_venv
 86 |     test_py_project
 87 |   tags:
 88 |   - python3
 89 |   - pocl
 90 |   - nvidia-titan-v
 91 |   except:
 92 |   - tags
 93 |   artifacts:
 94 |     reports:
 95 |       junit: test/pytest.xml
 96 | 
 97 | Python 3 POCL (+GL and special functions):
 98 |   script: |
 99 |     export PYOPENCL_TEST=portable:cpu
100 |     export EXTRA_INSTALL="numpy mako scipy pyfmmlib"
101 |     export PYOPENCL_ENABLE_GL=ON
102 | 
103 |     curl -L -O https://tiker.net/ci-support-v0
104 |     . ci-support-v0
105 |     build_py_project_in_venv
106 |     test_py_project
107 |   tags:
108 |   - python3
109 |   - pocl
110 |   except:
111 |   - tags
112 |   artifacts:
113 |     reports:
114 |       junit: test/pytest.xml
115 | 
116 | Ruff:
117 |   script: |
118 |     pipx install ruff
119 |     ruff check
120 |   tags:
121 |   - docker-runner
122 |   except:
123 |   - tags
124 | 
125 | Pylint:
126 |   script: |
127 |     export EXTRA_INSTALL="numpy mako matplotlib PyOpenGl IPython"
128 | 
129 |     curl -L -O https://tiker.net/ci-support-v0
130 |     . ci-support-v0
131 | 
132 |     build_py_project_in_venv
133 | 
134 |     # Avoid linting local directory, where native module
135 |     # cannot be imported.
136 |     rm -Rf "$(get_proj_name)"
137 | 
138 |     run_pylint "$(get_proj_name)" test/*.py
139 |   tags:
140 |   - python3
141 |   except:
142 |   - tags
143 | 
144 | Mypy:
145 |   script: |
146 |     export EXTRA_INSTALL="numpy mako mypy importlib-resources"
147 | 
148 |     curl -L -O https://tiker.net/ci-support-v0
149 |     . ci-support-v0
150 |     build_py_project_in_venv
151 |     python -m mypy --show-error-codes pyopencl test
152 |   tags:
153 |   - python3
154 |   except:
155 |   - tags
156 | 
157 | Documentation:
158 |   script: |
159 |     export EXTRA_INSTALL="numpy mako"
160 | 
161 |     curl -L -O https://tiker.net/ci-support-v0
162 |     . ci-support-v0
163 |     build_py_project_in_venv
164 |     build_docs
165 |     maybe_upload_docs
166 |   tags:
167 |   - linux
168 | 
169 | Examples:
170 |   script: |
171 |     export EXTRA_INSTALL="pillow cgen mako imageio"
172 | 
173 |     curl -L -O https://tiker.net/ci-support-v0
174 |     . ci-support-v0
175 |     build_py_project_in_venv
176 |     (cd examples; rm -f gl_*)
177 |     run_examples --no-require-main
178 |   except:
179 |   - tags
180 |   tags:
181 |   - python3
182 |   - pocl
183 | 
184 | Downstream:
185 |   parallel:
186 |     matrix:
187 |     - DOWNSTREAM_PROJECT: [loopy, boxtree, meshmode]
188 |   tags:
189 |   - large-node
190 |   - docker-runner
191 |   script: |
192 |     curl -L -O https://tiker.net/ci-support-v0
193 |     . ci-support-v0
194 | 
195 |     prepare_downstream_build "https://github.com/inducer/$DOWNSTREAM_PROJECT.git"
196 |     sed -i 's/pyopencl/ocl-icd/' .test-conda-env-py3.yml
197 |     build_py_project_in_conda_env
198 |     test_py_project
199 | 
200 | # vim: sw=2
201 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pyopencl/compyte"]
2 | 	path = pyopencl/compyte
3 | 	url = https://github.com/inducer/compyte
4 | 


--------------------------------------------------------------------------------
/.pylintrc-local.yml:
--------------------------------------------------------------------------------
1 | - arg: ignore
2 |   val: compyte
3 | - arg: generated-members
4 |   val:
5 |     - cltypes.*
6 |     - gl_platform.*
7 |     - mako.template
8 | 


--------------------------------------------------------------------------------
/.test-conda-env-py3.yml:
--------------------------------------------------------------------------------
 1 | name: test-conda-env
 2 | channels:
 3 | - conda-forge
 4 | - nodefaults
 5 | 
 6 | dependencies:
 7 | - python=3
 8 | - git
 9 | - numpy
10 | - ocl-icd
11 | - pocl
12 | - mako
13 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Kloeckner"
 5 |   given-names: "Andreas"
 6 |   orcid: "https://orcid.org/0000-0003-1228-519X"
 7 | - family-names: "Yu"
 8 |   given-names: "Yichao"
 9 | - family-names: "Wala"
10 |   given-names: "Matt"
11 | - family-names: "Fernando"
12 |   given-names: "Isuru"
13 | - family-names: "Bencun"
14 |   given-names: "Marko"
15 | - family-names: "Kulkarni"
16 |   given-names: "Kaushik"
17 | - family-names: "Diener"
18 |   given-names: "Matthias"
19 | - family-names: "Gao"
20 |   given-names: "Hao"
21 | - family-names: "Fikl"
22 |   given-names: "Alex"
23 | - family-names: "Weiner"
24 |   given-names: "Zach"
25 | - family-names: "Weigert"
26 |   given-names: "Martin"
27 | - family-names: "Palmer"
28 |   given-names: "Rebecca"
29 | - family-names: "Latham"
30 |   given-names: "Shane"
31 | - family-names: "Magno"
32 |   given-names: "Gonçalo"
33 | - family-names: "Fuller"
34 |   given-names: "Henry"
35 | - family-names: "Mackenzie"
36 |   given-names: "Jonathan"
37 | - family-names: "Niarchos"
38 |   given-names: "Sotiris"
39 | - family-names: "Gill"
40 |   given-names: "Shahzaib"
41 | - family-names: "Gohlke"
42 |   given-names: "Christoph"
43 | - family-names: "Bhosale"
44 |   given-names: "Aditya"
45 | - family-names: "Rothberg"
46 |   given-names: "Alex"
47 | - family-names: "Ey"
48 |   given-names: "Emanuel"
49 | - family-names: "Rapp"
50 |   given-names: "Holger"
51 | - family-names: "van der Walt"
52 |   given-names: "Stefan"
53 | # Removed pending resolution of https://github.com/zenodo/zenodo/issues/2343
54 | # - alias: "gw0"
55 | - family-names: "Thalhammer"
56 |   given-names: "Gregor"
57 | - family-names: "Kieffer"
58 |   given-names: "Jerome"
59 | - family-names: "Poliarnyi"
60 |   given-names: "Nikolai"
61 | - family-names: "Bollinger"
62 |   given-names: "Drew"
63 | - family-names: "Nitz"
64 |   given-names: "Alex"
65 | - family-names: "Bokota"
66 |   given-names: "Grzegorz"
67 |   orcid: 'https://orcid.org/0000-0002-5470-1676'
68 | 
69 | title: "PyOpenCL"
70 | version: 2022.1.3
71 | doi: 10.5281/zenodo.6533956
72 | date-released: 2022-03-10
73 | url: "https://github.com/inducer/pyopencl"
74 | license: MIT
75 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.17...3.26)
  2 | 
  3 | project(pyopencl LANGUAGES CXX VERSION ${SKBUILD_PROJECT_VERSION})
  4 | 
  5 | if(NOT SKBUILD)
  6 |   message(WARNING "\
  7 |   This CMake file is meant to be executed using 'scikit-build'. Running
  8 |   it directly will almost certainly not produce the desired result. If
  9 |   you are a user trying to install this package, please use the command
 10 |   below, which will install all necessary build dependencies, compile
 11 |   the package in an isolated environment, and then install it.
 12 |   =====================================================================
 13 |    $ pip install .
 14 |   =====================================================================
 15 |   If you are a software developer, and this is your own package, then
 16 |   it is usually much more efficient to install the build dependencies
 17 |   in your environment once and use the following command that avoids
 18 |   a costly creation of a new virtual environment at every compilation:
 19 |   =====================================================================
 20 |    $ pip install nanobind scikit-build-core[pyproject]
 21 |    $ pip install --no-build-isolation -ve .
 22 |   =====================================================================
 23 |   You may optionally add -Ceditable.rebuild=true to auto-rebuild when
 24 |   the package is imported. Otherwise, you need to re-run the above
 25 |   after editing C++ files.")
 26 | endif()
 27 | 
 28 | # {{{ Options
 29 | 
 30 | option(PYOPENCL_TRACE "Enable OpenCL tracing" $ENV{PYOPENCL_TRACE})
 31 | option(PYOPENCL_ENABLE_GL "Enable OpenGL interoperability" $ENV{PYOPENCL_ENABLE_GL})
 32 | option(PYOPENCL_USE_SHIPPED_EXT "Use shipped CL extension header" $ENV{PYOPENCL_USE_SHIPPED_EXT})
 33 | 
 34 | set(CL_INC_DIR CACHE STRING "OpenCL include directory")
 35 | set(CL_LIB_DIR CACHE STRING "OpenCL library directory")
 36 | set(CL_LIBNAME CACHE STRING "OpenCL library name")
 37 | 
 38 | set(PYOPENCL_PRETEND_CL_VERSION CACHE STRING "Pretend to be a different OpenCL version")
 39 | 
 40 | if(NOT CL_INC_DIR)
 41 |   message(STATUS "CL_INC_DIR not set, trying to guess it from environment variables.")
 42 |   if(DEFINED ENV{CL_INC_DIR})
 43 |     message(STATUS "Using OpenCL include directory from environment '$ENV{CL_INC_DIR}'")
 44 |     set(CL_INC_DIR $ENV{CL_INC_DIR})
 45 |   endif()
 46 | 
 47 |   if(DEFINED ENV{CL_LIB_DIR})
 48 |     message(STATUS "Using OpenCL library directory from environment '$ENV{CL_INC_DIR}'")
 49 |     set(CL_LIB_DIR $ENV{CL_LIB_DIR})
 50 |   endif()
 51 | 
 52 |   if(DEFINED ENV{CL_LIBNAME})
 53 |     message(STATUS "Using OpenCL library name from environment '$ENV{CL_LIBNAME}'")
 54 |     set(CL_LIBNAME $ENV{CL_LIBNAME})
 55 |   endif()
 56 | endif(NOT CL_INC_DIR)
 57 | 
 58 | if(NOT CL_INC_DIR)
 59 |   message(STATUS "CL_INC_DIR not set, trying to guess it from conda environment.")
 60 |   if(DEFINED ENV{CONDA_PREFIX})
 61 |     # Linux/MacOS:
 62 |     if(EXISTS $ENV{CONDA_PREFIX}/lib/libOpenCL${CMAKE_SHARED_LIBRARY_SUFFIX})
 63 |       message(STATUS "Found OpenCL in conda environment '$ENV{CONDA_PREFIX}'")
 64 |       set(CL_INC_DIR $ENV{CONDA_PREFIX}/include)
 65 |       set(CL_LIB_DIR $ENV{CONDA_PREFIX}/lib)
 66 |       set(CL_LIBNAME OpenCL)
 67 |     # Windows:
 68 |     elseif(EXISTS $ENV{CONDA_PREFIX}/Library/lib/OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX})
 69 |       message(STATUS "Found OpenCL in conda environment '$ENV{CONDA_PREFIX}'")
 70 |       set(CL_INC_DIR $ENV{CONDA_PREFIX}/Library/include)
 71 |       set(CL_LIB_DIR $ENV{CONDA_PREFIX}/Library/lib)
 72 |       set(CL_LIBNAME OpenCL)
 73 |     endif()
 74 | 
 75 |   endif(DEFINED ENV{CONDA_PREFIX})
 76 | endif(NOT CL_INC_DIR)
 77 | 
 78 | if(NOT PYOPENCL_PRETEND_CL_VERSION)
 79 |   if(DEFINED ENV{PYOPENCL_PRETEND_CL_VERSION})
 80 |     set(PYOPENCL_PRETEND_CL_VERSION $ENV{PYOPENCL_PRETEND_CL_VERSION})
 81 |   endif()
 82 | endif()
 83 | 
 84 | if(PYOPENCL_PRETEND_CL_VERSION)
 85 |   # Split the version string into a list
 86 |   string(REPLACE "." ";" VERSION_LIST ${PYOPENCL_PRETEND_CL_VERSION})
 87 | 
 88 |   # Get the major and minor version numbers
 89 |   list(GET VERSION_LIST 0 MAJOR)
 90 |   list(GET VERSION_LIST 1 MINOR)
 91 | 
 92 |   # Calculate the numerical value
 93 |   math(EXPR ARG "0x1000*${MAJOR} + 0x10*${MINOR}")
 94 |   message(STATUS "Pretending to use OpenCL version ${PYOPENCL_PRETEND_CL_VERSION} (${ARG})")
 95 |   set(PYOPENCL_PRETEND_CL_VERSION ${ARG})
 96 | endif()
 97 | 
 98 | message(STATUS "CL_INC_DIR ${CL_INC_DIR}")
 99 | message(STATUS "CL_LIB_DIR ${CL_LIB_DIR}")
100 | message(STATUS "CL_LIBNAME ${CL_LIBNAME}")
101 | 
102 | # }}}
103 | 
104 | # {{{ Get version information
105 | 
106 | find_program(GIT git)
107 | 
108 | if(GIT AND EXISTS ${CMAKE_SOURCE_DIR}/.git)
109 |   # Exact tag match => released version
110 |   execute_process(COMMAND git describe --exact-match --dirty=*
111 |              OUTPUT_VARIABLE PYOPENCL_VERSION_GIT
112 |              RESULT_VARIABLE git_result
113 |              OUTPUT_STRIP_TRAILING_WHITESPACE
114 |              ERROR_QUIET
115 |              )
116 |   if(NOT ${git_result} EQUAL 0)
117 |     # No exact tag match => development version
118 |     execute_process(COMMAND git describe --long --always --dirty=*
119 |              OUTPUT_VARIABLE PYOPENCL_VERSION_GIT
120 |              OUTPUT_STRIP_TRAILING_WHITESPACE
121 |              )
122 |     set(PYOPENCL_REL "(dev)")
123 |   else()
124 |     set(PYOPENCL_REL "(release)")
125 |   endif()
126 | else()
127 |   set(PYOPENCL_VERSION_GIT "v${PROJECT_VERSION}")
128 |   set(PYOPENCL_REL "(non-git)")
129 | endif()
130 | 
131 | # }}}
132 | 
133 | find_package(Python COMPONENTS Interpreter Development.Module NumPy REQUIRED)
134 | 
135 | if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
136 |   set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
137 |   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
138 | endif()
139 | 
140 | # {{{ Detect nanobind and import it
141 | 
142 | execute_process(
143 |   COMMAND
144 |   "${PYTHON_EXECUTABLE}" -c "import nanobind; print(nanobind.__version__)"
145 |   OUTPUT_VARIABLE NANOBIND_VERSION
146 |   OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT)
147 | 
148 | execute_process(
149 |   COMMAND
150 |   "${PYTHON_EXECUTABLE}" -c "import nanobind; print(nanobind.cmake_dir())"
151 |   OUTPUT_VARIABLE NANOBIND_DIR
152 |   OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT)
153 |   list(APPEND CMAKE_PREFIX_PATH "${NANOBIND_DIR}")
154 | 
155 | # }}}
156 | 
157 | link_directories(${CL_LIB_DIR})
158 | include_directories(${CL_INC_DIR} ${Python_NumPy_INCLUDE_DIRS})
159 | 
160 | find_package(nanobind CONFIG REQUIRED)
161 | 
162 | set(OpenCL_ROOT ${CL_LIB_DIR})
163 | set(OpenCL_INCLUDE_DIR ${CL_INC_DIR})
164 | set(OpenCL_LIBRARY ${CL_LIBNAME})
165 | find_package(OpenCL REQUIRED)
166 | 
167 | nanobind_add_module(
168 |   _cl
169 |   NB_STATIC # Build static libnanobind (the extension module itself remains a shared library)
170 |   LTO
171 |   NOMINSIZE
172 |   src/wrap_constants.cpp
173 |   src/wrap_cl.cpp
174 |   src/wrap_cl_part_1.cpp
175 |   src/wrap_cl_part_2.cpp
176 |   src/wrap_mempool.cpp
177 |   src/bitlog.cpp
178 | )
179 | 
180 | target_link_libraries(_cl PRIVATE ${OpenCL_LIBRARY})
181 | 
182 | target_compile_definitions(_cl
183 |   PRIVATE
184 |   PYGPU_PACKAGE=pyopencl
185 |   PYGPU_PYOPENCL
186 | )
187 | 
188 | if (PYOPENCL_PRETEND_CL_VERSION)
189 |   target_compile_definitions(
190 |     _cl PRIVATE PYOPENCL_PRETEND_CL_VERSION=${PYOPENCL_PRETEND_CL_VERSION})
191 | endif()
192 | 
193 | if (PYOPENCL_ENABLE_GL)
194 |   target_compile_definitions(_cl PRIVATE HAVE_GL=1)
195 | endif()
196 | 
197 | if (PYOPENCL_TRACE)
198 |   target_compile_definitions(_cl PRIVATE PYOPENCL_TRACE=1)
199 | endif()
200 | 
201 | if (PYOPENCL_USE_SHIPPED_EXT)
202 |   target_compile_definitions(_cl PRIVATE PYOPENCL_USE_SHIPPED_EXT=1)
203 | endif()
204 | 
205 | install(TARGETS _cl LIBRARY DESTINATION pyopencl)
206 | 
207 | 
208 | # {{{ Print configuration
209 | 
210 | message("==============================")
211 | message("PyOpenCL ${PYOPENCL_VERSION_GIT} ${PYOPENCL_REL} configuration: ")
212 | message("  PyOpenCL options: PYOPENCL_TRACE=${PYOPENCL_TRACE} PYOPENCL_ENABLE_GL=${PYOPENCL_ENABLE_GL} PYOPENCL_USE_SHIPPED_EXT=${PYOPENCL_USE_SHIPPED_EXT} PYOPENCL_PRETEND_CL_VERSION=${PYOPENCL_PRETEND_CL_VERSION}")
213 | message("  OpenCL:           ${OpenCL_LIBRARIES} [${OpenCL_VERSION_STRING}]")
214 | message("  Python:           ${Python_EXECUTABLE} [${Python_VERSION}]")
215 | message("  Build type:       ${CMAKE_BUILD_TYPE}")
216 | message("  C++ compiler:     ${CMAKE_CXX_COMPILER} [${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}]")
217 | message("  CMake:            ${CMAKE_COMMAND} [${CMAKE_VERSION}]")
218 | message("  Nanobind:         ${NANOBIND_DIR} [${NANOBIND_VERSION}]")
219 | message("  Build tool:       ${CMAKE_MAKE_PROGRAM}")
220 | message("==============================")
221 | 
222 | # }}}
223 | 
224 | # vim: foldmethod=marker:sw=2
225 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | PyOpenCL is licensed to you under the MIT/X Consortium license:
 2 | 
 3 | Copyright (c) 2009-13 Andreas Klöckner and Contributors.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person
 6 | obtaining a copy of this software and associated documentation
 7 | files (the "Software"), to deal in the Software without
 8 | restriction, including without limitation the rights to use,
 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following
12 | conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | OTHER DEALINGS IN THE SOFTWARE.
25 | 
26 | PyOpenCL includes derivatives of parts of the `Thrust
27 | <https://github.com/NVIDIA/thrust>`_ computing package (in particular the scan
28 | implementation). These parts are licensed as follows:
29 | 
30 |     Copyright 2008-2011 NVIDIA Corporation
31 | 
32 |     Licensed under the Apache License, Version 2.0 (the "License");
33 |     you may not use this file except in compliance with the License.
34 |     You may obtain a copy of the License at
35 | 
36 |         <https://www.apache.org/licenses/LICENSE-2.0>
37 | 
38 |     Unless required by applicable law or agreed to in writing, software
39 |     distributed under the License is distributed on an "AS IS" BASIS,
40 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41 |     See the License for the specific language governing permissions and
42 |     limitations under the License.
43 | 
44 | .. note::
45 | 
46 |     If you use Apache-licensed parts, be aware that these may be incompatible
47 |     with software licensed exclusively under GPL2.  (Most software is licensed
48 |     as GPL2 or later, in which case this is not an issue.)
49 | 
50 | PyOpenCL includes parts of the Random123 suite of random number generators:
51 | 
52 |     Copyright 2010-2012, D. E. Shaw Research.
53 |     All rights reserved.
54 | 
55 |     Redistribution and use in source and binary forms, with or without
56 |     modification, are permitted provided that the following conditions are
57 |     met:
58 | 
59 |     * Redistributions of source code must retain the above copyright
60 |       notice, this list of conditions, and the following disclaimer.
61 | 
62 |     * Redistributions in binary form must reproduce the above copyright
63 |       notice, this list of conditions, and the following disclaimer in the
64 |       documentation and/or other materials provided with the distribution.
65 | 
66 |     * Neither the name of D. E. Shaw Research nor the names of its
67 |       contributors may be used to endorse or promote products derived from
68 |       this software without specific prior written permission.
69 | 
70 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
71 |     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
72 |     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
73 |     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
74 |     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
75 |     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
76 |     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
77 |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
78 |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
79 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
80 |     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
81 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | PyOpenCL: Pythonic Access to OpenCL, with Arrays and Algorithms
 2 | ===============================================================
 3 | 
 4 | .. |badge-gitlab-ci| image:: https://gitlab.tiker.net/inducer/pyopencl/badges/main/pipeline.svg
 5 |     :alt: Gitlab Build Status
 6 |     :target: https://gitlab.tiker.net/inducer/pyopencl/commits/main
 7 | .. |badge-github-ci| image:: https://github.com/inducer/pyopencl/actions/workflows/ci.yml/badge.svg
 8 |     :alt: Github Build Status
 9 |     :target: https://github.com/inducer/pyopencl/actions/workflows/ci.yml
10 | .. |badge-pypi| image:: https://badge.fury.io/py/pyopencl.svg
11 |     :alt: Python Package Index Release Page
12 |     :target: https://pypi.org/project/pyopencl/
13 | .. |badge-zenodo| image:: https://zenodo.org/badge/1575307.svg
14 |     :alt: Zenodo DOI for latest release
15 |     :target: https://zenodo.org/badge/latestdoi/1575307
16 | 
17 | |badge-gitlab-ci| |badge-github-ci| |badge-pypi| |badge-zenodo|
18 | 
19 | PyOpenCL lets you access GPUs and other massively parallel compute
20 | devices from Python. It tries to offer computing goodness in the
21 | spirit of its sister project `PyCUDA <https://mathema.tician.de/software/pycuda>`__:
22 | 
23 | * Object cleanup tied to lifetime of objects. This idiom, often
24 |   called `RAII <https://en.wikipedia.org/wiki/Resource_Acquisition_Is_Initialization>`__
25 |   in C++, makes it much easier to write correct, leak- and
26 |   crash-free code.
27 | 
28 | * Completeness. PyOpenCL puts the full power of OpenCL's API at
29 |   your disposal, if you wish.  Every obscure ``get_info()`` query and
30 |   all CL calls are accessible.
31 | 
32 | * Automatic Error Checking. All CL errors are automatically
33 |   translated into Python exceptions.
34 | 
35 | * Speed. PyOpenCL's base layer is written in C++, so all the niceties
36 |   above are virtually free.
37 | 
38 | * Helpful and complete `Documentation <https://documen.tician.de/pyopencl>`__
39 |   as well as a `Wiki <https://wiki.tiker.net/PyOpenCL>`__.
40 | 
41 | * Liberal license. PyOpenCL is open-source under the
42 |   `MIT license <https://en.wikipedia.org/wiki/MIT_License>`__
43 |   and free for commercial, academic, and private use.
44 | 
45 | * Broad support. PyOpenCL was tested and works with Apple's, AMD's, and Nvidia's
46 |   CL implementations.
47 | 
48 | Simple 4-step `install instructions <https://documen.tician.de/pyopencl/misc.html#installation>`__
49 | using Conda on Linux and macOS (that also install a working OpenCL implementation!)
50 | can be found in the `documentation <https://documen.tician.de/pyopencl/>`__.
51 | 
52 | What you'll need if you do *not* want to use the convenient instructions above and
53 | instead build from source:
54 | 
55 | * g++/clang new enough to be compatible with nanobind (specifically, full support of C++17 is needed)
56 | * `numpy <https://numpy.org>`__, and
57 | * an OpenCL implementation. (See this `howto <https://wiki.tiker.net/OpenCLHowTo>`__
58 |   for how to get one.)
59 | 
60 | Links
61 | -----
62 | 
63 | * `Documentation <https://documen.tician.de/pyopencl>`__
64 |   (read how things work)
65 | * `Python package index <https://pypi.python.org/pypi/pyopencl>`__
66 |   (download releases, including binary wheels for Linux, macOS, Windows)
67 | * `Conda Forge <https://anaconda.org/conda-forge/pyopencl>`__
68 |   (download binary packages for Linux, macOS, Windows)
69 | * `Github <https://github.com/inducer/pyopencl>`__
70 |   (get latest source code, file bugs)
71 | 


--------------------------------------------------------------------------------
/TODOs:
--------------------------------------------------------------------------------
 1 | - *_from_int_ptr, register with metaclass
 2 | - generic_info
 3 | - Incorporate fixes in C++ stuff from after the fork
 4 | - compare and tests
 5 | - MemoryPool
 6 | - enqueue_nd_range_kernel size/offset mess
 7 | 
 8 | - CommandQueue.set_property
 9 | - GLBuffer
10 | - GLRenderBuffer
11 | - GLTexture
12 | - get_gl_context_info_khr
13 | - ?clEnqueueNativeKernel
14 | 
15 | - Buffer interface functions should really use new-style buffer interface
16 |   (old-style does not work in Py3)
17 |   https://github.com/numpy/numpy/issues/4747
18 | 


--------------------------------------------------------------------------------
/contrib/cldis.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = "Copyright (C) 2022 Isuru Fernando"
 2 | 
 3 | __license__ = """
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | """
22 | 
23 | """
24 | cldis.py
25 | 
26 | A script to compile and print the native code for a OpenCL kernel.
27 | 
28 | Usage: python cldis.py prog.cl <ptx/sass/asm> <build options>
29 | """
30 | 
31 | import glob
32 | import os
33 | import re
34 | import subprocess
35 | import sys
36 | import tempfile
37 | 
38 | 
39 | def main(ctx, tmp_dir, cl_str, output=None, build_options=()):
40 |     device = ctx.devices[0]
41 |     platform = device.platform
42 |     if platform.name == "NVIDIA CUDA":
43 |         supported_outputs = ["ptx", "sass"]
44 |     elif platform.name == "Portable Computing Language":
45 |         if device.name.startswith("NVIDIA"):
46 |             supported_outputs = ["ptx", "sass"]
47 |         elif device.name.startswith("pthread") or device.name.startswith("cpu"):
48 |             supported_outputs = ["asm"]
49 |         else:
50 |             raise NotImplementedError(f"Unknown pocl device '{device.name}'")
51 |     else:
52 |         raise NotImplementedError(f"Unknown opencl device '{device}'")
53 |     if output is None:
54 |         output = supported_outputs[0]
55 |     else:
56 |         assert output in supported_outputs
57 | 
58 |     prg = cl.Program(ctx, cl_str).build(options=build_options,
59 |             cache_dir=os.path.join(tmp_dir, "cache"))
60 | 
61 |     for binary in prg.binaries:
62 |         if output in ["ptx", "sass"]:
63 |             res = binary[binary.index(b"// Generated"):].decode("utf-8")
64 |             if output == "sass":
65 |                 with open(os.path.join(tmp_dir, "cl.ptx"), "w") as f:
66 |                     f.write(res)
67 |                 tgt = re.findall(r".target sm_[0-9]*", res, re.MULTILINE)[0]
68 |                 gpu_name = tgt[8:]
69 |                 subprocess.check_call(["ptxas", "cl.ptx", "--verbose",
70 |                     f"--gpu-name={gpu_name}", "--warn-on-spills"], cwd=tmp_dir)
71 |                 res = subprocess.check_output(["cuobjdump", "-sass", "elf.o"],
72 |                         cwd=tmp_dir).decode("utf-8")
73 | 
74 |         elif output == "asm" and platform.name == "Portable Computing Language":
75 |             so = glob.glob(f"{tmp_dir}/**/*.so", recursive=True)[0]
76 |             res = subprocess.check_output(["objdump", "-d", so]).decode("utf-8")
77 | 
78 |         print(res)
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     with tempfile.TemporaryDirectory() as tmp_dir:
83 |         os.environ["POCL_CACHE_DIR"] = os.path.join(tmp_dir, "pocl_cache")
84 |         import pyopencl as cl
85 |         ctx = cl.create_some_context()
86 |         cl_file = sys.argv[1]
87 |         with open(cl_file) as f:
88 |             cl_str = f.read()
89 |         output = sys.argv[2] if len(sys.argv) >= 3 else None
90 |         build_options = sys.argv[3:] if len(sys.argv) >= 4 else []
91 |         main(ctx, tmp_dir, cl_str, output, build_options)
92 | 


--------------------------------------------------------------------------------
/contrib/fortran-to-opencl/README:
--------------------------------------------------------------------------------
 1 | Experimental Fortran-to-OpenCL translator
 2 | -----------------------------------------
 3 | 
 4 | This is a highly experimental Fortran-to-OpenCL translator. Its purpose is to
 5 | translate computational kernels into OpenCL-like C. It doesn't
 6 | auto-parallelize. My purpose in writing this was to convert a few
 7 | special-function evaluators.
 8 | 
 9 | The best it can hope for at the moment is to automate most of the process so
10 | that you'll only have to fix up a few things manually afterwards. It further
11 | only deals with the subset of Fortran 77 that I needed. Quite a number of
12 | things are unimplemented.  Patches are welcome.
13 | 
14 | Andreas Kloeckner <inform@tiker.net>
15 | 
16 | Dependencies:
17 | 
18 | - cnd
19 |   http://github.com/inducer/cnd
20 | 
21 | - cgen
22 |   http://github.com/inducer/cgen
23 | 
24 | - pymbolic
25 |   http://github.com/inducer/pymbolic
26 | 
27 | - fparser
28 |   http://code.google.com/p/f2py
29 |   with fix from http://code.google.com/p/f2py/issues/detail?id=32
30 | 


--------------------------------------------------------------------------------
/contrib/pyopencl.vim:
--------------------------------------------------------------------------------
 1 | " Vim highlighting for PyOpenCL
 2 | " -----------------------------
 3 | "
 4 | " (C) Andreas Kloeckner 2011, MIT license
 5 | "
 6 | " Uses parts of mako.vim by Armin Ronacher.
 7 | "
 8 | " Installation:
 9 | " Just drop this file into ~/.vim/syntax/pyopencl.vim
10 | "
11 | " Then do 
12 | " :set filetype=pyopencl
13 | " and use 
14 | " """//CL// ...code..."""
15 | " for OpenCL code included in your Python file.
16 | "
17 | " You may also include a line
18 | " vim: filetype=pyopencl.python
19 | " at the end of your file to set the file type automatically.
20 | "
21 | " Optional: Install opencl.vim from
22 | " http://www.vim.org/scripts/script.php?script_id=3157
23 | 
24 | runtime! syntax/python.vim
25 | 
26 | unlet b:current_syntax
27 | try
28 |   syntax include @clCode syntax/opencl.vim
29 | catch
30 |   syntax include @clCode syntax/c.vim
31 | endtry
32 | 
33 | unlet b:current_syntax
34 | syn include @pythonTop syntax/python.vim
35 | 
36 | " {{{ mako
37 | 
38 | syn region clmakoLine start="^\s*%" skip="\\$" end="$"
39 | syn region clmakoVariable start=#\${# end=#}# contains=@pythonTop
40 | syn region clmakoBlock start=#<%!# end=#%># keepend contains=@pythonTop
41 | 
42 | syn match clmakoAttributeKey containedin=clmakoTag contained "[a-zA-Z_][a-zA-Z0-9_]*="
43 | syn region clmakoAttributeValue containedin=clmakoTag contained start=/"/ skip=/\\"/ end=/"/
44 | syn region clmakoAttributeValue containedin=clmakoTag contained start=/'/ skip=/\\'/ end=/'/
45 | 
46 | syn region clmakoTag start="</\?%\(def\|call\|page\|include\|namespace\|inherit\|self:[_[:alnum:]]\+\)\>" end="/\?>"
47 | 
48 | " The C highlighter's paren error detection screws up highlighting of 
49 | " Mako variables in C parens--turn it off.
50 | 
51 | syn clear cParen
52 | syn clear cParenError
53 | if !exists("c_no_bracket_error")
54 |   syn clear cBracket
55 | endif
56 | 
57 | syn cluster clmakoCode contains=clmakoLine,clmakoVariable,clmakoBlock,clmakoTag
58 | 
59 | hi link clmakoLine Preproc
60 | hi link clmakoVariable Preproc
61 | hi link clmakoBlock Preproc
62 | hi link clmakoTag Define
63 | hi link clmakoAttributeKey String
64 | hi link clmakoAttributeValue String
65 | 
66 | " }}}
67 | 
68 | syn region pythonCLString
69 |       \ start=+[uU]\=\z('''\|"""\)//CL\(:[a-zA-Z_0-9]\+\)\?//+ end="\z1" keepend
70 |       \ contains=@clCode,@clmakoCode
71 | 
72 | syn region pythonCLRawString
73 |       \ start=+[uU]\=[rR]\z('''\|"""\)//CL\(:[a-zA-Z_0-9]\+\)\?//+ end="\z1" keepend
74 |       \ contains=@clCode,@clmakoCode
75 | 
76 | " Uncomment if you still want the code highlighted as a string.
77 | " hi link pythonCLString String
78 | " hi link pythonCLRawString String
79 | 
80 | syntax sync fromstart
81 | 
82 | let b:current_syntax = "pyopencl"
83 | 
84 | " vim: foldmethod=marker
85 | 


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | constants.inc
2 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?= -W -n
 7 | SPHINXBUILD   ?= python $(shell which sphinx-build)
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | constants:
16 | 	python make_constants.py > constants.inc
17 | 
18 | .PHONY: help Makefile
19 | 
20 | # Catch-all target: route all unknown targets to Sphinx using the new
21 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
22 | %: Makefile constants
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 


--------------------------------------------------------------------------------
/doc/algorithm.rst:
--------------------------------------------------------------------------------
  1 | Parallel Algorithms
  2 | ===================
  3 | 
  4 | .. include:: subst.rst
  5 | 
  6 | Element-wise expression evaluation ("map")
  7 | ------------------------------------------
  8 | 
  9 | .. module:: pyopencl.elementwise
 10 | 
 11 | Evaluating involved expressions on :class:`pyopencl.array.Array` instances by
 12 | using overloaded operators can be somewhat inefficient, because a new temporary
 13 | is created for each intermediate result. The functionality in the module
 14 | :mod:`pyopencl.elementwise` contains tools to help generate kernels that
 15 | evaluate multi-stage expressions on one or several operands in a single pass.
 16 | 
 17 | .. autoclass:: ElementwiseKernel
 18 | 
 19 | Here's a usage example:
 20 | 
 21 | .. literalinclude:: ../examples/demo_elementwise.py
 22 | 
 23 | (You can find this example as
 24 | :download:`examples/demo_elementwise.py <../examples/demo_elementwise.py>`
 25 | in the PyOpenCL distribution.)
 26 | 
 27 | .. _custom-reductions:
 28 | 
 29 | Sums and counts ("reduce")
 30 | --------------------------
 31 | 
 32 | .. module:: pyopencl.reduction
 33 | 
 34 | .. autoclass:: ReductionKernel
 35 | 
 36 | Here's a usage example::
 37 | 
 38 |     a = pyopencl.array.arange(queue, 400, dtype=numpy.float32)
 39 |     b = pyopencl.array.arange(queue, 400, dtype=numpy.float32)
 40 | 
 41 |     krnl = ReductionKernel(ctx, numpy.float32, neutral="0",
 42 |             reduce_expr="a+b", map_expr="x[i]*y[i]",
 43 |             arguments="__global float *x, __global float *y")
 44 | 
 45 |     my_dot_prod = krnl(a, b).get()
 46 | 
 47 | .. _custom-scan:
 48 | 
 49 | Prefix Sums ("scan")
 50 | --------------------
 51 | 
 52 | .. module:: pyopencl.scan
 53 | 
 54 | .. |scan_extra_args| replace:: a list of tuples *(name, value)* specifying
 55 |     extra arguments to pass to the scan procedure. For version 2013.1,
 56 |     *value* must be a of a :mod:`numpy` sized scalar type. As of version 2013.2,
 57 |     *value* may also be a :class:`pyopencl.array.Array`.
 58 | .. |preamble| replace:: A snippet of C that is inserted into the compiled kernel
 59 |     before the actual kernel function. May be used for, e.g. type definitions
 60 |     or include statements.
 61 | 
 62 | A prefix sum is a running sum of an array, as provided by
 63 | e.g. :func:`numpy.cumsum`::
 64 | 
 65 |     >>> import numpy as np
 66 |     >>> a = [1,1,1,1,1,2,2,2,2,2]
 67 |     >>> np.cumsum(a)
 68 |     array([ 1,  2,  3,  4,  5,  7,  9, 11, 13, 15])
 69 | 
 70 | This is a very simple example of what a scan can do. It turns out that scans
 71 | are significantly more versatile. They are a basic building block of many
 72 | non-trivial parallel algorithms. Many of the operations enabled by scans seem
 73 | difficult to parallelize because of loop-carried dependencies.
 74 | 
 75 | .. seealso::
 76 | 
 77 |     `Prefix sums and their applications <https://doi.org/10.1184/R1/6608579.v1>`__, by Guy Blelloch.
 78 |         This article gives an overview of some surprising applications of scans.
 79 | 
 80 |     :ref:`predefined-scans`
 81 |         These operations built into PyOpenCL are realized using
 82 |         :class:`GenericScanKernel`.
 83 | 
 84 | Usage Example
 85 | ^^^^^^^^^^^^^
 86 | 
 87 | This example illustrates the implementation of a simplified version of
 88 | :func:`pyopencl.algorithm.copy_if`,
 89 | which copies integers from an array into the (variable-size) output if they are
 90 | greater than 300::
 91 | 
 92 |     knl = GenericScanKernel(
 93 |             ctx, np.int32,
 94 |             arguments="__global int *ary, __global int *out",
 95 |             input_expr="(ary[i] > 300) ? 1 : 0",
 96 |             scan_expr="a+b", neutral="0",
 97 |             output_statement="""
 98 |                 if (prev_item != item) out[item-1] = ary[i];
 99 |                 """)
100 | 
101 |     out = a.copy()
102 |     knl(a, out)
103 | 
104 |     a_host = a.get()
105 |     out_host = a_host[a_host > 300]
106 | 
107 |     assert (out_host == out.get()[:len(out_host)]).all()
108 | 
109 | The value being scanned over is a number of flags indicating whether each array
110 | element is greater than 300. These flags are computed by *input_expr*. The
111 | prefix sum over this array gives a running count of array items greater than
112 | 300. The *output_statement* the compares ``prev_item`` (the previous item's scan
113 | result, i.e. index) to ``item`` (the current item's scan result, i.e.
114 | index). If they differ, i.e. if the predicate was satisfied at this
115 | position, then the item is stored in the output at the computed index.
116 | 
117 | This example does not make use of the following advanced features also available
118 | in PyOpenCL:
119 | 
120 | * Segmented scans
121 | 
122 | * Access to the previous item in *input_expr* (e.g. for comparisons)
123 |   See the `implementation <https://github.com/inducer/pyopencl/blob/36afe57784368e8d2505bc7cad8df964ba3c0264/pyopencl/algorithm.py#L226>`__
124 |   of :func:`pyopencl.algorithm.unique` for an example.
125 | 
126 | Making Custom Scan Kernels
127 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
128 | 
129 | .. versionadded:: 2013.1
130 | 
131 | .. autoclass:: GenericScanKernel
132 | 
133 | Debugging aids
134 | ~~~~~~~~~~~~~~
135 | 
136 | .. autoclass:: GenericDebugScanKernel
137 | 
138 | .. _predefined-scans:
139 | 
140 | Simple / Legacy Interface
141 | ^^^^^^^^^^^^^^^^^^^^^^^^^
142 | 
143 | .. class:: ExclusiveScanKernel(ctx, dtype, scan_expr, neutral, name_prefix="scan", options=[], preamble="", devices=None)
144 | 
145 |     Generates a kernel that can compute a `prefix sum
146 |     <https://en.wikipedia.org/wiki/Prefix_sum>`__
147 |     using any associative operation given as *scan_expr*.
148 |     *scan_expr* uses the formal values "a" and "b" to indicate two operands of
149 |     an associative binary operation. *neutral* is the neutral element
150 |     of *scan_expr*, obeying *scan_expr(a, neutral) == a*.
151 | 
152 |     *dtype* specifies the type of the arrays being operated on.
153 |     *name_prefix* is used for kernel names to ensure recognizability
154 |     in profiles and logs. *options* is a list of compiler options to use
155 |     when building. *preamble* specifies a string of code that is
156 |     inserted before the actual kernels. *devices* may be used to restrict
157 |     the set of devices on which the kernel is meant to run. (defaults
158 |     to all devices in the context *ctx*.
159 | 
160 |     .. method:: __call__(self, input_ary, output_ary=None, allocator=None, queue=None)
161 | 
162 | .. class:: InclusiveScanKernel(ctx, dtype, scan_expr, neutral=None, name_prefix="scan", options=[], preamble="", devices=None)
163 | 
164 |     Works like :class:`ExclusiveScanKernel`.
165 | 
166 |     .. versionchanged:: 2013.1
167 |         *neutral* is now always required.
168 | 
169 | For the array ``[1, 2, 3]``, inclusive scan results in ``[1, 3, 6]``, and exclusive
170 | scan results in ``[0, 1, 3]``.
171 | 
172 | Here's a usage example::
173 | 
174 |     knl = InclusiveScanKernel(context, np.int32, "a+b")
175 | 
176 |     n = 2**20-2**18+5
177 |     rng = np.random.default_rng(seed=42)
178 |     host_data = rng.integers(0, 10, size=n, dtype=np.int32)
179 |     dev_data = cl_array.to_device(queue, host_data)
180 | 
181 |     knl(dev_data)
182 |     assert (dev_data.get() == np.cumsum(host_data, axis=0)).all()
183 | 
184 | Predicated copies ("partition", "unique", ...)
185 | ----------------------------------------------
186 | 
187 | .. module:: pyopencl.algorithm
188 | 
189 | .. autofunction:: copy_if
190 | 
191 | .. autofunction:: remove_if
192 | 
193 | .. autofunction:: partition
194 | 
195 | .. autofunction:: unique
196 | 
197 | Sorting (radix sort)
198 | --------------------
199 | 
200 | .. autoclass:: RadixSort
201 | 
202 |     .. automethod:: __call__
203 | 
204 | Building many variable-size lists
205 | ---------------------------------
206 | 
207 | .. autoclass:: ListOfListsBuilder
208 | 
209 | Bitonic Sort
210 | ------------
211 | 
212 | .. module:: pyopencl.bitonic_sort
213 | 
214 | .. autoclass:: BitonicSort
215 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import urlopen
 2 | 
 3 | 
 4 | _conf_url = \
 5 |         "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py"
 6 | with urlopen(_conf_url) as _inf:
 7 |     exec(compile(_inf.read(), _conf_url, "exec"), globals())
 8 | 
 9 | exclude_patterns = ["subst.rst"]
10 | 
11 | copyright = "2009-21, Andreas Kloeckner"
12 | 
13 | ver_dic = {}
14 | with open("../pyopencl/version.py") as ver_file:
15 |     ver_src = ver_file.read()
16 | exec(compile(ver_src, "../pyopencl/version.py", "exec"), ver_dic)
17 | version = ".".join(str(x) for x in ver_dic["VERSION"])
18 | # The full version, including alpha/beta/rc tags.
19 | release = ver_dic["VERSION_TEXT"]
20 | 
21 | intersphinx_mapping = {
22 |     "python": ("https://docs.python.org/3", None),
23 |     "numpy": ("https://numpy.org/doc/stable/", None),
24 |     "mako": ("https://docs.makotemplates.org/en/latest", None),
25 |     "pytools": ("https://documen.tician.de/pytools", None),
26 | }
27 | 


--------------------------------------------------------------------------------
/doc/howto.rst:
--------------------------------------------------------------------------------
  1 | How-tos
  2 | =======
  3 | 
  4 | How to use struct types with PyOpenCL
  5 | -------------------------------------
  6 | 
  7 | We import and initialize PyOpenCL as usual:
  8 | 
  9 | .. doctest::
 10 |     :options: +ELLIPSIS
 11 | 
 12 |     >>> import numpy as np
 13 |     >>> import pyopencl as cl
 14 |     >>> import pyopencl.tools
 15 |     >>> import pyopencl.array
 16 | 
 17 |     >>> ctx = cl.create_some_context(interactive=False)
 18 |     >>> queue = cl.CommandQueue(ctx)
 19 | 
 20 | Then, suppose we would like to declare a struct consisting of an integer and a
 21 | floating point number. We first create a :class:`numpy.dtype` along these
 22 | lines:
 23 | 
 24 | .. doctest::
 25 | 
 26 |     >>> my_struct = np.dtype([("field1", np.int32), ("field2", np.float32)])
 27 |     >>> print(my_struct)
 28 |     [('field1', '<i4'), ('field2', '<f4')]
 29 | 
 30 | .. note::
 31 | 
 32 |     Not all :mod:`numpy` dtypes are supported yet. For example strings (and
 33 |     generally things that have a shape of their own) are not supported.
 34 | 
 35 | Since OpenCL C may have a different opinion for :mod:`numpy` on how the struct
 36 | should be laid out, for example because of `alignment
 37 | <https://en.wikipedia.org/wiki/Data_structure_alignment>`__. So as a first step, we
 38 | match our dtype against CL's version:
 39 | 
 40 | .. doctest::
 41 | 
 42 |     >>> my_struct, my_struct_c_decl = cl.tools.match_dtype_to_c_struct(
 43 |     ...    ctx.devices[0], "my_struct", my_struct)
 44 |     >>> print(my_struct_c_decl)
 45 |     typedef struct {
 46 |       int field1;
 47 |       float field2;
 48 |     } my_struct;
 49 |     <BLANKLINE>
 50 |     <BLANKLINE>
 51 | 
 52 | We then tell PyOpenCL about our new type.
 53 | 
 54 | .. doctest::
 55 | 
 56 |     >>> my_struct = cl.tools.get_or_register_dtype("my_struct", my_struct)
 57 | 
 58 | Next, we can create some data of that type on the host and transfer it to
 59 | the device:
 60 | 
 61 | .. doctest::
 62 | 
 63 |     >>> ary_host = np.empty(20, my_struct)
 64 |     >>> ary_host["field1"].fill(217)
 65 |     >>> ary_host["field2"].fill(1000)
 66 |     >>> ary_host[13]["field2"] = 12
 67 |     >>> print(ary_host) #doctest: +NORMALIZE_WHITESPACE
 68 |     [(217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.)
 69 |      (217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.)
 70 |      (217,  1000.) (217,  1000.) (217,  1000.) (217,    12.) (217,  1000.)
 71 |      (217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.) (217,  1000.)]
 72 | 
 73 |     >>> ary = cl.array.to_device(queue, ary_host)
 74 | 
 75 | We can then operate on the array with our own kernels:
 76 | 
 77 | .. doctest::
 78 | 
 79 |     >>> prg = cl.Program(ctx, my_struct_c_decl + """
 80 |     ...     __kernel void set_to_1(__global my_struct *a)
 81 |     ...     {
 82 |     ...         a[get_global_id(0)].field1 = 1;
 83 |     ...     }
 84 |     ...     """).build()
 85 | 
 86 |     >>> evt = prg.set_to_1(queue, ary.shape, None, ary.data)
 87 |     >>> print(ary) #doctest: +NORMALIZE_WHITESPACE
 88 |     [(1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.)
 89 |      (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.)
 90 |      (1,  1000.) (1,    12.) (1,  1000.) (1,  1000.) (1,  1000.) (1,  1000.)
 91 |      (1,  1000.) (1,  1000.)]
 92 | 
 93 | as well as with PyOpenCL's built-in operations:
 94 | 
 95 | .. doctest::
 96 | 
 97 |     >>> from pyopencl.elementwise import ElementwiseKernel
 98 |     >>> elwise = ElementwiseKernel(ctx, "my_struct *a", "a[i].field1 = 2;",
 99 |     ...    preamble=my_struct_c_decl)
100 |     >>> evt = elwise(ary)
101 |     >>> print(ary) #doctest: +NORMALIZE_WHITESPACE
102 |     [(2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.)
103 |      (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.)
104 |      (2,  1000.) (2,    12.) (2,  1000.) (2,  1000.) (2,  1000.) (2,  1000.)
105 |      (2,  1000.) (2,  1000.)]
106 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
  1 | Welcome to PyOpenCL's documentation!
  2 | ====================================
  3 | 
  4 | PyOpenCL gives you easy, Pythonic access to the `OpenCL
  5 | <https://www.khronos.org/opencl/>`__ parallel computation API.
  6 | What makes PyOpenCL special?
  7 | 
  8 | * Object cleanup tied to lifetime of objects. This idiom,
  9 |   often called
 10 |   `RAII <https://en.wikipedia.org/wiki/Resource_Acquisition_Is_Initialization>`__
 11 |   in C++, makes it much easier to write correct, leak- and
 12 |   crash-free code.
 13 | 
 14 | * Completeness. PyOpenCL puts the full power of OpenCL's API at your
 15 |   disposal, if you wish. Every obscure ``get_info()`` query and
 16 |   all CL calls are accessible.
 17 | 
 18 | * Automatic Error Checking. All errors are automatically translated
 19 |   into Python exceptions.
 20 | 
 21 | * Speed. PyOpenCL's base layer is written in C++, so all the niceties above
 22 |   are virtually free.
 23 | 
 24 | * Helpful Documentation. You're looking at it. ;)
 25 | 
 26 | * Liberal license. PyOpenCL is open-source under the
 27 |   :ref:`MIT license <license>`
 28 |   and free for commercial, academic, and private use.
 29 | 
 30 | Here's an example, to give you an impression:
 31 | 
 32 | .. literalinclude:: ../examples/demo.py
 33 | 
 34 | (You can find this example as
 35 | :download:`examples/demo.py <../examples/demo.py>` in the PyOpenCL
 36 | source distribution.)
 37 | 
 38 | Tutorials
 39 | =========
 40 | 
 41 | * Gaston Hillar's `two-part article series
 42 |   <https://web.archive.org/web/20190707171427/www.drdobbs.com/open-source/easy-opencl-with-python/240162614>`__
 43 |   in Dr. Dobb's Journal provides a friendly introduction to PyOpenCL.
 44 | * `Simon McIntosh-Smith <http://people.cs.bris.ac.uk/~simonm/>`__
 45 |   and `Tom Deakin <https://www.tomdeakin.com/>`__'s course
 46 |   `Hands-on OpenCL <https://handsonopencl.github.io/>`__ contains
 47 |   both `lecture slides <https://github.com/HandsOnOpenCL/Lecture-Slides/releases>`__
 48 |   and `exercises (with solutions) <https://github.com/HandsOnOpenCL/Exercises-Solutions>`__
 49 |   (The course covers PyOpenCL as well as OpenCL's C and C++ APIs.)
 50 | * PyOpenCL course at `PASI <https://www.bu.edu/pasi>`__: Parts
 51 |   `1 <https://www.youtube.com/watch?v=X9mflbX1NL8>`__
 52 |   `2 <https://www.youtube.com/watch?v=MqvfCE_bKOg>`__
 53 |   `3 <https://www.youtube.com/watch?v=TAvKmV7CuUw>`__
 54 |   `4 <https://www.youtube.com/watch?v=SsuJ0LvZW1Q>`__
 55 |   (YouTube, 2011)
 56 | * PyOpenCL course at `DTU GPULab <http://gpulab.compute.dtu.dk/>`__ and
 57 |   `Simula <https://www.simula.no/>`__ (2011):
 58 |   `Lecture 1 <https://tiker.net/pub/simula-pyopencl-lec1.pdf>`__
 59 |   `Lecture 2 <https://tiker.net/pub/simula-pyopencl-lec2.pdf>`__
 60 |   `Problem set 1 <https://tiker.net/pub/simula-pyopencl-probset1.pdf>`__
 61 |   `Problem set 2 <https://tiker.net/pub/simula-pyopencl-probset2.pdf>`__
 62 | * Ian Johnson's `PyOpenCL tutorial <https://web.archive.org/web/20170907175053/http://enja.org:80/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python>`__.
 63 | 
 64 | Software that works with or enhances PyOpenCL
 65 | =============================================
 66 | 
 67 | * Jon Roose's `pyclblas <https://pyclblas.readthedocs.io/en/latest/index.html>`__
 68 |   (`code <https://github.com/jroose/pyclblas>`__)
 69 |   makes BLAS in the form of `clBLAS <https://github.com/clMathLibraries/clBLAS>`__
 70 |   available from within :mod:`pyopencl` code.
 71 | 
 72 |   Two earlier wrappers continue to be available:
 73 |   one by `Eric Hunsberger <https://github.com/hunse/pyopencl_blas>`__ and one
 74 |   by `Lars Ericson <https://lists.tiker.net/pipermail/pyopencl/2015-June/001890.html>`__.
 75 | 
 76 | * Cedric Nugteren provides a wrapper for the
 77 |   `CLBlast <https://github.com/CNugteren/CLBlast>`__
 78 |   OpenCL BLAS library:
 79 |   `PyCLBlast <https://github.com/CNugteren/CLBlast/tree/master/src/pyclblast>`__.
 80 | 
 81 | * Gregor Thalhammer's `gpyfft <https://github.com/geggo/gpyfft>`__ provides a
 82 |   Python wrapper for the OpenCL FFT library clFFT from AMD.
 83 | 
 84 | * Bogdan Opanchuk's `reikna <https://pypi.org/project/reikna/>`__ offers a
 85 |   variety of GPU-based algorithms (FFT, random number generation, matrix
 86 |   multiplication) designed to work with :class:`pyopencl.array.Array` objects.
 87 | 
 88 | * Troels Henriksen, Ken Friis Larsen, and Cosmin Oancea's `Futhark
 89 |   <https://futhark-lang.org/>`__ programming language offers a nice way to code
 90 |   nested-parallel programs with reductions and scans on data in
 91 |   :class:`pyopencl.array.Array` instances.
 92 | 
 93 | * Robbert Harms and Alard Roebroeck's `MOT <https://github.com/robbert-harms/MOT>`__
 94 |   offers a variety of GPU-enabled non-linear optimization algorithms and MCMC
 95 |   sampling routines for parallel optimization and sampling of multiple problems.
 96 | 
 97 | * Vincent Favre-Nicolin's `pyvkfft <https://github.com/vincefn/pyvkfft/>`__
 98 |   makes `vkfft <https://github.com/DTolm/VkFFT>`__ accessible from PyOpenCL.
 99 | 
100 | If you know of a piece of software you feel that should be on this list, please
101 | let me know, or, even better, send a patch!
102 | 
103 | Contents
104 | ========
105 | 
106 | .. toctree::
107 |     :maxdepth: 2
108 | 
109 |     runtime
110 |     runtime_const
111 |     runtime_platform
112 |     runtime_queue
113 |     runtime_memory
114 |     runtime_program
115 |     runtime_gl
116 |     tools
117 |     array
118 |     types
119 |     algorithm
120 |     howto
121 |     misc
122 |     🚀 Github <https://github.com/inducer/pyopencl>
123 |     💾 Download Releases <https://pypi.org/project/pyopencl>
124 | 
125 | Note that this guide does not explain OpenCL programming and technology. Please
126 | refer to the official `Khronos OpenCL documentation <https://www.khronos.org/opencl/>`__
127 | for that.
128 | 
129 | PyOpenCL also has its own `web site <https://mathema.tician.de/software/pyopencl>`__,
130 | where you can find updates, new versions, documentation, and support.
131 | 
132 | Indices and tables
133 | ==================
134 | 
135 | * :ref:`genindex`
136 | * :ref:`modindex`
137 | * :ref:`search`
138 | 


--------------------------------------------------------------------------------
/doc/runtime.rst:
--------------------------------------------------------------------------------
 1 | .. _reference-doc:
 2 | 
 3 | .. include:: subst.rst
 4 | 
 5 | OpenCL Runtime: Basics
 6 | ======================
 7 | 
 8 | Version Queries
 9 | ---------------
10 | 
11 | .. module:: pyopencl
12 | .. moduleauthor:: Andreas Kloeckner <inform@tiker.net>
13 | 
14 | .. data:: VERSION
15 | 
16 |     Gives the numeric version of PyOpenCL as a variable-length tuple
17 |     of integers. Enables easy version checks such as
18 |     ``VERSION >= (0, 93)``.
19 | 
20 | .. data:: VERSION_STATUS
21 | 
22 |     A text string such as ``"rc4"`` or ``"beta"`` qualifying the status
23 |     of the release.
24 | 
25 | .. data:: VERSION_TEXT
26 | 
27 |     The full release name (such as ``"0.93rc4"``) in string form.
28 | 
29 | .. function:: get_cl_header_version()
30 | 
31 |     Return a variable-length tuple of integers representing the
32 |     version of the OpenCL header against which PyOpenCL was
33 |     compiled.
34 | 
35 |     .. versionadded:: 0.92
36 | 
37 | .. _errors:
38 | 
39 | Error Reporting
40 | ---------------
41 | 
42 | .. class:: Error
43 | 
44 |     Base class for all PyOpenCL exceptions.
45 | 
46 | .. class:: MemoryError
47 | 
48 | .. class:: LogicError
49 | 
50 | .. class:: RuntimeError
51 | 
52 | 


--------------------------------------------------------------------------------
/doc/runtime_const.rst:
--------------------------------------------------------------------------------
 1 | OpenCL Runtime: Constants
 2 | =========================
 3 | 
 4 | .. currentmodule:: pyopencl
 5 | 
 6 | .. include:: constants.inc
 7 | 
 8 | .. class:: NameVersion
 9 | 
10 |     Describes the version of a specific feature.
11 | 
12 |     .. note::
13 | 
14 |         Only available with OpenCL 3.0 or newer.
15 | 
16 |     .. versionadded:: 2020.3
17 | 
18 |     .. method:: __init__(version, name)
19 |     .. attribute:: version
20 |     .. attribute:: name
21 | 
22 | .. class:: DeviceTopologyAmd
23 | 
24 |     .. method:: __init__(bus, device, function)
25 |     .. attribute:: type
26 |     .. attribute:: bus
27 |     .. attribute:: device
28 |     .. attribute:: function
29 | 
30 | .. vim: shiftwidth=4
31 | 


--------------------------------------------------------------------------------
/doc/runtime_gl.rst:
--------------------------------------------------------------------------------
 1 | .. include:: subst.rst
 2 | 
 3 | .. _gl-interop:
 4 | 
 5 | OpenCL Runtime: OpenGL Interoperability
 6 | =======================================
 7 | 
 8 | .. currentmodule:: pyopencl
 9 | 
10 | Functionality in this section is only available when PyOpenCL is compiled
11 | with GL support. See :func:`have_gl`.
12 | 
13 | .. versionadded:: 0.91
14 | 
15 | .. function:: have_gl()
16 | 
17 |     Return *True* if PyOpenCL was compiled with OpenGL interoperability,
18 |     otherwise *False*.
19 | 
20 | .. function:: get_gl_sharing_context_properties()
21 | 
22 |     Return a :class:`list` of :class:`context_properties` that will
23 |     allow a newly created context to share the currently active GL
24 |     context.
25 | 
26 | .. function:: get_apple_cgl_share_group()
27 | 
28 |     Get share group handle for current CGL context.
29 | 
30 |     Apple OS X only.
31 | 
32 |     .. versionadded:: 2011.1
33 | 
34 | .. class:: GLBuffer(context, flags, bufobj)
35 | 
36 |     :class:`GLBuffer` inherits from :class:`MemoryObject`.
37 | 
38 |     .. attribute:: gl_object
39 | 
40 | .. class:: GLRenderBuffer(context, flags, bufobj)
41 | 
42 |     :class:`GLRenderBuffer` inherits from :class:`MemoryObject`.
43 | 
44 |     .. attribute:: gl_object
45 | 
46 | .. class:: GLTexture(context, flags, texture_target, miplevel, texture, dims)
47 | 
48 |     :class:`GLTexture` inherits from :class:`Image`. Only available in OpenCL 1.2
49 |     and newer.
50 | 
51 |     .. attribute:: gl_object
52 | 
53 |     .. method:: get_gl_texture_info(param)
54 | 
55 |         See ``gl_texture_info`` for values of *param*.  Only available when
56 |         PyOpenCL is compiled with GL support. See :func:`have_gl`.
57 | 
58 | .. function:: enqueue_acquire_gl_objects(queue, mem_objects, wait_for=None)
59 | 
60 |     *mem_objects* is a list of :class:`MemoryObject` instances.
61 |     |std-enqueue-blurb|
62 | 
63 | .. function:: enqueue_release_gl_objects(queue, mem_objects, wait_for=None)
64 | 
65 |     *mem_objects* is a list of :class:`MemoryObject` instances. |std-enqueue-blurb|
66 | 
67 | .. function:: get_gl_context_info_khr(properties, param_name, platform=None)
68 | 
69 |     Get information on which CL device corresponds to a given
70 |     GL/EGL/WGL/CGL device.
71 | 
72 |     See the :class:`Context` constructor for the meaning of
73 |     *properties* and :class:`gl_context_info` for *param_name*.
74 | 
75 | 
76 |     .. versionchanged:: 2011.2
77 |         Accepts the *platform* argument.  Using *platform* equal to None is
78 |         deprecated as of PyOpenCL 2011.2.
79 | 


--------------------------------------------------------------------------------
/doc/runtime_platform.rst:
--------------------------------------------------------------------------------
  1 | .. include:: subst.rst
  2 | 
  3 | OpenCL Runtime: Platforms, Devices and Contexts
  4 | ===============================================
  5 | 
  6 | .. currentmodule:: pyopencl
  7 | 
  8 | Platform
  9 | --------
 10 | 
 11 | .. function:: get_platforms()
 12 | 
 13 |     Return a list of :class:`Platform` instances.
 14 | 
 15 | .. class:: Platform
 16 | 
 17 |     .. attribute:: info
 18 | 
 19 |         Lower case versions of the :class:`platform_info` constants
 20 |         may be used as attributes on instances of this class
 21 |         to directly query info attributes.
 22 | 
 23 |     .. method:: get_info(param)
 24 | 
 25 |         See :class:`platform_info` for values of *param*.
 26 | 
 27 |     .. method:: get_devices(device_type=device_type.ALL)
 28 | 
 29 |         Return a list of devices matching *device_type*.
 30 |         See :class:`device_type` for values of *device_type*.
 31 | 
 32 |         .. versionchanged:: 2013.2
 33 | 
 34 |             This used to raise an exception if no matching
 35 |             devices were found. Now, it will simply return
 36 |             an empty list.
 37 | 
 38 |     .. automethod:: from_int_ptr
 39 |     .. autoattribute:: int_ptr
 40 | 
 41 |     |comparable|
 42 | 
 43 | Device
 44 | ------
 45 | 
 46 | .. class:: Device
 47 | 
 48 |     Two instances of this class may be compared using *=="* and *"!="*.
 49 | 
 50 |     .. attribute:: info
 51 | 
 52 |         Lower case versions of the :class:`device_info` constants
 53 |         may be used as attributes on instances of this class
 54 |         to directly query info attributes.
 55 | 
 56 |     .. method:: get_info(param)
 57 | 
 58 |         See :class:`device_info` for values of *param*.
 59 | 
 60 |     .. automethod:: from_int_ptr
 61 |     .. autoattribute:: int_ptr
 62 | 
 63 |     .. attribute :: hashable_model_and_version_identifier
 64 | 
 65 |         An unspecified data type that can be used to (as precisely as possible,
 66 |         given identifying information available in OpenCL) identify a given
 67 |         model and software stack version of a compute device. Note that this
 68 |         identifier does not differentiate between different instances of the
 69 |         same device installed in a single host.
 70 | 
 71 |         The returned data type is hashable.
 72 | 
 73 |         .. versionadded:: 2020.1
 74 | 
 75 |     .. method:: create_sub_devices(properties)
 76 | 
 77 |         *properties* is an array of one (or more) of the forms::
 78 | 
 79 |             [ dpp.EQUALLY, 8]
 80 |             [ dpp.BY_COUNTS, 5, 7, 9, dpp.PARTITION_BY_COUNTS_LIST_END]
 81 |             [ dpp.BY_NAMES, 5, 7, 9, dpp.PARTITION_BY_NAMES_LIST_END]
 82 |             [ dpp.BY_AFFINITY_DOMAIN, dad.L1_CACHE]
 83 | 
 84 |         where ``dpp`` represents :class:`device_partition_property`
 85 |         and ``dad`` represent :class:`device_affinity_domain`.
 86 | 
 87 |         ``PROPERTIES_LIST_END_EXT`` is added automatically.
 88 | 
 89 |         Only available with CL 1.2.
 90 | 
 91 |         .. versionadded:: 2011.2
 92 | 
 93 |     .. method:: device_and_host_timer
 94 | 
 95 |         :returns: a tuple ``(device_timestamp, host_timestamp)``.
 96 | 
 97 |         Only available with CL 2.0.
 98 | 
 99 |         .. versionadded:: 2020.3
100 | 
101 |     .. method:: host_timer
102 | 
103 |         Only available with CL 2.0.
104 | 
105 |         .. versionadded:: 2020.3
106 | 
107 | .. autofunction:: choose_devices
108 | 
109 | Context
110 | -------
111 | 
112 | .. class:: Context(devices=None, properties=None, dev_type=None)
113 | 
114 |     Create a new context. *properties* is a list of key-value
115 |     tuples, where each key must be one of :class:`context_properties`.
116 |     At most one of *devices* and *dev_type* may be not *None*, where
117 |     *devices* is a list of :class:`Device` instances, and
118 |     *dev_type* is one of the :class:`device_type` constants.
119 |     If neither is specified, a context with a *dev_type* of
120 |     :attr:`device_type.DEFAULT` is created.
121 | 
122 |     .. note::
123 | 
124 |         Calling the constructor with no arguments may fail for
125 |         CL drivers that support the OpenCL ICD (which applies to most modern systems).
126 |         If you want similar, just-give-me-a-context-already behavior, we recommend
127 |         :func:`create_some_context`.
128 | 
129 |         See e.g. this
130 |         `explanation by AMD
131 |         <https://web.archive.org/web/20101114195033/https://developer.amd.com/support/KnowledgeBase/Lists/KnowledgeBase/DispForm.aspx?ID=71>`__:
132 | 
133 |             **What has changed?**
134 | 
135 |             In previous beta releases functions such as clGetDeviceIDs() and clCreateContext()
136 |             accepted a NULL value for the platform parameter. This release no longer
137 |             allows this - the platform must be a valid one obtained by using the platform API.
138 | 
139 |     .. note::
140 | 
141 |         Because of how OpenCL changed in order to support Installable Client
142 |         Drivers (ICDs) in OpenCL 1.1, the following will *look* reasonable
143 |         but often actually not work::
144 | 
145 |             import pyopencl as cl
146 |             ctx = cl.Context(dev_type=cl.device_type.ALL)
147 | 
148 |         Instead, make sure to choose a platform when choosing a device by type::
149 | 
150 |             import pyopencl as cl
151 | 
152 |             platforms = cl.get_platforms()
153 |             ctx = cl.Context(
154 |                     dev_type=cl.device_type.ALL,
155 |                     properties=[(cl.context_properties.PLATFORM, platforms[0])])
156 | 
157 |     .. note::
158 | 
159 |         For
160 |         ``context_properties.CL_GL_CONTEXT_KHR``,
161 |         ``context_properties.CL_EGL_DISPLAY_KHR``,
162 |         ``context_properties.CL_GLX_DISPLAY_KHR``,
163 |         ``context_properties.CL_WGL_HDC_KHR``, and
164 |         ``context_properties.CL_CGL_SHAREGROUP_KHR``
165 |         ``context_properties.CL_CGL_SHAREGROUP_APPLE``
166 |         the value in the key-value pair is a PyOpenGL context or display
167 |         instance.
168 | 
169 |     .. versionchanged:: 0.91.2
170 |         Constructor arguments *dev_type* added.
171 | 
172 |     .. attribute:: info
173 | 
174 |         Lower case versions of the :class:`context_info` constants
175 |         may be used as attributes on instances of this class
176 |         to directly query info attributes.
177 | 
178 |     .. method:: get_info(param)
179 | 
180 |         See :class:`context_info` for values of *param*.
181 | 
182 |     .. automethod:: from_int_ptr
183 |     .. autoattribute:: int_ptr
184 | 
185 |     .. method:: set_default_device_command_queue(dev, queue)
186 | 
187 |     |comparable|
188 | 
189 | .. autofunction:: create_some_context
190 | 


--------------------------------------------------------------------------------
/doc/runtime_queue.rst:
--------------------------------------------------------------------------------
  1 | .. include:: subst.rst
  2 | 
  3 | OpenCL Runtime: Command Queues and Events
  4 | =========================================
  5 | 
  6 | .. currentmodule:: pyopencl
  7 | 
  8 | Command Queue
  9 | -------------
 10 | 
 11 | .. class:: CommandQueue(context, device=None, properties=None)
 12 | 
 13 |     Create a new command queue. *properties* is a bit field
 14 |     consisting of :class:`command_queue_properties` values.
 15 | 
 16 |     If *device* is None, one of the devices in *context* is chosen
 17 |     in an implementation-defined manner.
 18 | 
 19 |     *properties* may be a bitwise combination of values from
 20 |     :class:`queue_properties` (or *None* which is equivalent to
 21 |     passing *0*). This is compatible with both OpenCL 1.x and 2.x.
 22 | 
 23 |     For OpenCL 2.0 and above, *properties* may also be a sequence
 24 |     of keys and values from :class:`queue_properties` as accepted
 25 |     by :c:func:`clCreateCommandQueueWithProperties` (see the OpenCL
 26 |     spec for details). The trailing *0* is added automatically
 27 |     and does not need to be included.
 28 | 
 29 |     A :class:`CommandQueue` may be used as a context manager, like this::
 30 | 
 31 |         with cl.CommandQueue(self.cl_context) as queue:
 32 |             enqueue_stuff(queue, ...)
 33 | 
 34 |     :meth:`finish` is automatically called at the end of the ``with``-delimited
 35 |     context, and further operations on the queue are considered an error.
 36 | 
 37 |     .. versionadded:: 2013.1
 38 | 
 39 |         Context manager capability.
 40 | 
 41 |     .. versionchanged:: 2018.2
 42 | 
 43 |         Added the sequence-of-properties interface for OpenCL 2.
 44 | 
 45 |     .. versionchanged:: 2022.1.4
 46 | 
 47 |         Use of a command queue after its context manager completes
 48 |         is now considered an error. :mod:`pyopencl` will warn about this
 49 |         for a transitionary period and will start raising an exception
 50 |         in 2023.
 51 | 
 52 |     .. attribute:: info
 53 | 
 54 |         Lower case versions of the :class:`command_queue_info` constants
 55 |         may be used as attributes on instances of this class
 56 |         to directly query info attributes.
 57 | 
 58 |     .. method:: get_info(param)
 59 | 
 60 |         See :class:`command_queue_info` for values of *param*.
 61 | 
 62 |     .. method:: set_property(prop, enable)
 63 | 
 64 |         See :class:`command_queue_properties` for possible values of *prop*.
 65 |         *enable* is a :class:`bool`.
 66 | 
 67 |         Unavailable in OpenCL 1.1 and newer.
 68 | 
 69 |     .. method:: flush()
 70 |     .. method:: finish()
 71 | 
 72 |     .. automethod:: from_int_ptr
 73 |     .. autoattribute:: int_ptr
 74 | 
 75 |     |comparable|
 76 | 
 77 | Event
 78 | -----
 79 | 
 80 | .. class:: Event
 81 | 
 82 |     .. attribute:: info
 83 | 
 84 |         Lower case versions of the :class:`event_info` constants
 85 |         may be used as attributes on instances of this class
 86 |         to directly query info attributes.
 87 | 
 88 |     .. attribute:: profile
 89 | 
 90 |         An instance of :class:`ProfilingInfoGetter`.
 91 | 
 92 |     .. method:: get_info(param)
 93 | 
 94 |         See :class:`event_info` for values of *param*.
 95 | 
 96 |     .. method:: get_profiling_info(param)
 97 | 
 98 |         See :class:`profiling_info` for values of *param*.
 99 |         See :attr:`profile` for an easier way of obtaining
100 |         the same information.
101 | 
102 |     .. method:: wait()
103 | 
104 |     .. automethod:: from_int_ptr
105 |     .. autoattribute:: int_ptr
106 | 
107 |     .. method:: set_callback(type, cb)
108 | 
109 |         Add the callback *cb* with signature ``cb(status)`` to the callback
110 |         queue for the event status *type* (one of the values of
111 |         :class:`command_execution_status`, except :attr:`command_execution_status.QUEUED`).
112 | 
113 |         See the OpenCL specification for restrictions on what *cb* may and may not do.
114 | 
115 |         .. versionadded:: 2015.2
116 | 
117 |     |comparable|
118 | 
119 | .. class:: ProfilingInfoGetter
120 | 
121 |    .. attribute:: info
122 | 
123 |         Lower case versions of the :class:`profiling_info` constants
124 |         may be used as attributes on the attribute ``profile`` of this
125 |         class to directly query profiling info.
126 | 
127 |         For example, you may use *evt.profile.end* instead of
128 |         *evt.get_profiling_info(pyopencl.profiling_info.END)*.
129 | 
130 | Event Subclasses
131 | ----------------
132 | 
133 | .. class:: UserEvent(context)
134 | 
135 |     A subclass of :class:`Event`. Only available with OpenCL 1.1 and newer.
136 | 
137 |     .. versionadded:: 0.92
138 | 
139 |     .. method:: set_status(status)
140 | 
141 |         See :class:`command_execution_status` for possible values of *status*.
142 | 
143 | .. class:: NannyEvent
144 | 
145 |     Transfers between host and device return events of this type. They hold
146 |     a reference to the host-side buffer and wait for the transfer to complete
147 |     when they are freed. Therefore, they can safely release the reference to
148 |     the object they're guarding upon destruction.
149 | 
150 |     A subclass of :class:`Event`.
151 | 
152 |     .. versionadded:: 2011.2
153 | 
154 |     .. method:: get_ward()
155 | 
156 |     .. method:: wait()
157 | 
158 |         In addition to performing the same wait as :meth:`Event.wait()`, this
159 |         method also releases the reference to the guarded object.
160 | 
161 | Synchronization Functions
162 | -------------------------
163 | 
164 | .. function:: wait_for_events(events)
165 | 
166 | .. function:: enqueue_barrier(queue, wait_for=None)
167 | 
168 |     Enqueues a barrier operation. which ensures that all queued commands in
169 |     command_queue have finished execution. This command is a synchronization
170 |     point.
171 | 
172 |     .. versionadded:: 0.91.5
173 |     .. versionchanged:: 2011.2
174 |         Takes *wait_for* and returns an :class:`Event`
175 | 
176 | .. function:: enqueue_marker(queue, wait_for=None)
177 | 
178 |     Returns an :class:`Event`.
179 | 
180 |     .. versionchanged:: 2011.2
181 |         Takes *wait_for*.
182 | 
183 | 


--------------------------------------------------------------------------------
/doc/subst.rst:
--------------------------------------------------------------------------------
 1 | .. |comparable| replace:: Instances of this class are hashable, and two
 2 |     instances of this class may be compared using *"=="* and *"!="*.
 3 |     (Hashability was added in version 2011.2.) Two objects are considered
 4 |     the same if the underlying OpenCL object is the same, as established
 5 |     by C pointer equality.
 6 | 
 7 | .. |buf-iface| replace:: must implement the Python buffer interface.
 8 |     (e.g. by being an :class:`numpy.ndarray`)
 9 | .. |explain-waitfor| replace:: *wait_for*
10 |     may either be *None* or a list of :class:`pyopencl.Event` instances for
11 |     whose completion this command waits before starting execution.
12 | .. |std-enqueue-blurb| replace:: Returns a new :class:`pyopencl.Event`. |explain-waitfor|
13 | 
14 | .. |copy-depr| replace:: **Note:** This function is deprecated as of PyOpenCL 2011.1.
15 |         Use :func:`~pyopencl.enqueue_copy` instead.
16 | 
17 | .. |glsize| replace:: *global_size* and *local_size* are tuples of identical length, with
18 |         between one and three entries. *global_size* specifies the overall size
19 |         of the computational grid: one work item will be launched for every
20 |         integer point in the grid. *local_size* specifies the workgroup size,
21 |         which must evenly divide the *global_size* in a dimension-by-dimension
22 |         manner.  *None* may be passed for local_size, in which case the
23 |         implementation will use an implementation-defined workgroup size.
24 |         If *g_times_l* is *True*, the global size will be multiplied by the
25 |         local size. (which makes the behavior more like Nvidia CUDA) In this case,
26 |         *global_size* and *local_size* also do not have to have the same number
27 |         of entries.
28 | 
29 | .. |empty-nd-range| replace:: *allow_empty_ndrange* is a :class:`bool` indicating
30 |         how an empty NDRange is to be treated, where "empty" means that one or more
31 |         entries of *global_size* or *local_size* are zero. OpenCL itself does not
32 |         allow enqueueing kernels over empty NDRanges. Setting this flag to *True*
33 |         enqueues a marker with a wait list (``clEnqueueMarkerWithWaitList``)
34 |         to obtain the synchronization effects that would have resulted from
35 |         the kernel enqueue.
36 |         Setting *allow_empty_ndrange* to *True* requires OpenCL 1.2 or newer.
37 | 


--------------------------------------------------------------------------------
/doc/tools.rst:
--------------------------------------------------------------------------------
1 | Built-in Utilities
2 | ==================
3 | 
4 | .. automodule:: pyopencl.tools
5 | 


--------------------------------------------------------------------------------
/doc/types.rst:
--------------------------------------------------------------------------------
 1 | OpenCL Type Mapping
 2 | ===================
 3 | 
 4 | .. module:: pyopencl.cltypes
 5 | 
 6 | .. _type-mappings:
 7 | 
 8 | Scalar Types
 9 | ------------
10 | 
11 | For ease of use, a the :mod:`pyopencl.cltypes` module provides convenient mapping
12 | from OpenCL type names to their equivalent :mod:`numpy` types. This saves you
13 | from referring back to the OpenCL spec to see that a ``cl_long`` is 64 bit
14 | unsigned integer. Use the module as follows:
15 | 
16 | .. doctest::
17 | 
18 |     >>> import numpy as np
19 |     >>> import pyopencl as cl
20 |     >>> import pyopencl.cltypes
21 |     >>> cl_uint = cl.cltypes.uint(42)   # maps to numpy.uint32
22 |     >>> cl_long = cl.cltypes.long(1235) # maps to numpy.int64
23 |     >>> floats = np.empty((128,), dtype=cl.cltypes.float) # array of numpy.float32
24 | 
25 | .. note::
26 | 
27 |     The OpenCL type ``bool`` does not have a corresponding :mod:`numpy` type
28 |     defined here, because OpenCL does not specify the in-memory representation
29 |     (or even the storage size) for this type.
30 | 
31 | Vector Types
32 | ------------
33 | 
34 | The corresponding vector types are also made available in the same package,
35 | allowing you to easily create :mod:`numpy` arrays with the appropriate memory
36 | layout.
37 | 
38 | .. doctest::
39 | 
40 |     >>> import numpy as np
41 |     >>> array_of_float16 = np.empty((128,), dtype=cl.cltypes.float16) # array of float16
42 | 
43 | 


--------------------------------------------------------------------------------
/doc/upload-docs.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | 
3 | rsync --verbose --archive --delete _build/html/ doc-upload:doc/pyopencl
4 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | wiki-examples
2 | 


--------------------------------------------------------------------------------
/examples/demo-struct-reduce.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pyopencl as cl
 4 | 
 5 | 
 6 | def make_collector_dtype(device):
 7 |     dtype = np.dtype([
 8 |         ("cur_min", np.int32),
 9 |         ("cur_max", np.int32),
10 |         ("pad", np.int32),
11 |         ])
12 | 
13 |     name = "minmax_collector"
14 |     from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct
15 | 
16 |     dtype, c_decl = match_dtype_to_c_struct(device, name, dtype)
17 |     dtype = get_or_register_dtype(name, dtype)
18 | 
19 |     return dtype, c_decl
20 | 
21 | 
22 | ctx = cl.create_some_context()
23 | queue = cl.CommandQueue(ctx)
24 | 
25 | mmc_dtype, mmc_c_decl = make_collector_dtype(ctx.devices[0])
26 | 
27 | preamble = mmc_c_decl + r"""//CL//
28 | 
29 |     minmax_collector mmc_neutral()
30 |     {
31 |         // FIXME: needs infinity literal in real use, ok here
32 |         minmax_collector result;
33 |         result.cur_min = 1<<30;
34 |         result.cur_max = -(1<<30);
35 |         return result;
36 |     }
37 | 
38 |     minmax_collector mmc_from_scalar(float x)
39 |     {
40 |         minmax_collector result;
41 |         result.cur_min = x;
42 |         result.cur_max = x;
43 |         return result;
44 |     }
45 | 
46 |     minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
47 |     {
48 |         minmax_collector result = a;
49 |         if (b.cur_min < result.cur_min)
50 |             result.cur_min = b.cur_min;
51 |         if (b.cur_max > result.cur_max)
52 |             result.cur_max = b.cur_max;
53 |         return result;
54 |     }
55 | 
56 |     """
57 | 
58 | from pyopencl.clrandom import rand as clrand
59 | 
60 | 
61 | a_gpu = clrand(queue, (20000,), dtype=np.int32, a=0, b=10**6)
62 | a = a_gpu.get()
63 | 
64 | from pyopencl.reduction import ReductionKernel
65 | 
66 | 
67 | red = ReductionKernel(ctx, mmc_dtype,
68 |         neutral="mmc_neutral()",
69 |         reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i])",
70 |         arguments="__global int *x", preamble=preamble)
71 | 
72 | minmax = red(a_gpu).get()
73 | 
74 | assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
75 | assert abs(minmax["cur_max"] - np.max(a)) < 1e-5
76 | 


--------------------------------------------------------------------------------
/examples/demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | import pyopencl as cl
 6 | 
 7 | 
 8 | rng = np.random.default_rng()
 9 | a_np = rng.random(50000, dtype=np.float32)
10 | b_np = rng.random(50000, dtype=np.float32)
11 | 
12 | ctx = cl.create_some_context()
13 | queue = cl.CommandQueue(ctx)
14 | 
15 | mf = cl.mem_flags
16 | a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
17 | b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
18 | 
19 | prg = cl.Program(ctx, """
20 | __kernel void sum(
21 |     __global const float *a_g, __global const float *b_g, __global float *res_g)
22 | {
23 |   int gid = get_global_id(0);
24 |   res_g[gid] = a_g[gid] + b_g[gid];
25 | }
26 | """).build()
27 | 
28 | res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
29 | knl = prg.sum  # Use this Kernel object for repeated calls
30 | knl(queue, a_np.shape, None, a_g, b_g, res_g)
31 | 
32 | res_np = np.empty_like(a_np)
33 | cl.enqueue_copy(queue, res_np, res_g)
34 | 
35 | # Check on CPU with Numpy:
36 | error_np = res_np - (a_np + b_np)
37 | print(f"Error:\n{error_np}")
38 | print(f"Norm: {np.linalg.norm(error_np):.16e}")
39 | assert np.allclose(res_np, a_np + b_np)
40 | 


--------------------------------------------------------------------------------
/examples/demo_array.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as la
 3 | 
 4 | import pyopencl as cl
 5 | import pyopencl.array as cl_array
 6 | 
 7 | 
 8 | rng = np.random.default_rng()
 9 | a = rng.random(50000, dtype=np.float32)
10 | b = rng.random(50000, dtype=np.float32)
11 | 
12 | ctx = cl.create_some_context()
13 | queue = cl.CommandQueue(ctx)
14 | 
15 | a_dev = cl_array.to_device(queue, a)
16 | b_dev = cl_array.to_device(queue, b)
17 | dest_dev = cl_array.empty_like(a_dev)
18 | 
19 | prg = cl.Program(ctx, """
20 |     __kernel void sum(__global const float *a,
21 |     __global const float *b, __global float *c)
22 |     {
23 |       int gid = get_global_id(0);
24 |       c[gid] = a[gid] + b[gid];
25 |     }
26 |     """).build()
27 | 
28 | knl = prg.sum  # Use this Kernel object for repeated calls
29 | knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
30 | 
31 | print(la.norm((dest_dev - (a_dev+b_dev)).get()))
32 | assert np.allclose(dest_dev.get(), (a_dev + b_dev).get())
33 | 


--------------------------------------------------------------------------------
/examples/demo_array_svm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pyopencl as cl
 4 | import pyopencl.array as cl_array
 5 | from pyopencl.tools import SVMAllocator, SVMPool
 6 | 
 7 | 
 8 | n = 50000
 9 | 
10 | rng = np.random.default_rng()
11 | a = rng.random(n, dtype=np.float32)
12 | b = rng.random(n, dtype=np.float32)
13 | 
14 | ctx = cl.create_some_context()
15 | queue = cl.CommandQueue(ctx)
16 | 
17 | alloc = SVMAllocator(ctx, alignment=0, queue=queue)
18 | alloc = SVMPool(alloc)
19 | 
20 | a_dev = cl_array.to_device(queue, a, allocator=alloc)
21 | b_dev = cl_array.to_device(queue, b, allocator=alloc)
22 | dest_dev = cl_array.empty_like(a_dev)
23 | 
24 | prg = cl.Program(ctx, """
25 |     __kernel void sum(__global const float *a,
26 |     __global const float *b, __global float *c)
27 |     {
28 |       int gid = get_global_id(0);
29 |       c[gid] = a[gid] + b[gid];
30 |     }
31 |     """).build()
32 | 
33 | knl = prg.sum
34 | knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
35 | 
36 | print(np.linalg.norm((dest_dev - (a_dev + b_dev)).get()))
37 | assert np.allclose(dest_dev.get(), (a_dev + b_dev).get())
38 | 


--------------------------------------------------------------------------------
/examples/demo_elementwise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pyopencl as cl
 4 | import pyopencl.array
 5 | from pyopencl.elementwise import ElementwiseKernel
 6 | 
 7 | 
 8 | n = 10
 9 | 
10 | rng = np.random.default_rng()
11 | a_np = rng.random(n, dtype=np.float32)
12 | b_np = rng.random(n, dtype=np.float32)
13 | 
14 | ctx = cl.create_some_context()
15 | queue = cl.CommandQueue(ctx)
16 | 
17 | a_g = cl.array.to_device(queue, a_np)
18 | b_g = cl.array.to_device(queue, b_np)
19 | 
20 | lin_comb = ElementwiseKernel(ctx,
21 |     "float k1, float *a_g, float k2, float *b_g, float *res_g",
22 |     "res_g[i] = k1 * a_g[i] + k2 * b_g[i]",
23 |     "lin_comb")
24 | 
25 | res_g = cl.array.empty_like(a_g)
26 | lin_comb(2, a_g, 3, b_g, res_g)
27 | 
28 | # Check on GPU with PyOpenCL Array:
29 | print((res_g - (2 * a_g + 3 * b_g)).get())
30 | 
31 | # Check on CPU with Numpy:
32 | res_np = res_g.get()
33 | print(res_np - (2 * a_np + 3 * b_np))
34 | print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np)))
35 | 


--------------------------------------------------------------------------------
/examples/demo_elementwise_complex.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as la
 3 | 
 4 | import pyopencl as cl
 5 | import pyopencl.array as cl_array
 6 | from pyopencl.elementwise import ElementwiseKernel
 7 | 
 8 | 
 9 | ctx = cl.create_some_context()
10 | queue = cl.CommandQueue(ctx)
11 | 
12 | n = 10
13 | 
14 | rng = np.random.default_rng()
15 | a_gpu = cl_array.to_device(queue,
16 |         rng.standard_normal(n, dtype=np.float32)
17 |         + 1j*rng.standard_normal(n, dtype=np.float32))
18 | b_gpu = cl_array.to_device(queue,
19 |         rng.standard_normal(n, dtype=np.float32)
20 |         + 1j*rng.standard_normal(n, dtype=np.float32))
21 | 
22 | complex_prod = ElementwiseKernel(ctx,
23 |         "float a, "
24 |         "cfloat_t *x, "
25 |         "cfloat_t *y, "
26 |         "cfloat_t *z",
27 |         "z[i] = cfloat_rmul(a, cfloat_mul(x[i], y[i]))",
28 |         "complex_prod",
29 |         preamble="#include <pyopencl-complex.h>")
30 | 
31 | complex_add = ElementwiseKernel(ctx,
32 |         "cfloat_t *x, "
33 |         "cfloat_t *y, "
34 |         "cfloat_t *z",
35 |         "z[i] = cfloat_add(x[i], y[i])",
36 |         "complex_add",
37 |         preamble="#include <pyopencl-complex.h>")
38 | 
39 | real_part = ElementwiseKernel(ctx,
40 |         "cfloat_t *x, float *z",
41 |         "z[i] = cfloat_real(x[i])",
42 |         "real_part",
43 |         preamble="#include <pyopencl-complex.h>")
44 | 
45 | c_gpu = cl_array.empty_like(a_gpu)
46 | complex_prod(5, a_gpu, b_gpu, c_gpu)
47 | 
48 | c_gpu_real = cl_array.empty(queue, len(a_gpu), dtype=np.float32)
49 | real_part(c_gpu, c_gpu_real)
50 | print(c_gpu.get().real - c_gpu_real.get())
51 | 
52 | print(la.norm(c_gpu.get() - (5*a_gpu.get()*b_gpu.get())))
53 | assert la.norm(c_gpu.get() - (5*a_gpu.get()*b_gpu.get())) < 1e-5
54 | 


--------------------------------------------------------------------------------
/examples/demo_mandelbrot.py:
--------------------------------------------------------------------------------
  1 | # I found this example for PyCuda here:
  2 | # http://wiki.tiker.net/PyCuda/Examples/Mandelbrot
  3 | #
  4 | # An improved sequential/pure Python code was contributed
  5 | # by CRVSADER//KY <crusaderky@gmail.com>.
  6 | #
  7 | # I adapted it for PyOpenCL. Hopefully it is useful to someone.
  8 | # July 2010, HolgerRapp@gmx.net
  9 | #
 10 | # Original readme below these lines.
 11 | 
 12 | # Mandelbrot calculate using GPU, Serial numpy and faster numpy
 13 | # Use to show the speed difference between CPU and GPU calculations
 14 | # ian@ianozsvald.com March 2010
 15 | 
 16 | # Based on vegaseat's TKinter/numpy example code from 2006
 17 | # http://www.daniweb.com/code/snippet216851.html#
 18 | # with minor changes to move to numpy from the obsolete Numeric
 19 | 
 20 | import time
 21 | 
 22 | import numpy as np
 23 | from PIL import Image
 24 | 
 25 | import pyopencl as cl
 26 | 
 27 | 
 28 | # You can choose a calculation routine below (calc_fractal), uncomment
 29 | # one of the three lines to test the three variations
 30 | # Speed notes are listed in the same place
 31 | 
 32 | # set width and height of window, more pixels take longer to calculate
 33 | w = 2048
 34 | h = 2048
 35 | 
 36 | 
 37 | def calc_fractal_opencl(q, maxiter):
 38 |     ctx = cl.create_some_context()
 39 |     queue = cl.CommandQueue(ctx)
 40 | 
 41 |     output = np.empty(q.shape, dtype=np.uint16)
 42 | 
 43 |     mf = cl.mem_flags
 44 |     q_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q)
 45 |     output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes)
 46 | 
 47 |     prg = cl.Program(
 48 |         ctx,
 49 |         """
 50 |     #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
 51 |     __kernel void mandelbrot(__global float2 *q,
 52 |                      __global ushort *output, ushort const maxiter)
 53 |     {
 54 |         int gid = get_global_id(0);
 55 |         float nreal, real = 0;
 56 |         float imag = 0;
 57 | 
 58 |         output[gid] = 0;
 59 | 
 60 |         for(int curiter = 0; curiter < maxiter; curiter++) {
 61 |             nreal = real*real - imag*imag + q[gid].x;
 62 |             imag = 2* real*imag + q[gid].y;
 63 |             real = nreal;
 64 | 
 65 |             if (real*real + imag*imag > 4.0f) {
 66 |                  output[gid] = curiter;
 67 |                  break;
 68 |             }
 69 |         }
 70 |     }
 71 |     """,
 72 |     ).build()
 73 | 
 74 |     prg.mandelbrot(
 75 |         queue, output.shape, None, q_opencl, output_opencl, np.uint16(maxiter)
 76 |     )
 77 | 
 78 |     cl.enqueue_copy(queue, output, output_opencl).wait()
 79 | 
 80 |     return output
 81 | 
 82 | 
 83 | def calc_fractal_serial(q, maxiter):
 84 |     # calculate z using pure python on a numpy array
 85 |     # note that, unlike the other two implementations,
 86 |     # the number of iterations per point is NOT constant
 87 |     z = np.zeros(q.shape, complex)
 88 |     output = np.resize(
 89 |         np.array(
 90 |             0,
 91 |         ),
 92 |         q.shape,
 93 |     )
 94 |     for i in range(len(q)):
 95 |         for iter in range(maxiter):
 96 |             z[i] = z[i] * z[i] + q[i]
 97 |             if abs(z[i]) > 2.0:
 98 |                 output[i] = iter
 99 |                 break
100 |     return output
101 | 
102 | 
103 | def calc_fractal_numpy(q, maxiter):
104 |     # calculate z using numpy, this is the original
105 |     # routine from vegaseat's URL
106 |     output = np.resize(
107 |         np.array(
108 |             0,
109 |         ),
110 |         q.shape,
111 |     )
112 |     z = np.zeros(q.shape, np.complex64)
113 | 
114 |     for it in range(maxiter):
115 |         z = z * z + q
116 |         done = np.greater(abs(z), 2.0)
117 |         q = np.where(done, 0 + 0j, q)
118 |         z = np.where(done, 0 + 0j, z)
119 |         output = np.where(done, it, output)
120 |     return output
121 | 
122 | 
123 | # choose your calculation routine here by uncommenting one of the options
124 | calc_fractal = calc_fractal_opencl
125 | # calc_fractal = calc_fractal_serial
126 | # calc_fractal = calc_fractal_numpy
127 | 
128 | 
129 | class Mandelbrot:
130 |     def draw(self, x1, x2, y1, y2, maxiter=30):
131 |         # draw the Mandelbrot set, from numpy example
132 |         xx = np.arange(x1, x2, (x2 - x1) / w)
133 |         yy = np.arange(y2, y1, (y1 - y2) / h) * 1j
134 |         q = np.ravel(xx + yy[:, np.newaxis]).astype(np.complex64)
135 | 
136 |         start_main = time.time()
137 |         output = calc_fractal(q, maxiter)
138 |         end_main = time.time()
139 | 
140 |         secs = end_main - start_main
141 |         print("Main took", secs)
142 | 
143 |         self.mandel = (output.reshape((h, w)) / float(output.max()) * 255.0).astype(
144 |             np.uint8
145 |         )
146 | 
147 |     def create_image(self):
148 |         """ "
149 |         create the image from the draw() string
150 |         """
151 |         # you can experiment with these x and y ranges
152 |         self.draw(-2.13, 0.77, -1.3, 1.3)
153 |         self.im = Image.fromarray(self.mandel)
154 |         self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255))
155 |             for i in rgb])
156 | 
157 |     def create_label(self):
158 |         # put the image on a label widget
159 |         self.image = ImageTk.PhotoImage(self.im)
160 |         self.label = tk.Label(self.root, image=self.image)
161 |         self.label.pack()
162 | 
163 |     def run_tk(self):
164 |         self.root = tk.Tk()
165 |         self.root.title("Mandelbrot Set")
166 |         self.create_image()
167 |         self.create_label()
168 |         # start event loop
169 |         self.root.mainloop()
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     test = Mandelbrot()
174 |     try:
175 |         import tkinter as tk
176 |     except ModuleNotFoundError:
177 |         test.create_image()
178 |     else:
179 |         from PIL import ImageTk
180 |         try:
181 |             test.run_tk()
182 |         except tk.TclError:
183 |             test.create_image()
184 | 


--------------------------------------------------------------------------------
/examples/demo_meta_codepy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as la
 3 | 
 4 | from cgen import (
 5 |     POD,
 6 |     Assign,
 7 |     Block,
 8 |     Const,
 9 |     FunctionBody,
10 |     FunctionDeclaration,
11 |     Initializer,
12 |     Module,
13 |     Pointer,
14 |     Value,
15 | )
16 | from cgen.opencl import CLGlobal, CLKernel, CLRequiredWorkGroupSize
17 | 
18 | import pyopencl as cl
19 | 
20 | 
21 | local_size = 256
22 | thread_strides = 32
23 | macroblock_count = 33
24 | dtype = np.float32
25 | total_size = local_size*thread_strides*macroblock_count
26 | 
27 | ctx = cl.create_some_context()
28 | queue = cl.CommandQueue(ctx)
29 | 
30 | rng = np.random.default_rng()
31 | a = rng.standard_normal(total_size, dtype=dtype)
32 | b = rng.standard_normal(total_size, dtype=dtype)
33 | 
34 | mf = cl.mem_flags
35 | a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
36 | b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
37 | c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
38 | 
39 | mod = Module([
40 |     FunctionBody(
41 |         CLKernel(CLRequiredWorkGroupSize((local_size,),
42 |             FunctionDeclaration(
43 |             Value("void", "add"),
44 |             arg_decls=[CLGlobal(Pointer(Const(POD(dtype, name))))
45 |                 for name in ["tgt", "op1", "op2"]]))),
46 |         Block([
47 |             Initializer(POD(np.int32, "idx"),
48 |                 "get_local_id(0) + %d * get_group_id(0)"
49 |                 % (local_size*thread_strides))
50 |             ]+[
51 |             Assign(
52 |                 "tgt[idx+%d]" % (o*local_size),
53 |                 "op1[idx+%d] + op2[idx+%d]" % (
54 |                     o*local_size,
55 |                     o*local_size))
56 |             for o in range(thread_strides)]))])
57 | 
58 | knl = cl.Program(ctx, str(mod)).build().add
59 | 
60 | knl(queue, (local_size*macroblock_count,), (local_size,),
61 |         c_buf, a_buf, b_buf)
62 | 
63 | c = np.empty_like(a)
64 | cl.enqueue_copy(queue, c, c_buf).wait()
65 | 
66 | assert la.norm(c-(a+b)) == 0
67 | 


--------------------------------------------------------------------------------
/examples/demo_meta_template.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as la
 3 | from mako.template import Template
 4 | 
 5 | import pyopencl as cl
 6 | 
 7 | 
 8 | local_size = 256
 9 | thread_strides = 32
10 | macroblock_count = 33
11 | dtype = np.float32
12 | total_size = local_size*thread_strides*macroblock_count
13 | 
14 | ctx = cl.create_some_context()
15 | queue = cl.CommandQueue(ctx)
16 | 
17 | rng = np.random.default_rng()
18 | a = rng.standard_normal(total_size, dtype=dtype)
19 | b = rng.standard_normal(total_size, dtype=dtype)
20 | 
21 | mf = cl.mem_flags
22 | a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
23 | b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
24 | c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
25 | 
26 | tpl = Template("""
27 |     __kernel void add(
28 |             __global ${ type_name } *tgt,
29 |             __global const ${ type_name } *op1,
30 |             __global const ${ type_name } *op2)
31 |     {
32 |       int idx = get_local_id(0)
33 |         + ${ local_size } * ${ thread_strides }
34 |         * get_group_id(0);
35 | 
36 |       % for i in range(thread_strides):
37 |           <% offset = i*local_size %>
38 |           tgt[idx + ${ offset }] =
39 |             op1[idx + ${ offset }]
40 |             + op2[idx + ${ offset } ];
41 |       % endfor
42 |     }""")
43 | 
44 | rendered_tpl = tpl.render(type_name="float",
45 |     local_size=local_size, thread_strides=thread_strides)
46 | 
47 | knl = cl.Program(ctx, str(rendered_tpl)).build().add
48 | 
49 | knl(queue, (local_size*macroblock_count,), (local_size,),
50 |         c_buf, a_buf, b_buf)
51 | 
52 | c = np.empty_like(a)
53 | cl.enqueue_copy(queue, c, c_buf).wait()
54 | 
55 | assert la.norm(c-(a+b)) == 0
56 | 


--------------------------------------------------------------------------------
/examples/dump-performance.py:
--------------------------------------------------------------------------------
 1 | import pyopencl as cl
 2 | import pyopencl.characterize.performance as perf
 3 | 
 4 | 
 5 | def main():
 6 |     ctx = cl.create_some_context()
 7 | 
 8 |     prof_overhead, latency = perf.get_profiling_overhead(ctx)
 9 |     print("command latency: %g s" % latency)
10 |     print("profiling overhead: {:g} s -> {:.1f} %".format(
11 |             prof_overhead, 100*prof_overhead/latency))
12 |     queue = cl.CommandQueue(
13 |             ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
14 | 
15 |     print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
16 |     print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))
17 | 
18 |     for tx_type in [
19 |             perf.HostToDeviceTransfer,
20 |             perf.DeviceToHostTransfer,
21 |             perf.DeviceToDeviceTransfer]:
22 |         print("----------------------------------------")
23 |         print(tx_type.__name__)
24 |         print("----------------------------------------")
25 | 
26 |         print("latency: %g s" % perf.transfer_latency(queue, tx_type))
27 |         for i in range(6, 31, 2):
28 |             bs = 1 << i
29 |             try:
30 |                 result = "%g GB/s" % (
31 |                         perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
32 |             except Exception as e:
33 |                 result = "exception: %s" % e.__class__.__name__
34 |             print("bandwidth @ %d bytes: %s" % (bs, result))
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/examples/dump-properties.py:
--------------------------------------------------------------------------------
 1 | from optparse import OptionParser
 2 | 
 3 | import pyopencl as cl
 4 | 
 5 | 
 6 | parser = OptionParser()
 7 | parser.add_option("-s", "--short", action="store_true",
 8 |                   help="don't print all device properties")
 9 | 
10 | (options, args) = parser.parse_args()
11 | 
12 | 
13 | def print_info(obj, info_cls):
14 |     for info_name in sorted(dir(info_cls)):
15 |         if not info_name.startswith("_") and info_name != "to_string":
16 |             info = getattr(info_cls, info_name)
17 |             try:
18 |                 info_value = obj.get_info(info)
19 |             except Exception:
20 |                 info_value = "<error>"
21 | 
22 |             if (info_cls == cl.device_info and info_name == "PARTITION_TYPES_EXT"
23 |                     and isinstance(info_value, list)):
24 |                 print("{}: {}".format(info_name, [
25 |                     cl.device_partition_property_ext.to_string(v,
26 |                         "<unknown device partition property %d>")
27 |                     for v in info_value]))
28 |             else:
29 |                 try:
30 |                     print(f"{info_name}: {info_value}")
31 |                 except Exception:
32 |                     print("%s: <error>" % info_name)
33 | 
34 | 
35 | for platform in cl.get_platforms():
36 |     print(75*"=")
37 |     print(platform)
38 |     print(75*"=")
39 |     if not options.short:
40 |         print_info(platform, cl.platform_info)
41 | 
42 |     for device in platform.get_devices():
43 |         if not options.short:
44 |             print(75*"-")
45 |         print(device)
46 |         if not options.short:
47 |             print(75*"-")
48 |             print_info(device, cl.device_info)
49 |             ctx = cl.Context([device])
50 |             for mf in [
51 |                     cl.mem_flags.READ_ONLY,
52 |                     # cl.mem_flags.READ_WRITE,
53 |                     # cl.mem_flags.WRITE_ONLY
54 |                     ]:
55 |                 for itype in [
56 |                         cl.mem_object_type.IMAGE2D,
57 |                         cl.mem_object_type.IMAGE3D
58 |                         ]:
59 |                     try:
60 |                         formats = cl.get_supported_image_formats(ctx, mf, itype)
61 |                     except Exception:
62 |                         formats = "<error>"
63 |                     else:
64 |                         def str_chd_type(chdtype):
65 |                             result = cl.channel_type.to_string(chdtype,
66 |                                     "<unknown channel data type %d>")
67 | 
68 |                             result = result.replace("_INT", "")
69 |                             result = result.replace("UNSIGNED", "U")
70 |                             result = result.replace("SIGNED", "S")
71 |                             result = result.replace("NORM", "N")
72 |                             result = result.replace("FLOAT", "F")
73 |                             return result
74 | 
75 |                         formats = ", ".join(
76 |                                 "{}-{}".format(
77 |                                     cl.channel_order.to_string(iform.channel_order,
78 |                                         "<unknown channel order 0x%x>"),
79 |                                     str_chd_type(iform.channel_data_type))
80 |                                 for iform in formats)
81 | 
82 |                     print("{} {} FORMATS: {}\n".format(
83 |                             cl.mem_object_type.to_string(itype),
84 |                             cl.mem_flags.to_string(mf),
85 |                             formats))
86 |             del ctx
87 | 


--------------------------------------------------------------------------------
/examples/gl_interop_demo.py:
--------------------------------------------------------------------------------
 1 | from OpenGL.GL import *
 2 | from OpenGL.GLUT import *
 3 | from OpenGL.raw.GL.VERSION.GL_1_5 import glBufferData as rawGlBufferData
 4 | 
 5 | import pyopencl as cl
 6 | 
 7 | 
 8 | n_vertices = 10000
 9 | 
10 | src = """
11 | 
12 | __kernel void generate_sin(__global float2* a)
13 | {
14 |     int id = get_global_id(0);
15 |     int n = get_global_size(0);
16 |     float r = (float)id / (float)n;
17 |     float x = r * 16.0f * 3.1415f;
18 |     a[id].x = r * 2.0f - 1.0f;
19 |     a[id].y = native_sin(x);
20 | }
21 | 
22 | """
23 | 
24 | def initialize():
25 |     platform = cl.get_platforms()[0]
26 | 
27 |     import sys
28 | 
29 |     from pyopencl.tools import get_gl_sharing_context_properties
30 |     if sys.platform == "darwin":
31 |         ctx = cl.Context(properties=get_gl_sharing_context_properties(),
32 |                 devices=[])
33 |     else:
34 |         # Some OSs prefer clCreateContextFromType, some prefer
35 |         # clCreateContext. Try both.
36 |         try:
37 |             ctx = cl.Context(properties=[
38 |                 (cl.context_properties.PLATFORM, platform)]
39 |                 + get_gl_sharing_context_properties())
40 |         except:
41 |             ctx = cl.Context(properties=[
42 |                 (cl.context_properties.PLATFORM, platform)]
43 |                 + get_gl_sharing_context_properties(),
44 |                 devices = [platform.get_devices()[0]])
45 | 
46 |     glClearColor(1, 1, 1, 1)
47 |     glColor(0, 0, 1)
48 |     vbo = glGenBuffers(1)
49 |     glBindBuffer(GL_ARRAY_BUFFER, vbo)
50 |     rawGlBufferData(GL_ARRAY_BUFFER, n_vertices * 2 * 4, None, GL_STATIC_DRAW)
51 |     glEnableClientState(GL_VERTEX_ARRAY)
52 |     glVertexPointer(2, GL_FLOAT, 0, None)
53 |     coords_dev = cl.GLBuffer(ctx, cl.mem_flags.READ_WRITE, int(vbo))
54 |     prog = cl.Program(ctx, src).build()
55 |     queue = cl.CommandQueue(ctx)
56 |     cl.enqueue_acquire_gl_objects(queue, [coords_dev])
57 |     prog.generate_sin(queue, (n_vertices,), None, coords_dev)
58 |     cl.enqueue_release_gl_objects(queue, [coords_dev])
59 |     queue.finish()
60 |     glFlush()
61 | 
62 | def display():
63 |     glClear(GL_COLOR_BUFFER_BIT)
64 |     glDrawArrays(GL_LINE_STRIP, 0, n_vertices)
65 |     glFlush()
66 | 
67 | def reshape(w, h):
68 |     glViewport(0, 0, w, h)
69 |     glMatrixMode(GL_PROJECTION)
70 |     glLoadIdentity()
71 |     glMatrixMode(GL_MODELVIEW)
72 | 
73 | if __name__ == '__main__':
74 |     import sys
75 |     glutInit(sys.argv)
76 |     if len(sys.argv) > 1:
77 |         n_vertices = int(sys.argv[1])
78 |     glutInitWindowSize(800, 160)
79 |     glutInitWindowPosition(0, 0)
80 |     glutCreateWindow('OpenCL/OpenGL Interop Tutorial: Sin Generator')
81 |     glutDisplayFunc(display)
82 |     glutReshapeFunc(reshape)
83 |     initialize()
84 |     glutMainLoop()
85 | 


--------------------------------------------------------------------------------
/examples/gl_particle_animation.py:
--------------------------------------------------------------------------------
  1 | # Visualization of particles with gravity
  2 | # Source: http://enja.org/2010/08/27/adventures-in-opencl-part-2-particles-with-opengl/
  3 | 
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | from OpenGL import GL, GLU, GLUT
  8 | from OpenGL.arrays import vbo
  9 | from OpenGL.GL import (
 10 |     GL_ARRAY_BUFFER, GL_BLEND, GL_COLOR_ARRAY, GL_COLOR_BUFFER_BIT,
 11 |     GL_DEPTH_BUFFER_BIT, GL_DYNAMIC_DRAW, GL_FLOAT, GL_MODELVIEW,
 12 |     GL_ONE_MINUS_SRC_ALPHA, GL_POINT_SMOOTH, GL_POINTS, GL_PROJECTION, GL_SRC_ALPHA,
 13 |     GL_VERTEX_ARRAY)
 14 | from OpenGL.GLUT import GLUT_DEPTH, GLUT_DOUBLE, GLUT_RGBA
 15 | 
 16 | import pyopencl as cl
 17 | from pyopencl.tools import get_gl_sharing_context_properties
 18 | 
 19 | 
 20 | mf = cl.mem_flags
 21 | 
 22 | width = 800
 23 | height = 600
 24 | num_particles = 100000
 25 | time_step = 0.005
 26 | mouse_down = False
 27 | mouse_old = {"x": 0.0, "y": 0.0}
 28 | rotate = {"x": 0.0, "y": 0.0, "z": 0.0}
 29 | translate = {"x": 0.0, "y": 0.0, "z": 0.0}
 30 | initial_translate = {"x": 0.0, "y": 0.0, "z": -2.5}
 31 | 
 32 | 
 33 | def glut_window():
 34 |     GLUT.glutInit(sys.argv)
 35 |     GLUT.glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH)
 36 |     GLUT.glutInitWindowSize(width, height)
 37 |     GLUT.glutInitWindowPosition(0, 0)
 38 |     window = GLUT.glutCreateWindow("Particle Simulation")
 39 | 
 40 |     GLUT.glutDisplayFunc(on_display)  # Called by GLUT every frame
 41 |     GLUT.glutKeyboardFunc(on_key)
 42 |     GLUT.glutMouseFunc(on_click)
 43 |     GLUT.glutMotionFunc(on_mouse_move)
 44 |     GLUT.glutTimerFunc(10, on_timer, 10)  # Call draw every 30 ms
 45 | 
 46 |     GL.glViewport(0, 0, width, height)
 47 |     GL.glMatrixMode(GL_PROJECTION)
 48 |     GL.glLoadIdentity()
 49 |     GLU.gluPerspective(60.0, width / float(height), 0.1, 1000.0)
 50 | 
 51 |     return window
 52 | 
 53 | 
 54 | def initial_buffers(num_particles):
 55 |     rng = np.random.default_rng()
 56 | 
 57 |     np_position = np.empty((num_particles, 4), dtype=np.float32)
 58 |     np_color = np.empty((num_particles, 4), dtype=np.float32)
 59 |     np_velocity = np.empty((num_particles, 4), dtype=np.float32)
 60 | 
 61 |     np_position[:, 0] = np.sin(
 62 |         np.arange(0.0, num_particles) * 2.001 * np.pi / num_particles
 63 |     )
 64 |     np_position[:, 0] *= rng.integers(num_particles) / 3.0 + 0.2
 65 |     np_position[:, 1] = np.cos(
 66 |         np.arange(0.0, num_particles) * 2.001 * np.pi / num_particles
 67 |     )
 68 |     np_position[:, 1] *= rng.integers(num_particles) / 3.0 + 0.2
 69 |     np_position[:, 2] = 0.0
 70 |     np_position[:, 3] = 1.0
 71 | 
 72 |     np_color[:, :] = [1.0, 1.0, 1.0, 1.0]  # White particles
 73 | 
 74 |     np_velocity[:, 0] = np_position[:, 0] * 2.0
 75 |     np_velocity[:, 1] = np_position[:, 1] * 2.0
 76 |     np_velocity[:, 2] = 3.0
 77 |     np_velocity[:, 3] = rng.integers(num_particles)
 78 | 
 79 |     gl_position = vbo.VBO(
 80 |         data=np_position, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER
 81 |     )
 82 |     gl_position.bind()
 83 |     gl_color = vbo.VBO(data=np_color, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER)
 84 |     gl_color.bind()
 85 | 
 86 |     return (np_position, np_velocity, gl_position, gl_color)
 87 | 
 88 | 
 89 | def on_timer(t):
 90 |     GLUT.glutTimerFunc(t, on_timer, t)
 91 |     GLUT.glutPostRedisplay()
 92 | 
 93 | 
 94 | def on_key(*args):
 95 |     if args[0] == "\033" or args[0] == "q":
 96 |         sys.exit()
 97 | 
 98 | 
 99 | def on_click(button, state, x, y):
100 |     mouse_old["x"] = x
101 |     mouse_old["y"] = y
102 | 
103 | 
104 | def on_mouse_move(x, y):
105 |     rotate["x"] += (y - mouse_old["y"]) * 0.2
106 |     rotate["y"] += (x - mouse_old["x"]) * 0.2
107 | 
108 |     mouse_old["x"] = x
109 |     mouse_old["y"] = y
110 | 
111 | 
112 | def on_display():
113 |     """Render the particles"""
114 |     # Update or particle positions by calling the OpenCL kernel
115 |     cl.enqueue_acquire_gl_objects(queue, [cl_gl_position, cl_gl_color])
116 |     kernelargs = (
117 |         cl_gl_position,
118 |         cl_gl_color,
119 |         cl_velocity,
120 |         cl_start_position,
121 |         cl_start_velocity,
122 |         np.float32(time_step),
123 |     )
124 |     program.particle_fountain(queue, (num_particles,), None, *(kernelargs))
125 |     cl.enqueue_release_gl_objects(queue, [cl_gl_position, cl_gl_color])
126 |     queue.finish()
127 |     GL.glFlush()
128 | 
129 |     GL.glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
130 |     GL.glMatrixMode(GL_MODELVIEW)
131 |     GL.glLoadIdentity()
132 | 
133 |     # Handle mouse transformations
134 |     GL.glTranslatef(initial_translate["x"], initial_translate["y"], initial_translate["z"])
135 |     GL.glRotatef(rotate["x"], 1, 0, 0)
136 |     GL.glRotatef(rotate["y"], 0, 1, 0)  # we switched around the axis so make this rotate_z
137 |     GL.glTranslatef(translate["x"], translate["y"], translate["z"])
138 | 
139 |     # Render the particles
140 |     GL.glEnable(GL_POINT_SMOOTH)
141 |     GL.glPointSize(2)
142 |     GL.glEnable(GL_BLEND)
143 |     GL.glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
144 | 
145 |     # Set up the VBOs
146 |     gl_color.bind()
147 |     GL.glColorPointer(4, GL_FLOAT, 0, gl_color)
148 |     gl_position.bind()
149 |     GL.glVertexPointer(4, GL_FLOAT, 0, gl_position)
150 |     GL.glEnableClientState(GL_VERTEX_ARRAY)
151 |     GL.glEnableClientState(GL_COLOR_ARRAY)
152 | 
153 |     # Draw the VBOs
154 |     GL.glDrawArrays(GL_POINTS, 0, num_particles)
155 | 
156 |     GL.glDisableClientState(GL_COLOR_ARRAY)
157 |     GL.glDisableClientState(GL_VERTEX_ARRAY)
158 | 
159 |     GL.glDisable(GL_BLEND)
160 | 
161 |     GLUT.glutSwapBuffers()
162 | 
163 | 
164 | window = glut_window()
165 | 
166 | (np_position, np_velocity, gl_position, gl_color) = initial_buffers(num_particles)
167 | 
168 | platform = cl.get_platforms()[0]
169 | context = cl.Context(
170 |     properties=[(cl.context_properties.PLATFORM, platform)]
171 |     + get_gl_sharing_context_properties()
172 | )
173 | queue = cl.CommandQueue(context)
174 | 
175 | cl_velocity = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_velocity)
176 | cl_start_position = cl.Buffer(
177 |     context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position
178 | )
179 | cl_start_velocity = cl.Buffer(
180 |     context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity
181 | )
182 | 
183 | cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position))
184 | cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color))
185 | 
186 | kernel = """__kernel void particle_fountain(__global float4* position,
187 |                                             __global float4* color,
188 |                                             __global float4* velocity,
189 |                                             __global float4* start_position,
190 |                                             __global float4* start_velocity,
191 |                                             float time_step)
192 | {
193 |     unsigned int i = get_global_id(0);
194 |     float4 p = position[i];
195 |     float4 v = velocity[i];
196 |     float life = velocity[i].w;
197 |     life -= time_step;
198 |     if (life <= 0.f)
199 |     {
200 |         p = start_position[i];
201 |         v = start_velocity[i];
202 |         life = 1.0f;
203 |     }
204 | 
205 |     v.z -= 9.8f*time_step;
206 |     p.x += v.x*time_step;
207 |     p.y += v.y*time_step;
208 |     p.z += v.z*time_step;
209 |     v.w = life;
210 | 
211 |     position[i] = p;
212 |     velocity[i] = v;
213 | 
214 |     color[i].w = life; /* Fade points as life decreases */
215 | }"""
216 | program = cl.Program(context, kernel).build()
217 | 
218 | GLUT.glutMainLoop()
219 | 


--------------------------------------------------------------------------------
/examples/ipython-demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "cc7d0709",
  7 |    "metadata": {
  8 |     "collapsed": false,
  9 |     "jupyter": {
 10 |      "outputs_hidden": false
 11 |     }
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import numpy as np\n",
 16 |     "\n",
 17 |     "import pyopencl as cl\n",
 18 |     "import pyopencl.array"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "8ac8d7bb",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "Load the PyOpenCL IPython extension:"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "7023ca2f",
 33 |    "metadata": {
 34 |     "collapsed": false,
 35 |     "jupyter": {
 36 |      "outputs_hidden": false
 37 |     }
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "%load_ext pyopencl.ipython_ext"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "id": "9544b53c",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "Create an OpenCL context and a command queue:"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "id": "fac17999",
 56 |    "metadata": {
 57 |     "collapsed": false,
 58 |     "jupyter": {
 59 |      "outputs_hidden": false
 60 |     }
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "ctx = cl.create_some_context(interactive=True)\n",
 65 |     "queue = cl.CommandQueue(ctx)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "a29daf04",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "-----\n",
 74 |     "\n",
 75 |     "Define an OpenCL kernel using the `%%cl_kernel` magic:"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "id": "65c7e6c9",
 82 |    "metadata": {
 83 |     "collapsed": false,
 84 |     "jupyter": {
 85 |      "outputs_hidden": false
 86 |     }
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "%%cl_kernel -o \"-cl-fast-relaxed-math\"\n",
 91 |     "\n",
 92 |     "__kernel void sum_vector(__global const float *a,\n",
 93 |     "__global const float *b, __global float *c)\n",
 94 |     "{\n",
 95 |     "  int gid = get_global_id(0);\n",
 96 |     "  c[gid] = a[gid] + b[gid];\n",
 97 |     "}"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "cfb57357",
103 |    "metadata": {},
104 |    "source": [
105 |     "This looks for `cl_ctx` or `ctx` in the user namespace to find a PyOpenCL context.\n",
106 |     "\n",
107 |     "Kernel names are automatically injected into the user namespace, so we can just use `sum_vector` from Python below.\n",
108 |     "\n",
109 |     "Now create some data to work on:"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "id": "1d80ff38",
116 |    "metadata": {
117 |     "collapsed": false,
118 |     "jupyter": {
119 |      "outputs_hidden": false
120 |     }
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "n = 10000\n",
125 |     "\n",
126 |     "a = cl.array.empty(queue, n, dtype=np.float32)\n",
127 |     "a.fill(15)\n",
128 |     "\n",
129 |     "rng = np.random.default_rng()\n",
130 |     "b_host = rng.normal(size=n).astype(np.float32)\n",
131 |     "b = cl.array.to_device(queue, b_host)\n",
132 |     "\n",
133 |     "c = cl.array.empty_like(a)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "id": "61fccb61",
139 |    "metadata": {},
140 |    "source": [
141 |     "Run the kernel:"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "id": "2ba991b3",
148 |    "metadata": {
149 |     "collapsed": false,
150 |     "jupyter": {
151 |      "outputs_hidden": false
152 |     }
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "sum_vector(queue, (n,), None, a.data, b.data, c.data)  # noqa: F821"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "id": "11a55b38",
162 |    "metadata": {},
163 |    "source": [
164 |     "Check the result using `numpy` operations:"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "id": "ee3560c1",
171 |    "metadata": {
172 |     "collapsed": false,
173 |     "jupyter": {
174 |      "outputs_hidden": false
175 |     }
176 |    },
177 |    "outputs": [],
178 |    "source": [
179 |     "assert (c.get() == b_host + 15).all()"
180 |    ]
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3 (ipykernel)",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.12.4"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 5
204 | }
205 | 


--------------------------------------------------------------------------------
/examples/median-filter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from imageio import imread, imsave
  3 | 
  4 | import pyopencl as cl
  5 | 
  6 | 
  7 | # Read in image
  8 | img = imread("noisyImage.jpg").astype(np.float32)
  9 | print(img.shape)
 10 | 
 11 | img = np.mean(img, axis=2)
 12 | print(img.shape)
 13 | 
 14 | ctx = cl.create_some_context()
 15 | queue = cl.CommandQueue(ctx)
 16 | 
 17 | mf = cl.mem_flags
 18 | 
 19 | # Kernel function
 20 | src = """
 21 | void sort(int *a, int *b, int *c) {
 22 |    int swap;
 23 |    if(*a > *b) {
 24 |       swap = *a;
 25 |       *a = *b;
 26 |       *b = swap;
 27 |    }
 28 |    if(*a > *c) {
 29 |       swap = *a;
 30 |       *a = *c;
 31 |       *c = swap;
 32 |    }
 33 |    if(*b > *c) {
 34 |       swap = *b;
 35 |       *b = *c;
 36 |       *c = swap;
 37 |    }
 38 | }
 39 | __kernel void medianFilter(
 40 |     __global float *img, __global float *result, __global int *width, __global
 41 |     int *height)
 42 | {
 43 |     int w = *width;
 44 |     int h = *height;
 45 |     int posx = get_global_id(1);
 46 |     int posy = get_global_id(0);
 47 |     int i = w*posy + posx;
 48 |     // Keeping the edge pixels the same
 49 |     if( posx == 0 || posy == 0 || posx == w-1 || posy == h-1 )
 50 |     {
 51 |         result[i] = img[i];
 52 |     }
 53 |     else
 54 |     {
 55 |         int pixel00, pixel01, pixel02, pixel10, pixel11, pixel12, pixel20,
 56 |             pixel21, pixel22;
 57 |         pixel00 = img[i - 1 - w];
 58 |         pixel01 = img[i- w];
 59 |         pixel02 = img[i + 1 - w];
 60 |         pixel10 = img[i - 1];
 61 |         pixel11 = img[i];
 62 |         pixel12 = img[i + 1];
 63 |         pixel20 = img[i - 1 + w];
 64 |         pixel21 = img[i + w];
 65 |         pixel22 = img[i + 1 + w];
 66 |         //sort the rows
 67 |         sort( &(pixel00), &(pixel01), &(pixel02) );
 68 |         sort( &(pixel10), &(pixel11), &(pixel12) );
 69 |         sort( &(pixel20), &(pixel21), &(pixel22) );
 70 |         //sort the columns
 71 |         sort( &(pixel00), &(pixel10), &(pixel20) );
 72 |         sort( &(pixel01), &(pixel11), &(pixel21) );
 73 |         sort( &(pixel02), &(pixel12), &(pixel22) );
 74 |         //sort the diagonal
 75 |         sort( &(pixel00), &(pixel11), &(pixel22) );
 76 |         // median is the the middle value of the diagonal
 77 |         result[i] = pixel11;
 78 |     }
 79 | }
 80 | """
 81 | 
 82 | # Kernel function instantiation
 83 | prg = cl.Program(ctx, src).build()
 84 | # Allocate memory for variables on the device
 85 | img_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=img)
 86 | result_g = cl.Buffer(ctx, mf.WRITE_ONLY, img.nbytes)
 87 | width_g = cl.Buffer(
 88 |     ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[1])
 89 | )
 90 | height_g = cl.Buffer(
 91 |     ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[0])
 92 | )
 93 | # Call Kernel. Automatically takes care of block/grid distribution
 94 | prg.medianFilter(queue, img.shape, None, img_g, result_g, width_g, height_g)
 95 | result = np.empty_like(img)
 96 | cl.enqueue_copy(queue, result, result_g)
 97 | 
 98 | # Show the blurred image
 99 | imsave("medianFilter-OpenCL.jpg", result, mode="RGB")
100 | 


--------------------------------------------------------------------------------
/examples/narray.py:
--------------------------------------------------------------------------------
 1 | # example by Roger Pau Monn'e
 2 | import numpy as np
 3 | 
 4 | import pyopencl as cl
 5 | 
 6 | 
 7 | demo_r = np.empty((500, 5), dtype=np.uint32)
 8 | ctx = cl.create_some_context()
 9 | queue = cl.CommandQueue(ctx)
10 | 
11 | mf = cl.mem_flags
12 | demo_buf = cl.Buffer(ctx, mf.WRITE_ONLY, demo_r.nbytes)
13 | 
14 | prg = cl.Program(ctx,
15 | """
16 | __kernel void demo(__global uint *demo)
17 | {
18 |     int i;
19 |     int gid = get_global_id(0);
20 |     for(i=0; i<5;i++)
21 |     {
22 |         demo[gid*5+i] = (uint) 1;
23 |     }
24 | }""")
25 | 
26 | try:
27 |     prg.build()
28 | except Exception:
29 |     print("Error:")
30 |     print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
31 |     raise
32 | 
33 | prg.demo(queue, (500,), None, demo_buf)
34 | cl.enqueue_copy(queue, demo_r, demo_buf).wait()
35 | 
36 | for res in demo_r:
37 |     print(res)
38 | 


--------------------------------------------------------------------------------
/examples/noisyImage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/pyopencl/b8b8d4d852e8a26356861ffda578874dc064e54c/examples/noisyImage.jpg


--------------------------------------------------------------------------------
/examples/svm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | import pyopencl as cl
 6 | from pyopencl.characterize import (
 7 |     has_coarse_grain_buffer_svm,
 8 |     has_fine_grain_buffer_svm,
 9 |     has_fine_grain_system_svm,
10 | )
11 | 
12 | 
13 | ctx = cl.create_some_context()
14 | queue = cl.CommandQueue(ctx)
15 | 
16 | dev = queue.device
17 | 
18 | print(
19 |     f"Device '{dev.name}' on platform '{dev.platform.name} ({dev.platform.version})'"
20 |     " has the following SVM features:\n"
21 |     f"  Coarse-grained buffer SVM: {has_coarse_grain_buffer_svm(dev)}\n"
22 |     f"  Fine-grained buffer SVM:   {has_fine_grain_buffer_svm(dev)}\n"
23 |     f"  Fine-grained system SVM:   {has_fine_grain_system_svm(dev)}"
24 |     )
25 | 
26 | prg = cl.Program(ctx, """
27 | __kernel void twice(
28 |     __global float *a_g)
29 | {
30 |   int gid = get_global_id(0);
31 |   a_g[gid] = 2*a_g[gid];
32 | }
33 | """).build()
34 | 
35 | 
36 | if has_coarse_grain_buffer_svm(dev):
37 |     print("Testing coarse-grained buffer SVM...", end="")
38 | 
39 |     svm_ary = cl.SVM(cl.csvm_empty(ctx, 10, np.float32))
40 |     assert isinstance(svm_ary.mem, np.ndarray)
41 | 
42 |     with svm_ary.map_rw(queue) as ary:
43 |         ary.fill(17)  # use from host
44 |         orig_ary = ary.copy()
45 | 
46 |     prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
47 |     queue.finish()
48 | 
49 |     with svm_ary.map_ro(queue) as ary:
50 |         assert np.array_equal(orig_ary*2, ary)
51 | 
52 |     print(" done.")
53 | 
54 | if has_fine_grain_buffer_svm(dev):
55 |     print("Testing fine-grained buffer SVM...", end="")
56 | 
57 |     ary = cl.fsvm_empty(ctx, 10, np.float32)
58 |     assert isinstance(ary.base, cl.SVMAllocation)
59 | 
60 |     ary.fill(17)
61 |     orig_ary = ary.copy()
62 | 
63 |     prg.twice(queue, ary.shape, None, cl.SVM(ary))
64 |     queue.finish()
65 | 
66 |     assert np.array_equal(orig_ary*2, ary)
67 | 
68 |     print(" done.")
69 | 
70 | if has_fine_grain_system_svm(dev):
71 |     print("Testing fine-grained system SVM...", end="")
72 | 
73 |     ary = np.zeros(10, np.float32)
74 |     assert isinstance(ary, np.ndarray)
75 | 
76 |     ary.fill(17)
77 |     orig_ary = ary.copy()
78 | 
79 |     prg.twice(queue, ary.shape, None, cl.SVM(ary))
80 |     queue.finish()
81 | 
82 |     assert np.array_equal(orig_ary*2, ary)
83 | 
84 |     print(" done.")
85 | 


--------------------------------------------------------------------------------
/examples/transpose.py:
--------------------------------------------------------------------------------
  1 | # Transposition of a matrix
  2 | # originally for PyCUDA by Hendrik Riedmann <riedmann@dam.brown.edu>
  3 | 
  4 | import numpy as np
  5 | import numpy.linalg as la
  6 | 
  7 | import pyopencl as cl
  8 | 
  9 | 
 10 | block_size = 16
 11 | 
 12 | 
 13 | class NaiveTranspose:
 14 |     def __init__(self, ctx):
 15 |         self.kernel = (
 16 |             cl.Program(
 17 |                 ctx,
 18 |                 """
 19 |         __kernel void transpose(
 20 |           __global float *a_t, __global float *a,
 21 |           unsigned a_width, unsigned a_height)
 22 |         {
 23 |           int read_idx = get_global_id(0) + get_global_id(1) * a_width;
 24 |           int write_idx = get_global_id(1) + get_global_id(0) * a_height;
 25 | 
 26 |           a_t[write_idx] = a[read_idx];
 27 |         }
 28 |         """,)
 29 |             .build()
 30 |             .transpose
 31 |         )
 32 | 
 33 |     def __call__(self, queue, tgt, src, shape):
 34 |         w, h = shape
 35 |         assert w % block_size == 0
 36 |         assert h % block_size == 0
 37 | 
 38 |         return self.kernel(
 39 |             queue,
 40 |             (w, h),
 41 |             (block_size, block_size),
 42 |             tgt,
 43 |             src,
 44 |             np.uint32(w),
 45 |             np.uint32(h),
 46 |         )
 47 | 
 48 | 
 49 | class SillyTranspose(NaiveTranspose):
 50 |     def __call__(self, queue, tgt, src, shape):
 51 |         w, h = shape
 52 |         assert w % block_size == 0
 53 |         assert h % block_size == 0
 54 | 
 55 |         return self.kernel(
 56 |             queue, (w, h), None, tgt, src, np.uint32(w), np.uint32(h)
 57 |         )
 58 | 
 59 | 
 60 | class TransposeWithLocal:
 61 |     def __init__(self, ctx):
 62 |         self.kernel = (
 63 |             cl.Program(
 64 |                 ctx,
 65 |                 """
 66 |         #define BLOCK_SIZE %(block_size)d
 67 |         #define A_BLOCK_STRIDE (BLOCK_SIZE * a_width)
 68 |         #define A_T_BLOCK_STRIDE (BLOCK_SIZE * a_height)
 69 | 
 70 |         __kernel __attribute__((reqd_work_group_size(BLOCK_SIZE, BLOCK_SIZE, 1)))
 71 |         void transpose(
 72 |           __global float *a_t, __global float *a,
 73 |           unsigned a_width, unsigned a_height,
 74 |           __local float *a_local)
 75 |         {
 76 |           int base_idx_a   =
 77 |             get_group_id(0) * BLOCK_SIZE +
 78 |             get_group_id(1) * A_BLOCK_STRIDE;
 79 |           int base_idx_a_t =
 80 |             get_group_id(1) * BLOCK_SIZE +
 81 |             get_group_id(0) * A_T_BLOCK_STRIDE;
 82 | 
 83 |           int glob_idx_a   =
 84 |             base_idx_a + get_local_id(0) + a_width * get_local_id(1);
 85 |           int glob_idx_a_t =
 86 |             base_idx_a_t + get_local_id(0) + a_height * get_local_id(1);
 87 | 
 88 |           a_local[get_local_id(1)*BLOCK_SIZE+get_local_id(0)] = a[glob_idx_a];
 89 | 
 90 |           barrier(CLK_LOCAL_MEM_FENCE);
 91 | 
 92 |           a_t[glob_idx_a_t] = a_local[get_local_id(0)*BLOCK_SIZE+get_local_id(1)];
 93 |         }
 94 |         """
 95 |                 % {"block_size": block_size},
 96 |             )
 97 |             .build()
 98 |             .transpose
 99 |         )
100 | 
101 |     def __call__(self, queue, tgt, src, shape):
102 |         w, h = shape
103 |         assert w % block_size == 0
104 |         assert h % block_size == 0
105 | 
106 |         return self.kernel(
107 |             queue,
108 |             (w, h),
109 |             (block_size, block_size),
110 |             tgt,
111 |             src,
112 |             np.uint32(w),
113 |             np.uint32(h),
114 |             cl.LocalMemory(4 * block_size * (block_size + 1)),
115 |         )
116 | 
117 | 
118 | def transpose_using_cl(ctx, queue, cpu_src, cls):
119 |     mf = cl.mem_flags
120 |     a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=cpu_src)
121 |     a_t_buf = cl.Buffer(ctx, mf.WRITE_ONLY, size=cpu_src.nbytes)
122 |     cls(ctx)(queue, a_t_buf, a_buf, cpu_src.shape)
123 | 
124 |     w, h = cpu_src.shape
125 |     result = np.empty((h, w), dtype=cpu_src.dtype)
126 |     cl.enqueue_copy(queue, result, a_t_buf).wait()
127 | 
128 |     a_buf.release()
129 |     a_t_buf.release()
130 | 
131 |     return result
132 | 
133 | 
134 | def check_transpose():
135 |     for cls in [NaiveTranspose, SillyTranspose, TransposeWithLocal]:
136 |         print("checking", cls.__name__)
137 |         ctx = cl.create_some_context()
138 | 
139 |         for dev in ctx.devices:
140 |             assert dev.local_mem_size > 0
141 | 
142 |         queue = cl.CommandQueue(ctx)
143 | 
144 |         for i in np.arange(10, 13, 0.125):
145 |             size = int(((2 ** i) // 32) * 32)
146 |             print(size)
147 | 
148 |             rng = np.random.default_rng()
149 |             source = rng.random((size, size), dtype=np.float32)
150 |             result = transpose_using_cl(ctx, queue, source, NaiveTranspose)
151 | 
152 |             err = source.T - result
153 |             err_norm = la.norm(err)
154 | 
155 |             assert err_norm == 0, (size, err_norm)
156 | 
157 | 
158 | def benchmark_transpose():
159 |     ctx = cl.create_some_context()
160 | 
161 |     for dev in ctx.devices:
162 |         assert dev.local_mem_size > 0
163 | 
164 |     queue = cl.CommandQueue(
165 |         ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
166 |     )
167 | 
168 |     sizes = [int(((2 ** i) // 32) * 32) for i in np.arange(10, 13, 0.125)]
169 |     # for i in np.arange(10, 10.5, 0.125)]
170 | 
171 |     mem_bandwidths = {}
172 | 
173 |     methods = [SillyTranspose, NaiveTranspose, TransposeWithLocal]
174 |     for cls in methods:
175 |         name = cls.__name__.replace("Transpose", "")
176 | 
177 |         mem_bandwidths[cls] = meth_mem_bws = []
178 | 
179 |         for size in sizes:
180 |             rng = np.random.default_rng()
181 |             source = rng.random((size, size), dtype=np.float32)
182 | 
183 |             mf = cl.mem_flags
184 |             a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=source)
185 |             a_t_buf = cl.Buffer(ctx, mf.WRITE_ONLY, size=source.nbytes)
186 |             method = cls(ctx)
187 | 
188 |             for _i in range(4):
189 |                 method(queue, a_t_buf, a_buf, source.shape)
190 | 
191 |             count = 12
192 |             events = []
193 |             for _i in range(count):
194 |                 events.append(method(queue, a_t_buf, a_buf, source.shape))
195 | 
196 |             events[-1].wait()
197 |             time = sum(evt.profile.end - evt.profile.start for evt in events)
198 | 
199 |             mem_bw = 2 * source.nbytes * count / (time * 1e-9)
200 |             print("benchmarking", name, size, mem_bw / 1e9, "GB/s")
201 |             meth_mem_bws.append(mem_bw)
202 | 
203 |             a_buf.release()
204 |             a_t_buf.release()
205 | 
206 |     try:
207 |         from matplotlib.pyplot import clf, grid, legend, plot, savefig, xlabel, ylabel
208 |     except ModuleNotFoundError:
209 |         pass
210 |     else:
211 |         for i in range(len(methods)):
212 |             clf()
213 |             for j in range(i + 1):
214 |                 method = methods[j]
215 |                 name = method.__name__.replace("Transpose", "")
216 |                 plot(sizes, np.array(mem_bandwidths[method]) / 1e9, "o-",
217 |                         label=name)
218 | 
219 |             xlabel("Matrix width/height $N$")
220 |             ylabel("Memory Bandwidth [GB/s]")
221 |             legend(loc="best")
222 |             grid()
223 | 
224 |             savefig("transpose-benchmark-%d.pdf" % i)
225 | 
226 | 
227 | check_transpose()
228 | benchmark_transpose()
229 | 


--------------------------------------------------------------------------------
/pyopencl/_cluda.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 2 | 
 3 | __license__ = """
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | """
22 | 
23 | CLUDA_PREAMBLE = """
24 | #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE);
25 | 
26 | #define WITHIN_KERNEL /* empty */
27 | #define KERNEL __kernel
28 | #define GLOBAL_MEM __global
29 | #define LOCAL_MEM __local
30 | #define LOCAL_MEM_ARG __local
31 | #define REQD_WG_SIZE(X,Y,Z) __attribute__((reqd_work_group_size(X, Y, Z)))
32 | 
33 | #define LID_0 ((ptrdiff_t) get_local_id(0))
34 | #define LID_1 ((ptrdiff_t) get_local_id(1))
35 | #define LID_2 ((ptrdiff_t) get_local_id(2))
36 | 
37 | #define GID_0 ((ptrdiff_t) get_group_id(0))
38 | #define GID_1 ((ptrdiff_t) get_group_id(1))
39 | #define GID_2 ((ptrdiff_t) get_group_id(2))
40 | 
41 | #define LDIM_0 ((ptrdiff_t) get_local_size(0))
42 | #define LDIM_1 ((ptrdiff_t) get_local_size(1))
43 | #define LDIM_2 ((ptrdiff_t) get_local_size(2))
44 | 
45 | #define GDIM_0 ((ptrdiff_t) get_num_groups(0))
46 | #define GDIM_1 ((ptrdiff_t) get_num_groups(1))
47 | #define GDIM_2 ((ptrdiff_t) get_num_groups(2))
48 | 
49 | % if double_support:
50 |     #if __OPENCL_C_VERSION__ < 120
51 |     #pragma OPENCL EXTENSION cl_khr_fp64: enable
52 |     #endif
53 | % endif
54 | """
55 | 


--------------------------------------------------------------------------------
/pyopencl/_mymako.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import mako.template  # noqa: F401
 3 | except ImportError as err:
 4 |     raise ImportError(
 5 |             "Some of PyOpenCL's facilities require the Mako templating engine.\n"
 6 |             "You or a piece of software you have used has tried to call such a\n"
 7 |             "part of PyOpenCL, but there was a problem importing Mako.\n\n"
 8 |             "You may install mako now by typing one of:\n"
 9 |             "- easy_install Mako\n"
10 |             "- pip install Mako\n"
11 |             "- aptitude install python-mako\n"
12 |             "\nor whatever else is appropriate for your system.") from err
13 | 
14 | from mako import *  # noqa: F403
15 | 


--------------------------------------------------------------------------------
/pyopencl/capture_call.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
  2 | 
  3 | __license__ = """
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | """
 22 | 
 23 | 
 24 | import numpy as np
 25 | 
 26 | from pytools.py_codegen import Indentation, PythonCodeGenerator
 27 | 
 28 | import pyopencl as cl
 29 | 
 30 | 
 31 | def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwargs):
 32 |     try:
 33 |         source = kernel._source
 34 |     except AttributeError as err:
 35 |         raise RuntimeError("cannot capture call, kernel source not available") from err
 36 | 
 37 |     if source is None:
 38 |         raise RuntimeError("cannot capture call, kernel source not available")
 39 | 
 40 |     cg = PythonCodeGenerator()
 41 | 
 42 |     cg("# generated by pyopencl.capture_call")
 43 |     cg("")
 44 |     cg("import numpy as np")
 45 |     cg("import pyopencl as cl")
 46 |     cg("from base64 import b64decode")
 47 |     cg("from zlib import decompress")
 48 |     cg("mf = cl.mem_flags")
 49 |     cg("")
 50 | 
 51 |     cg('CODE = r"""//CL//')
 52 |     for line in source.split("\n"):
 53 |         cg(line)
 54 |     cg('"""')
 55 | 
 56 |     # {{{ invocation
 57 | 
 58 |     arg_data = []
 59 | 
 60 |     cg("")
 61 |     cg("")
 62 |     cg("def main():")
 63 |     with Indentation(cg):
 64 |         cg("ctx = cl.create_some_context()")
 65 |         cg("queue = cl.CommandQueue(ctx)")
 66 |         cg("")
 67 | 
 68 |         kernel_args = []
 69 | 
 70 |         for i, arg in enumerate(args):
 71 |             if isinstance(arg, cl.Buffer):
 72 |                 buf = bytearray(arg.size)
 73 |                 cl.enqueue_copy(queue, buf, arg)
 74 |                 arg_data.append(("arg%d_data" % i, buf))
 75 |                 cg("arg%d = cl.Buffer(ctx, "
 76 |                         "mf.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,"
 77 |                         % i)
 78 |                 cg("    hostbuf=decompress(b64decode(arg%d_data)))"
 79 |                         % i)
 80 |                 kernel_args.append("arg%d" % i)
 81 |             elif isinstance(arg, (int, float)):
 82 |                 kernel_args.append(repr(arg))
 83 |             elif isinstance(arg, np.integer):
 84 |                 kernel_args.append("np.{}({})".format(
 85 |                     arg.dtype.type.__name__, repr(int(arg))))
 86 |             elif isinstance(arg, np.floating):
 87 |                 kernel_args.append("np.{}({})".format(
 88 |                     arg.dtype.type.__name__, repr(float(arg))))
 89 |             elif isinstance(arg, np.complexfloating):
 90 |                 kernel_args.append("np.{}({})".format(
 91 |                     arg.dtype.type.__name__, repr(complex(arg))))
 92 |             else:
 93 |                 try:
 94 |                     arg_buf = memoryview(arg)
 95 |                 except Exception as err:
 96 |                     raise RuntimeError("cannot capture: "
 97 |                             "unsupported arg nr %d (0-based)" % i) from err
 98 | 
 99 |                 arg_data.append(("arg%d_data" % i, arg_buf))
100 |                 kernel_args.append("decompress(b64decode(arg%d_data))" % i)
101 | 
102 |         cg("")
103 | 
104 |         g_times_l = kwargs.get("g_times_l", False)
105 |         if g_times_l:
106 |             dim = max(len(g_size), len(l_size))
107 |             l_size = l_size + (1,) * (dim-len(l_size))
108 |             g_size = g_size + (1,) * (dim-len(g_size))
109 |             g_size = tuple(
110 |                     gs*ls for gs, ls in zip(g_size, l_size))
111 | 
112 |         global_offset = kwargs.get("global_offset", None)
113 |         if global_offset is not None:
114 |             kernel_args.append("global_offset=%s" % repr(global_offset))
115 | 
116 |         cg("prg = cl.Program(ctx, CODE).build()")
117 |         cg("knl = prg.%s" % kernel.function_name)
118 |         if hasattr(kernel, "_scalar_arg_dtypes"):
119 |             def strify_dtype(d):
120 |                 if d is None:
121 |                     return "None"
122 | 
123 |                 d = np.dtype(d)
124 |                 s = repr(d)
125 |                 if s.startswith("dtype"):
126 |                     s = "np."+s
127 | 
128 |                 return s
129 | 
130 |             cg("knl.set_scalar_arg_dtypes((%s,))"
131 |                     % ", ".join(
132 |                         strify_dtype(dt) for dt in kernel._scalar_arg_dtypes))
133 | 
134 |         cg("knl(queue, {}, {},".format(repr(g_size), repr(l_size)))
135 |         cg("    %s)" % ", ".join(kernel_args))
136 |         cg("")
137 |         cg("queue.finish()")
138 | 
139 |     # }}}
140 | 
141 |     # {{{ data
142 | 
143 |     from base64 import b64encode
144 |     from zlib import compress
145 |     cg("")
146 |     line_len = 70
147 | 
148 |     for name, val in arg_data:
149 |         cg("%s = (" % name)
150 |         with Indentation(cg):
151 |             val = b64encode(compress(memoryview(val))).decode()
152 |             i = 0
153 |             while i < len(val):
154 |                 cg(repr(val[i:i+line_len]))
155 |                 i += line_len
156 | 
157 |             cg(")")
158 | 
159 |     # }}}
160 | 
161 |     # {{{ file trailer
162 | 
163 |     cg("")
164 |     cg('if __name__ == "__main__":')
165 |     with Indentation(cg):
166 |         cg("main()")
167 |     cg("")
168 | 
169 |     cg("# vim: filetype=pyopencl")
170 | 
171 |     # }}}
172 | 
173 |     if isinstance(output_file, str):
174 |         with open(output_file, "w") as outf:
175 |             outf.write(cg.get())
176 |     else:
177 |         output_file.write(cg.get())
178 | 


--------------------------------------------------------------------------------
/pyopencl/characterize/performance.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
  2 | 
  3 | __license__ = """
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | """
 22 | 
 23 | import numpy as np
 24 | 
 25 | import pyopencl as cl
 26 | 
 27 | 
 28 | # {{{ timing helpers
 29 | 
 30 | class Timer:
 31 |     def __init__(self, queue):
 32 |         self.queue = queue
 33 | 
 34 |     def start(self):
 35 |         pass
 36 | 
 37 |     def stop(self):
 38 |         pass
 39 | 
 40 |     def add_event(self, evt):
 41 |         pass
 42 | 
 43 |     def get_elapsed(self):
 44 |         pass
 45 | 
 46 | 
 47 | class WallTimer(Timer):
 48 |     def start(self):
 49 |         from time import time
 50 |         self.queue.finish()
 51 |         self.start_time = time()
 52 | 
 53 |     def stop(self):
 54 |         from time import time
 55 |         self.queue.finish()
 56 |         self.end_time = time()
 57 | 
 58 |     def get_elapsed(self):
 59 |         return self.end_time-self.start_time
 60 | 
 61 | 
 62 | def _get_time(queue, f, timer_factory=None, desired_duration=0.1,
 63 |         warmup_rounds=3):
 64 | 
 65 |     if timer_factory is None:
 66 |         timer_factory = WallTimer
 67 | 
 68 |     count = 1
 69 | 
 70 |     while True:
 71 |         timer = timer_factory(queue)
 72 | 
 73 |         for _i in range(warmup_rounds):
 74 |             f()
 75 |         warmup_rounds = 0
 76 | 
 77 |         timer.start()
 78 |         for _i in range(count):
 79 |             timer.add_event(f())
 80 |         timer.stop()
 81 | 
 82 |         elapsed = timer.get_elapsed()
 83 |         if elapsed < desired_duration:
 84 |             if elapsed == 0:
 85 |                 count *= 5
 86 |             else:
 87 |                 new_count = int(desired_duration/elapsed)
 88 | 
 89 |                 new_count = max(2*count, new_count)
 90 |                 new_count = min(10*count, new_count)
 91 |                 count = new_count
 92 | 
 93 |         else:
 94 |             return elapsed/count
 95 | 
 96 | # }}}
 97 | 
 98 | 
 99 | # {{{ transfer measurements
100 | 
101 | class HostDeviceTransferBase:
102 |     def __init__(self, queue, block_size):
103 |         self.queue = queue
104 |         self.host_buf = np.empty(block_size, dtype=np.uint8)
105 |         self.dev_buf = cl.Buffer(queue.context, cl.mem_flags.READ_WRITE, block_size)
106 | 
107 | 
108 | class HostToDeviceTransfer(HostDeviceTransferBase):
109 |     def do(self):
110 |         return cl.enqueue_copy(self. queue, self.dev_buf, self.host_buf)
111 | 
112 | 
113 | class DeviceToHostTransfer(HostDeviceTransferBase):
114 |     def do(self):
115 |         return cl.enqueue_copy(self. queue, self.host_buf, self.dev_buf)
116 | 
117 | 
118 | class DeviceToDeviceTransfer:
119 |     def __init__(self, queue, block_size):
120 |         self.queue = queue
121 |         mf = cl.mem_flags
122 |         self.dev_buf_1 = cl.Buffer(queue.context, mf.READ_WRITE, block_size)
123 |         self.dev_buf_2 = cl.Buffer(queue.context, mf.READ_WRITE, block_size)
124 | 
125 |     def do(self):
126 |         return cl.enqueue_copy(self. queue, self.dev_buf_2, self.dev_buf_1)
127 | 
128 | 
129 | def transfer_latency(queue, transfer_type, timer_factory=None):
130 |     transfer = transfer_type(queue, 1)
131 |     return _get_time(queue, transfer.do, timer_factory=timer_factory)
132 | 
133 | 
134 | def transfer_bandwidth(queue, transfer_type, block_size, timer_factory=None):
135 |     """Measures one-sided bandwidth."""
136 | 
137 |     transfer = transfer_type(queue, block_size)
138 |     return block_size/_get_time(queue, transfer.do, timer_factory=timer_factory)
139 | 
140 | # }}}
141 | 
142 | 
143 | def get_profiling_overhead(ctx, timer_factory=None):
144 |     no_prof_queue = cl.CommandQueue(ctx)
145 |     transfer = DeviceToDeviceTransfer(no_prof_queue, 1)
146 |     no_prof_time = _get_time(no_prof_queue, transfer.do, timer_factory=timer_factory)
147 | 
148 |     prof_queue = cl.CommandQueue(ctx,
149 |             properties=cl.command_queue_properties.PROFILING_ENABLE)
150 |     transfer = DeviceToDeviceTransfer(prof_queue, 1)
151 |     prof_time = _get_time(prof_queue, transfer.do, timer_factory=timer_factory)
152 | 
153 |     return prof_time - no_prof_time, prof_time
154 | 
155 | 
156 | def get_empty_kernel_time(queue, timer_factory=None):
157 |     prg = cl.Program(queue.context, """
158 |         __kernel void empty()
159 |         { }
160 |         """).build()
161 | 
162 |     knl = prg.empty
163 | 
164 |     def f():
165 |         knl(queue, (1,), None)
166 | 
167 |     return _get_time(queue, f, timer_factory=timer_factory)
168 | 
169 | 
170 | def _get_full_machine_kernel_rate(queue, src, args, name="benchmark",
171 |         timer_factory=None):
172 |     prg = cl.Program(queue.context, src).build()
173 | 
174 |     knl = getattr(prg, name)
175 | 
176 |     dev = queue.device
177 |     global_size = 4 * dev.max_compute_units
178 | 
179 |     def f():
180 |         knl(queue, (global_size,), None, *args)
181 | 
182 |     rates = []
183 |     num_dips = 0
184 | 
185 |     while True:
186 |         elapsed = _get_time(queue, f, timer_factory=timer_factory)
187 |         rate = global_size/elapsed
188 | 
189 |         keep_trying = not rates
190 | 
191 |         if rates and rate > 1.05*max(rates):  # big improvement
192 |             keep_trying = True
193 |             num_dips = 0
194 | 
195 |         if rates and rate < 0.9*max(rates) and num_dips < 3:  # big dip
196 |             keep_trying = True
197 |             num_dips += 1
198 | 
199 |         if keep_trying:
200 |             global_size *= 2
201 |             rates.append(rate)
202 |         else:
203 |             rates.append(rate)
204 |             return max(rates)
205 | 
206 | 
207 | def get_add_rate(queue, type="float", timer_factory=None):
208 |     return 50*10*_get_full_machine_kernel_rate(queue, """
209 |         typedef %(op_t)s op_t;
210 |         __kernel void benchmark()
211 |         {
212 |             local op_t tgt[1024];
213 |             op_t val = get_global_id(0);
214 | 
215 |             for (int i = 0; i < 10; ++i)
216 |             {
217 |                 val += val; val += val; val += val; val += val; val += val;
218 |                 val += val; val += val; val += val; val += val; val += val;
219 | 
220 |                 val += val; val += val; val += val; val += val; val += val;
221 |                 val += val; val += val; val += val; val += val; val += val;
222 | 
223 |                 val += val; val += val; val += val; val += val; val += val;
224 |                 val += val; val += val; val += val; val += val; val += val;
225 | 
226 |                 val += val; val += val; val += val; val += val; val += val;
227 |                 val += val; val += val; val += val; val += val; val += val;
228 | 
229 |                 val += val; val += val; val += val; val += val; val += val;
230 |                 val += val; val += val; val += val; val += val; val += val;
231 |             }
232 |             tgt[get_local_id(0)] = val;
233 |         }
234 |         """ % {"op_t": type}, ())
235 | 
236 | 
237 | # vim: foldmethod=marker:filetype=pyopencl
238 | 


--------------------------------------------------------------------------------
/pyopencl/cl/pyopencl-bessel-j-complex.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 | Evaluate Bessel J function J_v(z) and J_{v+1}(z) with v a nonnegative integer
  3 | and z anywhere in the complex plane.
  4 | 
  5 | Copyright (C) Vladimir Rokhlin
  6 | Copyright (C) 2010-2012 Leslie Greengard and Zydrunas Gimbutas
  7 | Copyright (C) 2015 Shidong Jiang, Andreas Kloeckner
  8 | 
  9 | Manually translated from
 10 | https://github.com/zgimbutas/fmmlib2d/blob/master/src/cdjseval2d.f
 11 | 
 12 | Originally licensed under GPL, permission to license under MIT granted via email
 13 | by Vladimir Rokhlin on May 25, 2015 and by Zydrunas Gimbutas on May 17, 2015.
 14 | 
 15 | Permission is hereby granted, free of charge, to any person obtaining a copy
 16 | of this software and associated documentation files (the "Software"), to deal
 17 | in the Software without restriction, including without limitation the rights
 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 19 | copies of the Software, and to permit persons to whom the Software is
 20 | furnished to do so, subject to the following conditions:
 21 | 
 22 | The above copyright notice and this permission notice shall be included in
 23 | all copies or substantial portions of the Software.
 24 | 
 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 31 | THE SOFTWARE.
 32 | 
 33 | */
 34 | 
 35 | void bessel_j_complex(int v, cdouble_t z, cdouble_t *j_v, cdouble_t *j_vp1)
 36 | {
 37 |   int n;
 38 |   int nmax = 10000;
 39 | 
 40 |   int k;
 41 |   int kmax=8;
 42 | 
 43 |   int vscale, vp1scale;
 44 |   double vscaling, vp1scaling;
 45 | 
 46 |   const double small = 2e-1;
 47 |   const double median = 1.0e0;
 48 | 
 49 |   const double upbound = 1e40;
 50 |   const double upbound_inv = 1e-40;
 51 | 
 52 |   double dd;
 53 |   double k_factorial_inv, kv_factorial_inv, kvp1_factorial_inv;
 54 | 
 55 |   cdouble_t z_half, mz_half2, mz_half_2k, z_half_v, z_half_vp1;
 56 | 
 57 |   cdouble_t ima = cdouble_new(0, 1);
 58 |   cdouble_t neg_ima = cdouble_new(0, -1);
 59 | 
 60 |   cdouble_t zinv, ztmp;
 61 |   cdouble_t j_nm1, j_n, j_np1;
 62 | 
 63 |   cdouble_t psi, zsn, zmul, zmulinv;
 64 |   cdouble_t unscaled_j_n, unscaled_j_nm1, unscaled_j_np1;
 65 |   cdouble_t unscaled_j_v, unscaled_j_vp1;
 66 |   cdouble_t scaling;
 67 | 
 68 |   // assert( v >= 0 );
 69 | 
 70 | #if 0
 71 |   if (cdouble_abs(z) < tiny)
 72 |   {
 73 |     if (v == 0)
 74 |     {
 75 |       *j_v = cdouble_new(1, 0);
 76 |       *j_vp1 = cdouble_new(0, 0);
 77 |     } else
 78 |     {
 79 |       *j_v = cdouble_new(0, 0);
 80 |       *j_vp1 = cdouble_new(0, 0);
 81 |     }
 82 |     return;
 83 |   }
 84 | #endif
 85 | 
 86 |   // {{{ power series for (small z) or (large v and median z)
 87 |   if ( (cdouble_abs(z) < small) || ( (v>12) && (cdouble_abs(z) < median)))
 88 |   {
 89 |     z_half = cdouble_divider(z,2.0);
 90 | 
 91 |     mz_half2 = cdouble_neg(cdouble_mul(z_half, z_half));
 92 | 
 93 |     z_half_v = cdouble_powr(z_half, v);
 94 |     z_half_vp1 = cdouble_mul(z_half_v, z_half);
 95 | 
 96 | 
 97 |     // compute 1/v!
 98 |     kv_factorial_inv = 1.0;
 99 |     for ( k = 1; k <= v; k++)
100 |     {
101 |       kv_factorial_inv /= k;
102 |     }
103 | 
104 |     kvp1_factorial_inv = kv_factorial_inv / (v+1);
105 | 
106 |     k_factorial_inv = 1.0;
107 | 
108 |     // compute the power series of bessel j function
109 |     mz_half_2k = cdouble_new(1.0, 0);
110 | 
111 |     *j_v = cdouble_new(0, 0);
112 |     *j_vp1 = cdouble_new(0, 0);
113 | 
114 |     for ( k = 0; k < kmax; k++ )
115 |     {
116 |       *j_v = cdouble_add(
117 |           *j_v,
118 |           cdouble_mulr(mz_half_2k, kv_factorial_inv*k_factorial_inv));
119 |       *j_vp1 = cdouble_add(*j_vp1,
120 |           cdouble_mulr(mz_half_2k, kvp1_factorial_inv*k_factorial_inv));
121 | 
122 |       mz_half_2k = cdouble_mul(mz_half_2k, mz_half2);
123 |       k_factorial_inv /= (k+1);
124 |       kv_factorial_inv /= (k+v+1);
125 |       kvp1_factorial_inv /= (k+v+2);
126 |     }
127 | 
128 |     *j_v = cdouble_mul(*j_v, z_half_v );
129 |     *j_vp1 = cdouble_mul(*j_vp1, z_half_vp1 );
130 | 
131 |     return;
132 |   }
133 | 
134 |   // }}}
135 | 
136 |   // {{{ use recurrence for large z
137 | 
138 |   j_nm1 = cdouble_new(0, 0);
139 |   j_n = cdouble_new(1, 0);
140 | 
141 |   n = v;
142 | 
143 |   zinv = cdouble_rdivide(1,z);
144 | 
145 |   while (true)
146 |   {
147 |     j_np1 = cdouble_sub(
148 |         cdouble_mul(cdouble_rmul(2*n, zinv), j_n),
149 |         j_nm1);
150 | 
151 |     n += 1;
152 |     j_nm1 = j_n;
153 |     j_n = j_np1;
154 | 
155 |     if (n > nmax)
156 |     {
157 |       *j_v = cdouble_new(nan(0x8e55e1u), 0);
158 |       *j_vp1 = cdouble_new(nan(0x8e55e1u), 0);
159 |       return;
160 |     }
161 | 
162 |     if (cdouble_abs_squared(j_n) > upbound)
163 |       break;
164 |   }
165 | 
166 |   // downward recursion, account for rescalings
167 |   // Record the number of times of the missed rescalings
168 |   // for j_v and j_vp1.
169 | 
170 |   unscaled_j_np1 = cdouble_new(0, 0);
171 |   unscaled_j_n = cdouble_new(1, 0);
172 | 
173 |   // Use normalization condition http://dlmf.nist.gov/10.12#E5
174 |   psi = cdouble_new(0, 0);
175 | 
176 |   if (cdouble_imag(z) <= 0)
177 |     zmul = ima;
178 |   else
179 |     zmul = neg_ima;
180 | 
181 |   zsn = cdouble_powr(zmul, n%4);
182 | 
183 |   zmulinv = cdouble_rdivide(1, zmul);
184 | 
185 |   vscale = 0;
186 |   vp1scale = 0;
187 | 
188 |   while (n > 0)
189 |   {
190 |     ztmp = cdouble_sub(
191 |         cdouble_mul(cdouble_rmul(2*n, zinv), unscaled_j_n),
192 |         unscaled_j_np1);
193 | 
194 |     unscaled_j_nm1 = ztmp;
195 | 
196 | 
197 |     psi = cdouble_add(psi, cdouble_mul(unscaled_j_n, zsn));
198 |     zsn = cdouble_mul(zsn, zmulinv);
199 | 
200 |     n -= 1;
201 |     unscaled_j_np1 = unscaled_j_n;
202 |     unscaled_j_n = unscaled_j_nm1;
203 | 
204 |     if (cdouble_abs_squared(ztmp) > upbound)
205 |     {
206 |       unscaled_j_np1 = cdouble_rmul(upbound_inv, unscaled_j_np1);
207 |       unscaled_j_n = cdouble_rmul(upbound_inv, unscaled_j_n);
208 |       psi = cdouble_rmul(upbound_inv,psi);
209 |       if (n < v) vscale++;
210 |       if (n < v+1) vp1scale++;
211 |     }
212 | 
213 |     if (n == v)
214 |       unscaled_j_v = unscaled_j_n;
215 |     if (n == v+1)
216 |       unscaled_j_vp1 = unscaled_j_n;
217 | 
218 |   }
219 | 
220 |   psi = cdouble_add(cdouble_rmul(2, psi), unscaled_j_n);
221 | 
222 |   if ( cdouble_imag(z) <= 0 )
223 |   {
224 |     scaling = cdouble_divide( cdouble_exp( cdouble_mul(ima,z) ), psi);
225 |   } else
226 |   {
227 |     scaling = cdouble_divide( cdouble_exp( cdouble_mul(neg_ima,z) ), psi);
228 |   }
229 |   vscaling = pow(upbound_inv, (double) vscale);
230 |   vp1scaling = pow(upbound_inv, (double) vp1scale);
231 | 
232 |   *j_v = cdouble_mul(unscaled_j_v, cdouble_mulr(scaling, vscaling));
233 |   *j_vp1 = cdouble_mul(unscaled_j_vp1, cdouble_mulr(scaling,vp1scaling));
234 | 
235 |   // }}}
236 | }
237 | 
238 | // vim: fdm=marker
239 | 


--------------------------------------------------------------------------------
/pyopencl/cl/pyopencl-eval-tbl.cl:
--------------------------------------------------------------------------------
  1 | //  Pieced together from Boost C++ and Cephes by
  2 | //  Andreas Kloeckner (C) 2012
  3 | //
  4 | //  Pieces from:
  5 | //
  6 | //  Copyright (c) 2006 Xiaogang Zhang, John Maddock
  7 | //  Use, modification and distribution are subject to the
  8 | //  Boost Software License, Version 1.0. (See
  9 | //  http://www.boost.org/LICENSE_1_0.txt)
 10 | //
 11 | // Cephes Math Library Release 2.8:  June, 2000
 12 | // Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
 13 | // What you see here may be used freely, but it comes with no support or
 14 | // guarantee.
 15 | 
 16 | #pragma once
 17 | 
 18 | typedef double special_func_scalar_type;
 19 | 
 20 | // {{{ cephes_polevl
 21 | 
 22 | /*
 23 |  * DESCRIPTION:
 24 |  *
 25 |  * Evaluates polynomial of degree N:
 26 |  *
 27 |  *                     2          N
 28 |  * y  =  C  + C x + C x  +...+ C x
 29 |  *        0    1     2          N
 30 |  *
 31 |  * Coefficients are stored in reverse order:
 32 |  *
 33 |  * coef[0] = C  , ..., coef[N] = C  .
 34 |  *            N                   0
 35 |  *
 36 |  *  The function p1evl() assumes that coef[N] = 1.0 and is
 37 |  * omitted from the array.  Its calling arguments are
 38 |  * otherwise the same as polevl().
 39 |  *
 40 |  */
 41 | 
 42 | special_func_scalar_type cephes_polevl(special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N)
 43 | {
 44 |   special_func_scalar_type ans;
 45 |   int i;
 46 |   __constant const special_func_scalar_type *p;
 47 | 
 48 |   p = coef;
 49 |   ans = *p++;
 50 |   i = N;
 51 | 
 52 |   do
 53 |     ans = ans * x  +  *p++;
 54 |   while( --i );
 55 | 
 56 |   return( ans );
 57 | }
 58 | 
 59 | // }}}
 60 | 
 61 | // {{{ cephes_p1evl
 62 | 
 63 | special_func_scalar_type cephes_p1evl( special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N )
 64 | {
 65 |   special_func_scalar_type ans;
 66 |   __constant const special_func_scalar_type *p;
 67 |   int i;
 68 | 
 69 |   p = coef;
 70 |   ans = x + *p++;
 71 |   i = N-1;
 72 | 
 73 |   do
 74 |     ans = ans * x  + *p++;
 75 |   while( --i );
 76 | 
 77 |   return( ans );
 78 | }
 79 | 
 80 | // }}}
 81 | 
 82 | // {{{ boost_evaluate_rational
 83 | 
 84 | special_func_scalar_type boost_evaluate_rational_backend(__constant const special_func_scalar_type* num, __constant const special_func_scalar_type* denom, special_func_scalar_type z, int count)
 85 | {
 86 |    special_func_scalar_type s1, s2;
 87 |    if(z <= 1)
 88 |    {
 89 |       s1 = num[count-1];
 90 |       s2 = denom[count-1];
 91 |       for(int i = (int)count - 2; i >= 0; --i)
 92 |       {
 93 |          s1 *= z;
 94 |          s2 *= z;
 95 |          s1 += num[i];
 96 |          s2 += denom[i];
 97 |       }
 98 |    }
 99 |    else
100 |    {
101 |       z = 1 / z;
102 |       s1 = num[0];
103 |       s2 = denom[0];
104 |       for(unsigned i = 1; i < count; ++i)
105 |       {
106 |          s1 *= z;
107 |          s2 *= z;
108 |          s1 += num[i];
109 |          s2 += denom[i];
110 |       }
111 |    }
112 |    return s1 / s2;
113 | }
114 | 
115 | #define boost_evaluate_rational(num, denom, z) \
116 |   boost_evaluate_rational_backend(num, denom, z, sizeof(num)/sizeof(special_func_scalar_type))
117 | 
118 | // }}}
119 | 
120 | // vim: fdm=marker
121 | 


--------------------------------------------------------------------------------
/pyopencl/cl/pyopencl-random123/openclfeatures.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2010-2011, D. E. Shaw Research.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright
10 |   notice, this list of conditions, and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright
13 |   notice, this list of conditions, and the following disclaimer in the
14 |   documentation and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of D. E. Shaw Research nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | */
32 | #ifndef __openclfeatures_dot_hpp
33 | #define __openclfeatures_dot_hpp
34 | 
35 | #ifndef R123_STATIC_INLINE
36 | #define R123_STATIC_INLINE inline
37 | #endif
38 | 
39 | #ifndef R123_FORCE_INLINE
40 | #define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
41 | #endif
42 | 
43 | #ifndef R123_CUDA_DEVICE
44 | #define R123_CUDA_DEVICE
45 | #endif
46 | 
47 | #ifndef R123_ASSERT
48 | #define R123_ASSERT(x)
49 | #endif
50 | 
51 | #ifndef R123_BUILTIN_EXPECT
52 | #define R123_BUILTIN_EXPECT(expr,likely) expr
53 | #endif
54 | 
55 | #ifndef R123_USE_GNU_UINT128
56 | #define R123_USE_GNU_UINT128 0
57 | #endif
58 | 
59 | #ifndef R123_USE_MULHILO64_ASM
60 | #define R123_USE_MULHILO64_ASM 0
61 | #endif
62 | 
63 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN
64 | #define R123_USE_MULHILO64_MSVC_INTRIN 0
65 | #endif
66 | 
67 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN
68 | #define R123_USE_MULHILO64_CUDA_INTRIN 0
69 | #endif
70 | 
71 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN
72 | #ifdef PYOPENCL_USING_OCLGRIND
73 | #define R123_USE_MULHILO64_OPENCL_INTRIN 0
74 | #else
75 | #define R123_USE_MULHILO64_OPENCL_INTRIN 1
76 | #endif
77 | #endif
78 | 
79 | #ifndef R123_USE_AES_NI
80 | #define R123_USE_AES_NI 0
81 | #endif
82 | 
83 | // XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of
84 | // ulong to mul_hi.  And gets lots of complaints from stdint.h
85 | // on some machines.
86 | // But these typedefs mean we cannot include stdint.h with
87 | // these headers?  Do we need R123_64T, R123_32T, R123_8T?
88 | typedef ulong uint64_t;
89 | typedef uint  uint32_t;
90 | typedef uchar uint8_t;
91 | #define UINT64_C(x) ((ulong)(x##UL))
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/pyopencl/cltypes.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = "Copyright (C) 2016 Jonathan Mackenzie"
  2 | 
  3 | __license__ = """
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | The above copyright notice and this permission notice shall be included in
 11 | all copies or substantial portions of the Software.
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 16 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 18 | THE SOFTWARE.
 19 | """
 20 | 
 21 | import warnings
 22 | 
 23 | import numpy as np
 24 | 
 25 | from pyopencl.tools import get_or_register_dtype
 26 | 
 27 | 
 28 | if __file__.endswith("array.py"):
 29 |     warnings.warn(
 30 |         "pyopencl.array.vec is deprecated. Please use pyopencl.cltypes.",
 31 |         stacklevel=2)
 32 | 
 33 | """
 34 | This file provides a type mapping from OpenCl type names to their numpy equivalents
 35 | """
 36 | 
 37 | char = np.int8
 38 | uchar = np.uint8
 39 | short = np.int16
 40 | ushort = np.uint16
 41 | int = np.int32
 42 | uint = np.uint32
 43 | long = np.int64
 44 | ulong = np.uint64
 45 | half = np.float16
 46 | float = np.float32
 47 | double = np.float64
 48 | 
 49 | 
 50 | # {{{ vector types
 51 | 
 52 | def _create_vector_types():
 53 |     mapping = [(k, globals()[k]) for k in
 54 |                 ["char", "uchar", "short", "ushort", "int",
 55 |                  "uint", "long", "ulong", "float", "double"]]
 56 | 
 57 |     def set_global(key, val):
 58 |         globals()[key] = val
 59 | 
 60 |     vec_types = {}
 61 |     vec_type_to_scalar_and_count = {}
 62 | 
 63 |     field_names = ["x", "y", "z", "w"]
 64 | 
 65 |     counts = [2, 3, 4, 8, 16]
 66 | 
 67 |     for base_name, base_type in mapping:
 68 |         for count in counts:
 69 |             name = "%s%d" % (base_name, count)
 70 | 
 71 |             titles = field_names[:count]
 72 | 
 73 |             padded_count = count
 74 |             if count == 3:
 75 |                 padded_count = 4
 76 | 
 77 |             names = ["s%d" % i for i in range(count)]
 78 |             while len(names) < padded_count:
 79 |                 names.append("padding%d" % (len(names) - count))
 80 | 
 81 |             if len(titles) < len(names):
 82 |                 titles.extend((len(names) - len(titles)) * [None])
 83 | 
 84 |             try:
 85 |                 dtype = np.dtype({
 86 |                     "names": names,
 87 |                     "formats": [base_type] * padded_count,
 88 |                     "titles": titles})
 89 |             except NotImplementedError:
 90 |                 try:
 91 |                     dtype = np.dtype([((n, title), base_type)
 92 |                                       for (n, title) in zip(names, titles)])
 93 |                 except TypeError:
 94 |                     dtype = np.dtype([(n, base_type) for (n, title)
 95 |                                       in zip(names, titles)])
 96 | 
 97 |             get_or_register_dtype(name, dtype)
 98 | 
 99 |             set_global(name, dtype)
100 | 
101 |             def create_array(dtype, count, padded_count, *args, **kwargs):
102 |                 if len(args) < count:
103 |                     from warnings import warn
104 |                     warn("default values for make_xxx are deprecated;"
105 |                          " instead specify all parameters or use"
106 |                          " cltypes.zeros_xxx",
107 |                          DeprecationWarning, stacklevel=4)
108 | 
109 |                 padded_args = tuple(list(args) + [0] * (padded_count - len(args)))
110 |                 array = eval("array(padded_args, dtype=dtype)",
111 |                              {"array": np.array,
112 |                               "padded_args": padded_args,
113 |                               "dtype": dtype})
114 |                 for key, val in list(kwargs.items()):
115 |                     array[key] = val
116 |                 return array
117 | 
118 |             set_global("make_" + name, eval(
119 |                 "lambda *args, **kwargs: create_array(dtype, %i, %i, "
120 |                 "*args, **kwargs)" % (count, padded_count),
121 |                 {"create_array": create_array, "dtype": dtype}))
122 |             set_global("filled_" + name, eval(
123 |                 "lambda val: make_%s(*[val]*%i)" % (name, count)))
124 |             set_global("zeros_" + name, eval("lambda: filled_%s(0)" % (name)))
125 |             set_global("ones_" + name, eval("lambda: filled_%s(1)" % (name)))
126 | 
127 |             vec_types[np.dtype(base_type), count] = dtype
128 |             vec_type_to_scalar_and_count[dtype] = np.dtype(base_type), count
129 | 
130 |     return vec_types, vec_type_to_scalar_and_count
131 | 
132 | 
133 | vec_types, vec_type_to_scalar_and_count = _create_vector_types()
134 | 
135 | # }}}
136 | 
137 | # vim: foldmethod=marker
138 | 


--------------------------------------------------------------------------------
/pyopencl/ipython_ext.py:
--------------------------------------------------------------------------------
 1 | from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
 2 | 
 3 | import pyopencl as cl
 4 | 
 5 | 
 6 | @magics_class
 7 | class PyOpenCLMagics(Magics):
 8 |     def _run_kernel(self, kernel, options):
 9 |         try:
10 |             ctx = self.shell.user_ns["cl_ctx"]
11 |         except KeyError:
12 |             ctx = None
13 | 
14 |         if not isinstance(ctx, cl.Context):
15 |             ctx = None
16 | 
17 |         if ctx is None:
18 |             try:
19 |                 ctx = self.shell.user_ns["ctx"]
20 |             except KeyError:
21 |                 ctx = None
22 | 
23 |         if ctx is None or not isinstance(ctx, cl.Context):
24 |             raise RuntimeError("unable to locate cl context, which must be "
25 |                     "present in namespace as 'cl_ctx' or 'ctx'")
26 | 
27 |         prg = cl.Program(ctx, kernel).build(options=options.split())
28 | 
29 |         for knl in prg.all_kernels():
30 |             self.shell.user_ns[knl.function_name] = knl
31 | 
32 |     @cell_magic
33 |     def cl_kernel(self, line, cell):
34 |         kernel = cell
35 | 
36 |         opts, _args = self.parse_options(line, "o:")
37 |         build_options = opts.get("o", "")
38 | 
39 |         self._run_kernel(kernel, build_options)
40 | 
41 |     def _load_kernel_and_options(self, line):
42 |         opts, args = self.parse_options(line, "o:f:")
43 | 
44 |         build_options = opts.get("o")
45 |         kernel = self.shell.find_user_code(opts.get("f") or args)
46 | 
47 |         return kernel, build_options
48 | 
49 |     @line_magic
50 |     def cl_kernel_from_file(self, line):
51 |         kernel, build_options = self._load_kernel_and_options(line)
52 |         self._run_kernel(kernel, build_options)
53 | 
54 |     @line_magic
55 |     def cl_load_edit_kernel(self, line):
56 |         kernel, build_options = self._load_kernel_and_options(line)
57 |         header = "%%cl_kernel"
58 | 
59 |         if build_options:
60 |             header = f'{header} -o "{build_options}"'
61 | 
62 |         content = f"{header}\n\n{kernel}"
63 | 
64 |         self.shell.set_next_input(content)
65 | 
66 | 
67 | def load_ipython_extension(ip):
68 |     ip.register_magics(PyOpenCLMagics)
69 | 


--------------------------------------------------------------------------------
/pyopencl/version.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from importlib import metadata
 3 | 
 4 | 
 5 | VERSION_TEXT = metadata.version("pyopencl")
 6 | _match = re.match(r"^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT)
 7 | assert _match is not None
 8 | VERSION_STATUS = _match.group(2)
 9 | VERSION = tuple(int(nr) for nr in _match.group(1).split("."))
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | build-backend = "scikit_build_core.build"
  3 | requires = [
  4 |     "scikit-build-core >=0.9.3",
  5 |     "nanobind >=1.9.2",
  6 |     # https://numpy.org/doc/stable/dev/depending_on_numpy.html#build-time-dependency
  7 |     # Just depending on numpy will automatically expose the oldest supported ABI.
  8 |     # - Retrieved 2024-06-24, AK
  9 |     "numpy",
 10 | ]
 11 | 
 12 | [project]
 13 | name = "pyopencl"
 14 | version = "2025.1"
 15 | description = "Python wrapper for OpenCL"
 16 | readme = "README.rst"
 17 | license = "MIT"
 18 | authors = [
 19 |     { name = "Andreas Kloeckner", email = "inform@tiker.net" },
 20 | ]
 21 | requires-python = "~=3.8"
 22 | classifiers = [
 23 |     "Development Status :: 5 - Production/Stable",
 24 |     "Environment :: Console",
 25 |     "Intended Audience :: Developers",
 26 |     "Intended Audience :: Other Audience",
 27 |     "Intended Audience :: Science/Research",
 28 |     "Natural Language :: English",
 29 |     "Programming Language :: C++",
 30 |     "Programming Language :: Python",
 31 |     "Programming Language :: Python :: 3 :: Only",
 32 |     "Topic :: Scientific/Engineering",
 33 |     "Topic :: Scientific/Engineering :: Mathematics",
 34 |     "Topic :: Scientific/Engineering :: Physics",
 35 | ]
 36 | dependencies = [
 37 |     "importlib-resources; python_version<'3.9'",
 38 |     "numpy",
 39 |     "platformdirs>=2.2",
 40 |     "pytools>=2024.1.5",
 41 | ]
 42 | 
 43 | [project.optional-dependencies]
 44 | oclgrind = [
 45 |     "oclgrind-binary-distribution>=18.3",
 46 | ]
 47 | pocl = [
 48 |     "pocl-binary-distribution>=1.2",
 49 | ]
 50 | test = [
 51 |     "ruff",
 52 |     "mako",
 53 |     "mypy",
 54 |     "pylint",
 55 |     "pytest>=7",
 56 | ]
 57 | 
 58 | [project.urls]
 59 | Documentation = "https://documen.tician.de/pyopencl"
 60 | Homepage = "https://mathema.tician.de/software/pyopencl"
 61 | Repository = "https://github.com/inducer/pyopencl"
 62 | 
 63 | [tool.scikit-build]
 64 | sdist.exclude = [
 65 |     ".mypy_cache",
 66 |     ".ci",
 67 |     ".github",
 68 |     ".conda-ci-build-configure.sh",
 69 |     "doc/upload-docs.sh",
 70 |     ".editorconfig",
 71 |     "TODOs",
 72 |     "run-*.sh",
 73 | ]
 74 | 
 75 | [tool.inducer-ci-support]
 76 | disable-editable-pip-install = true
 77 | 
 78 | [tool.ruff.lint]
 79 | preview = true
 80 | extend-select = [
 81 |     "B",    # flake8-bugbear
 82 |     "C",    # flake8-comprehensions
 83 |     "E",    # pycodestyle
 84 |     "F",    # pyflakes
 85 |     "G",    # flake8-logging-format
 86 |     "I",    # flake8-isort
 87 |     "N",    # pep8-naming
 88 |     "NPY",  # numpy
 89 |     "Q",    # flake8-quotes
 90 |     "RUF",  # ruff
 91 |     "UP",   # pyupgrade
 92 |     "W",    # pycodestyle
 93 | ]
 94 | extend-ignore = [
 95 |     "E226", # missing whitespace around arithmetic operator
 96 |     "E241", # multiple spaces after comma
 97 |     "E402", # module level import not at the top of file
 98 |     "C90",  # McCabe complexity
 99 |     "UP031", # use f-strings instead of %
100 |     "UP032", # use f-strings instead of .format
101 | ]
102 | exclude = [
103 |     "examples/gl_interop_demo.py",
104 |     "examples/gl_particle_animation.py",
105 |     "pyopencl/compyte/**/*.py",
106 | ]
107 | 
108 | [tool.ruff.lint.per-file-ignores]
109 | "examples/pi-monte-carlo.py" = ["N", "B", "F841"]
110 | "examples/black-hole-accretion.py" = ["N", "E501", "B"]
111 | "examples/n-body.py" = ["N", "E501"]
112 | "pyopencl/__init__.py" = ["I001"]
113 | "contrib/fortran-to-opencl/translate.py" = ["N802", "N815", "B"]
114 | 
115 | [tool.ruff.lint.flake8-quotes]
116 | inline-quotes = "double"
117 | docstring-quotes = "double"
118 | multiline-quotes = "double"
119 | 
120 | [tool.ruff.lint.isort]
121 | known-first-party = ["pytools", "pymbolic", "cgen"]
122 | known-local-folder = ["pyopencl"]
123 | lines-after-imports = 2
124 | combine-as-imports = true
125 | 
126 | [tool.pytest.ini_options]
127 | markers = [
128 |     "bitonic: tests involving bitonic sort"
129 | ]
130 | 
131 | [tool.mypy]
132 | warn_unused_ignores = true
133 | exclude = ["pyopencl/compyte"]
134 | 
135 | [[tool.mypy.overrides]]
136 | module = [
137 |     "IPython.*",
138 |     "OpenGL.*",
139 |     "mako.*",
140 |     "matplotlib.*",
141 |     "pyfmmlib.*",
142 |     "pyopencl._cl.*",
143 |     "pytest.*",
144 |     "scipy.*",
145 | ]
146 | ignore_missing_imports = true
147 | 
148 | [[tool.mypy.overrides]]
149 | module = ["pyopencl.compyte.*"]
150 | follow_imports = "skip"
151 | 
152 | [tool.cibuildwheel]
153 | test-command = "pytest {project}/test"
154 | test-extras = [
155 |     "test",
156 | ]
157 | environment-pass = [
158 |     "CL_INC_DIR",
159 |     "CL_LIB_DIR",
160 | ]
161 | test-skip = [
162 |     "*-macosx_*:arm64",
163 |     "*-macosx_arm64",
164 | ]
165 | 
166 | [tool.cibuildwheel.linux]
167 | skip = [
168 |     "pp*",
169 |     "cp36-*",
170 |     "cp37-*",
171 |     "*_i686",
172 | ]
173 | test-command = ""
174 | before-all = [
175 |     "yum install -y git openssl-devel ruby",
176 |     "bash {package}/scripts/build-ocl.sh",
177 | ]
178 | before-build = [
179 |     "pip install numpy -Csetup-args=-Dallow-noblas=true",
180 | ]
181 | repair-wheel-command = "auditwheel repair -w {dest_dir} --lib-sdir=/.libs {wheel}"
182 | 
183 | [[tool.cibuildwheel.overrides]]
184 | select = "*-musllinux*"
185 | before-all = [
186 |     "apk add ruby git openssl-dev libtool",
187 |     "bash {package}/scripts/build-ocl.sh",
188 | ]
189 | repair-wheel-command = "auditwheel repair -w {dest_dir} --lib-sdir=/.libs {wheel}"
190 | 
191 | [tool.cibuildwheel.macos]
192 | skip = [
193 |     "pp*",
194 |     "cp36-*",
195 |     "cp37-*",
196 | ]
197 | before-all = "bash {package}/scripts/build-ocl-macos.sh"
198 | test-command = "pytest {project}/test/test_array.py" # same limitation as conda-forge
199 | archs = "x86_64 arm64"
200 | 
201 | # https://github.com/conda-forge/pyopencl-feedstock/blob/6f3c5de59b18c9518abba3cb94f6ae92964553f8/recipe/meta.yaml#L62-L63
202 | 
203 | [tool.cibuildwheel.macos.environment]
204 | # Needed for full C++17 support
205 | MACOSX_DEPLOYMENT_TARGET = "10.14"
206 | 
207 | [tool.cibuildwheel.windows]
208 | skip = [
209 |     "*-win32",
210 |     "pp*",
211 |     "cp36-*",
212 |     "cp37-*",
213 | ]
214 | test-command = ""
215 | before-all = "bash {package}/scripts/build-ocl-windows.sh"
216 | 
217 | [tool.typos.default]
218 | extend-ignore-re = [
219 |   "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$"
220 | ]
221 | 
222 | [tool.typos.default.extend-words]
223 | # for ND Range
224 | ND = "ND"
225 | nd = "nd"
226 | 
227 | # level-of-detail
228 | LOD = "LOD"
229 | 
230 | # short for 'series'
231 |  "ser" = "ser"
232 | 
233 | # like the numpy function
234 |  "arange" = "arange"
235 | 
236 | [tool.typos.files]
237 | extend-exclude = [
238 | # No thanks, hex IDs in JSON should not be spellchecked.
239 | "examples/*.ipynb",
240 | # Copied from upstream
241 | "pyopencl/cl/pyopencl-random123/*",
242 | # This one has comments in French
243 | "examples/black-hole-accretion.py"
244 | ]
245 | 
246 | [tool.basedpyright]
247 | reportImplicitStringConcatenation = "none"
248 | reportUnnecessaryIsInstance = "none"
249 | reportUnusedCallResult = "none"
250 | reportExplicitAny = "none"
251 | reportUnreachable = "hint"
252 | 
253 | # This reports even cycles that are qualified by 'if TYPE_CHECKING'. Not what
254 | # we care about at this moment.
255 | # https://github.com/microsoft/pyright/issues/746
256 | reportImportCycles = "none"
257 | pythonVersion = "3.10"
258 | pythonPlatform = "All"
259 | 
260 | [[tool.basedpyright.executionEnvironments]]
261 | root = "test"
262 | reportUnknownArgumentType = "hint"
263 | reportPrivateUsage = "none"
264 | 
265 | 


--------------------------------------------------------------------------------
/run-mypy.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | python -m mypy pyopencl test
4 | 


--------------------------------------------------------------------------------
/run-pylint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit -o nounset
 4 | 
 5 | ci_support="https://gitlab.tiker.net/inducer/ci-support/raw/main"
 6 | 
 7 | if [[ ! -f .pylintrc.yml ]]; then
 8 |     curl -o .pylintrc.yml "${ci_support}/.pylintrc-default.yml"
 9 | fi
10 | 
11 | 
12 | if [[ ! -f .run-pylint.py ]]; then
13 |     curl -L -o .run-pylint.py "${ci_support}/run-pylint.py"
14 | fi
15 | 
16 | 
17 | PYLINT_RUNNER_ARGS="--jobs=4 --yaml-rcfile=.pylintrc.yml"
18 | 
19 | if [[ -f .pylintrc-local.yml ]]; then
20 |     PYLINT_RUNNER_ARGS+=" --yaml-rcfile=.pylintrc-local.yml"
21 | fi
22 | 
23 | python .run-pylint.py $PYLINT_RUNNER_ARGS $(basename $PWD) test/*.py "$@"
24 | 


--------------------------------------------------------------------------------
/scripts/build-ocl-macos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | 
 4 | set -o xtrace
 5 | 
 6 | git clone --branch v2022.01.04 https://github.com/KhronosGroup/OpenCL-ICD-Loader
 7 | git clone --branch v2022.01.04 https://github.com/KhronosGroup/OpenCL-Headers
 8 | 
 9 | 
10 | 
11 | cmake -D CMAKE_INSTALL_PREFIX=./OpenCL-Headers/install -S ./OpenCL-Headers -B ./OpenCL-Headers/build
12 | cmake --build ./OpenCL-Headers/build --target install
13 | 
14 | cmake -D CMAKE_PREFIX_PATH=${PWD}/OpenCL-Headers/install -D OPENCL_ICD_LOADER_HEADERS_DIR=${PWD}/OpenCL-Headers/install/include -D CMAKE_INSTALL_PREFIX=./OpenCL-ICD-Loader/install -S ./OpenCL-ICD-Loader -B ./OpenCL-ICD-Loader/build 
15 | cmake --build ./OpenCL-ICD-Loader/build --target install --config Release
16 | 
17 | echo "PyOpenCL wheel includes Khronos Group OpenCL-ICD-Loader which is licensed as below" >> ${SCRIPT_DIR}/../LICENSE
18 | cat ./OpenCL-ICD-Loader/LICENSE >> ${SCRIPT_DIR}/../LICENSE
19 | 


--------------------------------------------------------------------------------
/scripts/build-ocl-windows.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | 
 4 | set -o xtrace
 5 | 
 6 | git clone --branch v2022.01.04 https://github.com/KhronosGroup/OpenCL-ICD-Loader
 7 | 
 8 | git clone --branch v2022.01.04 https://github.com/KhronosGroup/OpenCL-Headers
 9 | 
10 | 
11 | cmake -D CMAKE_INSTALL_PREFIX=./OpenCL-Headers/install -S ./OpenCL-Headers -B ./OpenCL-Headers/build
12 | cmake --build ./OpenCL-Headers/build --target install
13 | 
14 | # if someone would like to try to create win32 wheels the below lines may be useful
15 | # cmake -D CMAKE_PREFIX_PATH=${PWD}/OpenCL-Headers/install -DOPENCL_ICD_LOADER_HEADERS_DIR=${PWD}/OpenCL-Headers/install/include -S ./OpenCL-ICD-Loader -B ./OpenCL-ICD-Loader/build
16 | # cmake --build ./OpenCL-ICD-Loader/build --target install --config Release
17 | 
18 | cmake -D CMAKE_PREFIX_PATH=${PWD}/OpenCL-Headers/install -D OPENCL_ICD_LOADER_HEADERS_DIR=${PWD}/OpenCL-Headers/install/include -S ./OpenCL-ICD-Loader -B ./OpenCL-ICD-Loader/build2 -A x64
19 | cmake --build ./OpenCL-ICD-Loader/build2 --target install --config Release
20 | 
21 | echo "PyOpenCL wheel includes Khronos Group OpenCL-ICD-Loader which is licensed as below:" >> ${SCRIPT_DIR}/../LICENSE
22 | cat ./OpenCL-ICD-Loader/LICENSE >> ${SCRIPT_DIR}/../LICENSE
23 | 


--------------------------------------------------------------------------------
/scripts/build-ocl.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | 
 4 | set -e -x
 5 | 
 6 | mkdir -p ~/deps
 7 | cd ~/deps
 8 | 
 9 | git clone --branch v2.3.1 https://github.com/OCL-dev/ocl-icd
10 | cd ocl-icd
11 | curl -L -O https://raw.githubusercontent.com/conda-forge/ocl-icd-feedstock/e2c03e3ddb1ff86630ccf80dc7b87a81640025ea/recipe/install-headers.patch
12 | git apply install-headers.patch
13 | curl -L -O https://github.com/isuruf/ocl-icd/commit/307f2267100a2d1383f0c4a77344b127c0857588.patch
14 | git apply 307f2267100a2d1383f0c4a77344b127c0857588.patch
15 | autoreconf -i
16 | chmod +x configure
17 | ./configure --prefix=/usr
18 | make -j4
19 | make install
20 | 
21 | # Bundle license files
22 | echo "PyOpenCL wheel includes ocl-icd which is licensed as below" >> ${SCRIPT_DIR}/../LICENSE
23 | cat ~/deps/ocl-icd/COPYING >> ${SCRIPT_DIR}/../LICENSE


--------------------------------------------------------------------------------
/src/bitlog.cpp:
--------------------------------------------------------------------------------
 1 | // Base-2 logarithm bithack
 2 | //
 3 | // Copyright (C) 2009 Andreas Kloeckner
 4 | // Copyright (C) Sean Eron Anderson (in the public domain)
 5 | //
 6 | // Permission is hereby granted, free of charge, to any person
 7 | // obtaining a copy of this software and associated documentation
 8 | // files (the "Software"), to deal in the Software without
 9 | // restriction, including without limitation the rights to use,
10 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the
12 | // Software is furnished to do so, subject to the following
13 | // conditions:
14 | //
15 | // The above copyright notice and this permission notice shall be
16 | // included in all copies or substantial portions of the Software.
17 | //
18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | // OTHER DEALINGS IN THE SOFTWARE.
26 | 
27 | 
28 | #include "bitlog.hpp"
29 | 
30 | 
31 | const char pyopencl::log_table_8[] =
32 | {
33 |   0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
34 |   4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
35 |   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
36 |   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
37 |   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
38 |   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
39 |   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 |   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
42 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
43 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 |   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
49 | };
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/src/bitlog.hpp:
--------------------------------------------------------------------------------
 1 | // Base-2 logarithm bithack.
 2 | //
 3 | // Copyright (C) 2009 Andreas Kloeckner
 4 | // Copyright (C) Sean Eron Anderson (in the public domain)
 5 | //
 6 | // Permission is hereby granted, free of charge, to any person
 7 | // obtaining a copy of this software and associated documentation
 8 | // files (the "Software"), to deal in the Software without
 9 | // restriction, including without limitation the rights to use,
10 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the
12 | // Software is furnished to do so, subject to the following
13 | // conditions:
14 | //
15 | // The above copyright notice and this permission notice shall be
16 | // included in all copies or substantial portions of the Software.
17 | //
18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | // OTHER DEALINGS IN THE SOFTWARE.
26 | 
27 | 
28 | #ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP
29 | #define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP
30 | 
31 | 
32 | #include <climits>
33 | #include <cstdint>
34 | 
35 | 
36 | namespace pyopencl
37 | {
38 |   /* from http://graphics.stanford.edu/~seander/bithacks.html */
39 | 
40 |   extern const char log_table_8[];
41 | 
42 |   inline unsigned bitlog2_16(uint16_t v)
43 |   {
44 |     if (unsigned long t = v >> 8)
45 |       return 8+log_table_8[t];
46 |     else
47 |       return log_table_8[v];
48 |   }
49 | 
50 |   inline unsigned bitlog2_32(uint32_t v)
51 |   {
52 |     if (uint16_t t = v >> 16)
53 |       return 16+bitlog2_16(t);
54 |     else
55 |       return bitlog2_16(v);
56 |   }
57 | 
58 | #if defined(UINT64_MAX)
59 |   inline unsigned bitlog2(uint64_t v)
60 |   {
61 |     if (uint32_t t = v >> 32)
62 |       return 32+bitlog2_32(t);
63 |     else
64 |       return bitlog2_32(v);
65 |   }
66 | #else
67 |   inline unsigned bitlog2(unsigned long v)
68 |   {
69 | #if (ULONG_MAX != 4294967295)
70 |     if (uint32_t t = v >> 32)
71 |       return 32+bitlog2_32(t);
72 |     else
73 | #endif
74 |       return bitlog2_32(v);
75 |   }
76 | #endif
77 | }
78 | 
79 | 
80 | 
81 | 
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/src/clinfo_ext.h:
--------------------------------------------------------------------------------
  1 | /* Include OpenCL header, and define OpenCL extensions, since what is and is not
  2 |  * available in the official headers is very system-dependent */
  3 | 
  4 | #ifndef _EXT_H
  5 | #define _EXT_H
  6 | 
  7 | #if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H))
  8 | #include <OpenCL/opencl.h>
  9 | #else
 10 | #include <CL/cl.h>
 11 | #endif
 12 | 
 13 | /* These two defines were introduced in the 1.2 headers
 14 |  * on 2012-11-30, so earlier versions don't have them
 15 |  * (e.g. Debian wheezy)
 16 |  */
 17 | 
 18 | #ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
 19 | #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT                 0x104A
 20 | #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT          0x104B
 21 | #endif
 22 | 
 23 | /*
 24 |  * Extensions
 25 |  */
 26 | 
 27 | /* cl_khr_icd */
 28 | #define CL_PLATFORM_ICD_SUFFIX_KHR			0x0920
 29 | #define CL_PLATFORM_NOT_FOUND_KHR			-1001
 30 | 
 31 | 
 32 | /* cl_khr_fp64 */
 33 | #define CL_DEVICE_DOUBLE_FP_CONFIG			0x1032
 34 | 
 35 | /* cl_khr_fp16 */
 36 | #define CL_DEVICE_HALF_FP_CONFIG			0x1033
 37 | 
 38 | /* cl_khr_terminate_context */
 39 | #define CL_DEVICE_TERMINATE_CAPABILITY_KHR		0x200F
 40 | 
 41 | /* cl_nv_device_attribute_query */
 42 | #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV		0x4000
 43 | #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV		0x4001
 44 | #define CL_DEVICE_REGISTERS_PER_BLOCK_NV		0x4002
 45 | #define CL_DEVICE_WARP_SIZE_NV				0x4003
 46 | #define CL_DEVICE_GPU_OVERLAP_NV			0x4004
 47 | #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV		0x4005
 48 | #define CL_DEVICE_INTEGRATED_MEMORY_NV			0x4006
 49 | #define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV	0x4007
 50 | #define CL_DEVICE_PCI_BUS_ID_NV				0x4008
 51 | #define CL_DEVICE_PCI_SLOT_ID_NV			0x4009
 52 | #define CL_DEVICE_PCI_DOMAIN_ID_NV          0x400A
 53 | 
 54 | /* cl_ext_atomic_counters_{32,64} */
 55 | #define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT		0x4032
 56 | 
 57 | /* cl_amd_device_attribute_query */
 58 | #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD		0x4036
 59 | #define CL_DEVICE_TOPOLOGY_AMD				0x4037
 60 | #define CL_DEVICE_BOARD_NAME_AMD			0x4038
 61 | #define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD		0x4039
 62 | #define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD		0x4040
 63 | #define CL_DEVICE_SIMD_WIDTH_AMD			0x4041
 64 | #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD		0x4042
 65 | #define CL_DEVICE_WAVEFRONT_WIDTH_AMD			0x4043
 66 | #define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD		0x4044
 67 | #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD		0x4045
 68 | #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD	0x4046
 69 | #define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD	0x4047
 70 | #define CL_DEVICE_LOCAL_MEM_BANKS_AMD			0x4048
 71 | #define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD		0x4049
 72 | #define CL_DEVICE_GFXIP_MAJOR_AMD			0x404A
 73 | #define CL_DEVICE_GFXIP_MINOR_AMD			0x404B
 74 | #define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD		0x404C
 75 | #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD         0x4030
 76 | #define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD               0x4031
 77 | #define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD    0x4033
 78 | #define CL_DEVICE_PCIE_ID_AMD                           0x4034
 79 | 
 80 | #ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
 81 | #define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD		1
 82 | 
 83 | typedef union
 84 | {
 85 | 	struct { cl_uint type; cl_uint data[5]; } raw;
 86 | 	struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
 87 | } cl_device_topology_amd;
 88 | #endif
 89 | 
 90 | /* cl_amd_offline_devices */
 91 | #define CL_CONTEXT_OFFLINE_DEVICES_AMD			0x403F
 92 | 
 93 | /* cl_ext_device_fission */
 94 | #define cl_ext_device_fission				1
 95 | 
 96 | typedef cl_ulong  cl_device_partition_property_ext;
 97 | 
 98 | #define CL_DEVICE_PARTITION_EQUALLY_EXT			0x4050
 99 | #define CL_DEVICE_PARTITION_BY_COUNTS_EXT		0x4051
100 | #define CL_DEVICE_PARTITION_BY_NAMES_EXT		0x4052
101 | #define CL_DEVICE_PARTITION_BY_NAMES_INTEL		0x4052 /* cl_intel_device_partition_by_names */
102 | #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT	0x4053
103 | 
104 | #define CL_DEVICE_PARENT_DEVICE_EXT			0x4054
105 | #define CL_DEVICE_PARTITION_TYPES_EXT			0x4055
106 | #define CL_DEVICE_AFFINITY_DOMAINS_EXT			0x4056
107 | #define CL_DEVICE_REFERENCE_COUNT_EXT			0x4057
108 | #define CL_DEVICE_PARTITION_STYLE_EXT			0x4058
109 | 
110 | #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT			0x1
111 | #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT			0x2
112 | #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT			0x3
113 | #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT			0x4
114 | #define CL_AFFINITY_DOMAIN_NUMA_EXT			0x10
115 | #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT		0x100
116 | 
117 | /* cl_intel_advanced_motion_estimation */
118 | #define CL_DEVICE_ME_VERSION_INTEL			0x407E
119 | 
120 | /* cl_qcom_ext_host_ptr */
121 | #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM		0x40A0
122 | #define CL_DEVICE_PAGE_SIZE_QCOM			0x40A1
123 | 
124 | /* cl_khr_spir */
125 | #define CL_DEVICE_SPIR_VERSIONS				0x40E0
126 | 
127 | /* cl_altera_device_temperature */
128 | #define CL_DEVICE_CORE_TEMPERATURE_ALTERA		0x40F3
129 | 
130 | /* cl_intel_simultaneous_sharing */
131 | #define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL		0x4104
132 | #define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL	0x4105
133 | 
134 | #endif
135 | 


--------------------------------------------------------------------------------
/src/pyopencl_ext.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PYOPENCL_EXT_H
 2 | #define _PYOPENCL_EXT_H
 3 | 
 4 | #ifdef PYOPENCL_USE_SHIPPED_EXT
 5 | 
 6 | #include "clinfo_ext.h"
 7 | 
 8 | #else
 9 | 
10 | #if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H))
11 | 
12 | #include <OpenCL/opencl.h>
13 | 
14 | #else
15 | 
16 | #include <CL/cl.h>
17 | #include <CL/cl_ext.h>
18 | 
19 | #endif
20 | 
21 | #ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
22 | #define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD        1
23 | 
24 | typedef union
25 | {
26 |     struct { cl_uint type; cl_uint data[5]; } raw;
27 |     struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
28 | } cl_device_topology_amd;
29 | #endif
30 | 
31 | #ifndef CL_DEVICE_P2P_DEVICES_AMD
32 | #define CL_DEVICE_P2P_DEVICES_AMD               0x4089
33 | 
34 | typedef CL_API_ENTRY cl_int
35 | (CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/,
36 |                                              cl_mem /*src_buffer*/,
37 |                                              cl_mem /*dst_buffer*/,
38 |                                              size_t /*src_offset*/,
39 |                                              size_t /*dst_offset*/,
40 |                                              size_t /*cb*/,
41 |                                              cl_uint /*num_events_in_wait_list*/,
42 |                                              const cl_event* /*event_wait_list*/,
43 |                                              cl_event* /*event*/);
44 | #endif
45 | 
46 | /* {{{ these NV defines are often missing from the system headers */
47 | 
48 | #ifndef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
49 | #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV                0x4005
50 | #endif
51 | #ifndef CL_DEVICE_INTEGRATED_MEMORY_NV
52 | #define CL_DEVICE_INTEGRATED_MEMORY_NV                  0x4006
53 | #endif
54 | 
55 | #ifndef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV
56 | #define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV       0x4007
57 | #endif
58 | 
59 | #ifndef CL_DEVICE_PCI_BUS_ID_NV
60 | #define CL_DEVICE_PCI_BUS_ID_NV                         0x4008
61 | #endif
62 | 
63 | #ifndef CL_DEVICE_PCI_SLOT_ID_NV
64 | #define CL_DEVICE_PCI_SLOT_ID_NV                        0x4009
65 | #endif
66 | 
67 | #ifndef CL_DEVICE_PCI_DOMAIN_ID_NV
68 | #define CL_DEVICE_PCI_DOMAIN_ID_NV                      0x400A
69 | #endif
70 | 
71 | /* }}} */
72 | 
73 | #endif
74 | 
75 | #endif
76 | 
77 | /* vim: foldmethod=marker */
78 | 


--------------------------------------------------------------------------------
/src/tools.hpp:
--------------------------------------------------------------------------------
 1 | // Various odds and ends
 2 | //
 3 | // Copyright (C) 2009 Andreas Kloeckner
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person
 6 | // obtaining a copy of this software and associated documentation
 7 | // files (the "Software"), to deal in the Software without
 8 | // restriction, including without limitation the rights to use,
 9 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | // copies of the Software, and to permit persons to whom the
11 | // Software is furnished to do so, subject to the following
12 | // conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be
15 | // included in all copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | // OTHER DEALINGS IN THE SOFTWARE.
25 | 
26 | 
27 | #ifndef _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP
28 | #define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP
29 | 
30 | 
31 | #include <nanobind/nanobind.h>
32 | 
33 | #include <numeric>
34 | #include <numpy/arrayobject.h>
35 | 
36 | 
37 | 
38 | namespace pyopencl
39 | {
40 |   inline
41 |   npy_intp size_from_dims(int ndim, const npy_intp *dims)
42 |   {
43 |     if (ndim != 0)
44 |       return std::accumulate(dims, dims+ndim, 1, std::multiplies<npy_intp>());
45 |     else
46 |       return 1;
47 |   }
48 | 
49 | 
50 | 
51 | 
52 |   inline void run_python_gc()
53 |   {
54 |     namespace py = nanobind;
55 | 
56 |     py::module_::import_("gc").attr("collect")();
57 |   }
58 | 
59 | 
60 |   // https://stackoverflow.com/a/28139075
61 |   template <typename T>
62 |   struct reversion_wrapper { T& iterable; };
63 | 
64 |   template <typename T>
65 |   auto begin (reversion_wrapper<T> w) { return w.iterable.rbegin(); }
66 | 
67 |   template <typename T>
68 |   auto end (reversion_wrapper<T> w) { return w.iterable.rend(); }
69 | 
70 |   template <typename T>
71 |   reversion_wrapper<T> reverse (T&& iterable) { return { iterable }; }
72 | 
73 | 
74 |   // https://stackoverflow.com/a/44175911
75 |   class noncopyable {
76 |   public:
77 |     noncopyable() = default;
78 |     ~noncopyable() = default;
79 | 
80 |   private:
81 |     noncopyable(const noncopyable&) = delete;
82 |     noncopyable& operator=(const noncopyable&) = delete;
83 |   };
84 | }
85 | 
86 | 
87 | 
88 | 
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/src/wrap_cl.cpp:
--------------------------------------------------------------------------------
 1 | // PyOpenCL-flavored C++ wrapper of the CL API
 2 | //
 3 | // Copyright (C) 2009 Andreas Kloeckner
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person
 6 | // obtaining a copy of this software and associated documentation
 7 | // files (the "Software"), to deal in the Software without
 8 | // restriction, including without limitation the rights to use,
 9 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | // copies of the Software, and to permit persons to whom the
11 | // Software is furnished to do so, subject to the following
12 | // conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be
15 | // included in all copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | // OTHER DEALINGS IN THE SOFTWARE.
25 | 
26 | 
27 | #define PY_ARRAY_UNIQUE_SYMBOL pyopencl_ARRAY_API
28 | 
29 | #include "wrap_cl.hpp"
30 | #include <nanobind/intrusive/counter.inl>
31 | 
32 | 
33 | 
34 | 
35 | using namespace pyopencl;
36 | 
37 | 
38 | 
39 | 
40 | extern void pyopencl_expose_constants(py::module_ &m);
41 | extern void pyopencl_expose_part_1(py::module_ &m);
42 | extern void pyopencl_expose_part_2(py::module_ &m);
43 | extern void pyopencl_expose_mempool(py::module_ &m);
44 | 
45 | static bool import_numpy_helper()
46 | {
47 |   import_array1(false);
48 |   return true;
49 | }
50 | 
51 | NB_MODULE(_cl, m)
52 | {
53 |   py::intrusive_init(
54 |     [](PyObject *o) noexcept {
55 |         py::gil_scoped_acquire guard;
56 |         Py_INCREF(o);
57 |     },
58 |     [](PyObject *o) noexcept {
59 |         py::gil_scoped_acquire guard;
60 |         Py_DECREF(o);
61 |     });
62 | 
63 |   if (!import_numpy_helper())
64 |     throw py::python_error();
65 | 
66 |   pyopencl_expose_constants(m);
67 |   pyopencl_expose_part_1(m);
68 |   pyopencl_expose_part_2(m);
69 |   pyopencl_expose_mempool(m);
70 | 
71 | #ifdef NDEBUG
72 |   // See https://github.com/inducer/pyopencl/issues/758 for context.
73 |   py::set_leak_warnings(false);
74 | #endif
75 | }
76 | 
77 | // vim: foldmethod=marker
78 | 


--------------------------------------------------------------------------------
/src/wrap_helpers.hpp:
--------------------------------------------------------------------------------
  1 | // Wrapper-helping odds and ends
  2 | //
  3 | // Copyright (C) 2009 Andreas Kloeckner
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person
  6 | // obtaining a copy of this software and associated documentation
  7 | // files (the "Software"), to deal in the Software without
  8 | // restriction, including without limitation the rights to use,
  9 | // copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | // copies of the Software, and to permit persons to whom the
 11 | // Software is furnished to do so, subject to the following
 12 | // conditions:
 13 | //
 14 | // The above copyright notice and this permission notice shall be
 15 | // included in all copies or substantial portions of the Software.
 16 | //
 17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 19 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 20 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 21 | // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 22 | // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 24 | // OTHER DEALINGS IN THE SOFTWARE.
 25 | 
 26 | 
 27 | #ifndef PYCUDA_WRAP_HELPERS_HEADER_SEEN
 28 | #define PYCUDA_WRAP_HELPERS_HEADER_SEEN
 29 | 
 30 | 
 31 | #include <nanobind/nanobind.h>
 32 | #include <nanobind/stl/string.h>
 33 | #include <nanobind/intrusive/counter.h>
 34 | #include <nanobind/intrusive/ref.h>
 35 | #include <nanobind/ndarray.h>
 36 | 
 37 | 
 38 | namespace py = nanobind;
 39 | 
 40 | 
 41 | #define ENUM_VALUE(NAME) \
 42 |   value(#NAME, NAME)
 43 | 
 44 | // {{{ DEF_SIMPLE_XXX
 45 | 
 46 | #define DEF_SIMPLE_METHOD(NAME) \
 47 |   def(#NAME, &cls::NAME)
 48 | 
 49 | #define DEF_SIMPLE_STATIC_METHOD(NAME) \
 50 |   def_static(#NAME, &cls::NAME)
 51 | 
 52 | #define DEF_SIMPLE_METHOD_WITH_ARGS(NAME, ARGS) \
 53 |   def(#NAME, &cls::NAME, boost::python::args ARGS)
 54 | 
 55 | #define DEF_SIMPLE_FUNCTION(NAME) \
 56 |   m.def(#NAME, &NAME)
 57 | 
 58 | #define DEF_SIMPLE_FUNCTION_WITH_ARGS(NAME, ARGS) \
 59 |   m.def(#NAME, &NAME, py::args ARGS)
 60 | 
 61 | #define DEF_SIMPLE_RO_MEMBER(NAME) \
 62 |   def_readonly(#NAME, &cls::m_##NAME)
 63 | 
 64 | #define DEF_SIMPLE_RW_MEMBER(NAME) \
 65 |   def_readwrite(#NAME, &cls::m_##NAME)
 66 | 
 67 | // }}}
 68 | 
 69 | // {{{ COPY_PY_XXX
 70 | 
 71 | #define COPY_PY_LIST(TYPE, NAME) \
 72 |   { \
 73 |     for (auto it: py_##NAME) \
 74 |       NAME.push_back(py::cast<TYPE>(it)); \
 75 |   }
 76 | 
 77 | #define COPY_PY_ARRAY(FUNC_NAME, TYPE, NAME, COUNTER) \
 78 |   { \
 79 |     COUNTER = 0; \
 80 |     for (auto it: py_##NAME) \
 81 |     { \
 82 |       if (COUNTER == NAME.size()) \
 83 |         throw error(FUNC_NAME, \
 84 |             CL_INVALID_VALUE, "too many entries in " #NAME " argument"); \
 85 |       NAME[COUNTER++] = py::cast<TYPE>(it); \
 86 |     } \
 87 |   }
 88 | 
 89 | #define COPY_PY_COORD_TRIPLE(NAME) \
 90 |   size_t NAME[3] = {0, 0, 0}; \
 91 |   { \
 92 |     py::sequence py_seq_##NAME = py::cast<py::sequence>(py_##NAME); \
 93 |     size_t my_len = len(py_seq_##NAME); \
 94 |     if (my_len > 3) \
 95 |       throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \
 96 |     for (size_t i = 0; i < my_len; ++i) \
 97 |       NAME[i] = py::cast<size_t>(py_seq_##NAME[i]); \
 98 |   }
 99 | 
100 | #define COPY_PY_PITCH_TUPLE(NAME) \
101 |   size_t NAME[2] = {0, 0}; \
102 |   if (py_##NAME.ptr() != Py_None) \
103 |   { \
104 |     py::sequence py_seq_##NAME = py::cast<py::sequence>(py_##NAME); \
105 |     size_t my_len = len(py_seq_##NAME); \
106 |     if (my_len > 2) \
107 |       throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \
108 |     for (size_t i = 0; i < my_len; ++i) \
109 |       NAME[i] = py::cast<size_t>(py_seq_##NAME[i]); \
110 |   }
111 | 
112 | #define COPY_PY_REGION_TRIPLE(NAME) \
113 |   size_t NAME[3] = {1, 1, 1}; \
114 |   { \
115 |     py::sequence py_seq_##NAME = py::cast<py::sequence>(py_##NAME); \
116 |     size_t my_len = len(py_seq_##NAME); \
117 |     if (my_len > 3) \
118 |       throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \
119 |     for (size_t i = 0; i < my_len; ++i) \
120 |       NAME[i] = py::cast<size_t>(py_seq_##NAME[i]); \
121 |   }
122 | 
123 | // }}}
124 | 
125 | #define PYOPENCL_PARSE_NUMPY_ARRAY_SPEC \
126 |     PyArray_Descr *tp_descr; \
127 |     if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) \
128 |       throw py::python_error(); \
129 |     \
130 |     std::vector<npy_intp> shape; \
131 |     try \
132 |     { \
133 |       shape.push_back(py::cast<npy_intp>(py_shape)); \
134 |     } \
135 |     catch (py::cast_error &) \
136 |     { \
137 |       COPY_PY_LIST(npy_intp, shape); \
138 |     } \
139 |     \
140 |     NPY_ORDER order = NPY_CORDER; \
141 |     PyArray_OrderConverter(py_order.ptr(), &order); \
142 |     \
143 |     int ary_flags = 0; \
144 |     if (order == NPY_FORTRANORDER) \
145 |       ary_flags |= NPY_ARRAY_FARRAY; \
146 |     else if (order == NPY_CORDER) \
147 |       ary_flags |= NPY_ARRAY_CARRAY; \
148 |     else \
149 |       throw std::runtime_error("unrecognized order specifier"); \
150 |     \
151 |     std::vector<npy_intp> strides; \
152 |     if (py_strides.ptr() != Py_None) \
153 |     { \
154 |       COPY_PY_LIST(npy_intp, strides); \
155 |     }
156 | 
157 | #define PYOPENCL_RETURN_VECTOR(ITEMTYPE, NAME) \
158 |   { \
159 |     py::list pyopencl_result; \
160 |     for (ITEMTYPE item: NAME) \
161 |       pyopencl_result.append(item); \
162 |     return pyopencl_result; \
163 |   }
164 | 
165 | namespace
166 | {
167 |   template <typename T>
168 |   inline py::object handle_from_new_ptr(T *ptr)
169 |   {
170 |     return py::cast(ptr, py::rv_policy::take_ownership);
171 |   }
172 | 
173 |   template <typename T, typename ClType>
174 |   inline T *from_int_ptr(intptr_t obj_ref, bool retain)
175 |   {
176 |     ClType clobj = (ClType) obj_ref;
177 |     return new T(clobj, retain);
178 |   }
179 | 
180 |   template <typename T>
181 |   inline intptr_t to_int_ptr(T const &obj)
182 |   {
183 |     return (intptr_t) obj.data();
184 |   }
185 | }
186 | 
187 | #define PYOPENCL_EXPOSE_TO_FROM_INT_PTR(CL_TYPENAME) \
188 |   .def_static("from_int_ptr", from_int_ptr<cls, CL_TYPENAME>, \
189 |       py::arg("int_ptr_value"), \
190 |       py::arg("retain")=true, \
191 |       "(static method) Return a new Python object referencing the C-level " \
192 |       ":c:type:`" #CL_TYPENAME "` object at the location pointed to " \
193 |       "by *int_ptr_value*. The relevant ``clRetain*`` function " \
194 |       "will be called if *retain* is True." \
195 |       "If the previous owner of the object will *not* release the reference, " \
196 |       "*retain* should be set to *False*, to effectively transfer ownership to " \
197 |       ":mod:`pyopencl`." \
198 |       "\n\n.. versionadded:: 2013.2\n" \
199 |       "\n\n.. versionchanged:: 2016.1\n\n    *retain* added.") \
200 |   .def_prop_ro("int_ptr", to_int_ptr<cls>, \
201 |       "Return an integer corresponding to the pointer value " \
202 |       "of the underlying :c:type:`" #CL_TYPENAME "`. " \
203 |       "Use :meth:`from_int_ptr` to turn back into a Python object." \
204 |       "\n\n.. versionadded:: 2013.2\n") \
205 | 
206 | #define PYOPENCL_EXPOSE_EQUALITY_TESTS \
207 |     /* this relies on nanobind overload resolution going in order of registration */ \
208 |     .def("__eq__", [](cls const &self, cls const &other) { return self == other; }) \
209 |     .def("__eq__", [](cls const &self, py::object obj) { return false; }, py::arg("obj").none())
210 | 
211 | 
212 | #endif
213 | 
214 | // vim: foldmethod=marker
215 | 


--------------------------------------------------------------------------------
/test/add-vectors-32.spv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/pyopencl/b8b8d4d852e8a26356861ffda578874dc064e54c/test/add-vectors-32.spv


--------------------------------------------------------------------------------
/test/add-vectors-64.spv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/pyopencl/b8b8d4d852e8a26356861ffda578874dc064e54c/test/add-vectors-64.spv


--------------------------------------------------------------------------------
/test/empty-header.h:
--------------------------------------------------------------------------------
1 | /* what did you expect? */
2 | 


--------------------------------------------------------------------------------
/test/test_arrays_in_structs.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = "Copyright (C) 2020 Sotiris Niarchos"
  2 | 
  3 | __license__ = """
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | """
 22 | 
 23 | import numpy as np
 24 | 
 25 | import pyopencl as cl
 26 | import pyopencl.cltypes as cltypes
 27 | import pyopencl.tools as cl_tools
 28 | from pyopencl import mem_flags
 29 | from pyopencl.tools import (
 30 |     pytest_generate_tests_for_pyopencl as pytest_generate_tests,  # noqa: F401
 31 | )
 32 | 
 33 | 
 34 | def test_struct_with_array_fields(ctx_factory):
 35 |     #
 36 |     # typedef struct {
 37 |     #     uint x[2];
 38 |     #     float y;
 39 |     #     uint z[3][4];
 40 |     # } my_struct;
 41 |     #
 42 |     cl_ctx = ctx_factory()
 43 |     device = cl_ctx.devices[0]
 44 |     queue = cl.CommandQueue(cl_ctx)
 45 | 
 46 |     my_struct = np.dtype([
 47 |         ("x", cltypes.uint, 2),
 48 |         ("y", cltypes.int),
 49 |         ("z", cltypes.uint, (3, 4))
 50 |     ])
 51 |     my_struct, cdecl = cl_tools.match_dtype_to_c_struct(
 52 |         device, "my_struct", my_struct
 53 |     )
 54 | 
 55 |     # a random buffer of 4 structs
 56 |     my_struct_arr = np.array([
 57 |         ([81, 24], -57, [[15, 28, 45,  7], [71, 95, 65, 84], [2, 11, 59,  9]]),
 58 |         ([5, 20],  47, [[15, 53,  7, 59], [73, 22, 27, 86], [59,  6, 39, 49]]),
 59 |         ([11, 99], -32, [[73, 83,  4, 65], [19, 21, 22, 27], [1, 55,  6, 64]]),
 60 |         ([57, 38], -54, [[74, 90, 38, 67], [77, 30, 99, 18], [91,  3, 63, 67]])
 61 |     ], dtype=my_struct)
 62 | 
 63 |     expected_res = []
 64 |     for x in my_struct_arr:
 65 |         expected_res.append(int(np.sum(x[0]) + x[1] + np.sum(x[2])))
 66 |     expected_res = np.array(expected_res, dtype=cltypes.int)
 67 | 
 68 |     kernel_src = """%s
 69 |     // this kernel sums every number contained in each struct
 70 |     __kernel void array_structs(__global my_struct *structs, __global int *res) {
 71 |         int i = get_global_id(0);
 72 |         my_struct s = structs[i];
 73 |         res[i] = s.x[0] + s.x[1] + s.y;
 74 |         for (int r = 0; r < 3; r++)
 75 |             for (int c = 0; c < 4; c++)
 76 |                 res[i] += s.z[r][c];
 77 |     }""" % cdecl
 78 | 
 79 |     mem_flags1 = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
 80 |     mem_flags2 = mem_flags.WRITE_ONLY
 81 | 
 82 |     my_struct_buf = cl.Buffer(cl_ctx, mem_flags1, hostbuf=my_struct_arr)
 83 |     res_buf = cl.Buffer(cl_ctx, mem_flags2, size=expected_res.nbytes)
 84 | 
 85 |     program = cl.Program(cl_ctx, kernel_src).build()
 86 |     kernel = program.array_structs
 87 |     kernel(queue, (4,), None, my_struct_buf, res_buf)
 88 | 
 89 |     res = np.empty_like(expected_res)
 90 |     cl.enqueue_copy(queue, res, res_buf)
 91 | 
 92 |     assert (res == expected_res).all()
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     import sys
 97 |     if len(sys.argv) > 1:
 98 |         exec(sys.argv[1])
 99 |     else:
100 |         from pytest import main
101 |         main([__file__])
102 | 


--------------------------------------------------------------------------------
/test/test_clrandom.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = "Copyright (C) 2018 Matt Wala"
 2 | 
 3 | __license__ = """
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | """
22 | 
23 | import numpy as np
24 | import pytest
25 | 
26 | import pyopencl as cl
27 | import pyopencl.clrandom as clrandom
28 | import pyopencl.cltypes as cltypes
29 | from pyopencl.characterize import has_double_support
30 | from pyopencl.tools import (
31 |     pytest_generate_tests_for_pyopencl as pytest_generate_tests,  # noqa: F401
32 | )
33 | 
34 | 
35 | try:
36 |     import faulthandler
37 | except ImportError:
38 |     pass
39 | else:
40 |     faulthandler.enable()
41 | 
42 | 
43 | @pytest.mark.parametrize("rng_class", [
44 |     clrandom.PhiloxGenerator,
45 |     clrandom.ThreefryGenerator])
46 | @pytest.mark.parametrize("dtype", [
47 |     np.int32,
48 |     np.int64,
49 |     np.float32,
50 |     np.float64,
51 |     cltypes.float2,         # type: ignore[attr-defined]
52 |     cltypes.float3,         # type: ignore[attr-defined]
53 |     cltypes.float4,         # type: ignore[attr-defined]
54 |     ])
55 | def test_clrandom_dtypes(ctx_factory, rng_class, dtype):
56 |     cl_ctx = ctx_factory()
57 |     if dtype == np.float64 and not has_double_support(cl_ctx.devices[0]):
58 |         pytest.skip("double precision not supported on this device")
59 |     rng = rng_class(cl_ctx)
60 | 
61 |     size = 10
62 | 
63 |     with cl.CommandQueue(cl_ctx) as queue:
64 |         rng.uniform(queue, size, dtype)
65 | 
66 |         if dtype not in (np.int32, np.int64):
67 |             rng.normal(queue, size, dtype)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     import sys
72 |     if len(sys.argv) > 1:
73 |         exec(sys.argv[1])
74 |     else:
75 |         from pytest import main
76 |         main([__file__])
77 | 


--------------------------------------------------------------------------------