├── .gitignore
├── docs
    ├── svmoverview.pdf
    ├── images
    │   ├── opencl_memory.png
    │   ├── example_opencl_system.svg
    │   ├── opencl_memory_objects.svg
    │   └── opencl_context_objects.svg
    ├── uml
    │   ├── opencl_memory_objects.puml
    │   ├── opencl_context_objects.puml
    │   ├── example_opencl_system.puml
    │   └── opencl_app_sequence.puml
    └── opencl_description.md
├── .github
    └── workflows
    │   └── rust.yml
├── CODE_OF_CONDUCT.md
├── examples
    ├── clinfo.rs
    ├── opencl2svm.rs
    ├── basic.rs
    ├── opencl2serde.rs
    └── opencl2image.rs
├── Cargo.toml
├── CONTRIBUTING.md
├── src
    ├── event.rs
    ├── platform.rs
    ├── lib.rs
    ├── command_buffer.rs
    └── context.rs
├── LICENSE
├── RELEASES.md
├── tests
    ├── opencl2_kernel_test.rs
    └── integration_test.rs
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | .vscode/
3 | /src/.vscode/*
4 | **/*.rs.bk
5 | Cargo.lock


--------------------------------------------------------------------------------
/docs/svmoverview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenba/opencl3/HEAD/docs/svmoverview.pdf


--------------------------------------------------------------------------------
/docs/images/opencl_memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenba/opencl3/HEAD/docs/images/opencl_memory.png


--------------------------------------------------------------------------------
/docs/uml/opencl_memory_objects.puml:
--------------------------------------------------------------------------------
 1 | @startuml
 2 | 
 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
 4 | 
 5 | ' title via::opencl Memory classes
 6 | 
 7 | Buffer --|> Memory
 8 | ' Sampler --o Image
 9 | Image --|> Memory
10 | Pipe --|> Memory
11 | Memory o- SVM
12 | 
13 | @enduml


--------------------------------------------------------------------------------
/docs/uml/opencl_context_objects.puml:
--------------------------------------------------------------------------------
 1 | @startuml
 2 | 
 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
 4 | 
 5 | ' title via::opencl Context objects
 6 | 
 7 | object cl_device_id
 8 | 
 9 | Context o-- "1..*" cl_device_id
10 | cl_device_id o- "1..*" CommandQueue
11 | Context *-- "1..*" CommandQueue
12 | 
13 | Context *-- "1..*" Program
14 | Program *- "1..*" Kernel
15 | Context *-- "1..*" Kernel
16 | 
17 | Context *-- "0..*" SubDevice
18 | 
19 | @enduml
20 | 


--------------------------------------------------------------------------------
/docs/uml/example_opencl_system.puml:
--------------------------------------------------------------------------------
 1 | @startuml
 2 | 
 3 | 'Copyright (c) 2017 Via Technology Ltd. All Rights Reserved.
 4 | 
 5 | 'title OpenCL Compute System Objects
 6 | 
 7 | object system
 8 | 
 9 | object platform_1
10 | object platform_2
11 | object platform_3
12 | 
13 | object device_1
14 | object device_2
15 | object device_3
16 | object device_4
17 | 
18 | system *-- platform_1
19 | system *-- platform_2
20 | system *-- platform_3
21 | 
22 | platform_1 *-- device_1
23 | platform_1 *-- device_2
24 | 
25 | platform_2 *-- device_2
26 | 
27 | platform_3 *-- device_2
28 | platform_3 *-- device_3
29 | platform_3 *-- device_4
30 | 
31 | @enduml


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, develop ]
 6 |   pull_request:
 7 |     branches: [ develop ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   check:
14 |     name: Check
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - name: Install Rust
19 |         run: rustup update stable
20 |       - name: Unit test
21 |         run: cargo check
22 | 
23 |   build:
24 |     name: Build
25 |     runs-on: ubuntu-latest
26 | 
27 |     steps:
28 |     - name: Load OpenCL
29 |       run: |
30 |         wget -qO - https://repositories.intel.com/graphics/intel-graphics.key |
31 |           sudo apt-key add -
32 |         sudo add-apt-repository \
33 |           'deb [arch=amd64] https://repositories.intel.com/graphics/ubuntu focal main'
34 |         sudo apt-get update
35 |         sudo apt-get install \
36 |           intel-opencl-icd \
37 |           intel-level-zero-gpu level-zero \
38 |           intel-media-va-driver-non-free libmfx1
39 |         sudo apt-get install ocl-icd-opencl-dev
40 | 
41 |     - uses: actions/checkout@v4
42 |     - name: Install Rust
43 |       run: rustup update stable
44 |     - name: Build
45 |       run: cargo build --verbose
46 |     # Tests "runs-on:" a GPU hosted runner which requires an enterprise subscription...
47 |     # - name: Run tests
48 |     #   run: cargo test -- --test-threads=1
49 | 
50 |   clippy:
51 |     name: Clippy
52 |     runs-on: ubuntu-latest
53 |     env:
54 |       RUSTFLAGS: "-Dwarnings"
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       - name: Install Rust
58 |         run: rustup update stable
59 |       - name: Run Clippy
60 |         run: cargo clippy --all-features
61 | 


--------------------------------------------------------------------------------
/docs/uml/opencl_app_sequence.puml:
--------------------------------------------------------------------------------
 1 | @startuml
 2 | 
 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
 4 | 
 5 | 'title OpenCL Application Sequence
 6 | 
 7 | actor "OpenCL Application" as user
 8 | 
 9 | participant "OpenCL\nplatforms" as platforms
10 | 
11 | participant "OpenCL\ndevices" as devices
12 | 
13 | participant "OpenCL\nContext" as opencl_context
14 | 
15 | participant "OpenCL\nCommandQueue" as opencl_queue
16 | 
17 | participant "OpenCL\nProgram" as opencl_program
18 | 
19 | participant "OpenCL\nKernel" as opencl_kernel
20 | 
21 | participant "OpenCL\nMemory" as opencl_memory
22 | 
23 | group Query
24 |   user -> platforms : Query OpenCL platforms.
25 |   user <- platforms : available OpenCL platforms
26 | 
27 |   user -> devices : Query OpenCL devices.
28 |   user <- devices : available OpenCL devices
29 | end
30 | 
31 | group Initialisation
32 |   user -> user : Choose most appropriate\nplatform and device(s)
33 | 
34 |   user -> opencl_context : Construct OpenCL Context for platform and device(s)
35 | 
36 |   devices -> opencl_context : Context devices
37 | 
38 |   opencl_context -> opencl_queue : Create device\ncommand_queue(s)
39 | 
40 |   user -> opencl_context : Construct and build programs: source, binary and/or IL
41 |   opencl_context -> opencl_program : Build program
42 |   opencl_context -> opencl_kernel : Construct kernels for program
43 | 
44 |   user -> opencl_context : Create OpenCL memory
45 |   opencl_context -> opencl_memory : Create: buffers, images, samplers, pipes, svm, etc.
46 | end
47 | 
48 | group Compute Data
49 |   user -> opencl_context : get kernel(s)
50 | 
51 |   group loop
52 |     user -> opencl_memory : Write input data to OpenCL memory
53 |     user -> opencl_queue : execute kernel(s)
54 |     user <- opencl_memory : Read output data from OpenCL memory
55 |     user <- opencl_queue : wait for events or queue finished
56 |   end
57 | end
58 | 
59 | group Clean Up
60 |   user -> opencl_context : drop context
61 | 
62 |   opencl_context -> opencl_memory : release: buffers, images, samplers, pipes, svm, etc.
63 |   opencl_context -> opencl_kernel : release kernels
64 |   opencl_context -> opencl_program : release programs
65 |   opencl_context -> opencl_queue : release command_queues
66 |   opencl_context -> opencl_context : release context
67 | end
68 | 
69 | @enduml
70 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behaviour that contributes to a positive environment for our
10 | community include:
11 | 
12 | - Using welcoming and inclusive language
13 | - Being respectful of differing viewpoints and experiences
14 | - Gracefully accepting constructive criticism
15 | - Focusing on what is best for the community
16 | - Showing empathy towards other community members
17 | 
18 | Examples of unacceptable behaviour include:
19 | 
20 | - The use of sexualized language or imagery and unwelcome sexual attention or advances
21 | - Trolling, insulting/derogatory comments, and personal or political attacks
22 | - Public or private harassment
23 | - Publishing others' private information, such as a physical or electronic address, without explicit permission
24 | - Other conduct which could reasonably be considered inappropriate in a professional setting
25 | 
26 | ## Our Responsibilities
27 | 
28 | Project maintainers are responsible for clarifying the standards of acceptable behaviour and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour.
29 | 
30 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviours that they deem inappropriate, threatening, offensive, or harmful.
31 | 
32 | ## Enforcement
33 | 
34 | Instances of abusive, harassing, or otherwise unacceptable behaviour may be
35 | reported to the community leaders responsible for enforcement at:
36 | [codeofconduct@via-technology.aero](mailto:codeofconduct@via-technology.aero).  
37 | All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/),
44 | version 2.1, available at
45 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct/).
46 | 


--------------------------------------------------------------------------------
/examples/clinfo.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //    http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use opencl3::Result;
16 | use opencl3::device::{CL_DEVICE_TYPE_ALL, Device, device_type_text, vendor_id_text};
17 | 
18 | /// Finds all the OpenCL platforms and devices on a system.
19 | ///
20 | /// It displays OpenCL platform information from `clGetPlatformInfo` and
21 | /// OpenCL device information from `clGetDeviceInfo` for all the platforms and
22 | /// devices.
23 | fn main() -> Result<()> {
24 |     let platforms = opencl3::platform::get_platforms()?;
25 |     println!("Number of platforms: {}", platforms.len());
26 | 
27 |     for platform in platforms {
28 |         println!("CL_PLATFORM_VENDOR: {}", platform.vendor()?);
29 |         println!("CL_PLATFORM_NAME: {}", platform.name()?);
30 |         println!("CL_PLATFORM_VERSION: {}", platform.version()?);
31 |         println!("CL_PLATFORM_PROFILE: {}", platform.profile()?);
32 |         println!("CL_PLATFORM_EXTENSIONS: {}", platform.extensions()?);
33 | 
34 |         let devices = platform.get_devices(CL_DEVICE_TYPE_ALL)?;
35 |         println!("Number of devices: {}", devices.len());
36 |         println!();
37 | 
38 |         for device_id in devices {
39 |             let device = Device::new(device_id);
40 |             println!("\tCL_DEVICE_VENDOR: {}", device.vendor()?);
41 |             let vendor_id = device.vendor_id()?;
42 |             println!(
43 |                 "\tCL_DEVICE_VENDOR_ID: {:X}, {}",
44 |                 vendor_id,
45 |                 vendor_id_text(vendor_id)
46 |             );
47 |             println!("\tCL_DEVICE_NAME: {}", device.name()?);
48 |             println!("\tCL_DEVICE_VERSION: {}", device.version()?);
49 |             let device_type = device.dev_type()?;
50 |             println!(
51 |                 "\tCL_DEVICE_TYPE: {:X}, {}",
52 |                 device_type,
53 |                 device_type_text(device_type)
54 |             );
55 |             println!("\tCL_DEVICE_PROFILE: {}", device.profile()?);
56 |             println!("\tCL_DEVICE_EXTENSIONS: {}", device.extensions()?);
57 |             println!(
58 |                 "\tCL_DEVICE_OPENCL_C_VERSION: {:?}",
59 |                 device.opencl_c_version()?
60 |             );
61 |             println!(
62 |                 "\tCL_DEVICE_BUILT_IN_KERNELS: {}",
63 |                 device.built_in_kernels()?
64 |             );
65 |             println!(
66 |                 "\tCL_DEVICE_SVM_CAPABILITIES: {:X}",
67 |                 device.svm_mem_capability()
68 |             );
69 |             println!();
70 |         }
71 |     }
72 | 
73 |     Ok(())
74 | }
75 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "opencl3"
 3 | version = "0.12.1"
 4 | authors = ["Ken Barker <ken.barker@via-technology.aero>"]
 5 | description = "A Rust implementation of the Khronos OpenCL 3.0 API and extensions."
 6 | documentation = "https://docs.rs/opencl3/"
 7 | homepage = "https://github.com/kenba/opencl3"
 8 | repository = "https://github.com/kenba/opencl3"
 9 | readme = "README.md"
10 | keywords = ["opencl", "gpu", "gpgpu"]
11 | categories = ["api-bindings", "asynchronous"]
12 | license = "Apache-2.0"
13 | edition = "2024"
14 | 
15 | [features]
16 | 
17 | static  = ["cl3/static"]
18 | 
19 | CL_VERSION_1_1 = ["cl3/CL_VERSION_1_1"]
20 | CL_VERSION_1_2 = ["cl3/CL_VERSION_1_2"]
21 | CL_VERSION_2_0 = ["cl3/CL_VERSION_2_0"]
22 | CL_VERSION_2_1 = ["cl3/CL_VERSION_2_1"]
23 | CL_VERSION_2_2 = ["cl3/CL_VERSION_2_2"]
24 | CL_VERSION_3_0 = ["cl3/CL_VERSION_3_0"]
25 | 
26 | cl_apple_setmemobjectdestructor = ["cl3/cl_apple_setmemobjectdestructor"]
27 | cl_apple_contextloggingfunctions = ["cl3/cl_apple_contextloggingfunctions"]
28 | 
29 | cl_khr_icd = ["cl3/cl_khr_icd"]
30 | cl_khr_il_program = ["cl3/cl_khr_il_program"]
31 | cl_khr_terminate_context = ["cl3/cl_khr_terminate_context"]
32 | cl_khr_create_command_queue = ["cl3/cl_khr_create_command_queue"]
33 | cl_ext_device_fission = ["cl3/cl_ext_device_fission"]
34 | cl_ext_migrate_memobject = ["cl3/cl_ext_migrate_memobject"]
35 | cl_qcom_ext_host_ptr = ["cl3/cl_qcom_ext_host_ptr"]
36 | cl_img_use_gralloc_ptr = ["cl3/cl_img_use_gralloc_ptr"]
37 | cl_img_generate_mipmap = ["cl3/cl_img_generate_mipmap"]
38 | cl_khr_subgroups = ["cl3/cl_khr_subgroups"]
39 | cl_khr_suggested_local_work_size = ["cl3/cl_khr_suggested_local_work_size"]
40 | cl_khr_external_memory = ["cl3/cl_khr_external_memory"]
41 | cl_khr_external_semaphore = ["cl3/cl_khr_external_semaphore"]
42 | cl_khr_external_semaphore_sync_fd = ["cl3/cl_khr_external_semaphore_sync_fd"]
43 | cl_khr_semaphore = ["cl3/cl_khr_semaphore"]
44 | cl_arm_import_memory = ["cl3/cl_arm_import_memory"]
45 | cl_arm_shared_virtual_memory = ["cl3/cl_arm_shared_virtual_memory"]
46 | cl_intel_accelerator = ["cl3/cl_intel_accelerator"]
47 | cl_intel_unified_shared_memory = ["cl3/cl_intel_unified_shared_memory"]
48 | cl_intel_create_buffer_with_properties = ["cl3/cl_intel_create_buffer_with_properties"]
49 | cl_intel_program_scope_host_pipe = ["cl3/cl_intel_program_scope_host_pipe"]
50 | cl_ext_image_requirements_info = ["cl3/cl_ext_image_requirements_info"]
51 | cl_khr_command_buffer = ["cl3/cl_khr_command_buffer"]
52 | cl_khr_command_buffer_multi_device = ["cl3/cl_khr_command_buffer_multi_device"]
53 | cl_khr_command_buffer_mutable_dispatch = ["cl3/cl_khr_command_buffer_mutable_dispatch"]
54 | 
55 | cl_khr_gl_sharing = ["cl3/cl_khr_gl_sharing"]
56 | cl_khr_gl_event = ["cl3/cl_khr_gl_event"]
57 | cl_khr_egl_image = ["cl3/cl_khr_egl_image"]
58 | cl_khr_egl_event = ["cl3/cl_khr_egl_event"]
59 | 
60 | cl_khr_dx9_media_sharing = ["cl3/cl_khr_dx9_media_sharing"]
61 | cl_intel_dx9_media_sharing = ["cl3/cl_intel_dx9_media_sharing"]
62 | cl_khr_d3d10_sharing = ["cl3/cl_khr_d3d10_sharing"]
63 | cl_khr_d3d11_sharing = ["cl3/cl_khr_d3d11_sharing"]
64 | 
65 | cl_loader_info = ["cl3/cl_loader_info"]
66 | cl_pocl_content_size = ["cl3/cl_pocl_content_size"]
67 | cl_ext_buffer_device_address = ["cl3/cl_ext_buffer_device_address"]
68 | cl_loader_layers = ["cl3/cl_loader_layers"]
69 | cl_img_cancel_command = ["cl3/cl_img_cancel_command"]
70 | cl_qcom_perf_hint = ["cl3/cl_qcom_perf_hint"]
71 | 
72 | # Use dynamic linking instead of static linking
73 | dynamic = ["cl3/dynamic"]
74 | 
75 | # Default features:
76 | default = ["dynamic"]
77 | 
78 | [dependencies]
79 | libc = "0.2"
80 | cl3 = { version = "0.13", default-features = false }
81 | serde = { version = "1.0", optional = true }
82 | 
83 | [dev-dependencies]
84 | serde_json = "1.0"
85 | opencl3 = { path = ".", features = ["dynamic", "serde"] }
86 | 
87 | [lints.clippy]
88 | enum_glob_use = "deny"
89 | nursery = "deny"
90 | unwrap_used = "deny"
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # How to contribute
  2 | 
  3 | First off, I'm really glad you're reading this, because we need volunteer developers to help improve this project and make it more useful to other OpenCL and Rust developers.
  4 | 
  5 | The following is a set of guidelines for contributing to `opencl3` and its packages, which are hosted in the `opencl3` repository on GitHub. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
  6 | 
  7 | ## Where do I go from here?
  8 | 
  9 | If you've noticed a bug or have a feature request then please raise a [new issue](https://github.com/kenba/opencl3/issues/new).
 10 | It's generally best to check the [issues](https://github.com/kenba/opencl3/issues) and [pull requests](https://github.com/kenba/opencl3/pulls) (open and closed) to ensure that someone else has not noticed it before you. I recommend that you wait for confirmation of your bug or approval for your feature request in this way before starting to code.
 11 | 
 12 | Note: many OpenCL issues are hardware specific, so it is often useful to describe your setup, i.e.:
 13 | 
 14 | - `opencl3` features, e.g. ["serde", "CL_VERSION_1_2", "CL_VERSION_2_0", "CL_VERSION_2_1", "CL_VERSION_2_1"] or default
 15 | - OpenCL target device vendor and version
 16 | - OpenCL ICD loader vendor and version
 17 | - Rust version `rustc --version`
 18 | - operating system
 19 | - and any other relevant information.
 20 | 
 21 | Please abide by our [Code of Conduct](CODE_OF_CONDUCT.md) in all issues and pull requests.
 22 | 
 23 | ## Fork & create a branch
 24 | 
 25 | If the issue is something you think that you can fix, then [fork opencl3](https://docs.github.com/en/get-started/quickstart/fork-a-repo) and create a branch from `develop` with a descriptive name.  
 26 | E.g. a good branch name would be (where issue #42 is the issue you're working on):
 27 | 
 28 | ```shell
 29 | git checkout develop
 30 | git checkout -b 42-fix-some-bug
 31 | ```
 32 | 
 33 | ## Get the test suite running
 34 | 
 35 | Run the unit tests:
 36 | 
 37 | ```shell
 38 | cargo test -- --test-threads=1 --show-output
 39 | ```
 40 | 
 41 | and integration tests:
 42 | 
 43 | ```shell
 44 | cargo test -- --test-threads=1 --show-output --ignored
 45 | ```
 46 | 
 47 | To ensure that you haven't broken anything.
 48 | Please feel free to add tests, especially where the new test(s) demonstrates a bug that you noticed.
 49 | 
 50 | Note: a new test that demonstrates a bug that you've described in an issue is always welcome in a PR, even if you haven't developed the code to fix it yet.
 51 | 
 52 | ## Implement your fix or feature
 53 | 
 54 | At this point, you're ready to make your changes!  
 55 | Feel free to ask for help; everyone is a beginner at first.
 56 | 
 57 | ## Get the style right
 58 | 
 59 | Your patch should follow the same conventions & pass the same code quality checks as the rest of the project.  
 60 | I recommend installing and running `clippy`:
 61 | 
 62 | ```shell
 63 | cargo clippy --all-features
 64 | ```
 65 | 
 66 | and `fmt`:
 67 | 
 68 | ```shell
 69 | cargo fmt
 70 | ```
 71 | 
 72 | ## Make a Pull Request
 73 | 
 74 | At this point, you should switch back to your develop branch and make sure it's up to date with opencl3's `develop` branch:
 75 | 
 76 | ```shell
 77 | git remote add upstream git@github.com:kenba/opencl3.git
 78 | git checkout develop
 79 | git pull upstream develop
 80 | ```
 81 | 
 82 | Then update your feature branch from your local copy of master, and push it!
 83 | 
 84 | ```shell
 85 | git checkout 42-fix-some-bug
 86 | git rebase master
 87 | git push --set-upstream origin 42-fix-some-bug
 88 | ```
 89 | 
 90 | Finally, go to GitHub and make a [Pull Request](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
 91 | 
 92 | Github Actions will then build your PR.
 93 | 
 94 | ## Merging a Pull Request
 95 | 
 96 | A maintainer will review your PR and determine whether it's Ok to merge it into the `develop` branch.
 97 | 
 98 | If it is, he/she will approve and merge the PR. If not, they may comment on the PR to request changes before they are willing to approve and merge it.
 99 | Note: at this stage you should only change the PR to resolve the maintainer's comments.
100 | You should *not* introduce a fantastic new feature that you've just thought of! That should be raised as a new issue instead.
101 | 
102 | ## Rebasing a Pull Request
103 | 
104 | If a maintainer asks you to "rebase" your PR, they're saying that a lot of code has changed, and that you need to update your branch so it's easier to merge.
105 | 
106 | Github have a good guide about [rebasing in Git](https://docs.github.com/en/get-started/using-git/about-git-rebase) here's our suggested workflow:
107 | 
108 | ```shell
109 | git checkout 42-fix-some-bug
110 | git pull --rebase upstream develop
111 | git push --force-with-lease 42-fix-some-bug
112 | ```
113 | 


--------------------------------------------------------------------------------
/examples/opencl2svm.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021-2023 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use opencl3::Result;
 16 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
 17 | use opencl3::context::Context;
 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device};
 19 | use opencl3::error_codes::cl_int;
 20 | use opencl3::kernel::{ExecuteKernel, Kernel};
 21 | use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE};
 22 | use opencl3::program::{CL_STD_2_0, Program};
 23 | use opencl3::svm::SvmVec;
 24 | use opencl3::types::CL_BLOCKING;
 25 | 
 26 | const PROGRAM_SOURCE: &str = r#"
 27 | kernel void inclusive_scan_int (global int* output,
 28 |                                 global int const* values)
 29 | {
 30 |     int sum = 0;
 31 |     size_t lid = get_local_id(0);
 32 |     size_t lsize = get_local_size(0);
 33 | 
 34 |     size_t num_groups = get_num_groups(0);
 35 |     for (size_t i = 0u; i < num_groups; ++i)
 36 |     {
 37 |         size_t lidx = i * lsize + lid;
 38 |         int value = work_group_scan_inclusive_add(values[lidx]);
 39 |         output[lidx] = sum + value;
 40 | 
 41 |         sum += work_group_broadcast(value, lsize - 1);
 42 |     }
 43 | }"#;
 44 | 
 45 | const KERNEL_NAME: &str = "inclusive_scan_int";
 46 | 
 47 | fn main() -> Result<()> {
 48 |     // Find a usable platform and device for this application
 49 |     let platforms = opencl3::platform::get_platforms()?;
 50 |     let platform = platforms.first().expect("no OpenCL platforms");
 51 |     let device = *platform
 52 |         .get_devices(CL_DEVICE_TYPE_GPU)?
 53 |         .first()
 54 |         .expect("no device found in platform");
 55 |     let device = Device::new(device);
 56 | 
 57 |     // Create a Context on an OpenCL device
 58 |     let context = Context::from_device(&device).expect("Context::from_device failed");
 59 | 
 60 |     // Build the OpenCL program source and create the kernel.
 61 |     let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
 62 |         .expect("Program::create_and_build_from_source failed");
 63 |     let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
 64 | 
 65 |     // Create a command_queue on the Context's device
 66 |     let queue =
 67 |         CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0)
 68 |             .expect("CommandQueue::create_default_with_properties failed");
 69 | 
 70 |     // The input data
 71 |     const ARRAY_SIZE: usize = 8;
 72 |     let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
 73 | 
 74 |     // Create an OpenCL SVM vector
 75 |     let mut test_values =
 76 |         SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
 77 | 
 78 |     // Map test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
 79 |     if !test_values.is_fine_grained() {
 80 |         unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? };
 81 |     }
 82 | 
 83 |     // Copy input data into the OpenCL SVM vector
 84 |     test_values.clone_from_slice(&value_array);
 85 | 
 86 |     // Make test_values immutable
 87 |     let test_values = test_values;
 88 | 
 89 |     // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
 90 |     if !test_values.is_fine_grained() {
 91 |         let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? };
 92 |         unmap_test_values_event.wait()?;
 93 |     }
 94 | 
 95 |     // The output data, an OpenCL SVM vector
 96 |     let mut results =
 97 |         SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
 98 | 
 99 |     // Run the kernel on the input data
100 |     let kernel_event = unsafe {
101 |         ExecuteKernel::new(&kernel)
102 |             .set_arg_svm(results.as_mut_ptr())
103 |             .set_arg_svm(test_values.as_ptr())
104 |             .set_global_work_size(ARRAY_SIZE)
105 |             .enqueue_nd_range(&queue)?
106 |     };
107 | 
108 |     // Wait for the kernel to complete execution on the device
109 |     kernel_event.wait()?;
110 | 
111 |     // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
112 |     if !results.is_fine_grained() {
113 |         unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? };
114 |     }
115 | 
116 |     // Can access OpenCL SVM directly, no need to map or read the results
117 |     println!("sum results: {:?}", results);
118 | 
119 |     // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
120 |     if !results.is_fine_grained() {
121 |         let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? };
122 |         unmap_results_event.wait()?;
123 |     }
124 | 
125 |     Ok(())
126 | }
127 | 


--------------------------------------------------------------------------------
/examples/basic.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use opencl3::Result;
 16 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
 17 | use opencl3::context::Context;
 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device, get_all_devices};
 19 | use opencl3::kernel::{ExecuteKernel, Kernel};
 20 | use opencl3::memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
 21 | use opencl3::program::Program;
 22 | use opencl3::types::{CL_BLOCKING, CL_NON_BLOCKING, cl_event, cl_float};
 23 | use std::ptr;
 24 | 
 25 | const PROGRAM_SOURCE: &str = r#"
 26 | kernel void saxpy_float (global float* z,
 27 |     global float const* x,
 28 |     global float const* y,
 29 |     float a)
 30 | {
 31 |     const size_t i = get_global_id(0);
 32 |     z[i] = a*x[i] + y[i];
 33 | }"#;
 34 | 
 35 | const KERNEL_NAME: &str = "saxpy_float";
 36 | 
 37 | fn main() -> Result<()> {
 38 |     // Find a usable device for this application
 39 |     let device_id = *get_all_devices(CL_DEVICE_TYPE_GPU)?
 40 |         .first()
 41 |         .expect("no device found in platform");
 42 |     let device = Device::new(device_id);
 43 | 
 44 |     // Create a Context on an OpenCL device
 45 |     let context = Context::from_device(&device).expect("Context::from_device failed");
 46 | 
 47 |     // Create a command_queue on the Context's device
 48 |     let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE)
 49 |         .expect("CommandQueue::create_default failed");
 50 | 
 51 |     // Build the OpenCL program source and create the kernel.
 52 |     let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
 53 |         .expect("Program::create_and_build_from_source failed");
 54 |     let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
 55 | 
 56 |     /////////////////////////////////////////////////////////////////////
 57 |     // Compute data
 58 | 
 59 |     // The input data
 60 |     const ARRAY_SIZE: usize = 1000;
 61 |     let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE];
 62 |     let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
 63 |     for i in 0..ARRAY_SIZE {
 64 |         sums[i] = 1.0 + 1.0 * i as cl_float;
 65 |     }
 66 | 
 67 |     // Create OpenCL device buffers
 68 |     let mut x = unsafe {
 69 |         Buffer::<cl_float>::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())?
 70 |     };
 71 |     let mut y = unsafe {
 72 |         Buffer::<cl_float>::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())?
 73 |     };
 74 |     let z = unsafe {
 75 |         Buffer::<cl_float>::create(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())?
 76 |     };
 77 | 
 78 |     // Blocking write
 79 |     let _x_write_event = unsafe { queue.enqueue_write_buffer(&mut x, CL_BLOCKING, 0, &ones, &[])? };
 80 | 
 81 |     // Non-blocking write, wait for y_write_event
 82 |     let y_write_event =
 83 |         unsafe { queue.enqueue_write_buffer(&mut y, CL_NON_BLOCKING, 0, &sums, &[])? };
 84 | 
 85 |     // a value for the kernel function
 86 |     let a: cl_float = 300.0;
 87 | 
 88 |     // Use the ExecuteKernel builder to set the kernel buffer and
 89 |     // cl_float value arguments, before setting the one dimensional
 90 |     // global_work_size for the call to enqueue_nd_range.
 91 |     // Unwraps the Result to get the kernel execution event.
 92 |     let kernel_event = unsafe {
 93 |         ExecuteKernel::new(&kernel)
 94 |             .set_arg(&z)
 95 |             .set_arg(&x)
 96 |             .set_arg(&y)
 97 |             .set_arg(&a)
 98 |             .set_global_work_size(ARRAY_SIZE)
 99 |             .set_wait_event(&y_write_event)
100 |             .enqueue_nd_range(&queue)?
101 |     };
102 | 
103 |     let mut events: Vec<cl_event> = Vec::default();
104 |     events.push(kernel_event.get());
105 | 
106 |     // Create a results array to hold the results from the OpenCL device
107 |     // and enqueue a read command to read the device buffer into the array
108 |     // after the kernel event completes.
109 |     let mut results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
110 |     let read_event =
111 |         unsafe { queue.enqueue_read_buffer(&z, CL_NON_BLOCKING, 0, &mut results, &events)? };
112 | 
113 |     // Wait for the read_event to complete.
114 |     read_event.wait()?;
115 | 
116 |     // Output the first and last results
117 |     println!("results front: {}", results[0]);
118 |     println!("results back: {}", results[ARRAY_SIZE - 1]);
119 | 
120 |     // Calculate the kernel duration, from the kernel_event
121 |     let start_time = kernel_event.profiling_command_start()?;
122 |     let end_time = kernel_event.profiling_command_end()?;
123 |     let duration = end_time - start_time;
124 |     println!("kernel execution duration (ns): {}", duration);
125 | 
126 |     Ok(())
127 | }
128 | 


--------------------------------------------------------------------------------
/examples/opencl2serde.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use opencl3::Result;
 16 | use opencl3::command_queue::CommandQueue;
 17 | use opencl3::context::Context;
 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device, get_all_devices};
 19 | use opencl3::error_codes::cl_int;
 20 | use opencl3::kernel::{ExecuteKernel, Kernel};
 21 | use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE};
 22 | use opencl3::program::{CL_STD_2_0, Program};
 23 | use opencl3::svm::{ExtendSvmVec, SvmVec};
 24 | use opencl3::types::CL_BLOCKING;
 25 | use serde::de::DeserializeSeed;
 26 | use std::ptr;
 27 | 
 28 | const PROGRAM_SOURCE: &str = r#"
 29 | kernel void inclusive_scan_int (global int* output,
 30 |     global int const* values)
 31 | {
 32 |     int sum = 0;
 33 |     size_t lid = get_local_id(0);
 34 |     size_t lsize = get_local_size(0);
 35 | 
 36 |     size_t num_groups = get_num_groups(0);
 37 |     for (size_t i = 0u; i < num_groups; ++i)
 38 |     {
 39 |         size_t lidx = i * lsize + lid;
 40 |         int value = work_group_scan_inclusive_add(values[lidx]);
 41 |         output[lidx] = sum + value;
 42 | 
 43 |         sum += work_group_broadcast(value, lsize - 1);
 44 |     }
 45 | }"#;
 46 | 
 47 | const KERNEL_NAME: &str = "inclusive_scan_int";
 48 | 
 49 | fn main() -> Result<()> {
 50 |     // Find a suitable device for this application
 51 |     let devices = get_all_devices(CL_DEVICE_TYPE_GPU)?;
 52 |     assert!(0 < devices.len());
 53 | 
 54 |     // Find an OpenCL SVM device
 55 |     let mut device_id = ptr::null_mut();
 56 |     let mut is_svm_capable: bool = false;
 57 |     for dev_id in devices {
 58 |         let device = Device::new(dev_id);
 59 |         let svm_mem_capability = device.svm_mem_capability();
 60 |         is_svm_capable = 0 < svm_mem_capability;
 61 |         if is_svm_capable {
 62 |             device_id = dev_id;
 63 |             break;
 64 |         }
 65 |     }
 66 | 
 67 |     if is_svm_capable {
 68 |         // Create OpenCL context from the OpenCL svm device
 69 |         let device = Device::new(device_id);
 70 |         let vendor = device.vendor()?;
 71 |         let vendor_id = device.vendor_id()?;
 72 |         println!("OpenCL device vendor name: {}", vendor);
 73 |         println!("OpenCL device vendor id: {:X}", vendor_id);
 74 | 
 75 |         /////////////////////////////////////////////////////////////////////
 76 |         // Initialise OpenCL compute environment
 77 | 
 78 |         // Create a Context on the OpenCL svm device
 79 |         let context = Context::from_device(&device).expect("Context::from_device failed");
 80 | 
 81 |         // Build the OpenCL program source and create the kernel.
 82 |         let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
 83 |             .expect("Program::create_and_build_from_source failed");
 84 | 
 85 |         let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
 86 | 
 87 |         // Create a command_queue on the Context's device
 88 |         let queue = CommandQueue::create_default_with_properties(&context, 0, 0)
 89 |             .expect("CommandQueue::create_default_with_properties failed");
 90 | 
 91 |         // The input data
 92 |         const ARRAY_SIZE: usize = 8;
 93 |         const VALUE_ARRAY: &str = "[3,2,5,9,7,1,4,2]";
 94 | 
 95 |         // Deserialize into an OpenCL SVM vector
 96 |         let mut test_values = SvmVec::<cl_int>::new(&context);
 97 | 
 98 |         let mut deserializer = serde_json::Deserializer::from_str(&VALUE_ARRAY);
 99 | 
100 |         // Handle test_values if device only supports CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
101 |         if !test_values.is_fine_grained() {
102 |             // SVM_COARSE_GRAIN_BUFFER needs to know the size of the data to allocate the SVM
103 |             test_values =
104 |                 SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
105 |             // Map the SVM for a SVM_COARSE_GRAIN_BUFFER
106 |             unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? };
107 |             // Clear the SVM for the deserializer
108 |             test_values.clear();
109 |         }
110 | 
111 |         ExtendSvmVec(&mut test_values)
112 |             .deserialize(&mut deserializer)
113 |             .expect("Error deserializing the VALUE_ARRAY JSON string.");
114 | 
115 |         // Make test_values SVM vector immutable
116 |         let test_values = test_values;
117 | 
118 |         // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
119 |         if !test_values.is_fine_grained() {
120 |             let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? };
121 |             unmap_test_values_event.wait()?;
122 |         }
123 | 
124 |         // The output data, an OpenCL SVM vector
125 |         let mut results =
126 |             SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
127 | 
128 |         // Run the sum kernel on the input data
129 |         let sum_kernel_event = unsafe {
130 |             ExecuteKernel::new(&kernel)
131 |                 .set_arg_svm(results.as_mut_ptr())
132 |                 .set_arg_svm(test_values.as_ptr())
133 |                 .set_global_work_size(ARRAY_SIZE)
134 |                 .enqueue_nd_range(&queue)?
135 |         };
136 | 
137 |         // Wait for the kernel to complete execution on the device
138 |         sum_kernel_event.wait()?;
139 | 
140 |         // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
141 |         if !results.is_fine_grained() {
142 |             unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? };
143 |         }
144 | 
145 |         // Convert SVM results to json
146 |         let json_results = serde_json::to_string(&results).unwrap();
147 |         println!("json results: {}", json_results);
148 | 
149 |         // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
150 |         if !results.is_fine_grained() {
151 |             let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? };
152 |             unmap_results_event.wait()?;
153 |         }
154 |     } else {
155 |         println!("OpenCL fine grained system SVM device not found")
156 |     }
157 | 
158 |     Ok(())
159 | }
160 | 


--------------------------------------------------------------------------------
/examples/opencl2image.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use cl3::ext::CL_IMAGE_FORMAT_NOT_SUPPORTED;
 16 | use cl3::memory::{CL_MEM_OBJECT_IMAGE2D, CL_MEM_WRITE_ONLY, CL_RGBA, CL_UNSIGNED_INT8};
 17 | use cl3::types::{CL_NON_BLOCKING, cl_image_desc, cl_image_format};
 18 | use libc::c_void;
 19 | use opencl3::Result;
 20 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
 21 | use opencl3::context::Context;
 22 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device};
 23 | use opencl3::kernel::{ExecuteKernel, Kernel};
 24 | use opencl3::memory::Image;
 25 | use opencl3::program::{CL_STD_2_0, Program};
 26 | use opencl3::types::cl_event;
 27 | 
 28 | const PROGRAM_SOURCE: &str = r#"
 29 | kernel void colorize(write_only image2d_t image)
 30 | {
 31 |     const size_t x = get_global_id(0);
 32 |     const size_t y = get_global_id(1);
 33 |     write_imageui(image, (int2)(x, y), (uint4)(x, y, 0, 255));
 34 | }"#;
 35 | 
 36 | const KERNEL_NAME: &str = "colorize";
 37 | 
 38 | fn main() -> Result<()> {
 39 |     // Find a usable platform and device for this application
 40 |     let platforms = opencl3::platform::get_platforms()?;
 41 |     let platform = platforms.first().expect("no OpenCL platforms");
 42 |     let device = *platform
 43 |         .get_devices(CL_DEVICE_TYPE_GPU)?
 44 |         .first()
 45 |         .expect("no device found in platform");
 46 |     let device = Device::new(device);
 47 | 
 48 |     // Create a Context on an OpenCL device
 49 |     let context = Context::from_device(&device).expect("Context::from_device failed");
 50 | 
 51 |     // Print some information about the device
 52 |     println!(
 53 |         "CL_DEVICE_IMAGE_SUPPORT: {:?}",
 54 |         device.image_support().unwrap()
 55 |     );
 56 |     println!(
 57 |         "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: {:?}",
 58 |         device.max_read_write_image_args().unwrap()
 59 |     );
 60 |     println!(
 61 |         "CL_DEVICE_MAX_READ_IMAGE_ARGS: {:?}",
 62 |         device.max_read_image_args().unwrap()
 63 |     );
 64 |     println!(
 65 |         "CL_DEVICE_MAX_WRITE_IMAGE_ARGS: {:?}",
 66 |         device.max_write_image_args().unwrap()
 67 |     );
 68 |     println!(
 69 |         "CL_DEVICE_MAX_SAMPLERS: {:?}",
 70 |         device.max_device_samples().unwrap()
 71 |     );
 72 |     let supported_formats =
 73 |         context.get_supported_image_formats(CL_MEM_WRITE_ONLY, CL_MEM_OBJECT_IMAGE2D)?;
 74 |     if supported_formats
 75 |         .iter()
 76 |         .filter(|f| {
 77 |             f.image_channel_order == CL_RGBA && f.image_channel_data_type == CL_UNSIGNED_INT8
 78 |         })
 79 |         .count()
 80 |         <= 0
 81 |     {
 82 |         println!("Device does not support CL_RGBA with CL_UNSIGNED_INT8 for CL_MEM_WRITE_ONLY!");
 83 |         return Err(CL_IMAGE_FORMAT_NOT_SUPPORTED.into());
 84 |     }
 85 | 
 86 |     // Build the OpenCL program source and create the kernel.
 87 |     let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
 88 |         .expect("Program::create_and_build_from_source failed");
 89 |     let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
 90 | 
 91 |     // Create a command_queue on the Context's device
 92 |     let queue =
 93 |         CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0)
 94 |             .expect("CommandQueue::create_default_with_properties failed");
 95 | 
 96 |     // Create an image
 97 |     let mut image = unsafe {
 98 |         Image::create(
 99 |             &context,
100 |             CL_MEM_WRITE_ONLY,
101 |             &cl_image_format {
102 |                 image_channel_order: CL_RGBA,
103 |                 image_channel_data_type: CL_UNSIGNED_INT8,
104 |             },
105 |             &cl_image_desc {
106 |                 image_type: CL_MEM_OBJECT_IMAGE2D,
107 |                 image_width: 10 as usize,
108 |                 image_height: 10 as usize,
109 |                 image_depth: 1,
110 |                 image_array_size: 1,
111 |                 image_row_pitch: 0,
112 |                 image_slice_pitch: 0,
113 |                 num_mip_levels: 0,
114 |                 num_samples: 0,
115 |                 buffer: std::ptr::null_mut(),
116 |             },
117 |             std::ptr::null_mut(),
118 |         )
119 |         .expect("Image::create failed")
120 |     };
121 | 
122 |     // Run the kernel on the input data
123 |     let kernel_event = unsafe {
124 |         ExecuteKernel::new(&kernel)
125 |             .set_arg(&image)
126 |             .set_global_work_sizes(&[10usize, 10usize])
127 |             .enqueue_nd_range(&queue)?
128 |     };
129 | 
130 |     let mut events: Vec<cl_event> = Vec::default();
131 |     events.push(kernel_event.get());
132 | 
133 |     // Fill the middle of the image with a solid color
134 |     let fill_color = [11u32, 22u32, 33u32, 44u32];
135 |     let fill_event = unsafe {
136 |         queue.enqueue_fill_image(
137 |             &mut image,
138 |             fill_color.as_ptr() as *const c_void,
139 |             &[3usize, 3usize, 0usize] as *const usize,
140 |             &[4usize, 4usize, 1usize] as *const usize,
141 |             &events,
142 |         )?
143 |     };
144 | 
145 |     let mut events: Vec<cl_event> = Vec::default();
146 |     events.push(fill_event.get());
147 | 
148 |     // Read the image data from the device
149 |     let mut image_data = [0u8; 10 * 10 * 4];
150 |     let read_event = unsafe {
151 |         queue.enqueue_read_image(
152 |             &image,
153 |             CL_NON_BLOCKING,
154 |             &[0usize, 0usize, 0usize] as *const usize,
155 |             &[10usize, 10usize, 1usize] as *const usize,
156 |             0,
157 |             0,
158 |             image_data.as_mut_ptr() as *mut c_void,
159 |             &events,
160 |         )?
161 |     };
162 | 
163 |     // Wait for the read_event to complete.
164 |     read_event.wait()?;
165 | 
166 |     // Print the image data
167 |     println!("image_data: ");
168 |     for y in 0..10 {
169 |         for x in 0..10 {
170 |             let offset = (y * 10 + x) * 4;
171 |             print!(
172 |                 "({:>3}, {:>3}, {:>3}, {:>3}) ",
173 |                 image_data[offset],
174 |                 image_data[offset + 1],
175 |                 image_data[offset + 2],
176 |                 image_data[offset + 3]
177 |             );
178 |         }
179 |         println!();
180 |     }
181 | 
182 |     Ok(())
183 | }
184 | 


--------------------------------------------------------------------------------
/docs/images/example_opencl_system.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" contentScriptType="application/ecmascript" contentStyleType="text/css" height="246px" preserveAspectRatio="none" style="width:398px;height:246px;" version="1.1" viewBox="0 0 398 246" width="398px" zoomAndPan="magnify"><defs><filter height="300%" id="f1mp00u0tw38d4" width="300%" x="-1" y="-1"><feGaussianBlur result="blurOut" stdDeviation="2.0"/><feColorMatrix in="blurOut" result="blurOut2" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 .4 0"/><feOffset dx="4.0" dy="4.0" in="blurOut2" result="blurOut3"/><feBlend in="SourceGraphic" in2="blurOut3" mode="normal"/></filter></defs><g><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="54" x="124.5" y="7"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="40" x="131.5" y="21.457">system</text><line style="stroke:#A80036;stroke-width:1.5;" x1="125.5" x2="177.5" y1="26.0938" y2="26.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="73" x="7" y="102"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="59" x="14" y="116.457">platform_1</text><line style="stroke:#A80036;stroke-width:1.5;" x1="8" x2="79" y1="121.0938" y2="121.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="73" x="115" y="102"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="59" x="122" y="116.457">platform_2</text><line style="stroke:#A80036;stroke-width:1.5;" x1="116" x2="187" y1="121.0938" y2="121.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="73" x="223" y="102"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="59" x="230" y="116.457">platform_3</text><line style="stroke:#A80036;stroke-width:1.5;" x1="224" x2="295" y1="121.0938" y2="121.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="63" x="12" y="197"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="49" x="19" y="211.457">device_1</text><line style="stroke:#A80036;stroke-width:1.5;" x1="13" x2="74" y1="216.0938" y2="216.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="63" x="120" y="197"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="49" x="127" y="211.457">device_2</text><line style="stroke:#A80036;stroke-width:1.5;" x1="121" x2="182" y1="216.0938" y2="216.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="63" x="223" y="197"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="49" x="230" y="211.457">device_3</text><line style="stroke:#A80036;stroke-width:1.5;" x1="224" x2="285" y1="216.0938" y2="216.0938"/><rect fill="#FEFECE" filter="url(#f1mp00u0tw38d4)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="63" x="321" y="197"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="49" x="328" y="211.457">device_4</text><line style="stroke:#A80036;stroke-width:1.5;" x1="322" x2="383" y1="216.0938" y2="216.0938"/><!--MD5=[f68154fc02dd051a67f6788e70091099]
 2 | reverse link system to platform_1--><path codeLine="17" d="M122.08,50.83 C103.3,67.01 79.47,87.52 63.01,101.7 " fill="none" id="system-backto-platform_1" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="132.19,42.13,125.0332,43.0131,123.0957,49.959,130.2525,49.0759,132.19,42.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[accf8706614c2e87c26a5787e06a2853]
 3 | reverse link system to platform_2--><path codeLine="18" d="M151.5,55.21 C151.5,70.58 151.5,88.78 151.5,101.7 " fill="none" id="system-backto-platform_2" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="151.5,42.13,147.5,48.13,151.5,54.13,155.5,48.13,151.5,42.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[8148989fc4f59313db12e0b3a9db7d1c]
 4 | reverse link system to platform_3--><path codeLine="19" d="M180.92,50.83 C199.7,67.01 223.53,87.52 239.99,101.7 " fill="none" id="system-backto-platform_3" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="170.81,42.13,172.7475,49.0759,179.9043,49.959,177.9668,43.0131,170.81,42.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[9d6dcf9b6b14aab93a436374bdf5d14f]
 5 | reverse link platform_1 to device_1--><path codeLine="21" d="M43.5,150.21 C43.5,165.58 43.5,183.78 43.5,196.7 " fill="none" id="platform_1-backto-device_1" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="43.5,137.13,39.5,143.13,43.5,149.13,47.5,143.13,43.5,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[4f8efefcf068dea61a13bd83048f7e72]
 6 | reverse link platform_1 to device_2--><path codeLine="22" d="M72.92,145.83 C91.7,162.01 115.53,182.52 131.99,196.7 " fill="none" id="platform_1-backto-device_2" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="62.81,137.13,64.7475,144.0759,71.9043,144.959,69.9668,138.0131,62.81,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[b50eded65ef27003c16259a0f05a05c0]
 7 | reverse link platform_2 to device_2--><path codeLine="24" d="M151.5,150.21 C151.5,165.58 151.5,183.78 151.5,196.7 " fill="none" id="platform_2-backto-device_2" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="151.5,137.13,147.5,143.13,151.5,149.13,155.5,143.13,151.5,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[5140a152020fdd078b42955f728fcbb8]
 8 | reverse link platform_3 to device_2--><path codeLine="26" d="M230.08,145.83 C211.3,162.01 187.47,182.52 171.01,196.7 " fill="none" id="platform_3-backto-device_2" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="240.19,137.13,233.0332,138.0131,231.0957,144.959,238.2525,144.0759,240.19,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[a36420b1a6587a04b7972aee33dd10e9]
 9 | reverse link platform_3 to device_3--><path codeLine="27" d="M257.9,150.21 C257.08,165.58 256.1,183.78 255.4,196.7 " fill="none" id="platform_3-backto-device_3" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="258.61,137.13,254.2928,142.906,257.9641,149.1126,262.2812,143.3366,258.61,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[d367a14c37c37cf22dd66db82a8d1269]
10 | reverse link platform_3 to device_4--><path codeLine="28" d="M285.51,146.51 C301.58,162.58 321.72,182.72 335.7,196.7 " fill="none" id="platform_3-backto-device_4" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="276.13,137.13,277.5442,144.2011,284.6153,145.6153,283.2011,138.5442,276.13,137.13" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[40023d9955e529f13d7bfd3cb12cdc64]
11 | @startuml
12 | 
13 | 'Copyright (c) 2017 Via Technology Ltd. All Rights Reserved.
14 | 
15 | 'title OpenCL Compute System Objects
16 | 
17 | object system
18 | 
19 | object platform_1
20 | object platform_2
21 | object platform_3
22 | 
23 | object device_1
24 | object device_2
25 | object device_3
26 | object device_4
27 | 
28 | system *- - platform_1
29 | system *- - platform_2
30 | system *- - platform_3
31 | 
32 | platform_1 *- - device_1
33 | platform_1 *- - device_2
34 | 
35 | platform_2 *- - device_2
36 | 
37 | platform_3 *- - device_2
38 | platform_3 *- - device_3
39 | platform_3 *- - device_4
40 | 
41 | @enduml
42 | 
43 | @startuml
44 | 
45 | 
46 | 
47 | object system
48 | 
49 | object platform_1
50 | object platform_2
51 | object platform_3
52 | 
53 | object device_1
54 | object device_2
55 | object device_3
56 | object device_4
57 | 
58 | system *- - platform_1
59 | system *- - platform_2
60 | system *- - platform_3
61 | 
62 | platform_1 *- - device_1
63 | platform_1 *- - device_2
64 | 
65 | platform_2 *- - device_2
66 | 
67 | platform_3 *- - device_2
68 | platform_3 *- - device_3
69 | platform_3 *- - device_4
70 | 
71 | @enduml
72 | 
73 | PlantUML version 1.2020.26(Mon Dec 21 17:45:07 GMT 2020)
74 | (GPL source distribution)
75 | Java Runtime: Java(TM) SE Runtime Environment
76 | JVM: Java HotSpot(TM) Client VM
77 | Default Encoding: Cp1252
78 | Language: en
79 | Country: GB
80 | --></g></svg>


--------------------------------------------------------------------------------
/src/event.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2024 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | pub use cl3::event::*;
 16 | 
 17 | use super::Result;
 18 | use libc::c_void;
 19 | 
 20 | /// An OpenCL event object.
 21 | ///
 22 | /// Has methods to return information from calls to clGetEventInfo and
 23 | /// clGetEventProfilingInfo with the appropriate parameters.  
 24 | /// Implements the Drop trait to call release_event when the object is dropped.
 25 | #[derive(Debug)]
 26 | pub struct Event {
 27 |     event: cl_event,
 28 | }
 29 | 
 30 | impl From<cl_event> for Event {
 31 |     fn from(event: cl_event) -> Self {
 32 |         Self { event }
 33 |     }
 34 | }
 35 | 
 36 | impl From<Event> for cl_event {
 37 |     fn from(value: Event) -> Self {
 38 |         value.event as Self
 39 |     }
 40 | }
 41 | 
 42 | impl Drop for Event {
 43 |     fn drop(&mut self) {
 44 |         unsafe { release_event(self.event).expect("Error: clReleaseEvent") };
 45 |     }
 46 | }
 47 | 
 48 | unsafe impl Send for Event {}
 49 | unsafe impl Sync for Event {}
 50 | 
 51 | impl Event {
 52 |     /// Create an Event from an OpenCL cl_event.
 53 |     ///
 54 |     /// * `event` - a valid OpenCL cl_event.
 55 |     ///
 56 |     /// returns the new Event
 57 |     pub const fn new(event: cl_event) -> Self {
 58 |         Self { event }
 59 |     }
 60 | 
 61 |     /// Get the underlying OpenCL cl_event.
 62 |     pub const fn get(&self) -> cl_event {
 63 |         self.event
 64 |     }
 65 | 
 66 |     /// Wait for the event to complete.
 67 |     pub fn wait(&self) -> Result<()> {
 68 |         let events = [self.get()];
 69 |         Ok(wait_for_events(&events)?)
 70 |     }
 71 | 
 72 |     pub fn command_execution_status(&self) -> Result<CommandExecutionStatus> {
 73 |         Ok(CommandExecutionStatus(
 74 |             get_event_info(self.event, CL_EVENT_COMMAND_EXECUTION_STATUS)?.into(),
 75 |         ))
 76 |     }
 77 | 
 78 |     pub fn command_type(&self) -> Result<EventCommandType> {
 79 |         Ok(EventCommandType(
 80 |             get_event_info(self.event, CL_EVENT_COMMAND_TYPE)?.into(),
 81 |         ))
 82 |     }
 83 | 
 84 |     pub fn reference_count(&self) -> Result<cl_uint> {
 85 |         Ok(get_event_info(self.event, CL_EVENT_REFERENCE_COUNT)?.into())
 86 |     }
 87 | 
 88 |     pub fn command_queue(&self) -> Result<cl_command_queue> {
 89 |         Ok(isize::from(get_event_info(self.event, CL_EVENT_COMMAND_QUEUE)?) as cl_command_queue)
 90 |     }
 91 | 
 92 |     pub fn context(&self) -> Result<cl_context> {
 93 |         Ok(isize::from(get_event_info(self.event, CL_EVENT_CONTEXT)?) as cl_context)
 94 |     }
 95 | 
 96 |     /// Get data about an OpenCL event.
 97 |     /// Calls clGetEventInfo to get the desired data about the event.
 98 |     pub fn get_data(&self, param_name: cl_event_info) -> Result<Vec<u8>> {
 99 |         Ok(get_event_data(self.event, param_name)?)
100 |     }
101 | 
102 |     pub fn set_callback(
103 |         &self,
104 |         command_exec_callback_type: cl_int,
105 |         pfn_notify: extern "C" fn(cl_event, cl_int, *mut c_void),
106 |         user_data: *mut c_void,
107 |     ) -> Result<()> {
108 |         Ok(set_event_callback(
109 |             self.event,
110 |             command_exec_callback_type,
111 |             pfn_notify,
112 |             user_data,
113 |         )?)
114 |     }
115 | 
116 |     pub fn profiling_command_queued(&self) -> Result<cl_ulong> {
117 |         Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_QUEUED)?.into())
118 |     }
119 | 
120 |     pub fn profiling_command_submit(&self) -> Result<cl_ulong> {
121 |         Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_SUBMIT)?.into())
122 |     }
123 | 
124 |     pub fn profiling_command_start(&self) -> Result<cl_ulong> {
125 |         Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_START)?.into())
126 |     }
127 | 
128 |     pub fn profiling_command_end(&self) -> Result<cl_ulong> {
129 |         Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_END)?.into())
130 |     }
131 | 
132 |     /// CL_VERSION_2_0
133 |     pub fn profiling_command_complete(&self) -> Result<cl_ulong> {
134 |         Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_COMPLETE)?.into())
135 |     }
136 | 
137 |     /// Get profiling data about an OpenCL event.
138 |     /// Calls clGetEventProfilingInfo to get the desired profiling data about the event.
139 |     pub fn profiling_data(&self, param_name: cl_profiling_info) -> Result<Vec<u8>> {
140 |         Ok(get_event_profiling_data(self.event, param_name)?)
141 |     }
142 | }
143 | 
144 | #[cfg(test)]
145 | mod tests {
146 |     use super::*;
147 |     use crate::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
148 |     use crate::context::Context;
149 |     use crate::device::{CL_DEVICE_TYPE_GPU, Device};
150 |     use crate::memory::{Buffer, CL_MEM_READ_ONLY};
151 |     use crate::platform::get_platforms;
152 |     use crate::types::{CL_NON_BLOCKING, cl_float};
153 |     use std::ptr;
154 | 
155 |     extern "C" fn event_callback_function(
156 |         _event: cl_event,
157 |         event_command_status: cl_int,
158 |         _user_data: *mut c_void,
159 |     ) {
160 |         println!(
161 |             "OpenCL event callback command status: {}",
162 |             event_command_status
163 |         );
164 |     }
165 | 
166 |     #[test]
167 |     fn test_event() {
168 |         let platforms = get_platforms().unwrap();
169 |         assert!(0 < platforms.len());
170 | 
171 |         // Get the first platform
172 |         let platform = &platforms[0];
173 | 
174 |         let devices = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap();
175 |         assert!(0 < devices.len());
176 | 
177 |         // Get the first device
178 |         let device = Device::new(devices[0]);
179 |         let context = Context::from_device(&device).unwrap();
180 | 
181 |         // Create a command_queue on the Context's default device
182 |         let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE)
183 |             .expect("CommandQueue::create_default failed");
184 | 
185 |         const ARRAY_SIZE: usize = 1024;
186 |         let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE];
187 | 
188 |         let mut buffer = unsafe {
189 |             Buffer::<cl_float>::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())
190 |                 .unwrap()
191 |         };
192 | 
193 |         let events: Vec<cl_event> = Vec::default();
194 | 
195 |         // Non-blocking write, wait for event
196 |         let event = unsafe {
197 |             queue
198 |                 .enqueue_write_buffer(&mut buffer, CL_NON_BLOCKING, 0, &ones, &events)
199 |                 .unwrap()
200 |         };
201 | 
202 |         // Set a callback_function on the event (i.e. write) being completed.
203 |         event
204 |             .set_callback(CL_COMPLETE, event_callback_function, ptr::null_mut())
205 |             .unwrap();
206 | 
207 |         let value = event.command_execution_status().unwrap();
208 |         println!("event.command_execution_status(): {}", value);
209 |         // assert_eq!(CL_QUEUED, value.0);
210 | 
211 |         let value = event.command_type().unwrap();
212 |         println!("event.command_type(): {}", value);
213 |         assert_eq!(CL_COMMAND_WRITE_BUFFER, value.0);
214 | 
215 |         let value = event.reference_count().unwrap();
216 |         println!("event.reference_count(): {}", value);
217 |         // assert_eq!(1, value);
218 | 
219 |         let value = event.command_queue().unwrap();
220 |         assert!(queue.get() == value);
221 | 
222 |         let value = event.context().unwrap();
223 |         assert!(context.get() == value);
224 | 
225 |         event.wait().unwrap();
226 | 
227 |         let value = event.command_execution_status().unwrap();
228 |         println!("event.command_execution_status(): {}", value);
229 |         assert_eq!(CL_COMPLETE, value.0);
230 | 
231 |         let value = event.profiling_command_queued().unwrap();
232 |         println!("event.profiling_command_queued(): {}", value);
233 |         assert!(0 < value);
234 | 
235 |         let value = event.profiling_command_submit().unwrap();
236 |         println!("event.profiling_command_submit(): {}", value);
237 |         assert!(0 < value);
238 | 
239 |         let value = event.profiling_command_start().unwrap();
240 |         println!("event.profiling_command_start(): {}", value);
241 |         assert!(0 < value);
242 | 
243 |         let value = event.profiling_command_end().unwrap();
244 |         println!("event.profiling_command_end(): {}", value);
245 |         assert!(0 < value);
246 | 
247 |         // CL_VERSION_2_0
248 |         match event.profiling_command_complete() {
249 |             Ok(value) => println!("event.profiling_command_complete(): {}", value),
250 |             Err(e) => println!("OpenCL error, event.profiling_command_complete(): {}", e),
251 |         }
252 |     }
253 | }
254 | 


--------------------------------------------------------------------------------
/src/platform.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2024 Via Technology Ltd.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #![allow(clippy::missing_safety_doc)]
 16 | 
 17 | pub use cl3::platform;
 18 | 
 19 | use super::Result;
 20 | use cl3::device;
 21 | #[allow(unused_imports)]
 22 | use cl3::dx9_media_sharing;
 23 | #[allow(unused_imports)]
 24 | use cl3::ext;
 25 | #[allow(unused_imports)]
 26 | use cl3::program;
 27 | #[allow(unused_imports)]
 28 | use cl3::types::{
 29 |     cl_device_id, cl_device_type, cl_name_version, cl_platform_id, cl_platform_info, cl_uint,
 30 |     cl_ulong, cl_version,
 31 | };
 32 | #[allow(unused_imports)]
 33 | use libc::{c_void, intptr_t};
 34 | 
 35 | /// An OpenCL platform id and methods to query it.
 36 | ///
 37 | /// The query methods calls clGetPlatformInfo with the relevant param_name, see:
 38 | /// [Platform Queries](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#platform-queries-table).
 39 | #[derive(Copy, Clone, Debug)]
 40 | pub struct Platform {
 41 |     id: intptr_t,
 42 | }
 43 | 
 44 | impl From<cl_platform_id> for Platform {
 45 |     fn from(value: cl_platform_id) -> Self {
 46 |         Self {
 47 |             id: value as intptr_t,
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | impl From<Platform> for cl_platform_id {
 53 |     fn from(value: Platform) -> Self {
 54 |         value.id as Self
 55 |     }
 56 | }
 57 | 
 58 | unsafe impl Send for Platform {}
 59 | unsafe impl Sync for Platform {}
 60 | 
 61 | impl Platform {
 62 |     pub fn new(id: cl_platform_id) -> Self {
 63 |         Self { id: id as intptr_t }
 64 |     }
 65 | 
 66 |     /// Accessor for the underlying platform id.
 67 |     pub const fn id(&self) -> cl_platform_id {
 68 |         self.id as cl_platform_id
 69 |     }
 70 | 
 71 |     /// Get the ids of available devices of the given type on the Platform.
 72 |     /// # Examples
 73 |     /// ```
 74 |     /// use opencl3::platform::get_platforms;
 75 |     /// use cl3::device::CL_DEVICE_TYPE_GPU;
 76 |     ///
 77 |     /// let platforms = get_platforms().unwrap();
 78 |     /// assert!(0 < platforms.len());
 79 |     ///
 80 |     /// // Choose a the first platform
 81 |     /// let platform = &platforms[0];
 82 |     /// let device_ids = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap();
 83 |     /// println!("CL_DEVICE_TYPE_GPU count: {}", device_ids.len());
 84 |     /// assert!(0 < device_ids.len());
 85 |     /// ```
 86 |     pub fn get_devices(&self, device_type: cl_device_type) -> Result<Vec<cl_device_id>> {
 87 |         Ok(device::get_device_ids(self.id(), device_type)?)
 88 |     }
 89 | 
 90 |     #[cfg(any(feature = "cl_khr_dx9_media_sharing", feature = "dynamic"))]
 91 |     pub unsafe fn get_device_ids_from_dx9_intel(
 92 |         &self,
 93 |         dx9_device_source: dx9_media_sharing::cl_dx9_device_source_intel,
 94 |         dx9_object: *mut c_void,
 95 |         dx9_device_set: dx9_media_sharing::cl_dx9_device_set_intel,
 96 |     ) -> Result<Vec<cl_device_id>> {
 97 |         unsafe {
 98 |             Ok(dx9_media_sharing::get_device_ids_from_dx9_intel(
 99 |                 self.id(),
100 |                 dx9_device_source,
101 |                 dx9_object,
102 |                 dx9_device_set,
103 |             )?)
104 |         }
105 |     }
106 | 
107 |     /// The OpenCL profile supported by the Platform,
108 |     /// it can be FULL_PROFILE or EMBEDDED_PROFILE.  
109 |     pub fn profile(&self) -> Result<String> {
110 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_PROFILE)?.into())
111 |     }
112 | 
113 |     /// The OpenCL profile version supported by the Platform,
114 |     /// e.g. OpenCL 1.2, OpenCL 2.0, OpenCL 2.1, etc.  
115 |     pub fn version(&self) -> Result<String> {
116 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_VERSION)?.into())
117 |     }
118 | 
119 |     /// The OpenCL Platform name string.  
120 |     pub fn name(&self) -> Result<String> {
121 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_NAME)?.into())
122 |     }
123 | 
124 |     /// The OpenCL Platform vendor string.  
125 |     pub fn vendor(&self) -> Result<String> {
126 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_VENDOR)?.into())
127 |     }
128 | 
129 |     /// A space separated list of extension names supported by the Platform.  
130 |     pub fn extensions(&self) -> Result<String> {
131 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_EXTENSIONS)?.into())
132 |     }
133 | 
134 |     /// The resolution of the host timer in nanoseconds as used by
135 |     /// clGetDeviceAndHostTimer.  
136 |     /// CL_VERSION_2_1
137 |     pub fn host_timer_resolution(&self) -> Result<cl_ulong> {
138 |         Ok(
139 |             platform::get_platform_info(self.id(), platform::CL_PLATFORM_HOST_TIMER_RESOLUTION)?
140 |                 .into(),
141 |         )
142 |     }
143 | 
144 |     /// The detailed (major, minor, patch) version supported by the platform.  
145 |     /// CL_VERSION_3_0
146 |     pub fn numeric_version(&self) -> Result<cl_version> {
147 |         Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_NUMERIC_VERSION)?.into())
148 |     }
149 | 
150 |     /// An array of description (name and version) structures that lists all the
151 |     /// extensions supported by the platform.  
152 |     /// CL_VERSION_3_0
153 |     pub fn extensions_with_version(&self) -> Result<Vec<cl_name_version>> {
154 |         Ok(
155 |             platform::get_platform_info(self.id(), platform::CL_PLATFORM_EXTENSIONS_WITH_VERSION)?
156 |                 .into(),
157 |         )
158 |     }
159 | 
160 |     /// cl_khr_external_memory
161 |     pub fn platform_external_memory_import_handle_types_khr(&self) -> Result<Vec<cl_name_version>> {
162 |         Ok(platform::get_platform_info(
163 |             self.id(),
164 |             ext::CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR,
165 |         )?
166 |         .into())
167 |     }
168 | 
169 |     /// cl_khr_external_semaphore
170 |     pub fn platform_semaphore_import_handle_types_khr(&self) -> Result<Vec<cl_name_version>> {
171 |         Ok(platform::get_platform_info(
172 |             self.id(),
173 |             ext::CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
174 |         )?
175 |         .into())
176 |     }
177 | 
178 |     /// cl_khr_external_semaphore
179 |     pub fn platform_semaphore_export_handle_types_khr(&self) -> Result<Vec<cl_name_version>> {
180 |         Ok(platform::get_platform_info(
181 |             self.id(),
182 |             ext::CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
183 |         )?
184 |         .into())
185 |     }
186 | 
187 |     /// cl_khr_semaphore
188 |     pub fn platform_semaphore_types_khr(&self) -> Result<Vec<cl_name_version>> {
189 |         Ok(platform::get_platform_info(self.id(), ext::CL_PLATFORM_SEMAPHORE_TYPES_KHR)?.into())
190 |     }
191 | 
192 |     /// Get data about an OpenCL platform.
193 |     /// Calls clGetPlatformInfo to get the desired data about the platform.
194 |     pub fn get_data(&self, param_name: cl_platform_info) -> Result<Vec<u8>> {
195 |         Ok(platform::get_platform_data(self.id(), param_name)?)
196 |     }
197 | 
198 |     /// Unload an OpenCL compiler for a platform.
199 |     /// CL_VERSION_1_2
200 |     ///
201 |     /// # Safety
202 |     ///
203 |     /// Compiling is unsafe after the compiler has been unloaded.
204 |     #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))]
205 |     pub unsafe fn unload_compiler(&self) -> Result<()> {
206 |         unsafe { Ok(program::unload_platform_compiler(self.id())?) }
207 |     }
208 | }
209 | 
210 | /// Get the available OpenCL platforms.  
211 | /// # Examples
212 | /// ```
213 | /// use opencl3::platform::get_platforms;
214 | ///
215 | /// let platforms = get_platforms().unwrap();
216 | /// println!("Number of OpenCL platforms: {}", platforms.len());
217 | /// assert!(0 < platforms.len());
218 | /// ```
219 | /// returns a Result containing a vector of available Platforms
220 | /// or the error code from the OpenCL C API function.
221 | pub fn get_platforms() -> Result<Vec<Platform>> {
222 |     let platform_ids = platform::get_platform_ids()?;
223 |     Ok(platform_ids
224 |         .iter()
225 |         .map(|id| Platform::new(*id))
226 |         .collect::<Vec<Platform>>())
227 | }
228 | 
229 | #[cfg(any(feature = "cl_khr_icd", feature = "dynamic"))]
230 | pub fn icd_get_platform_ids_khr() -> Result<Vec<Platform>> {
231 |     let platform_ids = ext::icd_get_platform_ids_khr()?;
232 |     Ok(platform_ids
233 |         .iter()
234 |         .map(|id| Platform::new(*id))
235 |         .collect::<Vec<Platform>>())
236 | }
237 | 
238 | #[cfg(test)]
239 | mod tests {
240 |     use super::*;
241 | 
242 |     #[test]
243 |     fn test_get_platforms() {
244 |         let platforms = get_platforms().unwrap();
245 |         println!("Number of platforms: {}", platforms.len());
246 |         assert!(0 < platforms.len());
247 | 
248 |         for platform in platforms {
249 |             println!("Platform Debug Trait: {:?}", platform);
250 |             println!("CL_PLATFORM_NAME: {}", platform.name().unwrap());
251 |             println!("CL_PLATFORM_PROFILE: {}", platform.profile().unwrap());
252 | 
253 |             let value = platform.version().unwrap();
254 |             println!("CL_PLATFORM_VERSION: {:?}", value);
255 | 
256 |             println!("CL_PLATFORM_VENDOR: {}", platform.vendor().unwrap());
257 |             println!(
258 |                 "CL_PLATFORM_EXTENSIONS: {:?}",
259 |                 platform.extensions().unwrap()
260 |             );
261 | 
262 |             // CL_VERSION_2_1 value, may not be supported
263 |             match platform.host_timer_resolution() {
264 |                 Ok(value) => {
265 |                     println!("CL_PLATFORM_HOST_TIMER_RESOLUTION: {}", value)
266 |                 }
267 |                 Err(e) => println!(
268 |                     "OpenCL error, CL_PLATFORM_HOST_TIMER_RESOLUTION: {:?}, {}",
269 |                     e, e
270 |                 ),
271 |             };
272 | 
273 |             println!();
274 |         }
275 |     }
276 | }
277 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2021 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | //! [![crates.io](https://img.shields.io/crates/v/opencl3.svg)](https://crates.io/crates/opencl3)
 16 | //! [![docs.io](https://docs.rs/opencl3/badge.svg)](https://docs.rs/opencl3/)
 17 | //! [![OpenCL 3.0](https://img.shields.io/badge/OpenCL-3.0-blue.svg)](https://www.khronos.org/registry/OpenCL/)
 18 | //! [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 19 | //!
 20 | //! A Rust implementation of the Khronos [OpenCL](https://www.khronos.org/registry/OpenCL/)
 21 | //! API.
 22 | //!
 23 | //! # Description
 24 | //!
 25 | //! This crate provides a relatively simple, object based model of the OpenCL 3.0
 26 | //! [API](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html).  
 27 | //! It is built upon the [cl3](https://crates.io/crates/cl3) crate, which
 28 | //! provides a functional interface to the OpenCL API.  
 29 | //!
 30 | //! **OpenCL** (Open Computing Language) is framework for general purpose
 31 | //! parallel programming across heterogeneous devices including: CPUs, GPUs,
 32 | //! DSPs, FPGAs and other processors or hardware accelerators.
 33 | //!
 34 | //! It is often considered as an open-source alternative to Nvidia's proprietary
 35 | //! Compute Unified Device Architecture [CUDA](https://developer.nvidia.com/cuda-zone)
 36 | //! for performing General-purpose computing on GPUs, see
 37 | //! [GPGPU](https://en.wikipedia.org/wiki/General-purpose_computing_on_graphics_processing_units).
 38 | //!
 39 | //! The [OpenCL Specification](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_the_opencl_architecture)
 40 | //! has evolved over time and not all device vendors support all OpenCL features.
 41 | //!
 42 | //! [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html)
 43 | //! is a unified specification that adds little new functionality to previous OpenCL versions.  
 44 | //! It specifies that all **OpenCL 1.2** features are **mandatory**, while all
 45 | //! OpenCL 2.x and OpenCL 3.0 features are now optional.
 46 | //!
 47 | //! See [OpenCL Description](https://github.com/kenba/opencl3/blob/main/docs/opencl_description.md).
 48 | //!
 49 | //! # OpenCL Architecture
 50 | //!
 51 | //! The [OpenCL Specification](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_the_opencl_architecture)
 52 | //! considers OpenCL as four models:
 53 | //!
 54 | //! * **Platform Model**  
 55 | //!   The physical OpenCL hardware: a *host* containing one or more OpenCL [platform]s,
 56 | //!   each connected to one or more OpenCL [device]s.  
 57 | //!   An OpenCL application running on the *host*, creates an OpenCL environment
 58 | //!   called a [context] on a single [platform] to process data on one or more
 59 | //!   of the OpenCL [device]s connected to the [platform].
 60 | //!
 61 | //! * **Programming Model**  
 62 | //!   An OpenCL [program] consists of OpenCL [kernel] functions that can run
 63 | //!   on OpenCL [device]s within a [context].  
 64 | //!   OpenCL [program]s must be created (and most must be built) for a [context]
 65 | //!   before their OpenCL [kernel] functions can be created from them,
 66 | //!   the exception being "built-in" [kernel]s which don't need to be built
 67 | //!   (or compiled and linked).  
 68 | //!   OpenCL [kernel]s are controlled by an OpenCL application that runs on the
 69 | //!   *host*, see **Execution Model**.
 70 | //!
 71 | //! * **Memory Model**  
 72 | //!   **OpenCL 1.2** memory is divided into two fundamental memory regions:
 73 | //!   **host memory** and **device memory**.  
 74 | //!   OpenCL [kernel]s run on **device memory**; an OpenCL application must write
 75 | //!   **host memory** to **device memory** for OpenCL [kernel]s to process.
 76 | //!   An OpenCL application must also read results from **device memory** to
 77 | //!   **host memory** after a [kernel] has completed execution.  
 78 | //!   **OpenCL 2.0** shared virtual memory ([svm]) is shared between the host
 79 | //!   and device(s) and synchronised by OpenCL; eliminating the explicit transfer
 80 | //!   of memory between host and device(s) memory regions.
 81 | //!
 82 | //! * **Execution Model**  
 83 | //!   An OpenCL application creates at least one OpenCL [command_queue] for each
 84 | //!   OpenCL [device] (or *sub-device*) within it's OpenCL [context].  
 85 | //!   OpenCL [kernel] executions and **OpenCL 1.2** memory reads and writes are
 86 | //!   "enqueued" by the OpenCL application on each [command_queue].
 87 | //!   An application can wait for all "enqueued" commands to finish on a
 88 | //!   [command_queue] or it can wait for specific [event]s to complete.
 89 | //!   Normally [command_queue]s run commands in the order that they are given.
 90 | //!   However, [event]s can be used to execute [kernel]s out-of-order.
 91 | //!
 92 | //! # OpenCL Objects
 93 | //!
 94 | //! [Platform]: platform/struct.Platform.html
 95 | //! [Device]: device/struct.Device.html
 96 | //! [SubDevice]: device/struct.SubDevice.html
 97 | //! [Context]: context/struct.Context.html
 98 | //! [Program]: program/struct.Program.html
 99 | //! [Kernel]: kernel/struct.Kernel.html
100 | //! [Buffer]: memory/struct.Buffer.html
101 | //! [Image]: memory/struct.Image.html
102 | //! [Sampler]: memory/struct.Sampler.html
103 | //! [SvmVec]: svm/struct.SvmVec.html
104 | //! [Pipe]: memory/struct.Pipe.html
105 | //! [CommandQueue]: command_queue/struct.CommandQueue.html
106 | //! [Event]: event/struct.Event.html
107 | //!
108 | //! ## Platform Model
109 | //!
110 | //! The platform model has thee objects:
111 | //! * [Platform]
112 | //! * [Device]
113 | //! * [Context]
114 | //!
115 | //! Of these three objects, the OpenCL [Context] is by *far* the most important.
116 | //! Each application must create a [Context] from the most appropriate [Device]s
117 | //! available on one of [Platform]s on the *host* system that the application
118 | //! is running on.
119 | //!
120 | //! Most example OpenCL applications just choose the first available [Platform]
121 | //! and [Device] for their [Context]. However, since many systems have multiple
122 | //! platforms and devices, the first [Platform] and [Device] are unlikely to
123 | //! provide the best performance.  
124 | //! For example, on a system with an APU (combined CPU and GPU, e.g. Intel i7)
125 | //! and a discrete graphics card (e.g. Nvidia GTX 1070) OpenCL may find the
126 | //! either the integrated GPU or the GPU on the graphics card first.
127 | //!
128 | //! OpenCL applications often require the performance of discrete graphics cards
129 | //! or specific OpenCL features, such as [svm] or double/half floating point
130 | //! precision. In such cases, it is necessary to query the [Platform]s and
131 | //! [Device]s to choose the most appropriate [Device]s for the application before
132 | //! creating the [Context].
133 | //!
134 | //! The [Platform] and [Device] modules contain structures and methods to simplify
135 | //! querying the host system [Platform]s and [Device]s to create a [Context].
136 | //!
137 | //! ## Programming Model
138 | //!
139 | //! The OpenCL programming model has two objects:
140 | //! * [Program]
141 | //! * [Kernel]
142 | //!
143 | //! OpenCL [Kernel] functions are contained in OpenCL [Program]s.  
144 | //!
145 | //! Kernels are usually defined as functions in OpenCL [Program] source code,
146 | //! however OpenCL [Device]s may contain built-in [Kernel]s,
147 | //! e.g.: some Intel GPUs have built-in motion estimation kernels.
148 | //!
149 | //! OpenCL [Program] objects can be created from OpenCL source code,
150 | //! built-in kernels, binaries and intermediate language binaries.
151 | //! Depending upon how an OpenCL [Program] object was created, it may need to
152 | //! be built (or complied and linked) before the [Kernel]s in them can be
153 | //! created.
154 | //!
155 | //! All the [Kernel]s in an [Program] can be created together or they can be
156 | //! created individually, by name.
157 | //!
158 | //! ## Memory Model
159 | //!
160 | //! The OpenCL memory model consists of five objects:
161 | //! * [Buffer]
162 | //! * [Image]
163 | //! * [Sampler]
164 | //! * [SvmVec]
165 | //! * [Pipe]
166 | //!
167 | //! [Buffer], [Image] and [Sampler] are OpenCL 1.2 (i.e. **mandatory**) objects,  
168 | //! [svm] and [Pipe] are are OpenCL 2.0 (i.e. optional) objects.
169 | //!
170 | //! A [Buffer] is a contiguous block of memory used for general purpose data.  
171 | //! An [Image] holds data for one, two or three dimensional images.  
172 | //! A [Sampler] describes how a [Kernel] is to sample an [Image], see
173 | //! [Sampler objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_sampler_objects).  
174 | //!
175 | //! [Shared Virtual Memory](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#shared-virtual-memory)
176 | //! enables the host and kernels executing on devices to directly share data
177 | //! without explicitly transferring it.
178 | //!
179 | //! [Pipes](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_pipes)
180 | //! store memory as FIFOs between [Kernel]s. [Pipe]s are not accessible from the host.
181 | //!
182 | //! ## Execution Model
183 | //!
184 | //! The OpenCL execution model has two objects:
185 | //! * [CommandQueue]
186 | //! * [Event]
187 | //!
188 | //! OpenCL commands to transfer memory and execute kernels on devices are
189 | //! performed via [CommandQueue]s.
190 | //!
191 | //! Each OpenCL device (and sub-device) must have at least one command_queue
192 | //! associated with it, so that commands may be enqueued on to the device.
193 | //!
194 | //! There are several OpenCL [CommandQueue] "enqueue_" methods to transfer
195 | //! data between host and device memory, map SVM memory and execute kernels.
196 | //! All the "enqueue_" methods accept an event_wait_list parameter and return
197 | //! an [Event] that can be used to monitor and control *out-of-order* execution
198 | //! of kernels on a [CommandQueue], see
199 | //! [Event Objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#event-objects).
200 | 
201 | extern crate cl3;
202 | 
203 | #[cfg(any(feature = "cl_khr_command_buffer", feature = "dynamic"))]
204 | pub mod command_buffer;
205 | pub mod command_queue;
206 | pub mod context;
207 | pub mod device;
208 | pub mod event;
209 | pub mod kernel;
210 | pub mod memory;
211 | pub mod platform;
212 | pub mod program;
213 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))]
214 | pub mod svm;
215 | 
216 | pub mod error_codes {
217 |     pub use cl3::error_codes::*;
218 | }
219 | pub mod types {
220 |     pub use cl3::types::*;
221 | }
222 | 
223 | use std::result;
224 | /// Custom Result type to output OpenCL error text.
225 | pub type Result<T> = result::Result<T, error_codes::ClError>;
226 | 


--------------------------------------------------------------------------------
/docs/images/opencl_memory_objects.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" contentScriptType="application/ecmascript" contentStyleType="text/css" height="177px" preserveAspectRatio="none" style="width:283px;height:177px;" version="1.1" viewBox="0 0 283 177" width="283px" zoomAndPan="magnify"><defs><filter height="300%" id="fxmsuq4diur7d" width="300%" x="-1" y="-1"><feGaussianBlur result="blurOut" stdDeviation="2.0"/><feColorMatrix in="blurOut" result="blurOut2" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 .4 0"/><feOffset dx="4.0" dy="4.0" in="blurOut2" result="blurOut3"/><feBlend in="SourceGraphic" in2="blurOut3" mode="normal"/></filter></defs><g><!--MD5=[9e9737bed61a6b1d6b48b2052db5d4e5]
 2 | class Buffer--><rect fill="#FEFECE" filter="url(#fxmsuq4diur7d)" height="48" id="Buffer" style="stroke:#A80036;stroke-width:1.5;" width="64" x="7" y="7"/><ellipse cx="22" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M24.7656,18.875 Q24.9219,18.6563 25.1094,18.5469 Q25.2969,18.4375 25.5156,18.4375 Q25.8906,18.4375 26.125,18.6953 Q26.3594,18.9531 26.3594,19.5625 L26.3594,21.0156 Q26.3594,21.625 26.125,21.8906 Q25.8906,22.1563 25.5156,22.1563 Q25.1719,22.1563 24.9688,21.9531 Q24.7656,21.7656 24.6563,21.25 Q24.6094,20.8906 24.4219,20.7031 Q24.0938,20.3281 23.4844,20.1094 Q22.875,19.8906 22.25,19.8906 Q21.4844,19.8906 20.8516,20.2188 Q20.2188,20.5469 19.7266,21.2969 Q19.2344,22.0469 19.2344,23.0781 L19.2344,24.1719 Q19.2344,25.4063 20.125,26.2266 Q21.0156,27.0469 22.6094,27.0469 Q23.5469,27.0469 24.2031,26.7969 Q24.5938,26.6406 25.0156,26.2031 Q25.2813,25.9375 25.4297,25.8594 Q25.5781,25.7813 25.7813,25.7813 Q26.1094,25.7813 26.3672,26.0391 Q26.625,26.2969 26.625,26.6406 Q26.625,26.9844 26.2813,27.3906 Q25.7813,27.9688 24.9844,28.2969 Q23.9063,28.75 22.6094,28.75 Q21.0938,28.75 19.8906,28.125 Q18.9063,27.625 18.2188,26.5547 Q17.5313,25.4844 17.5313,24.2031 L17.5313,23.0469 Q17.5313,21.7188 18.1484,20.5703 Q18.7656,19.4219 19.8594,18.8047 Q20.9531,18.1875 22.1875,18.1875 Q22.9219,18.1875 23.5703,18.3516 Q24.2188,18.5156 24.7656,18.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="32" x="36" y="27.9102">Buffer</text><line style="stroke:#A80036;stroke-width:1.5;" x1="8" x2="70" y1="39" y2="39"/><line style="stroke:#A80036;stroke-width:1.5;" x1="8" x2="70" y1="47" y2="47"/><!--MD5=[a40b17eea413225aac43454b95ec0b4d]
 3 | class Memory--><rect fill="#FEFECE" filter="url(#fxmsuq4diur7d)" height="48" id="Memory" style="stroke:#A80036;stroke-width:1.5;" width="75" x="102.5" y="115"/><ellipse cx="117.5" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M120.2656,126.875 Q120.4219,126.6563 120.6094,126.5469 Q120.7969,126.4375 121.0156,126.4375 Q121.3906,126.4375 121.625,126.6953 Q121.8594,126.9531 121.8594,127.5625 L121.8594,129.0156 Q121.8594,129.625 121.625,129.8906 Q121.3906,130.1563 121.0156,130.1563 Q120.6719,130.1563 120.4688,129.9531 Q120.2656,129.7656 120.1563,129.25 Q120.1094,128.8906 119.9219,128.7031 Q119.5938,128.3281 118.9844,128.1094 Q118.375,127.8906 117.75,127.8906 Q116.9844,127.8906 116.3516,128.2188 Q115.7188,128.5469 115.2266,129.2969 Q114.7344,130.0469 114.7344,131.0781 L114.7344,132.1719 Q114.7344,133.4063 115.625,134.2266 Q116.5156,135.0469 118.1094,135.0469 Q119.0469,135.0469 119.7031,134.7969 Q120.0938,134.6406 120.5156,134.2031 Q120.7813,133.9375 120.9297,133.8594 Q121.0781,133.7813 121.2813,133.7813 Q121.6094,133.7813 121.8672,134.0391 Q122.125,134.2969 122.125,134.6406 Q122.125,134.9844 121.7813,135.3906 Q121.2813,135.9688 120.4844,136.2969 Q119.4063,136.75 118.1094,136.75 Q116.5938,136.75 115.3906,136.125 Q114.4063,135.625 113.7188,134.5547 Q113.0313,133.4844 113.0313,132.2031 L113.0313,131.0469 Q113.0313,129.7188 113.6484,128.5703 Q114.2656,127.4219 115.3594,126.8047 Q116.4531,126.1875 117.6875,126.1875 Q118.4219,126.1875 119.0703,126.3516 Q119.7188,126.5156 120.2656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="43" x="131.5" y="135.9102">Memory</text><line style="stroke:#A80036;stroke-width:1.5;" x1="103.5" x2="176.5" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="103.5" x2="176.5" y1="155" y2="155"/><!--MD5=[964f074a2dd77d134c21b891bb53e381]
 4 | class Image--><rect fill="#FEFECE" filter="url(#fxmsuq4diur7d)" height="48" id="Image" style="stroke:#A80036;stroke-width:1.5;" width="67" x="106.5" y="7"/><ellipse cx="121.5" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M124.2656,18.875 Q124.4219,18.6563 124.6094,18.5469 Q124.7969,18.4375 125.0156,18.4375 Q125.3906,18.4375 125.625,18.6953 Q125.8594,18.9531 125.8594,19.5625 L125.8594,21.0156 Q125.8594,21.625 125.625,21.8906 Q125.3906,22.1563 125.0156,22.1563 Q124.6719,22.1563 124.4688,21.9531 Q124.2656,21.7656 124.1563,21.25 Q124.1094,20.8906 123.9219,20.7031 Q123.5938,20.3281 122.9844,20.1094 Q122.375,19.8906 121.75,19.8906 Q120.9844,19.8906 120.3516,20.2188 Q119.7188,20.5469 119.2266,21.2969 Q118.7344,22.0469 118.7344,23.0781 L118.7344,24.1719 Q118.7344,25.4063 119.625,26.2266 Q120.5156,27.0469 122.1094,27.0469 Q123.0469,27.0469 123.7031,26.7969 Q124.0938,26.6406 124.5156,26.2031 Q124.7813,25.9375 124.9297,25.8594 Q125.0781,25.7813 125.2813,25.7813 Q125.6094,25.7813 125.8672,26.0391 Q126.125,26.2969 126.125,26.6406 Q126.125,26.9844 125.7813,27.3906 Q125.2813,27.9688 124.4844,28.2969 Q123.4063,28.75 122.1094,28.75 Q120.5938,28.75 119.3906,28.125 Q118.4063,27.625 117.7188,26.5547 Q117.0313,25.4844 117.0313,24.2031 L117.0313,23.0469 Q117.0313,21.7188 117.6484,20.5703 Q118.2656,19.4219 119.3594,18.8047 Q120.4531,18.1875 121.6875,18.1875 Q122.4219,18.1875 123.0703,18.3516 Q123.7188,18.5156 124.2656,18.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="35" x="135.5" y="27.9102">Image</text><line style="stroke:#A80036;stroke-width:1.5;" x1="107.5" x2="172.5" y1="39" y2="39"/><line style="stroke:#A80036;stroke-width:1.5;" x1="107.5" x2="172.5" y1="47" y2="47"/><!--MD5=[4dfa3bfde5407297cdf87f3e284398fb]
 5 | class Pipe--><rect fill="#FEFECE" filter="url(#fxmsuq4diur7d)" height="48" id="Pipe" style="stroke:#A80036;stroke-width:1.5;" width="57" x="208.5" y="7"/><ellipse cx="223.5" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M226.2656,18.875 Q226.4219,18.6563 226.6094,18.5469 Q226.7969,18.4375 227.0156,18.4375 Q227.3906,18.4375 227.625,18.6953 Q227.8594,18.9531 227.8594,19.5625 L227.8594,21.0156 Q227.8594,21.625 227.625,21.8906 Q227.3906,22.1563 227.0156,22.1563 Q226.6719,22.1563 226.4688,21.9531 Q226.2656,21.7656 226.1563,21.25 Q226.1094,20.8906 225.9219,20.7031 Q225.5938,20.3281 224.9844,20.1094 Q224.375,19.8906 223.75,19.8906 Q222.9844,19.8906 222.3516,20.2188 Q221.7188,20.5469 221.2266,21.2969 Q220.7344,22.0469 220.7344,23.0781 L220.7344,24.1719 Q220.7344,25.4063 221.625,26.2266 Q222.5156,27.0469 224.1094,27.0469 Q225.0469,27.0469 225.7031,26.7969 Q226.0938,26.6406 226.5156,26.2031 Q226.7813,25.9375 226.9297,25.8594 Q227.0781,25.7813 227.2813,25.7813 Q227.6094,25.7813 227.8672,26.0391 Q228.125,26.2969 228.125,26.6406 Q228.125,26.9844 227.7813,27.3906 Q227.2813,27.9688 226.4844,28.2969 Q225.4063,28.75 224.1094,28.75 Q222.5938,28.75 221.3906,28.125 Q220.4063,27.625 219.7188,26.5547 Q219.0313,25.4844 219.0313,24.2031 L219.0313,23.0469 Q219.0313,21.7188 219.6484,20.5703 Q220.2656,19.4219 221.3594,18.8047 Q222.4531,18.1875 223.6875,18.1875 Q224.4219,18.1875 225.0703,18.3516 Q225.7188,18.5156 226.2656,18.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="25" x="237.5" y="27.9102">Pipe</text><line style="stroke:#A80036;stroke-width:1.5;" x1="209.5" x2="264.5" y1="39" y2="39"/><line style="stroke:#A80036;stroke-width:1.5;" x1="209.5" x2="264.5" y1="47" y2="47"/><!--MD5=[ee004750a09daee7b8886fa6abb8ade9]
 6 | class SVM--><rect fill="#FEFECE" filter="url(#fxmsuq4diur7d)" height="48" id="SVM" style="stroke:#A80036;stroke-width:1.5;" width="56" x="213" y="115"/><ellipse cx="228" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M230.7656,126.875 Q230.9219,126.6563 231.1094,126.5469 Q231.2969,126.4375 231.5156,126.4375 Q231.8906,126.4375 232.125,126.6953 Q232.3594,126.9531 232.3594,127.5625 L232.3594,129.0156 Q232.3594,129.625 232.125,129.8906 Q231.8906,130.1563 231.5156,130.1563 Q231.1719,130.1563 230.9688,129.9531 Q230.7656,129.7656 230.6563,129.25 Q230.6094,128.8906 230.4219,128.7031 Q230.0938,128.3281 229.4844,128.1094 Q228.875,127.8906 228.25,127.8906 Q227.4844,127.8906 226.8516,128.2188 Q226.2188,128.5469 225.7266,129.2969 Q225.2344,130.0469 225.2344,131.0781 L225.2344,132.1719 Q225.2344,133.4063 226.125,134.2266 Q227.0156,135.0469 228.6094,135.0469 Q229.5469,135.0469 230.2031,134.7969 Q230.5938,134.6406 231.0156,134.2031 Q231.2813,133.9375 231.4297,133.8594 Q231.5781,133.7813 231.7813,133.7813 Q232.1094,133.7813 232.3672,134.0391 Q232.625,134.2969 232.625,134.6406 Q232.625,134.9844 232.2813,135.3906 Q231.7813,135.9688 230.9844,136.2969 Q229.9063,136.75 228.6094,136.75 Q227.0938,136.75 225.8906,136.125 Q224.9063,135.625 224.2188,134.5547 Q223.5313,133.4844 223.5313,132.2031 L223.5313,131.0469 Q223.5313,129.7188 224.1484,128.5703 Q224.7656,127.4219 225.8594,126.8047 Q226.9531,126.1875 228.1875,126.1875 Q228.9219,126.1875 229.5703,126.3516 Q230.2188,126.5156 230.7656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="24" x="242" y="135.9102">SVM</text><line style="stroke:#A80036;stroke-width:1.5;" x1="214" x2="268" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="214" x2="268" y1="155" y2="155"/><!--MD5=[92f76e6e7e3ebbea79efd1107ffa7361]
 7 | link Buffer to Memory--><path codeLine="6" d="M60.92,55 C73.4,68.11 89.37,84.86 103.68,99.88 " fill="none" id="Buffer-to-Memory" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="none" points="109.05,95.37,117.78,114.68,98.91,105.03,109.05,95.37" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[b618517008d869bda4758aae16ba04c8]
 8 | link Image to Memory--><path codeLine="8" d="M140,55 C140,66.59 140,81.03 140,94.6 " fill="none" id="Image-to-Memory" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="none" points="147,94.68,140,114.68,133,94.68,147,94.68" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[9a58687ea7b2c26dca099038837b60c8]
 9 | link Pipe to Memory--><path codeLine="9" d="M215.95,55 C203.96,68.11 188.63,84.86 174.88,99.88 " fill="none" id="Pipe-to-Memory" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="none" points="180.01,104.65,161.34,114.68,169.68,95.2,180.01,104.65" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[67157ba05333537e883002c5109f808a]
10 | reverse link Memory to SVM--><path codeLine="10" d="M191.01,139 C198.3,139 205.59,139 212.89,139 " fill="none" id="Memory-backto-SVM" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#FFFFFF" points="177.88,139,183.8844,142.9934,189.88,138.9868,183.8756,134.9934,177.88,139" style="stroke:#A80036;stroke-width:1.0;"/><!--MD5=[3cdd10b2a878239e1bf84a1f31597818]
11 | @startuml
12 | 
13 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
14 | 
15 | ' title via::opencl Memory classes
16 | 
17 | Buffer - -|> Memory
18 | ' Sampler - -o Image
19 | Image - -|> Memory
20 | Pipe - -|> Memory
21 | Memory o- SVM
22 | 
23 | @enduml
24 | 
25 | @startuml
26 | 
27 | 
28 | 
29 | Buffer - -|> Memory
30 | Image - -|> Memory
31 | Pipe - -|> Memory
32 | Memory o- SVM
33 | 
34 | @enduml
35 | 
36 | PlantUML version 1.2020.26(Mon Dec 21 17:45:07 GMT 2020)
37 | (GPL source distribution)
38 | Java Runtime: Java(TM) SE Runtime Environment
39 | JVM: Java HotSpot(TM) Client VM
40 | Default Encoding: Cp1252
41 | Language: en
42 | Country: GB
43 | --></g></svg>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 Via Technology Ltd.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/RELEASES.md:
--------------------------------------------------------------------------------
  1 | # Releases
  2 | 
  3 | ## Version 0.12.1 (2025-09-22)
  4 | 
  5 | * PR [#72](https://github.com/kenba/opencl3/pull/72) fix static linking by disabling default features in cl3.
  6 | 
  7 | ## Version 0.12.0 (2025-04-19)
  8 | 
  9 | ### Changes
 10 | 
 11 | * Update for Rust edition = "2024"
 12 | 
 13 | ### New Features
 14 | 
 15 | * Add `cl_qcom_perf_hint` feature.
 16 | * Add `cl_ext_buffer_device_address ` feature.
 17 | 
 18 | ## Version 0.11.0 (2025-02-19)
 19 | 
 20 | ### Changes
 21 | 
 22 | * Up to date with OpenCL-Headers tag v2024.10.24.
 23 | 
 24 | ## Version 0.10.0 (2024-12-21)
 25 | 
 26 | ### Bug fixes
 27 | 
 28 | * Issue [#69](https://github.com/kenba/opencl3/issues/69) Unsound issue in SvmVec.
 29 | 
 30 | ### New Features
 31 | 
 32 | * Issue [#70](https://github.com/kenba/opencl3/issues/70) UpdSupport dynamic linking.
 33 | 
 34 | ## Version 0.9.5 (2023-12-22)
 35 | 
 36 | ### New Features
 37 | 
 38 | * Issue [#64](https://github.com/kenba/opencl3/issues/64) Update with new features in OpenCL-Headers repo.
 39 | 
 40 | ## Version 0.9.4 (2023-11-05)
 41 | 
 42 | ### New Features
 43 | 
 44 | * Issue [#64](https://github.com/kenba/opencl3/issues/64) Update with new functions in OpenCL-Headers repo.
 45 | 
 46 | ## Version 0.9.3 (2023-05-11)
 47 | 
 48 | ### New Features
 49 | 
 50 | * PR [#60](https://github.com/kenba/opencl3/pull/60) Support querying PCIE information for Intel devices.
 51 | 
 52 | ## Version 0.9.2 (2022-12-30)
 53 | 
 54 | ### Bug fixes
 55 | 
 56 | * Issue [#58](https://github.com/kenba/opencl3/issues/58) Compile failure due to unmarked unsafe function call in program.rs.
 57 | 
 58 | ## Version 0.9.1 (2022-09-20)
 59 | 
 60 | ### Bug fixes
 61 | 
 62 | * PR [#54](https://github.com/kenba/opencl3/pull/54) Unshadow device re-exports.
 63 | 
 64 | ### New Features
 65 | 
 66 | * Issue [#55](https://github.com/kenba/opencl3/issues/55) Add Khronos `cl_khr_command_buffer_mutable_dispatch` extension.
 67 | 
 68 | ## Version 0.9.0 (2022-09-10)
 69 | 
 70 | ### Breaking Changes
 71 | 
 72 | * Issue [#51](https://github.com/kenba/opencl3/issues/51) Undefined behaviour when using underlying OpenCL pointers without any unsafe.
 73 | * Issue [#52](https://github.com/kenba/opencl3/issues/52) `CL_MEM_USE_HOST_PTR` can result in undefined behaviour.
 74 | 
 75 | ### New Features
 76 | 
 77 | * PR [#53](https://github.com/kenba/opencl3/pull/53) added `sync` for all threadsafe OpenCL objects.
 78 | 
 79 | ## Version 0.8.1 (2022-07-23)
 80 | 
 81 | ### Bug fixes
 82 | 
 83 | * Issue [#49](https://github.com/kenba/opencl3/issues/49) Better Error messages for
 84 | `ExecuteKernel::set_arg`.
 85 | 
 86 | ### New Features
 87 | 
 88 | * Update for OpenCL extension `cl_ext_image_requirements_info`.
 89 | 
 90 | * PR [#50](https://github.com/kenba/opencl3/issues/50) Introduce `track_caller` for panicing methods of `ExecuteKernel`.
 91 | 
 92 | ## Version 0.8.0 (2022-05-02)
 93 | 
 94 | ### Breaking Changes
 95 | 
 96 | * Issue [#47](https://github.com/kenba/opencl3/issues/47) Base `opencl3` on [opencl-sys](https://crates.io/crates/opencl-sys) crate.
 97 | * Issue [#48](https://github.com/kenba/opencl3/issues/48) Put deprecation notices behind conditional attributes.
 98 | 
 99 | ## Version 0.7.0 (2022-04-10)
100 | 
101 | ### Breaking Changes
102 | 
103 | * Transition to Rust 2021 Edition.
104 | 
105 | ## Version 0.6.3 (2021-12-30)
106 | 
107 | ### New Features
108 | 
109 | * Issue [#44](https://github.com/kenba/opencl3/issues/44) Add provisional command-buffer extension. See: [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer).
110 | 
111 | ## Version 0.6.2 (2021-12-19)
112 | 
113 | ### New Features
114 | 
115 | * Issue [#37](https://github.com/kenba/opencl3/issues/37) Implement DeserializeSeed for ExtendSvmVec to enable `serde` to deserialize directly into an `SvmVec`.
116 | 
117 | ### Bug fixes
118 | 
119 | * Issue [#45](https://github.com/kenba/opencl3/issues/45) Examples and tests have `CL_MEM_READ_ONLY` and `CL_MEM_WRITE_ONLY` swapped over.
120 | 
121 | ## Version 0.6.1 (2021-11-12)
122 | 
123 | ### New Features
124 | 
125 | * Issue [#43](https://github.com/kenba/opencl3/issues/43) Update for new OpenCL extensions: `cl_khr_external_memory`, `cl_khr_external_semaphore` and `cl_khr_semaphore`.
126 | 
127 | ## Version 0.6.0 (2021-10-16)
128 | 
129 | ### Breaking Changes
130 | 
131 | * Issue [#41](https://github.com/kenba/opencl3/issues/41) Remove cl3 Info enums to support new OpenCL versions and extensions.
132 | * Add UUID and LUID types. See cl3 Issue [#13](https://github.com/kenba/cl3/issues/13) Remove Info enums to support new OpenCL versions and extensions.
133 | * Remove example from README.md.
134 | 
135 | ## Version 0.5.3 (2021-10-10)
136 | 
137 | ### New Features
138 | 
139 | * Issue [#38](https://github.com/kenba/opencl3/issues/38) Add SVM fine grain system support.
140 | * Issue [#40](https://github.com/kenba/opencl3/issues/40) Replace all calls to `to_string` with `from` or `into`.
141 | * Issue [#42](https://github.com/kenba/opencl3/issues/42) add `From` traits.
142 | * Add `get_all_devices` function.
143 | 
144 | ## Version 0.5.2 (2021-09-19)
145 | 
146 | ```toml
147 | [dependencies]
148 | libc = "0.2"
149 | cl3 = { version = "0.4", default-features = false }
150 | serde = { version = "1.0", optional = true }
151 | ```
152 | 
153 | ### New Features
154 | 
155 | * Issue [#39](https://github.com/kenba/opencl3/issues/39) Update for latest OpenCL-Headers.
156 | * Add CONTRIBUTING and CODE_OF_CONDUCT documents.
157 | 
158 | ## Version 0.5.1 (2021-09-17)
159 | 
160 | ```toml
161 | [dependencies]
162 | libc = "0.2"
163 | cl3 = { version = "0.4.2", default-features = false }
164 | serde = { version = "1.0", optional = true }
165 | ```
166 | 
167 | ### New Features
168 | 
169 | * Issue [#37](https://github.com/kenba/opencl3/issues/37) Implement Serde's Serialize, Deserialize for SvmVec.
170 | 
171 | ### Bug fixes
172 | 
173 | * Issue [#32](https://github.com/kenba/opencl3/issues/32) Example from readme has zero output on GTX 1060 Max-Q.
174 | * Issue [#35](https://github.com/kenba/opencl3/issues/35) Superfluous/Misleading generic parameter in `ExecuteKernel::set_arg_local_buffer`.
175 | 
176 | ## Version 0.5.0 (2021-09-12)
177 | 
178 | ### Breaking Changes
179 | 
180 | * Improve `SVM` interface and documentation.
181 | * Remove svm_capabilities parameter from `SvmVec` methods.
182 | 
183 | ### Bug fixes
184 | 
185 | * Issue [#33](https://github.com/kenba/opencl3/issues/33) Coarse-grained SVM has to be mapped before usage!
186 | 
187 | ## Version 0.4.1 (2021-08-21)
188 | 
189 | Depends on:  
190 | `cl3 = { version = "0.4.2", default-features = false }`
191 | 
192 | ### New Features
193 | 
194 | * Issue [#30](https://github.com/kenba/opencl3/issues/30) opencl3 cannot be compiled with OpenCl 1.2 features only.
195 | 
196 | ## Version 0.4.0 (2021-08-20)
197 | 
198 | Depends on `cl3` = "0.4.2".
199 | 
200 | ### Breaking Changes
201 | 
202 | * Issue [#26](https://github.com/kenba/opencl3/issues/26) Should `CommandQueue.html::enqueue_write_buffer` take a mutable buffer reference.
203 | * PR [#27](https://github.com/kenba/opencl3/pull/27) Make mutability explicit.
204 | 
205 | ### New Features
206 | 
207 | * Issue [#25](https://github.com/kenba/opencl3/issues/25) Using `set_event_callback`.
208 | 
209 | ## Version 0.3.1 (2021-08-06)
210 | 
211 | Depends on `cl3` = "0.4.1".
212 | 
213 | ### New Features
214 | 
215 | * Add Device method for `cl_khr_integer_dot_product` extension.
216 | 
217 | ## Version 0.3.0 (2021-07-10)
218 | 
219 | ### Breaking Changes
220 | 
221 | * Issue [#21](https://github.com/kenba/opencl3/issues/21) `Device::available()` should return a boolean.
222 | * PR [#22](https://github.com/kenba/opencl3/pull/22) Return booleans for device information where applicable.
223 | * Issue [#24](https://github.com/kenba/opencl3/issues/24) Use `bool` instead of `cl_bool`.
224 | * Use CL_BLOCKING and CL_NON_BLOCKING in enqueue calls.
225 | 
226 | ## Version 0.2.4 (2021-07-03)
227 | 
228 | ### New Features
229 | 
230 | * Issue [#18](https://github.com/kenba/opencl3/issues/18) Return UUID as array.
231 | * PR [#19](https://github.com/kenba/opencl3/pull/19) Export sizes of UUID and LUID.
232 | 
233 | ### Bug fixes
234 | 
235 | * Issue [#20](https://github.com/kenba/opencl3/issues/20) Restore `c_void` to program.rs.
236 | 
237 | ## Version 0.2.3 (2021-05-30)
238 | 
239 | Depends on `cl3` = "0.4.0".
240 | 
241 | ### New Features
242 | 
243 | * Issue [#15](https://github.com/kenba/opencl3/issues/15) It's safe to implement `Send` for most of the types.
244 | * PR [#16](https://github.com/kenba/opencl3/pull/16) Implement Send for most of the types.
245 | * PR [#17](https://github.com/kenba/opencl3/pull/17) Implement Send for some of the types.
246 | 
247 | ## Version 0.2.2 (2021-05-22)
248 | 
249 | Depends on `cl3` = "0.3.1".
250 | 
251 | ### New Features
252 | 
253 | * Issue [#13](https://github.com/kenba/opencl3/issues/13) Higher level create_sub_buffer call.
254 | * Issue [#14](https://github.com/kenba/opencl3/issues/14) Adding Debug derives.
255 | * Add OpenCL `cl_ext.h` functions.
256 | * Add `Direct3D` extension methods.
257 | * Add feature `cl_apple_setmemobjectdestructor` for `cl3`.
258 | 
259 | ## Version 0.2.1 (2021-05-16)
260 | 
261 | Depends on `cl3` = "0.3".
262 | 
263 | ### New Features
264 | 
265 | * Add extension `device_info` values.
266 | * Add `OpenGL` extension functions.
267 | * Add `OpenGL ES` extension functions.
268 | 
269 | ## Version 0.2.0 (2021-04-18)
270 | 
271 | Depends on `cl3` = "0.2".
272 | 
273 | ### Breaking Changes
274 | 
275 | * Issue [#10](https://github.com/kenba/opencl3/issues/10) Change the API to use String instead of ffi::CString.
276 | * Change `set_wait_event` to take `Event` reference.
277 | 
278 | ### New Features
279 | 
280 | * Issue [#9](https://github.com/kenba/opencl3/issues/9) Support running multiple instances of the same kernel simultaneously.
281 | * Issue [#12](https://github.com/kenba/opencl3/issues/12) Improve OpenCL error handling.
282 | * Add `from_device_type` method for `Context`.
283 | * Add `ClMem` trait object.
284 | * Add `CommandExecutionStatus` and `EventCommandType`.
285 | 
286 | ## Version 0.1.4 (2021-03-26)
287 | 
288 | ### Changes
289 | 
290 | * PR [#4](https://github.com/kenba/opencl3/pull/4) Implement Clone for CommandQueue
291 | * Issue [#5](https://github.com/kenba/opencl3/issues/5) Consider replacing unwrap with expect for error handling.
292 | * PR [#6](https://github.com/kenba/opencl3/pull/6) Make types Send and Sync where applicable.
293 | * PR [#7](https://github.com/kenba/opencl3/pull/7) Implement Clone for most of the types.
294 | * Issue [#8](https://github.com/kenba/opencl3/issues/8) Retrieving a program build log might be impossible.
295 | * PR [#10](https://github.com/kenba/opencl3/pull/10) Replace calls to to_str with to_string for issue [#10](https://github.com/kenba/opencl3/issues/10).
296 | 
297 | ## Version 0.1.3 (2021-01-16)
298 | 
299 | ### Changes
300 | 
301 | * PR [#1](https://github.com/kenba/opencl3/pull/1) Add Buffer type field as PhantomData.
302 | * Issue [#2](https://github.com/kenba/opencl3/issues/2) Consider adding PhantomData to Image and Pipe memory objects.
303 | * PR [#3](https://github.com/kenba/opencl3/pull/3) Remove Buffer cast method.
304 | * Remove unnecessary templates from methods.
305 | 
306 | ## Version 0.1.2 (2021-01-12)
307 | 
308 | ### Changes
309 | 
310 | * Remove `event_wait_list` from the `enqueue_nd_range` method.
311 | * Add `wait` method to `event`.
312 | * Add `opencl2_kernel_test.rs`.
313 | * Add example to README.
314 | * Don't raise error in `integration_test` if device is not SVM capable
315 | 
316 | ## Version 0.1.1 (2021-01-04)
317 | 
318 | ### Bug fixes
319 | 
320 | * Fix build on OpenCL 2.0 ICD.
321 | * Fix integration tests on Intel Skylake.
322 | * Get the max_work_item_dimensions from the device CommandQueue.
323 | 
324 | ## Version 0.1.0 (2020-12-31)
325 | 
326 | Depends on `cl3` = "0.1".
327 | 
328 | ### Features
329 | 
330 | * OpenCL objects implemented by Rust structs that manage their resources by implementing the `Drop` trait to perform [RAII](https://doc.rust-lang.org/rust-by-example/scope/raii.html), e.g. Context, Program, CommandQueue, etc.
331 | * `safe` Rust functions that call OpenCL C API functions and return Rust Result types.
332 | * A `Vec` implemented using OpenCL Shared Virtual Memory (SVM), see [svm](src/svm.rs).
333 | 


--------------------------------------------------------------------------------
/tests/opencl2_kernel_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021-2024 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))]
 16 | extern crate opencl3;
 17 | 
 18 | use cl3::device::{
 19 |     CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, CL_DEVICE_TYPE_ALL,
 20 |     CL_DEVICE_TYPE_GPU,
 21 | };
 22 | use opencl3::Result;
 23 | use opencl3::command_queue::CommandQueue;
 24 | use opencl3::context::Context;
 25 | use opencl3::device::Device;
 26 | use opencl3::kernel::{ExecuteKernel, Kernel, create_program_kernels};
 27 | use opencl3::platform::get_platforms;
 28 | use opencl3::program::{CL_STD_2_0, Program};
 29 | use opencl3::svm::SvmVec;
 30 | use opencl3::types::cl_int;
 31 | use std::ptr;
 32 | 
 33 | // The OpenCL kernels in PROGRAM_SOURCE below use built-in work-group functions:
 34 | // work_group_reduce_add, work_group_scan_inclusive_add and work_group_broadcast
 35 | // which were introduced in OpenCL 2.0.
 36 | const PROGRAM_SOURCE: &str = r#"
 37 | kernel void sum_int (global int* sums,
 38 |                     global int const* values)
 39 | {
 40 |     int value = sub_group_reduce_add(values[get_global_id(0)]);
 41 | 
 42 |     if (0u == get_local_id(0))
 43 |         sums[get_group_id(0)] = value;
 44 | }
 45 | 
 46 | kernel void inclusive_scan_int (global int* output,
 47 |                                 global int const* values)
 48 | {
 49 |     int sum = 0;
 50 |     size_t lid = get_local_id(0);
 51 |     size_t lsize = get_local_size(0);
 52 | 
 53 |     size_t num_groups = get_num_groups(0);
 54 |     for (size_t i = 0u; i < num_groups; ++i)
 55 |     {
 56 |         size_t lidx = i * lsize + lid;
 57 |         int value = sub_group_scan_inclusive_add(values[lidx]);
 58 |         output[lidx] = sum + value;
 59 | 
 60 |         sum += sub_group_broadcast(value, lsize - 1);
 61 |     }
 62 | }"#;
 63 | 
 64 | const SUM_KERNEL_NAME: &str = "sum_int";
 65 | const INCLUSIVE_SCAN_KERNEL_NAME: &str = "inclusive_scan_int";
 66 | 
 67 | #[test]
 68 | #[ignore]
 69 | fn test_opencl_2_kernel_example() -> Result<()> {
 70 |     let platforms = get_platforms()?;
 71 |     assert!(0 < platforms.len());
 72 | 
 73 |     /////////////////////////////////////////////////////////////////////
 74 |     // Query OpenCL compute environment
 75 |     let opencl_2: &str = "OpenCL 2";
 76 |     let opencl_3: &str = "OpenCL 3";
 77 | 
 78 |     // Find an OpenCL fine grained SVM, platform and device
 79 |     let mut device_id = ptr::null_mut();
 80 |     let mut is_fine_grained_svm: bool = false;
 81 |     for p in platforms {
 82 |         let platform_version = p.version()?;
 83 |         if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
 84 |             let devices = p
 85 |                 .get_devices(CL_DEVICE_TYPE_GPU)
 86 |                 .expect("Platform::get_devices failed");
 87 | 
 88 |             for dev_id in devices {
 89 |                 let device = Device::new(dev_id);
 90 |                 let svm_mem_capability = device.svm_mem_capability();
 91 |                 is_fine_grained_svm = 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
 92 |                 if is_fine_grained_svm {
 93 |                     device_id = dev_id;
 94 |                     break;
 95 |                 }
 96 |             }
 97 |         }
 98 |     }
 99 | 
100 |     if is_fine_grained_svm {
101 |         // Create OpenCL context from the OpenCL svm device
102 |         let device = Device::new(device_id);
103 |         let vendor = device.vendor()?;
104 |         let vendor_id = device.vendor_id()?;
105 |         println!("OpenCL device vendor name: {}", vendor);
106 |         println!("OpenCL device vendor id: {:X}", vendor_id);
107 | 
108 |         /////////////////////////////////////////////////////////////////////
109 |         // Initialise OpenCL compute environment
110 | 
111 |         // Create a Context on the OpenCL device
112 |         let context = Context::from_device(&device).expect("Context::from_device failed");
113 | 
114 |         // Build the OpenCL program source.
115 |         let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
116 |             .expect("Program::create_and_build_from_source failed");
117 | 
118 |         // Create the kernels from the OpenCL program source.
119 |         let kernels = create_program_kernels(&program)?;
120 |         assert!(0 < kernels.len());
121 | 
122 |         let kernel_0_name = kernels[0].function_name()?;
123 |         println!("OpenCL kernel_0_name: {}", kernel_0_name);
124 | 
125 |         let sum_kernel = if SUM_KERNEL_NAME == kernel_0_name {
126 |             &kernels[0]
127 |         } else {
128 |             &kernels[1]
129 |         };
130 | 
131 |         let inclusive_scan_kernel = if INCLUSIVE_SCAN_KERNEL_NAME == kernel_0_name {
132 |             &kernels[0]
133 |         } else {
134 |             &kernels[1]
135 |         };
136 | 
137 |         // Create a command_queue on the Context's device
138 |         let queue = CommandQueue::create_default_with_properties(&context, 0, 0)
139 |             .expect("CommandQueue::create_with_properties failed");
140 | 
141 |         // Get the svm capability of all the devices in the context.
142 |         let svm_capability = context.get_svm_mem_capability();
143 |         assert!(0 < svm_capability);
144 | 
145 |         // Create SVM vectors for the input and output data
146 | 
147 |         // The input data
148 |         const ARRAY_SIZE: usize = 8;
149 |         let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
150 | 
151 |         // Copy into an OpenCL SVM vector
152 |         let mut test_values =
153 |             SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
154 |         test_values.copy_from_slice(&value_array);
155 | 
156 |         // Make test_values immutable
157 |         let test_values = test_values;
158 | 
159 |         // The output data, an OpenCL SVM vector
160 |         let mut results =
161 |             SvmVec::<cl_int>::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed");
162 | 
163 |         // Run the sum kernel on the input data
164 |         let sum_kernel_event = unsafe {
165 |             ExecuteKernel::new(sum_kernel)
166 |                 .set_arg_svm(results.as_mut_ptr())
167 |                 .set_arg_svm(test_values.as_ptr())
168 |                 .set_global_work_size(ARRAY_SIZE)
169 |                 .enqueue_nd_range(&queue)?
170 |         };
171 | 
172 |         // Wait for the kernel to complete execution on the device
173 |         sum_kernel_event.wait()?;
174 | 
175 |         // Can access OpenCL SVM directly, no need to map or read the results
176 |         println!("sum results: {:?}", results);
177 |         assert_eq!(33, results[0]);
178 |         assert_eq!(0, results[ARRAY_SIZE - 1]);
179 | 
180 |         // Run the inclusive scan kernel on the input data
181 |         let kernel_event = unsafe {
182 |             ExecuteKernel::new(inclusive_scan_kernel)
183 |                 .set_arg_svm(results.as_mut_ptr())
184 |                 .set_arg_svm(test_values.as_ptr())
185 |                 .set_global_work_size(ARRAY_SIZE)
186 |                 .enqueue_nd_range(&queue)?
187 |         };
188 | 
189 |         kernel_event.wait()?;
190 | 
191 |         println!("inclusive_scan results: {:?}", results);
192 |         assert_eq!(value_array[0], results[0]);
193 |         assert_eq!(33, results[ARRAY_SIZE - 1]);
194 |     } else {
195 |         println!("OpenCL fine grained SVM capable device not found");
196 |     }
197 | 
198 |     Ok(())
199 | }
200 | 
201 | #[test]
202 | #[ignore]
203 | fn test_opencl_2_system_svm_example() -> Result<()> {
204 |     let platforms = get_platforms()?;
205 |     assert!(0 < platforms.len());
206 | 
207 |     /////////////////////////////////////////////////////////////////////
208 |     // Query OpenCL compute environment
209 |     let opencl_2: &str = "OpenCL 2";
210 |     let opencl_3: &str = "OpenCL 3";
211 | 
212 |     // Find an OpenCL fine grained SVM, platform and device
213 |     let mut device_id = ptr::null_mut();
214 |     let mut is_fine_grained_system_svm: bool = false;
215 |     for p in platforms {
216 |         let platform_version = p.version()?;
217 | 
218 |         if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
219 |             let devices = p
220 |                 .get_devices(CL_DEVICE_TYPE_ALL)
221 |                 .expect("Platform::get_devices failed");
222 | 
223 |             for dev_id in devices {
224 |                 let device = Device::new(dev_id);
225 |                 let svm_mem_capability = device.svm_mem_capability();
226 |                 is_fine_grained_system_svm =
227 |                     0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
228 |                 if is_fine_grained_system_svm {
229 |                     device_id = dev_id;
230 |                     break;
231 |                 }
232 |             }
233 |         }
234 |     }
235 | 
236 |     if is_fine_grained_system_svm {
237 |         // Create OpenCL context from the OpenCL svm device
238 |         let device = Device::new(device_id);
239 |         let vendor = device.vendor().expect("Device.vendor failed");
240 |         let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
241 |         println!("OpenCL device vendor name: {}", vendor);
242 |         println!("OpenCL device vendor id: {:X}", vendor_id);
243 | 
244 |         /////////////////////////////////////////////////////////////////////
245 |         // Initialise OpenCL compute environment
246 | 
247 |         // Create a Context on the OpenCL svm device
248 |         let context = Context::from_device(&device).expect("Context::from_device failed");
249 | 
250 |         // Build the OpenCL program source and create the kernel.
251 |         let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
252 |             .expect("Program::create_and_build_from_source failed");
253 | 
254 |         let kernel = Kernel::create(&program, SUM_KERNEL_NAME).expect("Kernel::create failed");
255 | 
256 |         // Create a command_queue on the Context's device
257 |         let queue = CommandQueue::create_default_with_properties(&context, 0, 0)
258 |             .expect("CommandQueue::create_default_with_properties failed");
259 | 
260 |         // The input data
261 |         const ARRAY_SIZE: usize = 8;
262 |         let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
263 | 
264 |         // Copy into an OpenCL SVM vector
265 |         let mut test_values =
266 |             SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
267 |         test_values.copy_from_slice(&value_array);
268 | 
269 |         // Make test_values immutable
270 |         let test_values = test_values;
271 | 
272 |         // The output data, an OpenCL SVM vector
273 |         let mut results =
274 |             SvmVec::<cl_int>::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed");
275 | 
276 |         // Run the sum kernel on the input data
277 |         let sum_kernel_event = unsafe {
278 |             ExecuteKernel::new(&kernel)
279 |                 .set_arg_svm(results.as_mut_ptr())
280 |                 .set_arg_svm(test_values.as_ptr())
281 |                 .set_global_work_size(ARRAY_SIZE)
282 |                 .enqueue_nd_range(&queue)?
283 |         };
284 | 
285 |         // Wait for the kernel to complete execution on the device
286 |         sum_kernel_event.wait()?;
287 | 
288 |         // Can access OpenCL SVM directly, no need to map or read the results
289 |         println!("sum results: {:?}", results);
290 |         assert_eq!(33, results[0]);
291 |         assert_eq!(0, results[ARRAY_SIZE - 1]);
292 |     } else {
293 |         println!("OpenCL fine grained system SVM device not found")
294 |     }
295 | 
296 |     Ok(())
297 | }
298 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # opencl3
  2 | 
  3 | [![crates.io](https://img.shields.io/crates/v/opencl3.svg)](https://crates.io/crates/opencl3)
  4 | [![docs.io](https://docs.rs/opencl3/badge.svg)](https://docs.rs/opencl3/)
  5 | [![OpenCL 3.0](https://img.shields.io/badge/OpenCL-3.0-blue.svg)](https://www.khronos.org/registry/OpenCL/)
  6 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  7 | [![Rust](https://github.com/kenba/opencl3/workflows/Rust/badge.svg)](https://github.com/kenba/opencl3/actions)
  8 | 
  9 | A Rust implementation of the Khronos [OpenCL](https://www.khronos.org/registry/OpenCL/) API.
 10 | 
 11 | ## Description
 12 | 
 13 | A relatively simple, object based model of the OpenCL 3.0
 14 | [API](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html).  
 15 | It is built upon the [cl3](https://crates.io/crates/cl3) crate, which
 16 | provides a functional interface to the OpenCL [C API](https://github.com/KhronosGroup/OpenCL-Headers/blob/master/CL/cl.h).  
 17 | 
 18 | [OpenCL](https://www.khronos.org/opencl/) (Open Computing Language) is framework for general purpose parallel programming across heterogeneous devices including: CPUs, GPUs, DSPs, FPGAs and other processors or hardware accelerators. It is often considered as an open-source alternative to Nvidia's proprietary
 19 | Compute Unified Device Architecture [CUDA](https://developer.nvidia.com/cuda-zone)
 20 | for performing General-purpose computing on GPUs, see
 21 | [GPGPU](https://en.wikipedia.org/wiki/General-purpose_computing_on_graphics_processing_units).
 22 | 
 23 | [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html)
 24 | is a unified specification that adds little new functionality to previous OpenCL versions.  
 25 | It specifies that all **OpenCL 1.2** features are **mandatory**, while all
 26 | OpenCL 2.x and 3.0 features are now optional.
 27 | 
 28 | ### Features
 29 | 
 30 | This library has:
 31 | 
 32 | * A simple API, enabling most OpenCL objects to be created with a single function call.
 33 | * Automatic OpenCL resource management using the [Drop trait](https://doc.rust-lang.org/book/ch15-03-drop.html) to implement [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization).
 34 | * Support for [directed acyclic graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph) OpenCL control flow execution using event wait lists.
 35 | * Support for Shared Virtual Memory (SVM) with an [SvmVec](src/svm.rs) object that can be serialized and deserialized by [serde](https://serde.rs/).
 36 | * Support for OpenCL extensions, see [OpenCL Extensions](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html).
 37 | * Support for multithreading with [Send and Sync](https://doc.rust-lang.org/nomicon/send-and-sync.html) traits.
 38 | 
 39 | ## Design
 40 | 
 41 | The library is object based with most OpenCL objects represented by rust structs.
 42 | For example, an OpenCL `cl_device_id` is represented by [Device](src/device.rs) with methods to get information about the device instead of calling `clGetDeviceInfo` with the relevant `cl_device_info` value.  
 43 | 
 44 | ![OpenCL Context](docs/images/opencl_context_objects.svg)  
 45 | *OpenCL Context Class Diagram*
 46 | 
 47 | The struct methods are simpler to use than their equivalent standalone functions in [cl3](https://github.com/kenba/cl3) because they convert the `InfoType` enum into the correct underlying type returned by the `clGetDeviceInfo` call for the `cl_device_info` value.
 48 | 
 49 | Nearly all the structs implement the `Drop` trait to release their corresponding
 50 | OpenCL objects. The exceptions are `Platform` and `Device` which don't need to be released. See the crate [documentation](https://docs.rs/opencl3/).
 51 | 
 52 | The API for OpenCL versions and extensions are controlled by Rust features such as "CL_VERSION_2_0" and "cl_khr_gl_sharing". To enable an OpenCL version, the feature for that version and **all** previous OpenCL versions must be enabled, e.g. for "CL_VERSION_2_0"; "CL_VERSION_1_1" and "CL_VERSION_1_2" must also be enabled.
 53 | 
 54 | The default features are "CL_VERSION_1_1", "CL_VERSION_1_2" and "CL_VERSION_2_0".
 55 | 
 56 | Rust deprecation warnings are given for OpenCL API functions that are deprecated by an enabled OpenCL version e.g., `clCreateCommandQueue` is deprecated whenever "CL_VERSION_2_0" is enabled.
 57 | 
 58 | ## Use
 59 | 
 60 | Ensure that an OpenCL Installable Client Driver (ICD) and the appropriate OpenCL
 61 | hardware driver(s) are installed, see
 62 | [OpenCL Installation](https://github.com/kenba/cl3/tree/main/docs/opencl_installation.md).
 63 | 
 64 | `opencl3` supports OpenCL 1.2 and 2.0 ICD loaders by default. If you have an
 65 | OpenCL 2.0 ICD loader then just add the following to your project's `Cargo.toml`:
 66 | 
 67 | ```toml
 68 | [dependencies]
 69 | opencl3 = "0.12"
 70 | ```
 71 | 
 72 | If your OpenCL ICD loader supports higher versions of OpenCL then add the
 73 | appropriate features to opencl3, e.g. for an OpenCL 3.0 ICD loader add the
 74 | following to your project's `Cargo.toml` instead:
 75 | 
 76 | ```toml
 77 | [dependencies.opencl3]
 78 | version = "0.12"
 79 | features = ["CL_VERSION_2_1", "CL_VERSION_2_2", "CL_VERSION_3_0"]
 80 | ```
 81 | 
 82 | OpenCL extensions and `serde` support can also be enabled by adding their features, e.g.:
 83 | 
 84 | ```toml
 85 | [dependencies.opencl3]
 86 | version = "0.12"
 87 | features = ["cl_khr_gl_sharing", "cl_khr_dx9_media_sharing", "serde"]
 88 | ```
 89 | 
 90 | See the [OpenCL Guide](https://github.com/KhronosGroup/OpenCL-Guide) and [OpenCL Description](https://github.com/kenba/opencl3/tree/main/docs/opencl_description.md) for background on using OpenCL.
 91 | 
 92 | ## Examples
 93 | 
 94 | There are examples in the [examples](https://github.com/kenba/opencl3/tree/main/examples/) directory.
 95 | The tests also provide examples of how the crate may be used, e.g. see:
 96 | [platform](https://github.com/kenba/opencl3/tree/main/src/platform.rs),
 97 | [device](https://github.com/kenba/opencl3/tree/main/src/device.rs),
 98 | [context](https://github.com/kenba/opencl3/tree/main/src/context.rs),
 99 | [integration_test](https://github.com/kenba/opencl3/tree/main/tests/integration_test.rs) and
100 | [opencl2_kernel_test](https://github.com/kenba/opencl3/tree/main/tests/opencl2_kernel_test.rs).
101 | 
102 | The library is designed to support events and OpenCL 2 features such as Shared Virtual Memory (SVM) and kernel built-in work-group functions.
103 | It also has optional support for `serde` e.g.:
104 | 
105 | ```rust no-run
106 | const PROGRAM_SOURCE: &str = r#"
107 | kernel void inclusive_scan_int (global int* output,
108 |                                 global int const* values)
109 | {
110 |     int sum = 0;
111 |     size_t lid = get_local_id(0);
112 |     size_t lsize = get_local_size(0);
113 | 
114 |     size_t num_groups = get_num_groups(0);
115 |     for (size_t i = 0u; i < num_groups; ++i)
116 |     {
117 |         size_t lidx = i * lsize + lid;
118 |         int value = work_group_scan_inclusive_add(values[lidx]);
119 |         output[lidx] = sum + value;
120 | 
121 |         sum += work_group_broadcast(value, lsize - 1);
122 |     }
123 | }"#;
124 | 
125 | const KERNEL_NAME: &str = "inclusive_scan_int";
126 | 
127 | // Create a Context on an OpenCL device
128 | let context = Context::from_device(&device).expect("Context::from_device failed");
129 | 
130 | // Build the OpenCL program source and create the kernel.
131 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
132 |     .expect("Program::create_and_build_from_source failed");
133 | 
134 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
135 | 
136 | // Create a command_queue on the Context's device
137 | let queue = CommandQueue::create_default_with_properties(
138 |     &context,
139 |     CL_QUEUE_PROFILING_ENABLE,
140 |     0,
141 | )
142 | .expect("CommandQueue::create_default_with_properties failed");
143 | 
144 | // The input data
145 | const ARRAY_SIZE: usize = 8;
146 | const VALUE_ARRAY: &str = "[3,2,5,9,7,1,4,2]";
147 | 
148 | // Create an OpenCL SVM vector
149 | let mut test_values = SvmVec::<cl_int>::new(&context);
150 | 
151 | // Handle test_values if device only supports CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
152 | if !test_values.is_fine_grained() {
153 |     // SVM_COARSE_GRAIN_BUFFER needs to know the size of the data to allocate the SVM
154 |     test_values = SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
155 |     // Map the SVM for a SVM_COARSE_GRAIN_BUFFER
156 |     unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? };
157 |     // Clear the SVM for the deserializer
158 |     test_values.clear();
159 | }
160 | 
161 | ExtendSvmVec(&mut test_values)
162 |     .deserialize(&mut deserializer)
163 |     .expect("Error deserializing the VALUE_ARRAY JSON string.");
164 | 
165 | // Make test_values immutable
166 | let test_values = test_values;
167 | 
168 | // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
169 | if !test_values.is_fine_grained() {
170 |     let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? };
171 |     unmap_test_values_event.wait()?;
172 | }
173 | 
174 | // The output data, an OpenCL SVM vector
175 | let mut results =
176 |     SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
177 | 
178 | // Run the kernel on the input data
179 | let sum_kernel_event = unsafe {
180 |     ExecuteKernel::new(&kernel)
181 |         .set_arg_svm(results.as_mut_ptr())
182 |         .set_arg_svm(test_values.as_ptr())
183 |         .set_global_work_size(ARRAY_SIZE)
184 |         .enqueue_nd_range(&queue)?
185 | };
186 | 
187 | // Wait for the kernel to complete execution on the device
188 | kernel_event.wait()?;
189 | 
190 | // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
191 | if !results.is_fine_grained() {
192 |     unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? };
193 | }
194 | 
195 | // Convert SVM results to json
196 | let json_results = serde_json::to_string(&results).unwrap();
197 | println!("json results: {}", json_results);
198 | 
199 | // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER
200 | if !results.is_fine_grained() {
201 |     let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? };
202 |     unmap_results_event.wait()?;
203 | }
204 | ```
205 | 
206 | The example above was taken from:
207 | [opencl2serde.rs](https://github.com/kenba/opencl3/tree/main/examples/opencl2serde.rs).
208 | 
209 | ## Tests
210 | 
211 | The crate contains unit, documentation and integration tests.  
212 | The tests run the platform and device info functions (among others) so they
213 | can provide useful information about OpenCL capabilities of the system.
214 | 
215 | It is recommended to run the tests in single-threaded mode, since some of
216 | them can interfere with each other when run multi-threaded, e.g.:
217 | 
218 | ```shell
219 | cargo test -- --test-threads=1 --show-output
220 | ```
221 | 
222 | The integration tests are marked `ignore` so use the following command to
223 | run them:
224 | 
225 | ```shell
226 | cargo test -- --test-threads=1 --show-output --ignored
227 | ```
228 | 
229 | ## Recent changes
230 | 
231 | The API has changed considerably since version `0.1` of the library, with the
232 | aim of making the library more consistent and easier to use.
233 | 
234 | [SvmVec](src/svm.rs) was changed recently to provide support for `serde` deserialization.
235 | It also changed in version 0.5.0 to provide better support for
236 | coarse grain buffer Shared Virtual Memory now that Nvidia is supporting it,
237 | see [Nvidia OpenCL](https://developer.nvidia.com/opencl).
238 | 
239 | In version 0.6.0 the Info enums were removed from the underlying [cl3](https://crates.io/crates/cl3) crate and this crate so that data can be read from OpenCL devices in the future using new values that are currently undefined.
240 | 
241 | In version 0.8.0 deprecation warnings are given for OpenCL API functions that are deprecated by an enabled OpenCL version e.g., `clCreateCommandQueue` is deprecated whenever "CL_VERSION_2_0" is enabled.
242 | 
243 | In version 0.9.0 many OpenCL API functions are declared `unsafe` since they may cause undefined behaviour if called incorrectly.
244 | 
245 | For information on other changes, see [Releases](RELEASES.md).
246 | 
247 | ## Contribution
248 | 
249 | If you want to contribute through code or documentation, the [Contributing](CONTRIBUTING.md) guide is the best place to start. If you have any questions, please feel free to ask.
250 | Just please abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
251 | 
252 | ## License
253 | 
254 | Licensed under the Apache License, Version 2.0, as per Khronos Group OpenCL.  
255 | You may obtain a copy of the License at: <http://www.apache.org/licenses/LICENSE-2.0>
256 | 
257 | Any contribution intentionally submitted for inclusion in the work by you shall be licensed  as defined in the Apache-2.0 license above, without any additional terms or conditions, unless you explicitly state otherwise.
258 | 
259 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.
260 | 


--------------------------------------------------------------------------------
/docs/opencl_description.md:
--------------------------------------------------------------------------------
  1 | # OpenCL Description
  2 | 
  3 | **OpenCL** (Open Computing Language) is framework for parallel programming on
  4 | heterogeneous devices.
  5 | It is designed to harness the compute performance of GPUs, DSPs, FPGAs, etc.
  6 | to improve the throughput and latency of computationally intensive workloads.
  7 | 
  8 | ## OpenCL Performance
  9 | 
 10 | A well designed OpenCL application running on appropriate hardware can
 11 | significantly outperform an equivalent application running on multiple CPUs.
 12 | However, a poorly designed OpenCL application or an OpenCL application
 13 | running on inappropriate hardware and/or with the wrong kind of data can be
 14 | *slower* than an equivalent application running on CPUs. There are several
 15 | performance overheads inherent to performing computational tasks off-board modern
 16 | CPUs to be considered before embarking on an OpenCL solution.
 17 | 
 18 | Parallel computing latency is governed by [Amdahl's law](https://en.wikipedia.org/wiki/Amdahl%27s_law), i.e. the minimum execution time of a parallelised process can
 19 | not be less than the parts of the process that *cannot* be parallelised.
 20 | Where OpenCL is concerned, the parts of the process that cannot be parallelised are:
 21 | * OpenCL Initialisation
 22 | * and data transfer between **host memory** and **device memory**.
 23 | 
 24 | Both OpenCL initialisation and data transfer can take longer than processing
 25 | the solution on modern CPUs, especially where the OpenCL programs/kernels to be
 26 | compiled are relatively large and/or using a compute device accessed via a
 27 | relatively slow mechanism, such as a [PCIe](https://en.wikipedia.org/wiki/PCI_Express)
 28 | bus to a discrete graphics card.
 29 | 
 30 | ### Small Tasks
 31 | 
 32 | Modern multi-core CPUs can share cached data "on-chip" instead of transferring it
 33 | via shared memory let alone down a data-bus and application "kernels" (i.e. functions)
 34 | are compiled into an application, they do not need to be loaded and compiled again
 35 | before the application can run.
 36 | 
 37 | A well-designed parallel processing application using a [work-stealing](https://en.wikipedia.org/wiki/Work_stealing) task scheduler such as Intel's Threading
 38 | Building Blocks ([TBB](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onetbb.html)) library often out-performs OpenCL applications for
 39 | "one off" tasks with a significant kernel compilation time and/or relatively
 40 | small data sets.
 41 | 
 42 | ### CPU Devices
 43 | 
 44 | Note: the OpenCL data transfer overhead can be significantly reduced by using
 45 | a CPU device, since it can share the same memory (and maybe even the same cache)
 46 | as the OpenCL application's host. An OpenCL CPU device may provide the optimum
 47 | solution where the OpenCL kernels can utilise [SIMD](https://en.wikipedia.org/wiki/SIMD)
 48 | instructions on CPUs.
 49 | 
 50 | # OpenCL Lifecycle
 51 | 
 52 | Figure 1 shows the typical lifecycle of an OpenCL application.  
 53 | It can be considered as consisting of 4 phases:
 54 | * Query
 55 | * Initialisation
 56 | * Compute
 57 | * Clean-up
 58 | 
 59 | ![OpenCL Application Lifecycle](images/opencl_app_sequence.svg)  
 60 | *Figure 1 OpenCL Application Lifecycle*
 61 | 
 62 | ## Query
 63 | 
 64 | In the Query phase the OpenCL application queries the system tha it's running on
 65 | to determine what features it supports and which is (are) the best device(s) to
 66 | run on.
 67 | 
 68 | Where an OpenCL application is designed to run on specific hardware, this simply
 69 | involves discovering which OpenCL device(s) correspond to the required hardware.
 70 | 
 71 | However, where an OpenCL application is designed to run almost anywhere (like
 72 | the tests in this library) then it must query the available platforms and
 73 | devices to find the most appropriate platform and device(s).  
 74 | 
 75 | This is not a trivial task, since any system with a discrete graphics card is
 76 | likely to have more than one platform and each platform is likely to be connected
 77 | to more than one device. Furthermore, each device may be connected to more than one platform, see Figure 2.
 78 | 
 79 | ![Example OpenCL System](images/example_opencl_system.svg)  
 80 | *Figure 2 An Example OpenCL System*
 81 | 
 82 | The [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html) API specification has new queries to simplify this task.
 83 | 
 84 | ## Initialisation
 85 | 
 86 | After the most suitable platform and device(s) have been found it is necessary
 87 | to create an OpenCL context for them.
 88 | 
 89 | ### Context
 90 | 
 91 | An OpenCL application must have at least one context.
 92 | An OpenCL application may create more than one context but OpenCL does not support synchronisation between multiple contexts.
 93 | 
 94 | An OpenCL context can be created for more than one device, however the devices
 95 | must all be connected to the same platform. Where an OpenCL context has multiple
 96 | devices, OpenCL **does** provide synchronisation between the devices.
 97 | 
 98 | ![OpenCL Context](images/opencl_context_objects.svg)  
 99 | *Figure 3 Context Class Diagram*
100 | 
101 | The `opencl3` [Context](../src/context.rs#Context) manages OpenCL objects that
102 | are required to compute data on OpenCL devices, i.e.:
103 | 
104 | * Command Queues
105 | * Programs
106 | * Kernels
107 | * Sub Devices
108 | * Memory
109 | 
110 | ### Command Queue(s)
111 | 
112 | In order to execute OpenCL kernels on the context device(s), it is necessary to
113 | create at least one command queue for each device. OpenCL permits more than
114 | one command queue per device and also enables applications to split devices into
115 | sub-devices, each of which require their own command queue(s).
116 | 
117 | ### Programs and Kernels
118 | 
119 | Also, in order to execute OpenCL kernels, the program(s) in which they are
120 | defined need to be created and built for all the devices in the context
121 | before the kernels themselves can be constructed.
122 | 
123 | OpenCL programs can be built from source code, Intermediate Language
124 | (IL, e.g [SPIR](https://www.khronos.org/spir/) or [SPIR-V](https://www.khronos.org/registry/spir-v/)) or binaries. Building from source or IL can take many seconds
125 | for complex kernels, therefore it is tempting to load binary programs especially
126 | if the application is designed to run on specific hardware.
127 | 
128 | Note: some devices have built-in kernels, e.g. [Intel Motion Estimation](https://software.intel.com/content/www/us/en/develop/articles/intro-to-advanced-motion-estimation-extension-for-opencl.html). These can also be
129 | built into the context for the device(s) that have them.
130 | 
131 | `opencl3` creates all the OpenCL kernels in the programs managed by `Context`
132 | and stores them in a HashMap in `Context` using the kernel name (in a `CString`)
133 | as the key.
134 | 
135 | ### Sub Devices
136 | 
137 | OpenCL devices can be partitioned into sub-devices, see [Partitioning a Device](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_partitioning_a_device). Unlike normal OpenCL devices that are connected
138 | to a platform, sub-devices can be retained and released.
139 | `opencl3` has a [SubDevice](../src/device.rs#SubDevice) struct to release
140 | sub-devices when they are dropped.
141 | 
142 | As with normal OpenCL devices, each sub-device requires at least one
143 | command queue to compute data on it.
144 | 
145 | Device partitioning is not supported by all OpenCL devices.
146 | For more information see: [OpenCL Device Fission for CPU Performance](https://software.intel.com/content/www/us/en/develop/articles/opencl-device-fission-for-cpu-performance.html).
147 | 
148 | ### Memory
149 | 
150 | Finally, the OpenCL kernels require memory from which to read input data and
151 | write output data. Unless using host Shared Virtual Memory (SVM), the OpenCL
152 | device memory (buffers, images and device SVM) must be created before data can be
153 | transferred to and from the host to the OpenCL device memory, see Figure 4.
154 | 
155 | ![OpenCL Memory Model](images/opencl_memory.png)  
156 | *Figure 4 An OpenCL Context's Memory Model*
157 | 
158 | OpenCL defines 4 different types of Memory Objects, see Figure 5:
159 | 
160 | * Buffer
161 | * Image
162 | * Pipe (*OpenCL 2*)
163 | * Shared Virtual Memory (*OpenCL 2*)
164 | 
165 | ![OpenCL Memory Objects](images/opencl_memory_objects.svg)  
166 | *Figure 5 OpenCL Memory Objects*
167 | 
168 | The OpenCL API describes the Memory Objects in detail in the OpenCL [Memory Model](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_model).
169 | 
170 | #### Sampler Objects
171 | 
172 | OpenCL also has Sampler Objects to specify how a kernel shall sample an image
173 | when the image is read in the kernel, see [Sampler Objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_sampler_objects).
174 | 
175 | ## Compute
176 | 
177 | Now that the OpenCL compute environment has been set up, data can be processed
178 | by the OpenCL kernel(s) on the device(s).
179 | 
180 | The library stores OpenCL kernels by name in a HashMap in the Context struct.
181 | An application can get OpenCL kernels it requires by name from the Context.
182 | 
183 | As shown in Figure 1, the application:
184 | * writes input data from the Host Memory to the Global Memory shown in Figure 3;
185 | * executes the kernel(s);
186 | * reads output data from the Global Memory to the Host Memory shown in Figure 3;
187 | * waits for the data read to finish.
188 | 
189 | ### Data Transfer
190 | 
191 | An OpenCL 1.2 application (i.e. *without* SVM) performs all the tasks above via
192 | a command_queue. OpenCL command queues have "enqueue" commands for: writing memory
193 | buffers and images, executing kernels, and reading memory buffers and images.
194 | 
195 | An OpenCL 2.0 application with *coarse* SVM also performs all the tasks above via
196 | a command_queue. However, instead of writing and reading memory to and from
197 | context Global Memory, the application `maps` and `unmaps` the svm memory.
198 | 
199 | An OpenCL 2.0 application with *host* or *fine device* SVM does not need to
200 | enqueue any commands to transfer data between Host Memory and Global Memory,
201 | it either doesn't need it or it is handled by OpenCL "under the covers".
202 | 
203 | The OpenCL API describes [SVM data transfer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#shared-virtual-memory)
204 | in more detail.
205 | 
206 | ### Kernel Execution
207 | 
208 | Before a kernel can be "enqueued" on a command_queue, its arguments must be
209 | initialised, i.e. the kernel function arguments must be set to the locations
210 | of the input and output memory and any other data it may require.
211 | 
212 | After its arguments have been set, a kernel can be executed on a command queue
213 | with the OpenCL `clEnqueueNDRangeKernel` function. This function controls *how*
214 | kernels execute, see [Mapping work-items onto an NDRange](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_mapping_work_items_onto_an_ndrange).
215 | 
216 | This library contains an `ExecuteKernel` struct that implements the
217 | [builder pattern](https://doc.rust-lang.org/1.0.0/style/ownership/builders.html)
218 | to simplify setting up the arguments and `NDRange` when executing a kernel,
219 | see: [kernel.rs](../src/kernel.rs).
220 | 
221 | ### Events
222 | 
223 | All command_queue "enqueue_" commands can return an optional Event object and
224 | take an event_wait_list as a parameter.
225 | 
226 | OpenCL events can be used to time how long OpenCL commands take to
227 | execute by creating command queues with `CL_QUEUE_PROFILING_ENABLE` and
228 | getting profile info from the events with `CL_PROFILING_COMMAND_START` and
229 | `CL_PROFILING_COMMAND_END`.
230 | 
231 | OpenCL events can also be used to control "enqueue_" commands by supplying
232 | events from previous "enqueue_" commands in the event_wait_list of the
233 | command to be controlled.
234 | 
235 | Furthermore, if an OpenCL device supports out-of-order command execution and its
236 | command queue was created with `CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE` then the
237 | OpenCL events can be used to control out-of-order command execution, enabling
238 | commands to run concurrently on the device. Together, the events and event_wait_lists
239 | effectively create [directed acyclic graphs](https://en.wikipedia.org/wiki/Directed_acyclic_graph) to control execution on the device.
240 | 
241 | An application can wait for events to complete by calling `wait_for_events` with
242 | an event_wait_list instead of calling `finish` on a command_queue.
243 | `wait_for_events` enables an application to wait for events to complete
244 | simultaneously on multiple queues, for example, see
245 | [OpenCL events](http://people.cs.bris.ac.uk/~simonm/workshops/BSC_2013/opencl:course:bsc/Slides/OpenCL_events.pdf).
246 | 
247 | ## Clean-up
248 | 
249 | A well-behaved OpenCL application should clean-up after itself by freeing memory
250 | and releasing OpenCL objects, i.e.:
251 | * events,
252 | * samplers,
253 | * images,
254 | * buffers,
255 | * pipes,
256 | * svm,
257 | * kernels,
258 | * programs,
259 | * command queues,
260 | * and the context(s).
261 | 
262 | This library handles OpenCL clean-up automatically using [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) by wrapping OpenCL objects in
263 | structs that implement the [Drop trait](https://doc.rust-lang.org/book/ch15-03-drop.html).
264 | 


--------------------------------------------------------------------------------
/docs/images/opencl_context_objects.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" contentScriptType="application/ecmascript" contentStyleType="text/css" height="177px" preserveAspectRatio="none" style="width:610px;height:177px;" version="1.1" viewBox="0 0 610 177" width="610px" zoomAndPan="magnify"><defs><filter height="300%" id="f1fqqfcb7b1wkx" width="300%" x="-1" y="-1"><feGaussianBlur result="blurOut" stdDeviation="2.0"/><feColorMatrix in="blurOut" result="blurOut2" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 .4 0"/><feOffset dx="4.0" dy="4.0" in="blurOut2" result="blurOut3"/><feBlend in="SourceGraphic" in2="blurOut3" mode="normal"/></filter></defs><g><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="35.0938" style="stroke:#A80036;stroke-width:1.5;" width="82" x="7" y="121.5"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="68" x="14" y="135.957">cl_device_id</text><line style="stroke:#A80036;stroke-width:1.5;" x1="8" x2="88" y1="140.5938" y2="140.5938"/><!--MD5=[6754354365917abf8f1a56af1b7e344c]
 2 | class Context--><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="48" id="Context" style="stroke:#A80036;stroke-width:1.5;" width="73" x="290.5" y="7"/><ellipse cx="305.5" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M308.2656,18.875 Q308.4219,18.6563 308.6094,18.5469 Q308.7969,18.4375 309.0156,18.4375 Q309.3906,18.4375 309.625,18.6953 Q309.8594,18.9531 309.8594,19.5625 L309.8594,21.0156 Q309.8594,21.625 309.625,21.8906 Q309.3906,22.1563 309.0156,22.1563 Q308.6719,22.1563 308.4688,21.9531 Q308.2656,21.7656 308.1563,21.25 Q308.1094,20.8906 307.9219,20.7031 Q307.5938,20.3281 306.9844,20.1094 Q306.375,19.8906 305.75,19.8906 Q304.9844,19.8906 304.3516,20.2188 Q303.7188,20.5469 303.2266,21.2969 Q302.7344,22.0469 302.7344,23.0781 L302.7344,24.1719 Q302.7344,25.4063 303.625,26.2266 Q304.5156,27.0469 306.1094,27.0469 Q307.0469,27.0469 307.7031,26.7969 Q308.0938,26.6406 308.5156,26.2031 Q308.7813,25.9375 308.9297,25.8594 Q309.0781,25.7813 309.2813,25.7813 Q309.6094,25.7813 309.8672,26.0391 Q310.125,26.2969 310.125,26.6406 Q310.125,26.9844 309.7813,27.3906 Q309.2813,27.9688 308.4844,28.2969 Q307.4063,28.75 306.1094,28.75 Q304.5938,28.75 303.3906,28.125 Q302.4063,27.625 301.7188,26.5547 Q301.0313,25.4844 301.0313,24.2031 L301.0313,23.0469 Q301.0313,21.7188 301.6484,20.5703 Q302.2656,19.4219 303.3594,18.8047 Q304.4531,18.1875 305.6875,18.1875 Q306.4219,18.1875 307.0703,18.3516 Q307.7188,18.5156 308.2656,18.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="41" x="319.5" y="27.9102">Context</text><line style="stroke:#A80036;stroke-width:1.5;" x1="291.5" x2="362.5" y1="39" y2="39"/><line style="stroke:#A80036;stroke-width:1.5;" x1="291.5" x2="362.5" y1="47" y2="47"/><!--MD5=[8ad5f43107e1b4d7c118456659769e46]
 3 | class CommandQueue--><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="48" id="CommandQueue" style="stroke:#A80036;stroke-width:1.5;" width="128" x="124" y="115"/><ellipse cx="139" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M141.7656,126.875 Q141.9219,126.6563 142.1094,126.5469 Q142.2969,126.4375 142.5156,126.4375 Q142.8906,126.4375 143.125,126.6953 Q143.3594,126.9531 143.3594,127.5625 L143.3594,129.0156 Q143.3594,129.625 143.125,129.8906 Q142.8906,130.1563 142.5156,130.1563 Q142.1719,130.1563 141.9688,129.9531 Q141.7656,129.7656 141.6563,129.25 Q141.6094,128.8906 141.4219,128.7031 Q141.0938,128.3281 140.4844,128.1094 Q139.875,127.8906 139.25,127.8906 Q138.4844,127.8906 137.8516,128.2188 Q137.2188,128.5469 136.7266,129.2969 Q136.2344,130.0469 136.2344,131.0781 L136.2344,132.1719 Q136.2344,133.4063 137.125,134.2266 Q138.0156,135.0469 139.6094,135.0469 Q140.5469,135.0469 141.2031,134.7969 Q141.5938,134.6406 142.0156,134.2031 Q142.2813,133.9375 142.4297,133.8594 Q142.5781,133.7813 142.7813,133.7813 Q143.1094,133.7813 143.3672,134.0391 Q143.625,134.2969 143.625,134.6406 Q143.625,134.9844 143.2813,135.3906 Q142.7813,135.9688 141.9844,136.2969 Q140.9063,136.75 139.6094,136.75 Q138.0938,136.75 136.8906,136.125 Q135.9063,135.625 135.2188,134.5547 Q134.5313,133.4844 134.5313,132.2031 L134.5313,131.0469 Q134.5313,129.7188 135.1484,128.5703 Q135.7656,127.4219 136.8594,126.8047 Q137.9531,126.1875 139.1875,126.1875 Q139.9219,126.1875 140.5703,126.3516 Q141.2188,126.5156 141.7656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="96" x="153" y="135.9102">CommandQueue</text><line style="stroke:#A80036;stroke-width:1.5;" x1="125" x2="251" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="125" x2="251" y1="155" y2="155"/><!--MD5=[175f95e045c1a44aed0b30df3abe0e5c]
 4 | class Program--><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="48" id="Program" style="stroke:#A80036;stroke-width:1.5;" width="80" x="287" y="115"/><ellipse cx="302" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M304.7656,126.875 Q304.9219,126.6563 305.1094,126.5469 Q305.2969,126.4375 305.5156,126.4375 Q305.8906,126.4375 306.125,126.6953 Q306.3594,126.9531 306.3594,127.5625 L306.3594,129.0156 Q306.3594,129.625 306.125,129.8906 Q305.8906,130.1563 305.5156,130.1563 Q305.1719,130.1563 304.9688,129.9531 Q304.7656,129.7656 304.6563,129.25 Q304.6094,128.8906 304.4219,128.7031 Q304.0938,128.3281 303.4844,128.1094 Q302.875,127.8906 302.25,127.8906 Q301.4844,127.8906 300.8516,128.2188 Q300.2188,128.5469 299.7266,129.2969 Q299.2344,130.0469 299.2344,131.0781 L299.2344,132.1719 Q299.2344,133.4063 300.125,134.2266 Q301.0156,135.0469 302.6094,135.0469 Q303.5469,135.0469 304.2031,134.7969 Q304.5938,134.6406 305.0156,134.2031 Q305.2813,133.9375 305.4297,133.8594 Q305.5781,133.7813 305.7813,133.7813 Q306.1094,133.7813 306.3672,134.0391 Q306.625,134.2969 306.625,134.6406 Q306.625,134.9844 306.2813,135.3906 Q305.7813,135.9688 304.9844,136.2969 Q303.9063,136.75 302.6094,136.75 Q301.0938,136.75 299.8906,136.125 Q298.9063,135.625 298.2188,134.5547 Q297.5313,133.4844 297.5313,132.2031 L297.5313,131.0469 Q297.5313,129.7188 298.1484,128.5703 Q298.7656,127.4219 299.8594,126.8047 Q300.9531,126.1875 302.1875,126.1875 Q302.9219,126.1875 303.5703,126.3516 Q304.2188,126.5156 304.7656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="48" x="316" y="135.9102">Program</text><line style="stroke:#A80036;stroke-width:1.5;" x1="288" x2="366" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="288" x2="366" y1="155" y2="155"/><!--MD5=[daeea0df713f5a1e6085e9f07acfb9b8]
 5 | class Kernel--><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="48" id="Kernel" style="stroke:#A80036;stroke-width:1.5;" width="68" x="402" y="115"/><ellipse cx="417" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M419.7656,126.875 Q419.9219,126.6563 420.1094,126.5469 Q420.2969,126.4375 420.5156,126.4375 Q420.8906,126.4375 421.125,126.6953 Q421.3594,126.9531 421.3594,127.5625 L421.3594,129.0156 Q421.3594,129.625 421.125,129.8906 Q420.8906,130.1563 420.5156,130.1563 Q420.1719,130.1563 419.9688,129.9531 Q419.7656,129.7656 419.6563,129.25 Q419.6094,128.8906 419.4219,128.7031 Q419.0938,128.3281 418.4844,128.1094 Q417.875,127.8906 417.25,127.8906 Q416.4844,127.8906 415.8516,128.2188 Q415.2188,128.5469 414.7266,129.2969 Q414.2344,130.0469 414.2344,131.0781 L414.2344,132.1719 Q414.2344,133.4063 415.125,134.2266 Q416.0156,135.0469 417.6094,135.0469 Q418.5469,135.0469 419.2031,134.7969 Q419.5938,134.6406 420.0156,134.2031 Q420.2813,133.9375 420.4297,133.8594 Q420.5781,133.7813 420.7813,133.7813 Q421.1094,133.7813 421.3672,134.0391 Q421.625,134.2969 421.625,134.6406 Q421.625,134.9844 421.2813,135.3906 Q420.7813,135.9688 419.9844,136.2969 Q418.9063,136.75 417.6094,136.75 Q416.0938,136.75 414.8906,136.125 Q413.9063,135.625 413.2188,134.5547 Q412.5313,133.4844 412.5313,132.2031 L412.5313,131.0469 Q412.5313,129.7188 413.1484,128.5703 Q413.7656,127.4219 414.8594,126.8047 Q415.9531,126.1875 417.1875,126.1875 Q417.9219,126.1875 418.5703,126.3516 Q419.2188,126.5156 419.7656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="36" x="431" y="135.9102">Kernel</text><line style="stroke:#A80036;stroke-width:1.5;" x1="403" x2="469" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="403" x2="469" y1="155" y2="155"/><!--MD5=[5a085a4c8838457085fbed9f488cfeb1]
 6 | class SubDevice--><rect fill="#FEFECE" filter="url(#f1fqqfcb7b1wkx)" height="48" id="SubDevice" style="stroke:#A80036;stroke-width:1.5;" width="91" x="505.5" y="115"/><ellipse cx="520.5" cy="131" fill="#ADD1B2" rx="11" ry="11" style="stroke:#A80036;stroke-width:1.0;"/><path d="M523.2656,126.875 Q523.4219,126.6563 523.6094,126.5469 Q523.7969,126.4375 524.0156,126.4375 Q524.3906,126.4375 524.625,126.6953 Q524.8594,126.9531 524.8594,127.5625 L524.8594,129.0156 Q524.8594,129.625 524.625,129.8906 Q524.3906,130.1563 524.0156,130.1563 Q523.6719,130.1563 523.4688,129.9531 Q523.2656,129.7656 523.1563,129.25 Q523.1094,128.8906 522.9219,128.7031 Q522.5938,128.3281 521.9844,128.1094 Q521.375,127.8906 520.75,127.8906 Q519.9844,127.8906 519.3516,128.2188 Q518.7188,128.5469 518.2266,129.2969 Q517.7344,130.0469 517.7344,131.0781 L517.7344,132.1719 Q517.7344,133.4063 518.625,134.2266 Q519.5156,135.0469 521.1094,135.0469 Q522.0469,135.0469 522.7031,134.7969 Q523.0938,134.6406 523.5156,134.2031 Q523.7813,133.9375 523.9297,133.8594 Q524.0781,133.7813 524.2813,133.7813 Q524.6094,133.7813 524.8672,134.0391 Q525.125,134.2969 525.125,134.6406 Q525.125,134.9844 524.7813,135.3906 Q524.2813,135.9688 523.4844,136.2969 Q522.4063,136.75 521.1094,136.75 Q519.5938,136.75 518.3906,136.125 Q517.4063,135.625 516.7188,134.5547 Q516.0313,133.4844 516.0313,132.2031 L516.0313,131.0469 Q516.0313,129.7188 516.6484,128.5703 Q517.2656,127.4219 518.3594,126.8047 Q519.4531,126.1875 520.6875,126.1875 Q521.4219,126.1875 522.0703,126.3516 Q522.7188,126.5156 523.2656,126.875 Z " fill="#000000"/><text fill="#000000" font-family="sans-serif" font-size="12" lengthAdjust="spacingAndGlyphs" textLength="59" x="534.5" y="135.9102">SubDevice</text><line style="stroke:#A80036;stroke-width:1.5;" x1="506.5" x2="595.5" y1="147" y2="147"/><line style="stroke:#A80036;stroke-width:1.5;" x1="506.5" x2="595.5" y1="155" y2="155"/><!--MD5=[7de028eb4269b1f6a1a3b34322269b70]
 7 | reverse link Context to cl_device_id--><path codeLine="8" d="M277.88,50.16 C233.03,66.8 164.94,92.25 106,115 C100.54,117.11 94.81,119.35 89.15,121.57 " fill="none" id="Context-backto-cl_device_id" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#FFFFFF" points="290.16,45.6,283.143,43.938,278.91,49.7759,285.927,51.4379,290.16,45.6" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="79.4684" y="110.6841">1..*</text><!--MD5=[64d49767c51577c53c2045b7d6ea0a0a]
 8 | reverse link cl_device_id to CommandQueue--><path codeLine="9" d="M102.07,139 C109.32,139 116.56,139 123.81,139 " fill="none" id="cl_device_id-backto-CommandQueue" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#FFFFFF" points="89.02,139,95.02,143,101.02,139,95.02,135,89.02,139" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="96.1592" y="135.6882">1..*</text><!--MD5=[58695bd4f72c9dba5e61912a086b19ef]
 9 | reverse link Context to CommandQueue--><path codeLine="10" d="M286.31,63.03 C264.66,79.54 238.48,99.5 218.58,114.68 " fill="none" id="Context-backto-CommandQueue" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="296.84,55,289.6437,55.4617,287.3021,62.2821,294.4984,61.8203,296.84,55" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="203.5964" y="104.34">1..*</text><!--MD5=[f385cb0276476e773e02ec29138691f2]
10 | reverse link Context to Program--><path codeLine="12" d="M327,68.34 C327,83.68 327,101.1 327,114.68 " fill="none" id="Context-backto-Program" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="327,55,323,61,327,67,331,61,327,55" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="304.5625" y="104.34">1..*</text><!--MD5=[cdc35776229e961d278ce1aa3cef2c3c]
11 | reverse link Program to Kernel--><path codeLine="13" d="M380.06,139 C387.3,139 394.54,139 401.78,139 " fill="none" id="Program-backto-Kernel" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="367.02,139,373.02,143,379.02,139,373.02,135,367.02,139" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="373.9033" y="135.1014">1..*</text><!--MD5=[39cfa9c45a7b2d8817314e3a153d9b79]
12 | reverse link Context to Kernel--><path codeLine="14" d="M360.08,64.17 C376.82,80.45 396.77,99.85 412.02,114.68 " fill="none" id="Context-backto-Kernel" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="350.65,55,352.1684,62.0494,359.2596,63.3591,357.7412,56.3097,350.65,55" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="383.7374" y="104.34">1..*</text><!--MD5=[ba5fd4c84a27ff9b35bc22e4ba232c38]
13 | reverse link Context to SubDevice--><path codeLine="16" d="M375.94,55.16 C414.48,73.4 467.8,98.63 505.43,116.44 " fill="none" id="Context-backto-SubDevice" style="stroke:#A80036;stroke-width:1.0;"/><polygon fill="#A80036" points="363.99,49.5,367.6991,55.6841,374.834,54.6387,371.1249,48.4547,363.99,49.5" style="stroke:#A80036;stroke-width:1.0;"/><text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacingAndGlyphs" textLength="20" x="477.6155" y="109.6623">0..*</text><!--MD5=[f3ab354c9ad3340febefe3ac3bea5226]
14 | @startuml
15 | 
16 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved.
17 | 
18 | ' title via::opencl Context objects
19 | 
20 | object cl_device_id
21 | 
22 | Context o- - "1..*" cl_device_id
23 | cl_device_id o- "1..*" CommandQueue
24 | Context *- - "1..*" CommandQueue
25 | 
26 | Context *- - "1..*" Program
27 | Program *- "1..*" Kernel
28 | Context *- - "1..*" Kernel
29 | 
30 | Context *- - "0..*" SubDevice
31 | 
32 | @enduml
33 | 
34 | @startuml
35 | 
36 | 
37 | 
38 | object cl_device_id
39 | 
40 | Context o- - "1..*" cl_device_id
41 | cl_device_id o- "1..*" CommandQueue
42 | Context *- - "1..*" CommandQueue
43 | 
44 | Context *- - "1..*" Program
45 | Program *- "1..*" Kernel
46 | Context *- - "1..*" Kernel
47 | 
48 | Context *- - "0..*" SubDevice
49 | 
50 | @enduml
51 | 
52 | PlantUML version 1.2020.26(Mon Dec 21 17:45:07 GMT 2020)
53 | (GPL source distribution)
54 | Java Runtime: Java(TM) SE Runtime Environment
55 | JVM: Java HotSpot(TM) Client VM
56 | Default Encoding: Cp1252
57 | Language: en
58 | Country: GB
59 | --></g></svg>


--------------------------------------------------------------------------------
/tests/integration_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2021 Via Technology Ltd. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | extern crate opencl3;
 16 | 
 17 | use cl3::device::CL_DEVICE_TYPE_GPU;
 18 | use opencl3::Result;
 19 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
 20 | use opencl3::context::Context;
 21 | use opencl3::device::Device;
 22 | use opencl3::kernel::{ExecuteKernel, Kernel};
 23 | use opencl3::memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
 24 | use opencl3::platform::get_platforms;
 25 | use opencl3::program::Program;
 26 | use opencl3::types::{CL_BLOCKING, CL_NON_BLOCKING, cl_event, cl_float};
 27 | use std::ptr;
 28 | 
 29 | const PROGRAM_SOURCE: &str = r#"
 30 | kernel void saxpy_float (global float* z,
 31 |     global float const* x,
 32 |     global float const* y,
 33 |     float a)
 34 | {
 35 | size_t i = get_global_id(0);
 36 | z[i] = a*x[i] + y[i];
 37 | }"#;
 38 | 
 39 | const KERNEL_NAME: &str = "saxpy_float";
 40 | 
 41 | #[test]
 42 | #[ignore]
 43 | fn test_opencl_1_2_example() -> Result<()> {
 44 |     let platforms = get_platforms()?;
 45 |     assert!(0 < platforms.len());
 46 | 
 47 |     // Get the first platform
 48 |     let platform = &platforms[0];
 49 | 
 50 |     let devices = platform
 51 |         .get_devices(CL_DEVICE_TYPE_GPU)
 52 |         .expect("Platform::get_devices failed");
 53 |     assert!(0 < devices.len());
 54 | 
 55 |     let platform_name = platform.name()?;
 56 |     println!("Platform Name: {:?}", platform_name);
 57 | 
 58 |     // Create OpenCL context from the first device
 59 |     let device = Device::new(devices[0]);
 60 |     let vendor = device.vendor().expect("Device.vendor failed");
 61 |     let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
 62 |     println!("OpenCL device vendor name: {}", vendor);
 63 |     println!("OpenCL device vendor id: {:X}", vendor_id);
 64 | 
 65 |     /////////////////////////////////////////////////////////////////////
 66 |     // Initialise OpenCL compute environment
 67 | 
 68 |     // Create a Context on the OpenCL device
 69 |     let context = Context::from_device(&device).expect("Context::from_device failed");
 70 | 
 71 |     // Build the OpenCL program source and create the kernel.
 72 |     let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
 73 |         .expect("Program::create_and_build_from_source failed");
 74 | 
 75 |     let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
 76 | 
 77 |     // Create a command_queue on the Context's device
 78 |     let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE)
 79 |         .expect("CommandQueue::create_default failed");
 80 | 
 81 |     /////////////////////////////////////////////////////////////////////
 82 |     // Compute data
 83 | 
 84 |     // The input data
 85 |     const ARRAY_SIZE: usize = 1000;
 86 |     let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE];
 87 |     let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
 88 |     for i in 0..ARRAY_SIZE {
 89 |         sums[i] = 1.0 + 1.0 * i as cl_float;
 90 |     }
 91 | 
 92 |     // Create OpenCL device buffers
 93 |     let mut x = unsafe {
 94 |         Buffer::<cl_float>::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())?
 95 |     };
 96 |     let mut y = unsafe {
 97 |         Buffer::<cl_float>::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())?
 98 |     };
 99 |     let z = unsafe {
100 |         Buffer::<cl_float>::create(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())?
101 |     };
102 | 
103 |     // Blocking write
104 |     let _x_write_event = unsafe { queue.enqueue_write_buffer(&mut x, CL_BLOCKING, 0, &ones, &[])? };
105 | 
106 |     // Non-blocking write, wait for y_write_event
107 |     let y_write_event =
108 |         unsafe { queue.enqueue_write_buffer(&mut y, CL_NON_BLOCKING, 0, &sums, &[])? };
109 | 
110 |     // a value for the kernel function
111 |     let a: cl_float = 300.0;
112 | 
113 |     // Use the ExecuteKernel builder to set the kernel buffer and
114 |     // cl_float value arguments, before setting the one dimensional
115 |     // global_work_size for the call to enqueue_nd_range.
116 |     // Unwraps the Result to get the kernel execution event.
117 |     let kernel_event = unsafe {
118 |         ExecuteKernel::new(&kernel)
119 |             .set_arg(&z)
120 |             .set_arg(&x)
121 |             .set_arg(&y)
122 |             .set_arg(&a)
123 |             .set_global_work_size(ARRAY_SIZE)
124 |             .set_wait_event(&y_write_event)
125 |             .enqueue_nd_range(&queue)?
126 |     };
127 | 
128 |     let mut events: Vec<cl_event> = Vec::default();
129 |     events.push(kernel_event.get());
130 | 
131 |     // Create a results array to hold the results from the OpenCL device
132 |     // and enqueue a read command to read the device buffer into the array
133 |     // after the kernel event completes.
134 |     let mut results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
135 |     let _event =
136 |         unsafe { queue.enqueue_read_buffer(&z, CL_NON_BLOCKING, 0, &mut results, &events)? };
137 | 
138 |     // Block until all commands on the queue have completed
139 |     queue.finish()?;
140 | 
141 |     assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
142 |     println!("results back: {}", results[ARRAY_SIZE - 1]);
143 | 
144 |     // Calculate the kernel duration, from the kernel_event
145 |     let start_time = kernel_event.profiling_command_start()?;
146 |     let end_time = kernel_event.profiling_command_end()?;
147 |     let duration = end_time - start_time;
148 |     println!("kernel execution duration (ns): {}", duration);
149 | 
150 |     Ok(())
151 | }
152 | 
153 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))]
154 | #[test]
155 | #[ignore]
156 | fn test_opencl_svm_example() -> Result<()> {
157 |     use cl3::device::{CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_BUFFER};
158 |     use opencl3::command_queue::CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
159 |     use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE};
160 |     use opencl3::svm::SvmVec;
161 | 
162 |     let platforms = get_platforms()?;
163 |     assert!(0 < platforms.len());
164 | 
165 |     /////////////////////////////////////////////////////////////////////
166 |     // Query OpenCL compute environment
167 |     let opencl_2: &str = "OpenCL 2";
168 |     let opencl_3: &str = "OpenCL 3";
169 | 
170 |     // Find an OpenCL SVM, platform and device
171 |     let mut device_id = ptr::null_mut();
172 |     let mut is_svm_capable: bool = false;
173 |     for p in platforms {
174 |         let platform_version = p.version()?;
175 |         if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
176 |             let devices = p
177 |                 .get_devices(CL_DEVICE_TYPE_GPU)
178 |                 .expect("Platform::get_devices failed");
179 | 
180 |             for dev_id in devices {
181 |                 let device = Device::new(dev_id);
182 |                 let svm_mem_capability = device.svm_mem_capability();
183 |                 is_svm_capable = 0 < svm_mem_capability
184 |                     & (CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
185 |                 if is_svm_capable {
186 |                     device_id = dev_id;
187 |                     break;
188 |                 }
189 |             }
190 |         }
191 |     }
192 | 
193 |     if is_svm_capable {
194 |         // Create OpenCL context from the OpenCL svm device
195 |         let device = Device::new(device_id);
196 |         let vendor = device.vendor().expect("Device.vendor failed");
197 |         let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
198 |         println!("OpenCL device vendor name: {}", vendor);
199 |         println!("OpenCL device vendor id: {:X}", vendor_id);
200 | 
201 |         /////////////////////////////////////////////////////////////////////
202 |         // Initialise OpenCL compute environment
203 | 
204 |         // Create a Context on the OpenCL svm device
205 |         let context = Context::from_device(&device).expect("Context::from_device failed");
206 | 
207 |         // Build the OpenCL program source and create the kernel.
208 |         let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
209 |             .expect("Program::create_and_build_from_source failed");
210 | 
211 |         let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
212 | 
213 |         // Create a command_queue on the Context's device
214 |         let queue = CommandQueue::create_default_with_properties(
215 |             &context,
216 |             CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
217 |             0,
218 |         )
219 |         .expect("CommandQueue::create_default_with_properties failed");
220 | 
221 |         /////////////////////////////////////////////////////////////////////
222 |         // Compute data
223 | 
224 |         // Get the svm capability of all the devices in the context.
225 |         let svm_capability = context.get_svm_mem_capability();
226 |         assert!(0 < svm_capability);
227 | 
228 |         let is_fine_grained_svm: bool = 0 < svm_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
229 |         println!("OpenCL SVM is fine grained: {}", is_fine_grained_svm);
230 | 
231 |         // Create SVM vectors for the data
232 | 
233 |         // The SVM vectors
234 |         const ARRAY_SIZE: usize = 1000;
235 |         let mut ones =
236 |             SvmVec::<cl_float>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
237 |         let mut sums =
238 |             SvmVec::<cl_float>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
239 |         let mut results =
240 |             SvmVec::<cl_float>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
241 | 
242 |         let a: cl_float = 300.0;
243 |         if is_fine_grained_svm {
244 |             // The input data
245 |             for i in 0..ARRAY_SIZE {
246 |                 ones[i] = 1.0;
247 |             }
248 | 
249 |             for i in 0..ARRAY_SIZE {
250 |                 sums[i] = 1.0 + 1.0 * i as cl_float;
251 |             }
252 | 
253 |             // Make ones and sums immutable
254 |             let ones = ones;
255 |             let sums = sums;
256 | 
257 |             // Use the ExecuteKernel builder to set the kernel buffer and
258 |             // cl_float value arguments, before setting the one dimensional
259 |             // global_work_size for the call to enqueue_nd_range.
260 |             // Unwraps the Result to get the kernel execution event.
261 |             let kernel_event = unsafe {
262 |                 ExecuteKernel::new(&kernel)
263 |                     .set_arg_svm(results.as_mut_ptr())
264 |                     .set_arg_svm(ones.as_ptr())
265 |                     .set_arg_svm(sums.as_ptr())
266 |                     .set_arg(&a)
267 |                     .set_global_work_size(ARRAY_SIZE)
268 |                     .enqueue_nd_range(&queue)?
269 |             };
270 | 
271 |             // Wait for the kernel_event to complete
272 |             kernel_event.wait()?;
273 | 
274 |             assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
275 |             println!("results back: {}", results[ARRAY_SIZE - 1]);
276 | 
277 |             // Calculate the kernel duration, from the kernel_event
278 |             let start_time = kernel_event.profiling_command_start()?;
279 |             let end_time = kernel_event.profiling_command_end()?;
280 |             let duration = end_time - start_time;
281 |             println!("kernel execution duration (ns): {}", duration);
282 |         } else {
283 |             // !is_fine_grained_svm
284 | 
285 |             // Resize and map the input SVM vectors, before setting their data
286 |             unsafe {
287 |                 ones.set_len(ARRAY_SIZE)?;
288 |                 sums.set_len(ARRAY_SIZE)?;
289 |                 queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut ones, &[])?;
290 |                 queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut sums, &[])?;
291 |             }
292 |             // The input data
293 |             for i in 0..ARRAY_SIZE {
294 |                 ones[i] = 1.0;
295 |             }
296 | 
297 |             for i in 0..ARRAY_SIZE {
298 |                 sums[i] = 1.0 + 1.0 * i as cl_float;
299 |             }
300 | 
301 |             // Make ones and sums immutable
302 |             let ones = ones;
303 |             let sums = sums;
304 | 
305 |             let mut events: Vec<cl_event> = Vec::default();
306 |             let unmap_sums_event = unsafe { queue.enqueue_svm_unmap(&sums, &[])? };
307 |             let unmap_ones_event = unsafe { queue.enqueue_svm_unmap(&ones, &[])? };
308 |             events.push(unmap_sums_event.get());
309 |             events.push(unmap_ones_event.get());
310 | 
311 |             // Use the ExecuteKernel builder to set the kernel buffer and
312 |             // cl_float value arguments, before setting the one dimensional
313 |             // global_work_size for the call to enqueue_nd_range.
314 |             // Unwraps the Result to get the kernel execution event.
315 |             let kernel_event = unsafe {
316 |                 ExecuteKernel::new(&kernel)
317 |                     .set_arg_svm(results.as_mut_ptr())
318 |                     .set_arg_svm(ones.as_ptr())
319 |                     .set_arg_svm(sums.as_ptr())
320 |                     .set_arg(&a)
321 |                     .set_global_work_size(ARRAY_SIZE)
322 |                     .set_event_wait_list(&events)
323 |                     .enqueue_nd_range(&queue)?
324 |             };
325 | 
326 |             // Wait for the kernel_event to complete
327 |             kernel_event.wait()?;
328 | 
329 |             // Map SVM results before reading them
330 |             let _map_results_event =
331 |                 unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? };
332 | 
333 |             assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
334 |             println!("results back: {}", results[ARRAY_SIZE - 1]);
335 | 
336 |             // Calculate the kernel duration from the kernel_event
337 |             let start_time = kernel_event.profiling_command_start()?;
338 |             let end_time = kernel_event.profiling_command_end()?;
339 |             let duration = end_time - start_time;
340 |             println!("kernel execution duration (ns): {}", duration);
341 | 
342 |             /////////////////////////////////////////////////////////////////////
343 |             // Clean up
344 |             let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? };
345 |             unmap_results_event.wait()?;
346 |             println!("SVM buffers unmapped");
347 |         }
348 |     } else {
349 |         println!("OpenCL SVM capable device not found")
350 |     }
351 | 
352 |     Ok(())
353 | }
354 | 


--------------------------------------------------------------------------------
/src/command_buffer.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021-2024 Via Technology Ltd.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | //! OpenCL Command Buffers extension. Enable with feature: cl_khr_command_buffer.
 16 | 
 17 | #![allow(clippy::too_many_arguments, clippy::missing_safety_doc)]
 18 | 
 19 | use super::Result;
 20 | use super::event::Event;
 21 | use super::memory::*;
 22 | 
 23 | #[allow(unused_imports)]
 24 | use cl3::ext::{
 25 |     CL_COMMAND_BUFFER_NUM_QUEUES_KHR, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR,
 26 |     CL_COMMAND_BUFFER_QUEUES_KHR, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR,
 27 |     CL_COMMAND_BUFFER_STATE_KHR, cl_bool, cl_command_buffer_info_khr, cl_command_buffer_khr,
 28 |     cl_command_buffer_properties_khr, cl_command_properties_khr, cl_mutable_command_khr,
 29 |     cl_sync_point_khr, command_barrier_with_wait_list_khr, command_copy_buffer_khr,
 30 |     command_copy_buffer_rect_khr, command_copy_buffer_to_image_khr, command_copy_image_khr,
 31 |     command_copy_image_to_buffer_khr, command_fill_buffer_khr, command_fill_image_khr,
 32 |     command_nd_range_kernel_khr, command_svm_mem_fill_khr, command_svm_memcpy_khr,
 33 |     create_command_buffer_khr, enqueue_command_buffer_khr, finalize_command_buffer_khr,
 34 |     get_command_buffer_data_khr, get_command_buffer_info_khr,
 35 |     get_command_buffer_mutable_dispatch_data, release_command_buffer_khr,
 36 | };
 37 | #[allow(unused_imports)]
 38 | use cl3::types::{cl_command_queue, cl_event, cl_kernel, cl_mem, cl_uint};
 39 | use libc::{c_void, size_t};
 40 | use std::mem;
 41 | use std::ptr;
 42 | 
 43 | /// An OpenCL command-buffer.
 44 | ///
 45 | /// This extension adds the ability to record and replay buffers of OpenCL commands.  
 46 | /// See [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer)
 47 | #[derive(Debug)]
 48 | pub struct CommandBuffer {
 49 |     buffer: cl_command_buffer_khr,
 50 | }
 51 | 
 52 | impl From<CommandBuffer> for cl_command_buffer_khr {
 53 |     fn from(value: CommandBuffer) -> Self {
 54 |         value.buffer
 55 |     }
 56 | }
 57 | 
 58 | impl Drop for CommandBuffer {
 59 |     fn drop(&mut self) {
 60 |         unsafe {
 61 |             release_command_buffer_khr(self.buffer).expect("Error: clReleaseCommandBufferKHR")
 62 |         };
 63 |     }
 64 | }
 65 | 
 66 | unsafe impl Send for CommandBuffer {}
 67 | unsafe impl Sync for CommandBuffer {}
 68 | 
 69 | impl CommandBuffer {
 70 |     const fn new(buffer: cl_command_buffer_khr) -> Self {
 71 |         Self { buffer }
 72 |     }
 73 | 
 74 |     /// Get the underlying OpenCL cl_command_buffer_khr.
 75 |     pub const fn get(&self) -> cl_command_buffer_khr {
 76 |         self.buffer
 77 |     }
 78 | 
 79 |     /// Create a command-buffer that can record commands to the specified queues.
 80 |     pub fn create(
 81 |         queues: &[cl_command_queue],
 82 |         properties: &[cl_command_buffer_properties_khr],
 83 |     ) -> Result<Self> {
 84 |         let buffer = create_command_buffer_khr(queues, properties.as_ptr())?;
 85 |         Ok(Self::new(buffer))
 86 |     }
 87 | 
 88 |     /// Finalizes command recording ready for enqueuing the command-buffer on a command-queue.
 89 |     pub fn finalize(&self) -> Result<()> {
 90 |         Ok(finalize_command_buffer_khr(self.buffer)?)
 91 |     }
 92 | 
 93 |     /// Enqueues a command-buffer to execute on command-queues specified by queues,
 94 |     /// or on default command-queues used during recording if queues is empty.
 95 |     pub unsafe fn enqueue(
 96 |         &self,
 97 |         queues: &mut [cl_command_queue],
 98 |         event_wait_list: &[cl_event],
 99 |     ) -> Result<Event> {
100 |         unsafe {
101 |             let event = enqueue_command_buffer_khr(
102 |                 queues.len() as cl_uint,
103 |                 queues.as_mut_ptr(),
104 |                 self.buffer,
105 |                 event_wait_list.len() as cl_uint,
106 |                 if !event_wait_list.is_empty() {
107 |                     event_wait_list.as_ptr()
108 |                 } else {
109 |                     ptr::null()
110 |                 },
111 |             )?;
112 |             Ok(Event::new(event))
113 |         }
114 |     }
115 | 
116 |     /// Records a barrier operation used as a synchronization point.
117 |     pub unsafe fn command_barrier_with_wait_list(
118 |         &self,
119 |         queue: cl_command_queue,
120 |         properties: *const cl_command_properties_khr,
121 |         sync_point_wait_list: &[cl_sync_point_khr],
122 |     ) -> Result<cl_sync_point_khr> {
123 |         let mut sync_point = 0;
124 |         unsafe {
125 |             command_barrier_with_wait_list_khr(
126 |                 self.buffer,
127 |                 queue,
128 |                 properties,
129 |                 sync_point_wait_list,
130 |                 &mut sync_point,
131 |                 ptr::null_mut(),
132 |             )?
133 |         };
134 |         Ok(sync_point)
135 |     }
136 | 
137 |     /// Records a command to copy from one buffer object to another.
138 |     pub unsafe fn copy_buffer<T>(
139 |         &self,
140 |         queue: cl_command_queue,
141 |         properties: *const cl_command_properties_khr,
142 |         src_buffer: &Buffer<T>,
143 |         dst_buffer: &mut Buffer<T>,
144 |         src_offset: size_t,
145 |         dst_offset: size_t,
146 |         size: size_t,
147 |         sync_point_wait_list: &[cl_sync_point_khr],
148 |     ) -> Result<cl_sync_point_khr> {
149 |         unsafe {
150 |             let mut sync_point = 0;
151 |             command_copy_buffer_khr(
152 |                 self.buffer,
153 |                 queue,
154 |                 properties,
155 |                 src_buffer.get(),
156 |                 dst_buffer.get_mut(),
157 |                 src_offset,
158 |                 dst_offset,
159 |                 size,
160 |                 sync_point_wait_list,
161 |                 &mut sync_point,
162 |                 ptr::null_mut(),
163 |             )?;
164 |             Ok(sync_point)
165 |         }
166 |     }
167 | 
168 |     /// Records a command to copy a rectangular region from a buffer object to another buffer object.
169 |     pub unsafe fn copy_buffer_rect<T>(
170 |         &self,
171 |         queue: cl_command_queue,
172 |         properties: *const cl_command_properties_khr,
173 |         src_buffer: &Buffer<T>,
174 |         dst_buffer: &mut Buffer<T>,
175 |         src_origin: *const size_t,
176 |         dst_origin: *const size_t,
177 |         region: *const size_t,
178 |         src_row_pitch: size_t,
179 |         src_slice_pitch: size_t,
180 |         dst_row_pitch: size_t,
181 |         dst_slice_pitch: size_t,
182 |         sync_point_wait_list: &[cl_sync_point_khr],
183 |     ) -> Result<cl_sync_point_khr> {
184 |         unsafe {
185 |             let mut sync_point = 0;
186 |             command_copy_buffer_rect_khr(
187 |                 self.buffer,
188 |                 queue,
189 |                 properties,
190 |                 src_buffer.get(),
191 |                 dst_buffer.get_mut(),
192 |                 src_origin,
193 |                 dst_origin,
194 |                 region,
195 |                 src_row_pitch,
196 |                 src_slice_pitch,
197 |                 dst_row_pitch,
198 |                 dst_slice_pitch,
199 |                 sync_point_wait_list,
200 |                 &mut sync_point,
201 |                 ptr::null_mut(),
202 |             )?;
203 |             Ok(sync_point)
204 |         }
205 |     }
206 | 
207 |     /// Records a command to copy a buffer object to an image object.
208 |     pub unsafe fn copy_buffer_to_image<T>(
209 |         &self,
210 |         queue: cl_command_queue,
211 |         properties: *const cl_command_properties_khr,
212 |         src_buffer: &Buffer<T>,
213 |         dst_image: &mut Image,
214 |         src_offset: size_t,
215 |         dst_origin: *const size_t,
216 |         region: *const size_t,
217 |         sync_point_wait_list: &[cl_sync_point_khr],
218 |     ) -> Result<cl_sync_point_khr> {
219 |         unsafe {
220 |             let mut sync_point = 0;
221 |             command_copy_buffer_to_image_khr(
222 |                 self.buffer,
223 |                 queue,
224 |                 properties,
225 |                 src_buffer.get(),
226 |                 dst_image.get_mut(),
227 |                 src_offset,
228 |                 dst_origin,
229 |                 region,
230 |                 sync_point_wait_list,
231 |                 &mut sync_point,
232 |                 ptr::null_mut(),
233 |             )?;
234 |             Ok(sync_point)
235 |         }
236 |     }
237 | 
238 |     /// Records a command to copy image objects.
239 |     pub unsafe fn copy_image<T>(
240 |         &self,
241 |         queue: cl_command_queue,
242 |         properties: *const cl_command_properties_khr,
243 |         src_image: Image,
244 |         dst_image: &mut Image,
245 |         src_origin: *const size_t,
246 |         dst_origin: *const size_t,
247 |         region: *const size_t,
248 |         sync_point_wait_list: &[cl_sync_point_khr],
249 |     ) -> Result<cl_sync_point_khr> {
250 |         unsafe {
251 |             let mut sync_point = 0;
252 |             command_copy_image_khr(
253 |                 self.buffer,
254 |                 queue,
255 |                 properties,
256 |                 src_image.get(),
257 |                 dst_image.get_mut(),
258 |                 src_origin,
259 |                 dst_origin,
260 |                 region,
261 |                 sync_point_wait_list,
262 |                 &mut sync_point,
263 |                 ptr::null_mut(),
264 |             )?;
265 |             Ok(sync_point)
266 |         }
267 |     }
268 | 
269 |     /// Records a command to copy an image object to a buffer object.
270 |     pub unsafe fn copy_image_to_buffer<T>(
271 |         &self,
272 |         queue: cl_command_queue,
273 |         properties: *const cl_command_properties_khr,
274 |         src_image: &Image,
275 |         dst_buffer: &mut Buffer<T>,
276 |         src_origin: *const size_t,
277 |         region: *const size_t,
278 |         dst_offset: size_t,
279 |         sync_point_wait_list: &[cl_sync_point_khr],
280 |     ) -> Result<cl_sync_point_khr> {
281 |         unsafe {
282 |             let mut sync_point = 0;
283 |             command_copy_image_to_buffer_khr(
284 |                 self.buffer,
285 |                 queue,
286 |                 properties,
287 |                 src_image.get(),
288 |                 dst_buffer.get_mut(),
289 |                 src_origin,
290 |                 region,
291 |                 dst_offset,
292 |                 sync_point_wait_list,
293 |                 &mut sync_point,
294 |                 ptr::null_mut(),
295 |             )?;
296 |             Ok(sync_point)
297 |         }
298 |     }
299 | 
300 |     /// Records a command to fill a buffer object with a pattern of a given pattern size.
301 |     #[allow(clippy::as_ptr_cast_mut)]
302 |     pub unsafe fn fill_buffer<T>(
303 |         &self,
304 |         queue: cl_command_queue,
305 |         properties: *const cl_command_properties_khr,
306 |         buffer: &mut Buffer<T>,
307 |         pattern: &[T],
308 |         offset: size_t,
309 |         size: size_t,
310 |         sync_point_wait_list: &[cl_sync_point_khr],
311 |     ) -> Result<cl_sync_point_khr> {
312 |         unsafe {
313 |             let mut sync_point = 0;
314 |             command_fill_buffer_khr(
315 |                 self.buffer,
316 |                 queue,
317 |                 properties,
318 |                 buffer.get_mut(),
319 |                 pattern.as_ptr() as cl_mem,
320 |                 mem::size_of_val(pattern),
321 |                 offset,
322 |                 size,
323 |                 sync_point_wait_list,
324 |                 &mut sync_point,
325 |                 ptr::null_mut(),
326 |             )?;
327 |             Ok(sync_point)
328 |         }
329 |     }
330 | 
331 |     /// Records a command to fill an image object with a specified color.
332 |     pub unsafe fn fill_image<T>(
333 |         &self,
334 |         queue: cl_command_queue,
335 |         properties: *const cl_command_properties_khr,
336 |         image: &mut Image,
337 |         fill_color: *const c_void,
338 |         origin: *const size_t,
339 |         region: *const size_t,
340 |         sync_point_wait_list: &[cl_sync_point_khr],
341 |     ) -> Result<cl_sync_point_khr> {
342 |         unsafe {
343 |             let mut sync_point = 0;
344 |             command_fill_image_khr(
345 |                 self.buffer,
346 |                 queue,
347 |                 properties,
348 |                 image.get_mut(),
349 |                 fill_color,
350 |                 origin,
351 |                 region,
352 |                 sync_point_wait_list,
353 |                 &mut sync_point,
354 |                 ptr::null_mut(),
355 |             )?;
356 |             Ok(sync_point)
357 |         }
358 |     }
359 | 
360 |     /// Records a command to execute a kernel on a device.
361 |     pub unsafe fn nd_range_kernel(
362 |         &self,
363 |         queue: cl_command_queue,
364 |         properties: *const cl_command_properties_khr,
365 |         kernel: cl_kernel,
366 |         work_dim: cl_uint,
367 |         global_work_offsets: *const size_t,
368 |         global_work_sizes: *const size_t,
369 |         local_work_sizes: *const size_t,
370 |         sync_point_wait_list: &[cl_sync_point_khr],
371 |     ) -> Result<cl_sync_point_khr> {
372 |         unsafe {
373 |             let mut sync_point = 0;
374 |             command_nd_range_kernel_khr(
375 |                 self.buffer,
376 |                 queue,
377 |                 properties,
378 |                 kernel,
379 |                 work_dim,
380 |                 global_work_offsets,
381 |                 global_work_sizes,
382 |                 local_work_sizes,
383 |                 sync_point_wait_list,
384 |                 &mut sync_point,
385 |                 ptr::null_mut(),
386 |             )?;
387 |             Ok(sync_point)
388 |         }
389 |     }
390 | 
391 |     pub unsafe fn svm_memcpy(
392 |         &self,
393 |         queue: cl_command_queue,
394 |         properties: *const cl_command_properties_khr,
395 |         dst_ptr: *mut c_void,
396 |         src_ptr: *const c_void,
397 |         size: size_t,
398 |         sync_point_wait_list: &[cl_sync_point_khr],
399 |         mutable_handle: *mut cl_mutable_command_khr,
400 |     ) -> Result<cl_sync_point_khr> {
401 |         unsafe {
402 |             let mut sync_point = 0;
403 |             command_svm_memcpy_khr(
404 |                 self.buffer,
405 |                 queue,
406 |                 properties,
407 |                 dst_ptr,
408 |                 src_ptr,
409 |                 size,
410 |                 sync_point_wait_list,
411 |                 &mut sync_point,
412 |                 mutable_handle,
413 |             )?;
414 |             Ok(sync_point)
415 |         }
416 |     }
417 | 
418 |     pub unsafe fn svm_mem_fill(
419 |         &self,
420 |         queue: cl_command_queue,
421 |         properties: *const cl_command_properties_khr,
422 |         svm_ptr: *mut c_void,
423 |         pattern: *const c_void,
424 |         pattern_size: size_t,
425 |         size: size_t,
426 |         sync_point_wait_list: &[cl_sync_point_khr],
427 |         mutable_handle: *mut cl_mutable_command_khr,
428 |     ) -> Result<cl_sync_point_khr> {
429 |         unsafe {
430 |             let mut sync_point = 0;
431 |             command_svm_mem_fill_khr(
432 |                 self.buffer,
433 |                 queue,
434 |                 properties,
435 |                 svm_ptr,
436 |                 pattern,
437 |                 pattern_size,
438 |                 size,
439 |                 sync_point_wait_list,
440 |                 &mut sync_point,
441 |                 mutable_handle,
442 |             )?;
443 |             Ok(sync_point)
444 |         }
445 |     }
446 | 
447 |     pub fn num_queues(&self) -> Result<cl_uint> {
448 |         Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_NUM_QUEUES_KHR)?.into())
449 |     }
450 | 
451 |     pub fn queues(&self) -> Result<Vec<isize>> {
452 |         // cl_command_queue
453 |         Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_QUEUES_KHR)?.into())
454 |     }
455 | 
456 |     pub fn reference_count(&self) -> Result<cl_uint> {
457 |         Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR)?.into())
458 |     }
459 | 
460 |     pub fn buffer_state(&self) -> Result<cl_uint> {
461 |         Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_STATE_KHR)?.into())
462 |     }
463 | 
464 |     pub fn properties_array(&self) -> Result<Vec<cl_command_buffer_properties_khr>> {
465 |         Ok(
466 |             get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR)?
467 |                 .into(),
468 |         )
469 |     }
470 | 
471 |     pub fn get_data(&self, param_name: cl_command_buffer_info_khr) -> Result<Vec<u8>> {
472 |         Ok(get_command_buffer_data_khr(self.buffer, param_name)?)
473 |     }
474 | }
475 | 


--------------------------------------------------------------------------------
/src/context.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2025 Via Technology Ltd.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #![allow(clippy::missing_safety_doc)]
 16 | 
 17 | pub use cl3::context;
 18 | 
 19 | use super::Result;
 20 | use super::device::Device;
 21 | #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))]
 22 | use super::device::SubDevice;
 23 | 
 24 | #[allow(unused_imports)]
 25 | use cl3::dx9_media_sharing;
 26 | 
 27 | #[cfg(any(feature = "cl_khr_d3d10_sharing", feature = "dynamic"))]
 28 | #[allow(unused_imports)]
 29 | use cl3::d3d10;
 30 | 
 31 | #[cfg(any(feature = "cl_khr_d3d11_sharing", feature = "dynamic"))]
 32 | #[allow(unused_imports)]
 33 | use cl3::d3d11;
 34 | 
 35 | #[allow(unused_imports)]
 36 | use cl3::egl;
 37 | #[allow(unused_imports)]
 38 | use cl3::ext;
 39 | #[allow(unused_imports)]
 40 | use cl3::gl;
 41 | #[allow(unused_imports)]
 42 | use cl3::types::{
 43 |     cl_context, cl_context_info, cl_context_properties, cl_device_id, cl_device_svm_capabilities,
 44 |     cl_device_type, cl_event, cl_image_format, cl_mem, cl_mem_flags, cl_mem_object_type, cl_uint,
 45 | };
 46 | use libc::{c_char, c_void, intptr_t, size_t};
 47 | use std::ptr;
 48 | 
 49 | /// Get the current device used by an OpenGL context.
 50 | ///
 51 | /// * `properties` - the OpenCL context properties.
 52 | ///
 53 | /// returns a Result containing the device
 54 | /// or the error code from the OpenCL C API function.
 55 | #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))]
 56 | #[allow(clippy::as_ptr_cast_mut)]
 57 | pub fn get_current_device_for_gl_context_khr(
 58 |     properties: &[cl_context_properties],
 59 | ) -> Result<cl_device_id> {
 60 |     let device = intptr_t::from(gl::get_gl_context_info_khr(
 61 |         properties.as_ptr() as *mut cl_context_properties,
 62 |         gl::CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
 63 |     )?) as cl_device_id;
 64 |     Ok(device)
 65 | }
 66 | 
 67 | /// Get the devices for an OpenGL context.
 68 | ///
 69 | /// * `properties` - the OpenCL context properties.
 70 | ///
 71 | /// returns a Result containing the devices
 72 | /// or the error code from the OpenCL C API function.
 73 | #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))]
 74 | #[allow(clippy::as_ptr_cast_mut)]
 75 | pub fn get_devices_for_gl_context_khr(
 76 |     properties: &[cl_context_properties],
 77 | ) -> Result<Vec<cl_device_id>> {
 78 |     let dev_ptrs: Vec<intptr_t> = gl::get_gl_context_info_khr(
 79 |         properties.as_ptr() as *mut cl_context_properties,
 80 |         gl::CL_DEVICES_FOR_GL_CONTEXT_KHR,
 81 |     )?
 82 |     .into();
 83 |     let devices = dev_ptrs
 84 |         .iter()
 85 |         .map(|ptr| *ptr as cl_device_id)
 86 |         .collect::<Vec<cl_device_id>>();
 87 |     Ok(devices)
 88 | }
 89 | 
 90 | /// An OpenCL context object.
 91 | /// Implements the Drop trait to call release_context when the object is dropped.
 92 | #[derive(Debug)]
 93 | pub struct Context {
 94 |     context: cl_context,
 95 |     devices: Vec<cl_device_id>,
 96 | }
 97 | 
 98 | impl From<Context> for cl_context {
 99 |     fn from(value: Context) -> Self {
100 |         value.context
101 |     }
102 | }
103 | 
104 | impl Drop for Context {
105 |     fn drop(&mut self) {
106 |         self.devices.clear();
107 |         unsafe { context::release_context(self.context).expect("Error: clReleaseContext") };
108 |     }
109 | }
110 | 
111 | unsafe impl Send for Context {}
112 | unsafe impl Sync for Context {}
113 | 
114 | impl Context {
115 |     fn new(context: cl_context, devices: &[cl_device_id]) -> Self {
116 |         Self {
117 |             context,
118 |             devices: devices.to_vec(),
119 |         }
120 |     }
121 | 
122 |     /// Get the underlying OpenCL cl_context.
123 |     pub const fn get(&self) -> cl_context {
124 |         self.context
125 |     }
126 | 
127 |     /// Create a Context from a slice of cl_device_ids.  
128 |     ///
129 |     /// * `devices` - a slice of cl_device_ids for an OpenCL Platform.
130 |     /// * `properties` - a null terminated list of cl_context_properties, see
131 |     ///   [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table).
132 |     /// * `pfn_notify` - an optional callback function that can be registered by the application.
133 |     /// * `user_data` - passed as the user_data argument when pfn_notify is called.
134 |     ///
135 |     /// returns a Result containing the new OpenCL context
136 |     /// or the error code from the OpenCL C API function.
137 |     pub fn from_devices(
138 |         devices: &[cl_device_id],
139 |         properties: &[cl_context_properties],
140 |         pfn_notify: Option<unsafe extern "C" fn(*const c_char, *const c_void, size_t, *mut c_void)>,
141 |         user_data: *mut c_void,
142 |     ) -> Result<Self> {
143 |         let properties_ptr = if !properties.is_empty() {
144 |             properties.as_ptr()
145 |         } else {
146 |             ptr::null()
147 |         };
148 |         let context = context::create_context(devices, properties_ptr, pfn_notify, user_data)?;
149 |         Ok(Self::new(context, devices))
150 |     }
151 | 
152 |     /// Create a Context from a [Device].
153 |     ///
154 |     /// * `device` - a [Device].
155 |     ///
156 |     /// returns a Result containing the new OpenCL context
157 |     /// or the error code from the OpenCL C API function.
158 |     pub fn from_device(device: &Device) -> Result<Self> {
159 |         let devices: Vec<cl_device_id> = vec![device.id()];
160 |         let properties = Vec::<cl_context_properties>::default();
161 |         Self::from_devices(&devices, &properties, None, ptr::null_mut())
162 |     }
163 | 
164 |     /// Create a Context from a slice of SubDevices.  
165 |     ///
166 |     /// * `devices` - a slice of SubDevices for an OpenCL Platform.
167 |     /// * `properties` - a null terminated list of cl_context_properties, see
168 |     ///   [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table).
169 |     /// * `pfn_notify` - an optional callback function that can be registered by the application.
170 |     /// * `user_data` - passed as the user_data argument when pfn_notify is called.
171 |     ///
172 |     /// returns a Result containing the new OpenCL context
173 |     /// or the error code from the OpenCL C API function.
174 |     #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))]
175 |     pub fn from_sub_devices(
176 |         sub_devices: &[SubDevice],
177 |         properties: &[cl_context_properties],
178 |         pfn_notify: Option<unsafe extern "C" fn(*const c_char, *const c_void, size_t, *mut c_void)>,
179 |         user_data: *mut c_void,
180 |     ) -> Result<Self> {
181 |         let devices = sub_devices
182 |             .iter()
183 |             .map(|dev| dev.id())
184 |             .collect::<Vec<cl_device_id>>();
185 |         Self::from_devices(&devices, properties, pfn_notify, user_data)
186 |     }
187 | 
188 |     /// Create a Context from a cl_device_type.  
189 |     ///
190 |     /// * `device_type` - the cl_device_type to create a Context for.
191 |     /// * `properties` - a null terminated list of cl_context_properties, see
192 |     ///   [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table).
193 |     /// * `pfn_notify` - an optional callback function that can be registered by the application.
194 |     /// * `user_data` - passed as the user_data argument when pfn_notify is called.
195 |     ///
196 |     /// returns a Result containing the new OpenCL context
197 |     /// or the error code from the OpenCL C API function.
198 |     pub fn from_device_type(
199 |         device_type: cl_device_type,
200 |         properties: &[cl_context_properties],
201 |         pfn_notify: Option<unsafe extern "C" fn(*const c_char, *const c_void, size_t, *mut c_void)>,
202 |         user_data: *mut c_void,
203 |     ) -> Result<Self> {
204 |         let properties_ptr = if !properties.is_empty() {
205 |             properties.as_ptr()
206 |         } else {
207 |             ptr::null()
208 |         };
209 |         let context =
210 |             context::create_context_from_type(device_type, properties_ptr, pfn_notify, user_data)?;
211 |         let dev_ptrs: Vec<intptr_t> =
212 |             context::get_context_info(context, context::CL_CONTEXT_DEVICES)?.into();
213 |         let devices = dev_ptrs
214 |             .iter()
215 |             .map(|ptr| *ptr as cl_device_id)
216 |             .collect::<Vec<cl_device_id>>();
217 |         Ok(Self::new(context, &devices))
218 |     }
219 | 
220 |     /// Get the common Shared Virtual Memory (SVM) capabilities of the
221 |     /// devices in the Context.
222 |     pub fn get_svm_mem_capability(&self) -> cl_device_svm_capabilities {
223 |         let device = Device::new(self.devices[0]);
224 |         let mut svm_capability = device.svm_mem_capability();
225 | 
226 |         for index in 1..self.devices.len() {
227 |             let device = Device::new(self.devices[index]);
228 |             svm_capability &= device.svm_mem_capability();
229 |         }
230 | 
231 |         svm_capability
232 |     }
233 | 
234 |     /// Get the list of image formats supported by the Context for an image type,
235 |     /// and allocation information.
236 |     ///
237 |     /// Calls clGetSupportedImageFormats to get the desired information about the context.
238 |     ///
239 |     /// * `flags` - a bit-field used to specify allocation and usage information
240 |     ///   about the image memory object being created, see:
241 |     ///   [Memory Flags](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#memory-flags-table).
242 |     /// * `image_type` - describes the image type.
243 |     ///
244 |     /// returns a Result containing the desired information in an InfoType enum
245 |     /// or the error code from the OpenCL C API function.
246 |     pub fn get_supported_image_formats(
247 |         &self,
248 |         flags: cl_mem_flags,
249 |         image_type: cl_mem_object_type,
250 |     ) -> Result<Vec<cl_image_format>> {
251 |         Ok(cl3::memory::get_supported_image_formats(
252 |             self.context,
253 |             flags,
254 |             image_type,
255 |         )?)
256 |     }
257 | 
258 |     #[cfg(any(feature = "cl_arm_import_memory", feature = "dynamic"))]
259 |     pub unsafe fn import_memory_arm(
260 |         &self,
261 |         flags: cl_mem_flags,
262 |         properties: *const ext::cl_import_properties_arm,
263 |         memory: *mut c_void,
264 |         size: size_t,
265 |     ) -> Result<cl_mem> {
266 |         unsafe {
267 |             Ok(ext::import_memory_arm(
268 |                 self.context,
269 |                 flags,
270 |                 properties,
271 |                 memory,
272 |                 size,
273 |             )?)
274 |         }
275 |     }
276 | 
277 |     #[allow(clippy::missing_const_for_fn)]
278 |     pub fn devices(&self) -> &[cl_device_id] {
279 |         &self.devices
280 |     }
281 | 
282 |     pub fn default_device(&self) -> cl_device_id {
283 |         self.devices[0]
284 |     }
285 | 
286 |     pub const fn num_devices(&self) -> cl_uint {
287 |         self.devices.len() as cl_uint
288 |     }
289 | 
290 |     #[cfg(any(feature = "CL_VERSION_3_0", feature = "dynamic"))]
291 |     #[inline]
292 |     pub fn set_destructor_callback(
293 |         &self,
294 |         pfn_notify: Option<unsafe extern "C" fn(cl_context, *mut c_void)>,
295 |         user_data: *mut c_void,
296 |     ) -> Result<()> {
297 |         context::set_context_destructor_callback(self.context, pfn_notify, user_data)
298 |             .map_err(Into::into)
299 |     }
300 | 
301 |     pub fn reference_count(&self) -> Result<cl_uint> {
302 |         Ok(context::get_context_info(self.context, context::CL_CONTEXT_REFERENCE_COUNT)?.into())
303 |     }
304 | 
305 |     pub fn properties(&self) -> Result<Vec<intptr_t>> {
306 |         Ok(context::get_context_info(self.context, context::CL_CONTEXT_PROPERTIES)?.into())
307 |     }
308 | 
309 |     /// Get data about an OpenCL context.
310 |     /// Calls clGetContextInfo to get the desired data about the context.
311 |     pub fn get_data(&self, param_name: cl_context_info) -> Result<Vec<u8>> {
312 |         Ok(context::get_context_data(self.context, param_name)?)
313 |     }
314 | 
315 |     #[cfg(any(feature = "cl_khr_terminate_context", feature = "dynamic"))]
316 |     pub unsafe fn terminate(&self) -> Result<()> {
317 |         unsafe { Ok(ext::terminate_context_khr(self.context)?) }
318 |     }
319 | 
320 |     /// Create a cl_event linked to an OpenGL sync object.  
321 |     /// Requires the cl_khr_gl_event extension
322 |     ///
323 |     /// * `sync` - the sync object in the GL share group associated with context.  
324 |     ///
325 |     /// returns a Result containing the new OpenCL event
326 |     /// or the error code from the OpenCL C API function.
327 |     #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))]
328 |     pub fn create_event_from_gl_sync_khr(&self, sync: gl::cl_GLsync) -> Result<cl_event> {
329 |         Ok(gl::create_event_from_gl_sync_khr(self.context, sync)?)
330 |     }
331 | 
332 |     /// Create an event object linked to an EGL fence sync object.  
333 |     /// Requires the cl_khr_egl_event extension
334 |     ///
335 |     /// * `sync` - the handle to an EGLSync object.  
336 |     /// * `display` - the handle to an EGLDisplay.  
337 |     ///
338 |     /// returns a Result containing the new OpenCL event
339 |     /// or the error code from the OpenCL C API function.
340 |     #[cfg(any(feature = "cl_khr_egl_event", feature = "dynamic"))]
341 |     pub unsafe fn create_event_from_egl_sync_khr(
342 |         &self,
343 |         sync: egl::CLeglSyncKHR,
344 |         display: egl::CLeglDisplayKHR,
345 |     ) -> Result<cl_event> {
346 |         unsafe {
347 |             Ok(egl::create_event_from_egl_sync_khr(
348 |                 self.context,
349 |                 sync,
350 |                 display,
351 |             )?)
352 |         }
353 |     }
354 | 
355 |     #[cfg(any(feature = "cl_khr_semaphore", feature = "dynamic"))]
356 |     pub fn create_semaphore_with_properties_khr(
357 |         &self,
358 |         sema_props: *const ext::cl_semaphore_properties_khr,
359 |     ) -> Result<cl_mem> {
360 |         Ok(ext::create_semaphore_with_properties_khr(
361 |             self.context,
362 |             sema_props,
363 |         )?)
364 |     }
365 | 
366 |     #[cfg(any(
367 |         feature = "cl_khr_dx9_media_sharing",
368 |         feature = "cl_intel_dx9_media_sharing",
369 |         feature = "dynamic"
370 |     ))]
371 |     pub fn get_supported_dx9_media_surface_formats_intel(
372 |         &self,
373 |         flags: cl_mem_flags,
374 |         image_type: cl_mem_object_type,
375 |         plane: cl_uint,
376 |     ) -> Result<Vec<cl_uint>> {
377 |         Ok(unsafe {
378 |             dx9_media_sharing::get_supported_dx9_media_surface_formats_intel(
379 |                 self.context,
380 |                 flags,
381 |                 image_type,
382 |                 plane,
383 |             )
384 |         }?)
385 |     }
386 | 
387 |     #[cfg(any(feature = "cl_khr_d3d10_sharing", feature = "dynamic"))]
388 |     pub fn get_supported_d3d10_texture_formats_intel(
389 |         &self,
390 |         flags: cl_mem_flags,
391 |         image_type: cl_mem_object_type,
392 |     ) -> Result<Vec<cl_uint>> {
393 |         Ok(unsafe {
394 |             d3d10::get_supported_d3d10_texture_formats_intel(self.context, flags, image_type)
395 |         }?)
396 |     }
397 | 
398 |     #[cfg(any(feature = "cl_khr_d3d11_sharing", feature = "dynamic"))]
399 |     pub fn get_supported_d3d11_texture_formats_intel(
400 |         &self,
401 |         flags: cl_mem_flags,
402 |         image_type: cl_mem_object_type,
403 |         plane: cl_uint,
404 |     ) -> Result<Vec<cl_uint>> {
405 |         Ok(unsafe {
406 |             d3d11::get_supported_d3d11_texture_formats_intel(self.context, flags, image_type, plane)
407 |         }?)
408 |     }
409 | }
410 | 
411 | #[cfg(test)]
412 | mod tests {
413 |     use super::*;
414 |     use crate::device::Device;
415 |     use crate::platform::get_platforms;
416 |     use cl3::device::CL_DEVICE_TYPE_GPU;
417 |     use cl3::info_type::InfoType;
418 |     use cl3::memory::{CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE};
419 | 
420 |     #[test]
421 |     fn test_context() {
422 |         let platforms = get_platforms().unwrap();
423 |         assert!(0 < platforms.len());
424 | 
425 |         // Get the first platform
426 |         let platform = &platforms[0];
427 | 
428 |         let devices = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap();
429 |         assert!(0 < devices.len());
430 | 
431 |         // Get the first device
432 |         let device = Device::new(devices[0]);
433 |         let context = Context::from_device(&device).unwrap();
434 | 
435 |         println!(
436 |             "CL_DEVICE_SVM_CAPABILITIES: {:X}",
437 |             context.get_svm_mem_capability()
438 |         );
439 | 
440 |         println!(
441 |             "clGetSupportedImageFormats:\norder: data_type {}",
442 |             InfoType::VecImageFormat(
443 |                 context
444 |                     .get_supported_image_formats(CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D)
445 |                     .unwrap()
446 |             )
447 |         );
448 | 
449 |         println!(
450 |             "CL_CONTEXT_REFERENCE_COUNT: {}",
451 |             context.reference_count().unwrap()
452 |         );
453 | 
454 |         println!("CL_CONTEXT_PROPERTIES: {:?}", context.properties().unwrap());
455 |     }
456 | 
457 |     #[test]
458 |     fn test_context_from_device_type() {
459 |         let properties = Vec::<cl_context_properties>::default();
460 |         let context =
461 |             Context::from_device_type(CL_DEVICE_TYPE_GPU, &properties, None, ptr::null_mut());
462 | 
463 |         match context {
464 |             Ok(value) => {
465 |                 println!("Context num devices: {}", value.num_devices())
466 |             }
467 |             Err(e) => println!("OpenCL error, Context::from_device_type: {}", e),
468 |         }
469 |     }
470 | }
471 | 


--------------------------------------------------------------------------------