├── .gitignore ├── docs ├── svmoverview.pdf ├── images │ ├── opencl_memory.png │ ├── example_opencl_system.svg │ ├── opencl_memory_objects.svg │ └── opencl_context_objects.svg ├── uml │ ├── opencl_memory_objects.puml │ ├── opencl_context_objects.puml │ ├── example_opencl_system.puml │ └── opencl_app_sequence.puml └── opencl_description.md ├── .github └── workflows │ └── rust.yml ├── CODE_OF_CONDUCT.md ├── examples ├── clinfo.rs ├── opencl2svm.rs ├── basic.rs ├── opencl2serde.rs └── opencl2image.rs ├── Cargo.toml ├── CONTRIBUTING.md ├── src ├── event.rs ├── platform.rs ├── lib.rs ├── command_buffer.rs └── context.rs ├── LICENSE ├── RELEASES.md ├── tests ├── opencl2_kernel_test.rs └── integration_test.rs └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode/ 3 | /src/.vscode/* 4 | **/*.rs.bk 5 | Cargo.lock -------------------------------------------------------------------------------- /docs/svmoverview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kenba/opencl3/HEAD/docs/svmoverview.pdf -------------------------------------------------------------------------------- /docs/images/opencl_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kenba/opencl3/HEAD/docs/images/opencl_memory.png -------------------------------------------------------------------------------- /docs/uml/opencl_memory_objects.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 4 | 5 | ' title via::opencl Memory classes 6 | 7 | Buffer --|> Memory 8 | ' Sampler --o Image 9 | Image --|> Memory 10 | Pipe --|> Memory 11 | Memory o- SVM 12 | 13 | @enduml -------------------------------------------------------------------------------- /docs/uml/opencl_context_objects.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 4 | 5 | ' title via::opencl Context objects 6 | 7 | object cl_device_id 8 | 9 | Context o-- "1..*" cl_device_id 10 | cl_device_id o- "1..*" CommandQueue 11 | Context *-- "1..*" CommandQueue 12 | 13 | Context *-- "1..*" Program 14 | Program *- "1..*" Kernel 15 | Context *-- "1..*" Kernel 16 | 17 | Context *-- "0..*" SubDevice 18 | 19 | @enduml 20 | -------------------------------------------------------------------------------- /docs/uml/example_opencl_system.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | 'Copyright (c) 2017 Via Technology Ltd. All Rights Reserved. 4 | 5 | 'title OpenCL Compute System Objects 6 | 7 | object system 8 | 9 | object platform_1 10 | object platform_2 11 | object platform_3 12 | 13 | object device_1 14 | object device_2 15 | object device_3 16 | object device_4 17 | 18 | system *-- platform_1 19 | system *-- platform_2 20 | system *-- platform_3 21 | 22 | platform_1 *-- device_1 23 | platform_1 *-- device_2 24 | 25 | platform_2 *-- device_2 26 | 27 | platform_3 *-- device_2 28 | platform_3 *-- device_3 29 | platform_3 *-- device_4 30 | 31 | @enduml -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main, develop ] 6 | pull_request: 7 | branches: [ develop ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | check: 14 | name: Check 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Install Rust 19 | run: rustup update stable 20 | - name: Unit test 21 | run: cargo check 22 | 23 | build: 24 | name: Build 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - name: Load OpenCL 29 | run: | 30 | wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | 31 | sudo apt-key add - 32 | sudo add-apt-repository \ 33 | 'deb [arch=amd64] https://repositories.intel.com/graphics/ubuntu focal main' 34 | sudo apt-get update 35 | sudo apt-get install \ 36 | intel-opencl-icd \ 37 | intel-level-zero-gpu level-zero \ 38 | intel-media-va-driver-non-free libmfx1 39 | sudo apt-get install ocl-icd-opencl-dev 40 | 41 | - uses: actions/checkout@v4 42 | - name: Install Rust 43 | run: rustup update stable 44 | - name: Build 45 | run: cargo build --verbose 46 | # Tests "runs-on:" a GPU hosted runner which requires an enterprise subscription... 47 | # - name: Run tests 48 | # run: cargo test -- --test-threads=1 49 | 50 | clippy: 51 | name: Clippy 52 | runs-on: ubuntu-latest 53 | env: 54 | RUSTFLAGS: "-Dwarnings" 55 | steps: 56 | - uses: actions/checkout@v4 57 | - name: Install Rust 58 | run: rustup update stable 59 | - name: Run Clippy 60 | run: cargo clippy --all-features 61 | -------------------------------------------------------------------------------- /docs/uml/opencl_app_sequence.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | 'Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 4 | 5 | 'title OpenCL Application Sequence 6 | 7 | actor "OpenCL Application" as user 8 | 9 | participant "OpenCL\nplatforms" as platforms 10 | 11 | participant "OpenCL\ndevices" as devices 12 | 13 | participant "OpenCL\nContext" as opencl_context 14 | 15 | participant "OpenCL\nCommandQueue" as opencl_queue 16 | 17 | participant "OpenCL\nProgram" as opencl_program 18 | 19 | participant "OpenCL\nKernel" as opencl_kernel 20 | 21 | participant "OpenCL\nMemory" as opencl_memory 22 | 23 | group Query 24 | user -> platforms : Query OpenCL platforms. 25 | user <- platforms : available OpenCL platforms 26 | 27 | user -> devices : Query OpenCL devices. 28 | user <- devices : available OpenCL devices 29 | end 30 | 31 | group Initialisation 32 | user -> user : Choose most appropriate\nplatform and device(s) 33 | 34 | user -> opencl_context : Construct OpenCL Context for platform and device(s) 35 | 36 | devices -> opencl_context : Context devices 37 | 38 | opencl_context -> opencl_queue : Create device\ncommand_queue(s) 39 | 40 | user -> opencl_context : Construct and build programs: source, binary and/or IL 41 | opencl_context -> opencl_program : Build program 42 | opencl_context -> opencl_kernel : Construct kernels for program 43 | 44 | user -> opencl_context : Create OpenCL memory 45 | opencl_context -> opencl_memory : Create: buffers, images, samplers, pipes, svm, etc. 46 | end 47 | 48 | group Compute Data 49 | user -> opencl_context : get kernel(s) 50 | 51 | group loop 52 | user -> opencl_memory : Write input data to OpenCL memory 53 | user -> opencl_queue : execute kernel(s) 54 | user <- opencl_memory : Read output data from OpenCL memory 55 | user <- opencl_queue : wait for events or queue finished 56 | end 57 | end 58 | 59 | group Clean Up 60 | user -> opencl_context : drop context 61 | 62 | opencl_context -> opencl_memory : release: buffers, images, samplers, pipes, svm, etc. 63 | opencl_context -> opencl_kernel : release kernels 64 | opencl_context -> opencl_program : release programs 65 | opencl_context -> opencl_queue : release command_queues 66 | opencl_context -> opencl_context : release context 67 | end 68 | 69 | @enduml 70 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behaviour that contributes to a positive environment for our 10 | community include: 11 | 12 | - Using welcoming and inclusive language 13 | - Being respectful of differing viewpoints and experiences 14 | - Gracefully accepting constructive criticism 15 | - Focusing on what is best for the community 16 | - Showing empathy towards other community members 17 | 18 | Examples of unacceptable behaviour include: 19 | 20 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 21 | - Trolling, insulting/derogatory comments, and personal or political attacks 22 | - Public or private harassment 23 | - Publishing others' private information, such as a physical or electronic address, without explicit permission 24 | - Other conduct which could reasonably be considered inappropriate in a professional setting 25 | 26 | ## Our Responsibilities 27 | 28 | Project maintainers are responsible for clarifying the standards of acceptable behaviour and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. 29 | 30 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviours that they deem inappropriate, threatening, offensive, or harmful. 31 | 32 | ## Enforcement 33 | 34 | Instances of abusive, harassing, or otherwise unacceptable behaviour may be 35 | reported to the community leaders responsible for enforcement at: 36 | [codeofconduct@via-technology.aero](mailto:codeofconduct@via-technology.aero). 37 | All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), 44 | version 2.1, available at 45 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct/). 46 | -------------------------------------------------------------------------------- /examples/clinfo.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use opencl3::Result; 16 | use opencl3::device::{CL_DEVICE_TYPE_ALL, Device, device_type_text, vendor_id_text}; 17 | 18 | /// Finds all the OpenCL platforms and devices on a system. 19 | /// 20 | /// It displays OpenCL platform information from `clGetPlatformInfo` and 21 | /// OpenCL device information from `clGetDeviceInfo` for all the platforms and 22 | /// devices. 23 | fn main() -> Result<()> { 24 | let platforms = opencl3::platform::get_platforms()?; 25 | println!("Number of platforms: {}", platforms.len()); 26 | 27 | for platform in platforms { 28 | println!("CL_PLATFORM_VENDOR: {}", platform.vendor()?); 29 | println!("CL_PLATFORM_NAME: {}", platform.name()?); 30 | println!("CL_PLATFORM_VERSION: {}", platform.version()?); 31 | println!("CL_PLATFORM_PROFILE: {}", platform.profile()?); 32 | println!("CL_PLATFORM_EXTENSIONS: {}", platform.extensions()?); 33 | 34 | let devices = platform.get_devices(CL_DEVICE_TYPE_ALL)?; 35 | println!("Number of devices: {}", devices.len()); 36 | println!(); 37 | 38 | for device_id in devices { 39 | let device = Device::new(device_id); 40 | println!("\tCL_DEVICE_VENDOR: {}", device.vendor()?); 41 | let vendor_id = device.vendor_id()?; 42 | println!( 43 | "\tCL_DEVICE_VENDOR_ID: {:X}, {}", 44 | vendor_id, 45 | vendor_id_text(vendor_id) 46 | ); 47 | println!("\tCL_DEVICE_NAME: {}", device.name()?); 48 | println!("\tCL_DEVICE_VERSION: {}", device.version()?); 49 | let device_type = device.dev_type()?; 50 | println!( 51 | "\tCL_DEVICE_TYPE: {:X}, {}", 52 | device_type, 53 | device_type_text(device_type) 54 | ); 55 | println!("\tCL_DEVICE_PROFILE: {}", device.profile()?); 56 | println!("\tCL_DEVICE_EXTENSIONS: {}", device.extensions()?); 57 | println!( 58 | "\tCL_DEVICE_OPENCL_C_VERSION: {:?}", 59 | device.opencl_c_version()? 60 | ); 61 | println!( 62 | "\tCL_DEVICE_BUILT_IN_KERNELS: {}", 63 | device.built_in_kernels()? 64 | ); 65 | println!( 66 | "\tCL_DEVICE_SVM_CAPABILITIES: {:X}", 67 | device.svm_mem_capability() 68 | ); 69 | println!(); 70 | } 71 | } 72 | 73 | Ok(()) 74 | } 75 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opencl3" 3 | version = "0.12.1" 4 | authors = ["Ken Barker "] 5 | description = "A Rust implementation of the Khronos OpenCL 3.0 API and extensions." 6 | documentation = "https://docs.rs/opencl3/" 7 | homepage = "https://github.com/kenba/opencl3" 8 | repository = "https://github.com/kenba/opencl3" 9 | readme = "README.md" 10 | keywords = ["opencl", "gpu", "gpgpu"] 11 | categories = ["api-bindings", "asynchronous"] 12 | license = "Apache-2.0" 13 | edition = "2024" 14 | 15 | [features] 16 | 17 | static = ["cl3/static"] 18 | 19 | CL_VERSION_1_1 = ["cl3/CL_VERSION_1_1"] 20 | CL_VERSION_1_2 = ["cl3/CL_VERSION_1_2"] 21 | CL_VERSION_2_0 = ["cl3/CL_VERSION_2_0"] 22 | CL_VERSION_2_1 = ["cl3/CL_VERSION_2_1"] 23 | CL_VERSION_2_2 = ["cl3/CL_VERSION_2_2"] 24 | CL_VERSION_3_0 = ["cl3/CL_VERSION_3_0"] 25 | 26 | cl_apple_setmemobjectdestructor = ["cl3/cl_apple_setmemobjectdestructor"] 27 | cl_apple_contextloggingfunctions = ["cl3/cl_apple_contextloggingfunctions"] 28 | 29 | cl_khr_icd = ["cl3/cl_khr_icd"] 30 | cl_khr_il_program = ["cl3/cl_khr_il_program"] 31 | cl_khr_terminate_context = ["cl3/cl_khr_terminate_context"] 32 | cl_khr_create_command_queue = ["cl3/cl_khr_create_command_queue"] 33 | cl_ext_device_fission = ["cl3/cl_ext_device_fission"] 34 | cl_ext_migrate_memobject = ["cl3/cl_ext_migrate_memobject"] 35 | cl_qcom_ext_host_ptr = ["cl3/cl_qcom_ext_host_ptr"] 36 | cl_img_use_gralloc_ptr = ["cl3/cl_img_use_gralloc_ptr"] 37 | cl_img_generate_mipmap = ["cl3/cl_img_generate_mipmap"] 38 | cl_khr_subgroups = ["cl3/cl_khr_subgroups"] 39 | cl_khr_suggested_local_work_size = ["cl3/cl_khr_suggested_local_work_size"] 40 | cl_khr_external_memory = ["cl3/cl_khr_external_memory"] 41 | cl_khr_external_semaphore = ["cl3/cl_khr_external_semaphore"] 42 | cl_khr_external_semaphore_sync_fd = ["cl3/cl_khr_external_semaphore_sync_fd"] 43 | cl_khr_semaphore = ["cl3/cl_khr_semaphore"] 44 | cl_arm_import_memory = ["cl3/cl_arm_import_memory"] 45 | cl_arm_shared_virtual_memory = ["cl3/cl_arm_shared_virtual_memory"] 46 | cl_intel_accelerator = ["cl3/cl_intel_accelerator"] 47 | cl_intel_unified_shared_memory = ["cl3/cl_intel_unified_shared_memory"] 48 | cl_intel_create_buffer_with_properties = ["cl3/cl_intel_create_buffer_with_properties"] 49 | cl_intel_program_scope_host_pipe = ["cl3/cl_intel_program_scope_host_pipe"] 50 | cl_ext_image_requirements_info = ["cl3/cl_ext_image_requirements_info"] 51 | cl_khr_command_buffer = ["cl3/cl_khr_command_buffer"] 52 | cl_khr_command_buffer_multi_device = ["cl3/cl_khr_command_buffer_multi_device"] 53 | cl_khr_command_buffer_mutable_dispatch = ["cl3/cl_khr_command_buffer_mutable_dispatch"] 54 | 55 | cl_khr_gl_sharing = ["cl3/cl_khr_gl_sharing"] 56 | cl_khr_gl_event = ["cl3/cl_khr_gl_event"] 57 | cl_khr_egl_image = ["cl3/cl_khr_egl_image"] 58 | cl_khr_egl_event = ["cl3/cl_khr_egl_event"] 59 | 60 | cl_khr_dx9_media_sharing = ["cl3/cl_khr_dx9_media_sharing"] 61 | cl_intel_dx9_media_sharing = ["cl3/cl_intel_dx9_media_sharing"] 62 | cl_khr_d3d10_sharing = ["cl3/cl_khr_d3d10_sharing"] 63 | cl_khr_d3d11_sharing = ["cl3/cl_khr_d3d11_sharing"] 64 | 65 | cl_loader_info = ["cl3/cl_loader_info"] 66 | cl_pocl_content_size = ["cl3/cl_pocl_content_size"] 67 | cl_ext_buffer_device_address = ["cl3/cl_ext_buffer_device_address"] 68 | cl_loader_layers = ["cl3/cl_loader_layers"] 69 | cl_img_cancel_command = ["cl3/cl_img_cancel_command"] 70 | cl_qcom_perf_hint = ["cl3/cl_qcom_perf_hint"] 71 | 72 | # Use dynamic linking instead of static linking 73 | dynamic = ["cl3/dynamic"] 74 | 75 | # Default features: 76 | default = ["dynamic"] 77 | 78 | [dependencies] 79 | libc = "0.2" 80 | cl3 = { version = "0.13", default-features = false } 81 | serde = { version = "1.0", optional = true } 82 | 83 | [dev-dependencies] 84 | serde_json = "1.0" 85 | opencl3 = { path = ".", features = ["dynamic", "serde"] } 86 | 87 | [lints.clippy] 88 | enum_glob_use = "deny" 89 | nursery = "deny" 90 | unwrap_used = "deny" 91 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | First off, I'm really glad you're reading this, because we need volunteer developers to help improve this project and make it more useful to other OpenCL and Rust developers. 4 | 5 | The following is a set of guidelines for contributing to `opencl3` and its packages, which are hosted in the `opencl3` repository on GitHub. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. 6 | 7 | ## Where do I go from here? 8 | 9 | If you've noticed a bug or have a feature request then please raise a [new issue](https://github.com/kenba/opencl3/issues/new). 10 | It's generally best to check the [issues](https://github.com/kenba/opencl3/issues) and [pull requests](https://github.com/kenba/opencl3/pulls) (open and closed) to ensure that someone else has not noticed it before you. I recommend that you wait for confirmation of your bug or approval for your feature request in this way before starting to code. 11 | 12 | Note: many OpenCL issues are hardware specific, so it is often useful to describe your setup, i.e.: 13 | 14 | - `opencl3` features, e.g. ["serde", "CL_VERSION_1_2", "CL_VERSION_2_0", "CL_VERSION_2_1", "CL_VERSION_2_1"] or default 15 | - OpenCL target device vendor and version 16 | - OpenCL ICD loader vendor and version 17 | - Rust version `rustc --version` 18 | - operating system 19 | - and any other relevant information. 20 | 21 | Please abide by our [Code of Conduct](CODE_OF_CONDUCT.md) in all issues and pull requests. 22 | 23 | ## Fork & create a branch 24 | 25 | If the issue is something you think that you can fix, then [fork opencl3](https://docs.github.com/en/get-started/quickstart/fork-a-repo) and create a branch from `develop` with a descriptive name. 26 | E.g. a good branch name would be (where issue #42 is the issue you're working on): 27 | 28 | ```shell 29 | git checkout develop 30 | git checkout -b 42-fix-some-bug 31 | ``` 32 | 33 | ## Get the test suite running 34 | 35 | Run the unit tests: 36 | 37 | ```shell 38 | cargo test -- --test-threads=1 --show-output 39 | ``` 40 | 41 | and integration tests: 42 | 43 | ```shell 44 | cargo test -- --test-threads=1 --show-output --ignored 45 | ``` 46 | 47 | To ensure that you haven't broken anything. 48 | Please feel free to add tests, especially where the new test(s) demonstrates a bug that you noticed. 49 | 50 | Note: a new test that demonstrates a bug that you've described in an issue is always welcome in a PR, even if you haven't developed the code to fix it yet. 51 | 52 | ## Implement your fix or feature 53 | 54 | At this point, you're ready to make your changes! 55 | Feel free to ask for help; everyone is a beginner at first. 56 | 57 | ## Get the style right 58 | 59 | Your patch should follow the same conventions & pass the same code quality checks as the rest of the project. 60 | I recommend installing and running `clippy`: 61 | 62 | ```shell 63 | cargo clippy --all-features 64 | ``` 65 | 66 | and `fmt`: 67 | 68 | ```shell 69 | cargo fmt 70 | ``` 71 | 72 | ## Make a Pull Request 73 | 74 | At this point, you should switch back to your develop branch and make sure it's up to date with opencl3's `develop` branch: 75 | 76 | ```shell 77 | git remote add upstream git@github.com:kenba/opencl3.git 78 | git checkout develop 79 | git pull upstream develop 80 | ``` 81 | 82 | Then update your feature branch from your local copy of master, and push it! 83 | 84 | ```shell 85 | git checkout 42-fix-some-bug 86 | git rebase master 87 | git push --set-upstream origin 42-fix-some-bug 88 | ``` 89 | 90 | Finally, go to GitHub and make a [Pull Request](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). 91 | 92 | Github Actions will then build your PR. 93 | 94 | ## Merging a Pull Request 95 | 96 | A maintainer will review your PR and determine whether it's Ok to merge it into the `develop` branch. 97 | 98 | If it is, he/she will approve and merge the PR. If not, they may comment on the PR to request changes before they are willing to approve and merge it. 99 | Note: at this stage you should only change the PR to resolve the maintainer's comments. 100 | You should *not* introduce a fantastic new feature that you've just thought of! That should be raised as a new issue instead. 101 | 102 | ## Rebasing a Pull Request 103 | 104 | If a maintainer asks you to "rebase" your PR, they're saying that a lot of code has changed, and that you need to update your branch so it's easier to merge. 105 | 106 | Github have a good guide about [rebasing in Git](https://docs.github.com/en/get-started/using-git/about-git-rebase) here's our suggested workflow: 107 | 108 | ```shell 109 | git checkout 42-fix-some-bug 110 | git pull --rebase upstream develop 111 | git push --force-with-lease 42-fix-some-bug 112 | ``` 113 | -------------------------------------------------------------------------------- /examples/opencl2svm.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2023 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use opencl3::Result; 16 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue}; 17 | use opencl3::context::Context; 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device}; 19 | use opencl3::error_codes::cl_int; 20 | use opencl3::kernel::{ExecuteKernel, Kernel}; 21 | use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE}; 22 | use opencl3::program::{CL_STD_2_0, Program}; 23 | use opencl3::svm::SvmVec; 24 | use opencl3::types::CL_BLOCKING; 25 | 26 | const PROGRAM_SOURCE: &str = r#" 27 | kernel void inclusive_scan_int (global int* output, 28 | global int const* values) 29 | { 30 | int sum = 0; 31 | size_t lid = get_local_id(0); 32 | size_t lsize = get_local_size(0); 33 | 34 | size_t num_groups = get_num_groups(0); 35 | for (size_t i = 0u; i < num_groups; ++i) 36 | { 37 | size_t lidx = i * lsize + lid; 38 | int value = work_group_scan_inclusive_add(values[lidx]); 39 | output[lidx] = sum + value; 40 | 41 | sum += work_group_broadcast(value, lsize - 1); 42 | } 43 | }"#; 44 | 45 | const KERNEL_NAME: &str = "inclusive_scan_int"; 46 | 47 | fn main() -> Result<()> { 48 | // Find a usable platform and device for this application 49 | let platforms = opencl3::platform::get_platforms()?; 50 | let platform = platforms.first().expect("no OpenCL platforms"); 51 | let device = *platform 52 | .get_devices(CL_DEVICE_TYPE_GPU)? 53 | .first() 54 | .expect("no device found in platform"); 55 | let device = Device::new(device); 56 | 57 | // Create a Context on an OpenCL device 58 | let context = Context::from_device(&device).expect("Context::from_device failed"); 59 | 60 | // Build the OpenCL program source and create the kernel. 61 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) 62 | .expect("Program::create_and_build_from_source failed"); 63 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 64 | 65 | // Create a command_queue on the Context's device 66 | let queue = 67 | CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0) 68 | .expect("CommandQueue::create_default_with_properties failed"); 69 | 70 | // The input data 71 | const ARRAY_SIZE: usize = 8; 72 | let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2]; 73 | 74 | // Create an OpenCL SVM vector 75 | let mut test_values = 76 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 77 | 78 | // Map test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 79 | if !test_values.is_fine_grained() { 80 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? }; 81 | } 82 | 83 | // Copy input data into the OpenCL SVM vector 84 | test_values.clone_from_slice(&value_array); 85 | 86 | // Make test_values immutable 87 | let test_values = test_values; 88 | 89 | // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 90 | if !test_values.is_fine_grained() { 91 | let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? }; 92 | unmap_test_values_event.wait()?; 93 | } 94 | 95 | // The output data, an OpenCL SVM vector 96 | let mut results = 97 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 98 | 99 | // Run the kernel on the input data 100 | let kernel_event = unsafe { 101 | ExecuteKernel::new(&kernel) 102 | .set_arg_svm(results.as_mut_ptr()) 103 | .set_arg_svm(test_values.as_ptr()) 104 | .set_global_work_size(ARRAY_SIZE) 105 | .enqueue_nd_range(&queue)? 106 | }; 107 | 108 | // Wait for the kernel to complete execution on the device 109 | kernel_event.wait()?; 110 | 111 | // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 112 | if !results.is_fine_grained() { 113 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? }; 114 | } 115 | 116 | // Can access OpenCL SVM directly, no need to map or read the results 117 | println!("sum results: {:?}", results); 118 | 119 | // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 120 | if !results.is_fine_grained() { 121 | let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? }; 122 | unmap_results_event.wait()?; 123 | } 124 | 125 | Ok(()) 126 | } 127 | -------------------------------------------------------------------------------- /examples/basic.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use opencl3::Result; 16 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue}; 17 | use opencl3::context::Context; 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device, get_all_devices}; 19 | use opencl3::kernel::{ExecuteKernel, Kernel}; 20 | use opencl3::memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; 21 | use opencl3::program::Program; 22 | use opencl3::types::{CL_BLOCKING, CL_NON_BLOCKING, cl_event, cl_float}; 23 | use std::ptr; 24 | 25 | const PROGRAM_SOURCE: &str = r#" 26 | kernel void saxpy_float (global float* z, 27 | global float const* x, 28 | global float const* y, 29 | float a) 30 | { 31 | const size_t i = get_global_id(0); 32 | z[i] = a*x[i] + y[i]; 33 | }"#; 34 | 35 | const KERNEL_NAME: &str = "saxpy_float"; 36 | 37 | fn main() -> Result<()> { 38 | // Find a usable device for this application 39 | let device_id = *get_all_devices(CL_DEVICE_TYPE_GPU)? 40 | .first() 41 | .expect("no device found in platform"); 42 | let device = Device::new(device_id); 43 | 44 | // Create a Context on an OpenCL device 45 | let context = Context::from_device(&device).expect("Context::from_device failed"); 46 | 47 | // Create a command_queue on the Context's device 48 | let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE) 49 | .expect("CommandQueue::create_default failed"); 50 | 51 | // Build the OpenCL program source and create the kernel. 52 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") 53 | .expect("Program::create_and_build_from_source failed"); 54 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 55 | 56 | ///////////////////////////////////////////////////////////////////// 57 | // Compute data 58 | 59 | // The input data 60 | const ARRAY_SIZE: usize = 1000; 61 | let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE]; 62 | let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE]; 63 | for i in 0..ARRAY_SIZE { 64 | sums[i] = 1.0 + 1.0 * i as cl_float; 65 | } 66 | 67 | // Create OpenCL device buffers 68 | let mut x = unsafe { 69 | Buffer::::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())? 70 | }; 71 | let mut y = unsafe { 72 | Buffer::::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())? 73 | }; 74 | let z = unsafe { 75 | Buffer::::create(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())? 76 | }; 77 | 78 | // Blocking write 79 | let _x_write_event = unsafe { queue.enqueue_write_buffer(&mut x, CL_BLOCKING, 0, &ones, &[])? }; 80 | 81 | // Non-blocking write, wait for y_write_event 82 | let y_write_event = 83 | unsafe { queue.enqueue_write_buffer(&mut y, CL_NON_BLOCKING, 0, &sums, &[])? }; 84 | 85 | // a value for the kernel function 86 | let a: cl_float = 300.0; 87 | 88 | // Use the ExecuteKernel builder to set the kernel buffer and 89 | // cl_float value arguments, before setting the one dimensional 90 | // global_work_size for the call to enqueue_nd_range. 91 | // Unwraps the Result to get the kernel execution event. 92 | let kernel_event = unsafe { 93 | ExecuteKernel::new(&kernel) 94 | .set_arg(&z) 95 | .set_arg(&x) 96 | .set_arg(&y) 97 | .set_arg(&a) 98 | .set_global_work_size(ARRAY_SIZE) 99 | .set_wait_event(&y_write_event) 100 | .enqueue_nd_range(&queue)? 101 | }; 102 | 103 | let mut events: Vec = Vec::default(); 104 | events.push(kernel_event.get()); 105 | 106 | // Create a results array to hold the results from the OpenCL device 107 | // and enqueue a read command to read the device buffer into the array 108 | // after the kernel event completes. 109 | let mut results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE]; 110 | let read_event = 111 | unsafe { queue.enqueue_read_buffer(&z, CL_NON_BLOCKING, 0, &mut results, &events)? }; 112 | 113 | // Wait for the read_event to complete. 114 | read_event.wait()?; 115 | 116 | // Output the first and last results 117 | println!("results front: {}", results[0]); 118 | println!("results back: {}", results[ARRAY_SIZE - 1]); 119 | 120 | // Calculate the kernel duration, from the kernel_event 121 | let start_time = kernel_event.profiling_command_start()?; 122 | let end_time = kernel_event.profiling_command_end()?; 123 | let duration = end_time - start_time; 124 | println!("kernel execution duration (ns): {}", duration); 125 | 126 | Ok(()) 127 | } 128 | -------------------------------------------------------------------------------- /examples/opencl2serde.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use opencl3::Result; 16 | use opencl3::command_queue::CommandQueue; 17 | use opencl3::context::Context; 18 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device, get_all_devices}; 19 | use opencl3::error_codes::cl_int; 20 | use opencl3::kernel::{ExecuteKernel, Kernel}; 21 | use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE}; 22 | use opencl3::program::{CL_STD_2_0, Program}; 23 | use opencl3::svm::{ExtendSvmVec, SvmVec}; 24 | use opencl3::types::CL_BLOCKING; 25 | use serde::de::DeserializeSeed; 26 | use std::ptr; 27 | 28 | const PROGRAM_SOURCE: &str = r#" 29 | kernel void inclusive_scan_int (global int* output, 30 | global int const* values) 31 | { 32 | int sum = 0; 33 | size_t lid = get_local_id(0); 34 | size_t lsize = get_local_size(0); 35 | 36 | size_t num_groups = get_num_groups(0); 37 | for (size_t i = 0u; i < num_groups; ++i) 38 | { 39 | size_t lidx = i * lsize + lid; 40 | int value = work_group_scan_inclusive_add(values[lidx]); 41 | output[lidx] = sum + value; 42 | 43 | sum += work_group_broadcast(value, lsize - 1); 44 | } 45 | }"#; 46 | 47 | const KERNEL_NAME: &str = "inclusive_scan_int"; 48 | 49 | fn main() -> Result<()> { 50 | // Find a suitable device for this application 51 | let devices = get_all_devices(CL_DEVICE_TYPE_GPU)?; 52 | assert!(0 < devices.len()); 53 | 54 | // Find an OpenCL SVM device 55 | let mut device_id = ptr::null_mut(); 56 | let mut is_svm_capable: bool = false; 57 | for dev_id in devices { 58 | let device = Device::new(dev_id); 59 | let svm_mem_capability = device.svm_mem_capability(); 60 | is_svm_capable = 0 < svm_mem_capability; 61 | if is_svm_capable { 62 | device_id = dev_id; 63 | break; 64 | } 65 | } 66 | 67 | if is_svm_capable { 68 | // Create OpenCL context from the OpenCL svm device 69 | let device = Device::new(device_id); 70 | let vendor = device.vendor()?; 71 | let vendor_id = device.vendor_id()?; 72 | println!("OpenCL device vendor name: {}", vendor); 73 | println!("OpenCL device vendor id: {:X}", vendor_id); 74 | 75 | ///////////////////////////////////////////////////////////////////// 76 | // Initialise OpenCL compute environment 77 | 78 | // Create a Context on the OpenCL svm device 79 | let context = Context::from_device(&device).expect("Context::from_device failed"); 80 | 81 | // Build the OpenCL program source and create the kernel. 82 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) 83 | .expect("Program::create_and_build_from_source failed"); 84 | 85 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 86 | 87 | // Create a command_queue on the Context's device 88 | let queue = CommandQueue::create_default_with_properties(&context, 0, 0) 89 | .expect("CommandQueue::create_default_with_properties failed"); 90 | 91 | // The input data 92 | const ARRAY_SIZE: usize = 8; 93 | const VALUE_ARRAY: &str = "[3,2,5,9,7,1,4,2]"; 94 | 95 | // Deserialize into an OpenCL SVM vector 96 | let mut test_values = SvmVec::::new(&context); 97 | 98 | let mut deserializer = serde_json::Deserializer::from_str(&VALUE_ARRAY); 99 | 100 | // Handle test_values if device only supports CL_DEVICE_SVM_COARSE_GRAIN_BUFFER 101 | if !test_values.is_fine_grained() { 102 | // SVM_COARSE_GRAIN_BUFFER needs to know the size of the data to allocate the SVM 103 | test_values = 104 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 105 | // Map the SVM for a SVM_COARSE_GRAIN_BUFFER 106 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? }; 107 | // Clear the SVM for the deserializer 108 | test_values.clear(); 109 | } 110 | 111 | ExtendSvmVec(&mut test_values) 112 | .deserialize(&mut deserializer) 113 | .expect("Error deserializing the VALUE_ARRAY JSON string."); 114 | 115 | // Make test_values SVM vector immutable 116 | let test_values = test_values; 117 | 118 | // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 119 | if !test_values.is_fine_grained() { 120 | let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? }; 121 | unmap_test_values_event.wait()?; 122 | } 123 | 124 | // The output data, an OpenCL SVM vector 125 | let mut results = 126 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 127 | 128 | // Run the sum kernel on the input data 129 | let sum_kernel_event = unsafe { 130 | ExecuteKernel::new(&kernel) 131 | .set_arg_svm(results.as_mut_ptr()) 132 | .set_arg_svm(test_values.as_ptr()) 133 | .set_global_work_size(ARRAY_SIZE) 134 | .enqueue_nd_range(&queue)? 135 | }; 136 | 137 | // Wait for the kernel to complete execution on the device 138 | sum_kernel_event.wait()?; 139 | 140 | // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 141 | if !results.is_fine_grained() { 142 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? }; 143 | } 144 | 145 | // Convert SVM results to json 146 | let json_results = serde_json::to_string(&results).unwrap(); 147 | println!("json results: {}", json_results); 148 | 149 | // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 150 | if !results.is_fine_grained() { 151 | let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? }; 152 | unmap_results_event.wait()?; 153 | } 154 | } else { 155 | println!("OpenCL fine grained system SVM device not found") 156 | } 157 | 158 | Ok(()) 159 | } 160 | -------------------------------------------------------------------------------- /examples/opencl2image.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use cl3::ext::CL_IMAGE_FORMAT_NOT_SUPPORTED; 16 | use cl3::memory::{CL_MEM_OBJECT_IMAGE2D, CL_MEM_WRITE_ONLY, CL_RGBA, CL_UNSIGNED_INT8}; 17 | use cl3::types::{CL_NON_BLOCKING, cl_image_desc, cl_image_format}; 18 | use libc::c_void; 19 | use opencl3::Result; 20 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue}; 21 | use opencl3::context::Context; 22 | use opencl3::device::{CL_DEVICE_TYPE_GPU, Device}; 23 | use opencl3::kernel::{ExecuteKernel, Kernel}; 24 | use opencl3::memory::Image; 25 | use opencl3::program::{CL_STD_2_0, Program}; 26 | use opencl3::types::cl_event; 27 | 28 | const PROGRAM_SOURCE: &str = r#" 29 | kernel void colorize(write_only image2d_t image) 30 | { 31 | const size_t x = get_global_id(0); 32 | const size_t y = get_global_id(1); 33 | write_imageui(image, (int2)(x, y), (uint4)(x, y, 0, 255)); 34 | }"#; 35 | 36 | const KERNEL_NAME: &str = "colorize"; 37 | 38 | fn main() -> Result<()> { 39 | // Find a usable platform and device for this application 40 | let platforms = opencl3::platform::get_platforms()?; 41 | let platform = platforms.first().expect("no OpenCL platforms"); 42 | let device = *platform 43 | .get_devices(CL_DEVICE_TYPE_GPU)? 44 | .first() 45 | .expect("no device found in platform"); 46 | let device = Device::new(device); 47 | 48 | // Create a Context on an OpenCL device 49 | let context = Context::from_device(&device).expect("Context::from_device failed"); 50 | 51 | // Print some information about the device 52 | println!( 53 | "CL_DEVICE_IMAGE_SUPPORT: {:?}", 54 | device.image_support().unwrap() 55 | ); 56 | println!( 57 | "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: {:?}", 58 | device.max_read_write_image_args().unwrap() 59 | ); 60 | println!( 61 | "CL_DEVICE_MAX_READ_IMAGE_ARGS: {:?}", 62 | device.max_read_image_args().unwrap() 63 | ); 64 | println!( 65 | "CL_DEVICE_MAX_WRITE_IMAGE_ARGS: {:?}", 66 | device.max_write_image_args().unwrap() 67 | ); 68 | println!( 69 | "CL_DEVICE_MAX_SAMPLERS: {:?}", 70 | device.max_device_samples().unwrap() 71 | ); 72 | let supported_formats = 73 | context.get_supported_image_formats(CL_MEM_WRITE_ONLY, CL_MEM_OBJECT_IMAGE2D)?; 74 | if supported_formats 75 | .iter() 76 | .filter(|f| { 77 | f.image_channel_order == CL_RGBA && f.image_channel_data_type == CL_UNSIGNED_INT8 78 | }) 79 | .count() 80 | <= 0 81 | { 82 | println!("Device does not support CL_RGBA with CL_UNSIGNED_INT8 for CL_MEM_WRITE_ONLY!"); 83 | return Err(CL_IMAGE_FORMAT_NOT_SUPPORTED.into()); 84 | } 85 | 86 | // Build the OpenCL program source and create the kernel. 87 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) 88 | .expect("Program::create_and_build_from_source failed"); 89 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 90 | 91 | // Create a command_queue on the Context's device 92 | let queue = 93 | CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0) 94 | .expect("CommandQueue::create_default_with_properties failed"); 95 | 96 | // Create an image 97 | let mut image = unsafe { 98 | Image::create( 99 | &context, 100 | CL_MEM_WRITE_ONLY, 101 | &cl_image_format { 102 | image_channel_order: CL_RGBA, 103 | image_channel_data_type: CL_UNSIGNED_INT8, 104 | }, 105 | &cl_image_desc { 106 | image_type: CL_MEM_OBJECT_IMAGE2D, 107 | image_width: 10 as usize, 108 | image_height: 10 as usize, 109 | image_depth: 1, 110 | image_array_size: 1, 111 | image_row_pitch: 0, 112 | image_slice_pitch: 0, 113 | num_mip_levels: 0, 114 | num_samples: 0, 115 | buffer: std::ptr::null_mut(), 116 | }, 117 | std::ptr::null_mut(), 118 | ) 119 | .expect("Image::create failed") 120 | }; 121 | 122 | // Run the kernel on the input data 123 | let kernel_event = unsafe { 124 | ExecuteKernel::new(&kernel) 125 | .set_arg(&image) 126 | .set_global_work_sizes(&[10usize, 10usize]) 127 | .enqueue_nd_range(&queue)? 128 | }; 129 | 130 | let mut events: Vec = Vec::default(); 131 | events.push(kernel_event.get()); 132 | 133 | // Fill the middle of the image with a solid color 134 | let fill_color = [11u32, 22u32, 33u32, 44u32]; 135 | let fill_event = unsafe { 136 | queue.enqueue_fill_image( 137 | &mut image, 138 | fill_color.as_ptr() as *const c_void, 139 | &[3usize, 3usize, 0usize] as *const usize, 140 | &[4usize, 4usize, 1usize] as *const usize, 141 | &events, 142 | )? 143 | }; 144 | 145 | let mut events: Vec = Vec::default(); 146 | events.push(fill_event.get()); 147 | 148 | // Read the image data from the device 149 | let mut image_data = [0u8; 10 * 10 * 4]; 150 | let read_event = unsafe { 151 | queue.enqueue_read_image( 152 | &image, 153 | CL_NON_BLOCKING, 154 | &[0usize, 0usize, 0usize] as *const usize, 155 | &[10usize, 10usize, 1usize] as *const usize, 156 | 0, 157 | 0, 158 | image_data.as_mut_ptr() as *mut c_void, 159 | &events, 160 | )? 161 | }; 162 | 163 | // Wait for the read_event to complete. 164 | read_event.wait()?; 165 | 166 | // Print the image data 167 | println!("image_data: "); 168 | for y in 0..10 { 169 | for x in 0..10 { 170 | let offset = (y * 10 + x) * 4; 171 | print!( 172 | "({:>3}, {:>3}, {:>3}, {:>3}) ", 173 | image_data[offset], 174 | image_data[offset + 1], 175 | image_data[offset + 2], 176 | image_data[offset + 3] 177 | ); 178 | } 179 | println!(); 180 | } 181 | 182 | Ok(()) 183 | } 184 | -------------------------------------------------------------------------------- /docs/images/example_opencl_system.svg: -------------------------------------------------------------------------------- 1 | systemplatform_1platform_2platform_3device_1device_2device_3device_4 -------------------------------------------------------------------------------- /src/event.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2024 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | pub use cl3::event::*; 16 | 17 | use super::Result; 18 | use libc::c_void; 19 | 20 | /// An OpenCL event object. 21 | /// 22 | /// Has methods to return information from calls to clGetEventInfo and 23 | /// clGetEventProfilingInfo with the appropriate parameters. 24 | /// Implements the Drop trait to call release_event when the object is dropped. 25 | #[derive(Debug)] 26 | pub struct Event { 27 | event: cl_event, 28 | } 29 | 30 | impl From for Event { 31 | fn from(event: cl_event) -> Self { 32 | Self { event } 33 | } 34 | } 35 | 36 | impl From for cl_event { 37 | fn from(value: Event) -> Self { 38 | value.event as Self 39 | } 40 | } 41 | 42 | impl Drop for Event { 43 | fn drop(&mut self) { 44 | unsafe { release_event(self.event).expect("Error: clReleaseEvent") }; 45 | } 46 | } 47 | 48 | unsafe impl Send for Event {} 49 | unsafe impl Sync for Event {} 50 | 51 | impl Event { 52 | /// Create an Event from an OpenCL cl_event. 53 | /// 54 | /// * `event` - a valid OpenCL cl_event. 55 | /// 56 | /// returns the new Event 57 | pub const fn new(event: cl_event) -> Self { 58 | Self { event } 59 | } 60 | 61 | /// Get the underlying OpenCL cl_event. 62 | pub const fn get(&self) -> cl_event { 63 | self.event 64 | } 65 | 66 | /// Wait for the event to complete. 67 | pub fn wait(&self) -> Result<()> { 68 | let events = [self.get()]; 69 | Ok(wait_for_events(&events)?) 70 | } 71 | 72 | pub fn command_execution_status(&self) -> Result { 73 | Ok(CommandExecutionStatus( 74 | get_event_info(self.event, CL_EVENT_COMMAND_EXECUTION_STATUS)?.into(), 75 | )) 76 | } 77 | 78 | pub fn command_type(&self) -> Result { 79 | Ok(EventCommandType( 80 | get_event_info(self.event, CL_EVENT_COMMAND_TYPE)?.into(), 81 | )) 82 | } 83 | 84 | pub fn reference_count(&self) -> Result { 85 | Ok(get_event_info(self.event, CL_EVENT_REFERENCE_COUNT)?.into()) 86 | } 87 | 88 | pub fn command_queue(&self) -> Result { 89 | Ok(isize::from(get_event_info(self.event, CL_EVENT_COMMAND_QUEUE)?) as cl_command_queue) 90 | } 91 | 92 | pub fn context(&self) -> Result { 93 | Ok(isize::from(get_event_info(self.event, CL_EVENT_CONTEXT)?) as cl_context) 94 | } 95 | 96 | /// Get data about an OpenCL event. 97 | /// Calls clGetEventInfo to get the desired data about the event. 98 | pub fn get_data(&self, param_name: cl_event_info) -> Result> { 99 | Ok(get_event_data(self.event, param_name)?) 100 | } 101 | 102 | pub fn set_callback( 103 | &self, 104 | command_exec_callback_type: cl_int, 105 | pfn_notify: extern "C" fn(cl_event, cl_int, *mut c_void), 106 | user_data: *mut c_void, 107 | ) -> Result<()> { 108 | Ok(set_event_callback( 109 | self.event, 110 | command_exec_callback_type, 111 | pfn_notify, 112 | user_data, 113 | )?) 114 | } 115 | 116 | pub fn profiling_command_queued(&self) -> Result { 117 | Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_QUEUED)?.into()) 118 | } 119 | 120 | pub fn profiling_command_submit(&self) -> Result { 121 | Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_SUBMIT)?.into()) 122 | } 123 | 124 | pub fn profiling_command_start(&self) -> Result { 125 | Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_START)?.into()) 126 | } 127 | 128 | pub fn profiling_command_end(&self) -> Result { 129 | Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_END)?.into()) 130 | } 131 | 132 | /// CL_VERSION_2_0 133 | pub fn profiling_command_complete(&self) -> Result { 134 | Ok(get_event_profiling_info(self.event, CL_PROFILING_COMMAND_COMPLETE)?.into()) 135 | } 136 | 137 | /// Get profiling data about an OpenCL event. 138 | /// Calls clGetEventProfilingInfo to get the desired profiling data about the event. 139 | pub fn profiling_data(&self, param_name: cl_profiling_info) -> Result> { 140 | Ok(get_event_profiling_data(self.event, param_name)?) 141 | } 142 | } 143 | 144 | #[cfg(test)] 145 | mod tests { 146 | use super::*; 147 | use crate::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue}; 148 | use crate::context::Context; 149 | use crate::device::{CL_DEVICE_TYPE_GPU, Device}; 150 | use crate::memory::{Buffer, CL_MEM_READ_ONLY}; 151 | use crate::platform::get_platforms; 152 | use crate::types::{CL_NON_BLOCKING, cl_float}; 153 | use std::ptr; 154 | 155 | extern "C" fn event_callback_function( 156 | _event: cl_event, 157 | event_command_status: cl_int, 158 | _user_data: *mut c_void, 159 | ) { 160 | println!( 161 | "OpenCL event callback command status: {}", 162 | event_command_status 163 | ); 164 | } 165 | 166 | #[test] 167 | fn test_event() { 168 | let platforms = get_platforms().unwrap(); 169 | assert!(0 < platforms.len()); 170 | 171 | // Get the first platform 172 | let platform = &platforms[0]; 173 | 174 | let devices = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap(); 175 | assert!(0 < devices.len()); 176 | 177 | // Get the first device 178 | let device = Device::new(devices[0]); 179 | let context = Context::from_device(&device).unwrap(); 180 | 181 | // Create a command_queue on the Context's default device 182 | let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE) 183 | .expect("CommandQueue::create_default failed"); 184 | 185 | const ARRAY_SIZE: usize = 1024; 186 | let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE]; 187 | 188 | let mut buffer = unsafe { 189 | Buffer::::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut()) 190 | .unwrap() 191 | }; 192 | 193 | let events: Vec = Vec::default(); 194 | 195 | // Non-blocking write, wait for event 196 | let event = unsafe { 197 | queue 198 | .enqueue_write_buffer(&mut buffer, CL_NON_BLOCKING, 0, &ones, &events) 199 | .unwrap() 200 | }; 201 | 202 | // Set a callback_function on the event (i.e. write) being completed. 203 | event 204 | .set_callback(CL_COMPLETE, event_callback_function, ptr::null_mut()) 205 | .unwrap(); 206 | 207 | let value = event.command_execution_status().unwrap(); 208 | println!("event.command_execution_status(): {}", value); 209 | // assert_eq!(CL_QUEUED, value.0); 210 | 211 | let value = event.command_type().unwrap(); 212 | println!("event.command_type(): {}", value); 213 | assert_eq!(CL_COMMAND_WRITE_BUFFER, value.0); 214 | 215 | let value = event.reference_count().unwrap(); 216 | println!("event.reference_count(): {}", value); 217 | // assert_eq!(1, value); 218 | 219 | let value = event.command_queue().unwrap(); 220 | assert!(queue.get() == value); 221 | 222 | let value = event.context().unwrap(); 223 | assert!(context.get() == value); 224 | 225 | event.wait().unwrap(); 226 | 227 | let value = event.command_execution_status().unwrap(); 228 | println!("event.command_execution_status(): {}", value); 229 | assert_eq!(CL_COMPLETE, value.0); 230 | 231 | let value = event.profiling_command_queued().unwrap(); 232 | println!("event.profiling_command_queued(): {}", value); 233 | assert!(0 < value); 234 | 235 | let value = event.profiling_command_submit().unwrap(); 236 | println!("event.profiling_command_submit(): {}", value); 237 | assert!(0 < value); 238 | 239 | let value = event.profiling_command_start().unwrap(); 240 | println!("event.profiling_command_start(): {}", value); 241 | assert!(0 < value); 242 | 243 | let value = event.profiling_command_end().unwrap(); 244 | println!("event.profiling_command_end(): {}", value); 245 | assert!(0 < value); 246 | 247 | // CL_VERSION_2_0 248 | match event.profiling_command_complete() { 249 | Ok(value) => println!("event.profiling_command_complete(): {}", value), 250 | Err(e) => println!("OpenCL error, event.profiling_command_complete(): {}", e), 251 | } 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /src/platform.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2024 Via Technology Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![allow(clippy::missing_safety_doc)] 16 | 17 | pub use cl3::platform; 18 | 19 | use super::Result; 20 | use cl3::device; 21 | #[allow(unused_imports)] 22 | use cl3::dx9_media_sharing; 23 | #[allow(unused_imports)] 24 | use cl3::ext; 25 | #[allow(unused_imports)] 26 | use cl3::program; 27 | #[allow(unused_imports)] 28 | use cl3::types::{ 29 | cl_device_id, cl_device_type, cl_name_version, cl_platform_id, cl_platform_info, cl_uint, 30 | cl_ulong, cl_version, 31 | }; 32 | #[allow(unused_imports)] 33 | use libc::{c_void, intptr_t}; 34 | 35 | /// An OpenCL platform id and methods to query it. 36 | /// 37 | /// The query methods calls clGetPlatformInfo with the relevant param_name, see: 38 | /// [Platform Queries](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#platform-queries-table). 39 | #[derive(Copy, Clone, Debug)] 40 | pub struct Platform { 41 | id: intptr_t, 42 | } 43 | 44 | impl From for Platform { 45 | fn from(value: cl_platform_id) -> Self { 46 | Self { 47 | id: value as intptr_t, 48 | } 49 | } 50 | } 51 | 52 | impl From for cl_platform_id { 53 | fn from(value: Platform) -> Self { 54 | value.id as Self 55 | } 56 | } 57 | 58 | unsafe impl Send for Platform {} 59 | unsafe impl Sync for Platform {} 60 | 61 | impl Platform { 62 | pub fn new(id: cl_platform_id) -> Self { 63 | Self { id: id as intptr_t } 64 | } 65 | 66 | /// Accessor for the underlying platform id. 67 | pub const fn id(&self) -> cl_platform_id { 68 | self.id as cl_platform_id 69 | } 70 | 71 | /// Get the ids of available devices of the given type on the Platform. 72 | /// # Examples 73 | /// ``` 74 | /// use opencl3::platform::get_platforms; 75 | /// use cl3::device::CL_DEVICE_TYPE_GPU; 76 | /// 77 | /// let platforms = get_platforms().unwrap(); 78 | /// assert!(0 < platforms.len()); 79 | /// 80 | /// // Choose a the first platform 81 | /// let platform = &platforms[0]; 82 | /// let device_ids = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap(); 83 | /// println!("CL_DEVICE_TYPE_GPU count: {}", device_ids.len()); 84 | /// assert!(0 < device_ids.len()); 85 | /// ``` 86 | pub fn get_devices(&self, device_type: cl_device_type) -> Result> { 87 | Ok(device::get_device_ids(self.id(), device_type)?) 88 | } 89 | 90 | #[cfg(any(feature = "cl_khr_dx9_media_sharing", feature = "dynamic"))] 91 | pub unsafe fn get_device_ids_from_dx9_intel( 92 | &self, 93 | dx9_device_source: dx9_media_sharing::cl_dx9_device_source_intel, 94 | dx9_object: *mut c_void, 95 | dx9_device_set: dx9_media_sharing::cl_dx9_device_set_intel, 96 | ) -> Result> { 97 | unsafe { 98 | Ok(dx9_media_sharing::get_device_ids_from_dx9_intel( 99 | self.id(), 100 | dx9_device_source, 101 | dx9_object, 102 | dx9_device_set, 103 | )?) 104 | } 105 | } 106 | 107 | /// The OpenCL profile supported by the Platform, 108 | /// it can be FULL_PROFILE or EMBEDDED_PROFILE. 109 | pub fn profile(&self) -> Result { 110 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_PROFILE)?.into()) 111 | } 112 | 113 | /// The OpenCL profile version supported by the Platform, 114 | /// e.g. OpenCL 1.2, OpenCL 2.0, OpenCL 2.1, etc. 115 | pub fn version(&self) -> Result { 116 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_VERSION)?.into()) 117 | } 118 | 119 | /// The OpenCL Platform name string. 120 | pub fn name(&self) -> Result { 121 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_NAME)?.into()) 122 | } 123 | 124 | /// The OpenCL Platform vendor string. 125 | pub fn vendor(&self) -> Result { 126 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_VENDOR)?.into()) 127 | } 128 | 129 | /// A space separated list of extension names supported by the Platform. 130 | pub fn extensions(&self) -> Result { 131 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_EXTENSIONS)?.into()) 132 | } 133 | 134 | /// The resolution of the host timer in nanoseconds as used by 135 | /// clGetDeviceAndHostTimer. 136 | /// CL_VERSION_2_1 137 | pub fn host_timer_resolution(&self) -> Result { 138 | Ok( 139 | platform::get_platform_info(self.id(), platform::CL_PLATFORM_HOST_TIMER_RESOLUTION)? 140 | .into(), 141 | ) 142 | } 143 | 144 | /// The detailed (major, minor, patch) version supported by the platform. 145 | /// CL_VERSION_3_0 146 | pub fn numeric_version(&self) -> Result { 147 | Ok(platform::get_platform_info(self.id(), platform::CL_PLATFORM_NUMERIC_VERSION)?.into()) 148 | } 149 | 150 | /// An array of description (name and version) structures that lists all the 151 | /// extensions supported by the platform. 152 | /// CL_VERSION_3_0 153 | pub fn extensions_with_version(&self) -> Result> { 154 | Ok( 155 | platform::get_platform_info(self.id(), platform::CL_PLATFORM_EXTENSIONS_WITH_VERSION)? 156 | .into(), 157 | ) 158 | } 159 | 160 | /// cl_khr_external_memory 161 | pub fn platform_external_memory_import_handle_types_khr(&self) -> Result> { 162 | Ok(platform::get_platform_info( 163 | self.id(), 164 | ext::CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, 165 | )? 166 | .into()) 167 | } 168 | 169 | /// cl_khr_external_semaphore 170 | pub fn platform_semaphore_import_handle_types_khr(&self) -> Result> { 171 | Ok(platform::get_platform_info( 172 | self.id(), 173 | ext::CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 174 | )? 175 | .into()) 176 | } 177 | 178 | /// cl_khr_external_semaphore 179 | pub fn platform_semaphore_export_handle_types_khr(&self) -> Result> { 180 | Ok(platform::get_platform_info( 181 | self.id(), 182 | ext::CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 183 | )? 184 | .into()) 185 | } 186 | 187 | /// cl_khr_semaphore 188 | pub fn platform_semaphore_types_khr(&self) -> Result> { 189 | Ok(platform::get_platform_info(self.id(), ext::CL_PLATFORM_SEMAPHORE_TYPES_KHR)?.into()) 190 | } 191 | 192 | /// Get data about an OpenCL platform. 193 | /// Calls clGetPlatformInfo to get the desired data about the platform. 194 | pub fn get_data(&self, param_name: cl_platform_info) -> Result> { 195 | Ok(platform::get_platform_data(self.id(), param_name)?) 196 | } 197 | 198 | /// Unload an OpenCL compiler for a platform. 199 | /// CL_VERSION_1_2 200 | /// 201 | /// # Safety 202 | /// 203 | /// Compiling is unsafe after the compiler has been unloaded. 204 | #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))] 205 | pub unsafe fn unload_compiler(&self) -> Result<()> { 206 | unsafe { Ok(program::unload_platform_compiler(self.id())?) } 207 | } 208 | } 209 | 210 | /// Get the available OpenCL platforms. 211 | /// # Examples 212 | /// ``` 213 | /// use opencl3::platform::get_platforms; 214 | /// 215 | /// let platforms = get_platforms().unwrap(); 216 | /// println!("Number of OpenCL platforms: {}", platforms.len()); 217 | /// assert!(0 < platforms.len()); 218 | /// ``` 219 | /// returns a Result containing a vector of available Platforms 220 | /// or the error code from the OpenCL C API function. 221 | pub fn get_platforms() -> Result> { 222 | let platform_ids = platform::get_platform_ids()?; 223 | Ok(platform_ids 224 | .iter() 225 | .map(|id| Platform::new(*id)) 226 | .collect::>()) 227 | } 228 | 229 | #[cfg(any(feature = "cl_khr_icd", feature = "dynamic"))] 230 | pub fn icd_get_platform_ids_khr() -> Result> { 231 | let platform_ids = ext::icd_get_platform_ids_khr()?; 232 | Ok(platform_ids 233 | .iter() 234 | .map(|id| Platform::new(*id)) 235 | .collect::>()) 236 | } 237 | 238 | #[cfg(test)] 239 | mod tests { 240 | use super::*; 241 | 242 | #[test] 243 | fn test_get_platforms() { 244 | let platforms = get_platforms().unwrap(); 245 | println!("Number of platforms: {}", platforms.len()); 246 | assert!(0 < platforms.len()); 247 | 248 | for platform in platforms { 249 | println!("Platform Debug Trait: {:?}", platform); 250 | println!("CL_PLATFORM_NAME: {}", platform.name().unwrap()); 251 | println!("CL_PLATFORM_PROFILE: {}", platform.profile().unwrap()); 252 | 253 | let value = platform.version().unwrap(); 254 | println!("CL_PLATFORM_VERSION: {:?}", value); 255 | 256 | println!("CL_PLATFORM_VENDOR: {}", platform.vendor().unwrap()); 257 | println!( 258 | "CL_PLATFORM_EXTENSIONS: {:?}", 259 | platform.extensions().unwrap() 260 | ); 261 | 262 | // CL_VERSION_2_1 value, may not be supported 263 | match platform.host_timer_resolution() { 264 | Ok(value) => { 265 | println!("CL_PLATFORM_HOST_TIMER_RESOLUTION: {}", value) 266 | } 267 | Err(e) => println!( 268 | "OpenCL error, CL_PLATFORM_HOST_TIMER_RESOLUTION: {:?}, {}", 269 | e, e 270 | ), 271 | }; 272 | 273 | println!(); 274 | } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! [![crates.io](https://img.shields.io/crates/v/opencl3.svg)](https://crates.io/crates/opencl3) 16 | //! [![docs.io](https://docs.rs/opencl3/badge.svg)](https://docs.rs/opencl3/) 17 | //! [![OpenCL 3.0](https://img.shields.io/badge/OpenCL-3.0-blue.svg)](https://www.khronos.org/registry/OpenCL/) 18 | //! [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 19 | //! 20 | //! A Rust implementation of the Khronos [OpenCL](https://www.khronos.org/registry/OpenCL/) 21 | //! API. 22 | //! 23 | //! # Description 24 | //! 25 | //! This crate provides a relatively simple, object based model of the OpenCL 3.0 26 | //! [API](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html). 27 | //! It is built upon the [cl3](https://crates.io/crates/cl3) crate, which 28 | //! provides a functional interface to the OpenCL API. 29 | //! 30 | //! **OpenCL** (Open Computing Language) is framework for general purpose 31 | //! parallel programming across heterogeneous devices including: CPUs, GPUs, 32 | //! DSPs, FPGAs and other processors or hardware accelerators. 33 | //! 34 | //! It is often considered as an open-source alternative to Nvidia's proprietary 35 | //! Compute Unified Device Architecture [CUDA](https://developer.nvidia.com/cuda-zone) 36 | //! for performing General-purpose computing on GPUs, see 37 | //! [GPGPU](https://en.wikipedia.org/wiki/General-purpose_computing_on_graphics_processing_units). 38 | //! 39 | //! The [OpenCL Specification](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_the_opencl_architecture) 40 | //! has evolved over time and not all device vendors support all OpenCL features. 41 | //! 42 | //! [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html) 43 | //! is a unified specification that adds little new functionality to previous OpenCL versions. 44 | //! It specifies that all **OpenCL 1.2** features are **mandatory**, while all 45 | //! OpenCL 2.x and OpenCL 3.0 features are now optional. 46 | //! 47 | //! See [OpenCL Description](https://github.com/kenba/opencl3/blob/main/docs/opencl_description.md). 48 | //! 49 | //! # OpenCL Architecture 50 | //! 51 | //! The [OpenCL Specification](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_the_opencl_architecture) 52 | //! considers OpenCL as four models: 53 | //! 54 | //! * **Platform Model** 55 | //! The physical OpenCL hardware: a *host* containing one or more OpenCL [platform]s, 56 | //! each connected to one or more OpenCL [device]s. 57 | //! An OpenCL application running on the *host*, creates an OpenCL environment 58 | //! called a [context] on a single [platform] to process data on one or more 59 | //! of the OpenCL [device]s connected to the [platform]. 60 | //! 61 | //! * **Programming Model** 62 | //! An OpenCL [program] consists of OpenCL [kernel] functions that can run 63 | //! on OpenCL [device]s within a [context]. 64 | //! OpenCL [program]s must be created (and most must be built) for a [context] 65 | //! before their OpenCL [kernel] functions can be created from them, 66 | //! the exception being "built-in" [kernel]s which don't need to be built 67 | //! (or compiled and linked). 68 | //! OpenCL [kernel]s are controlled by an OpenCL application that runs on the 69 | //! *host*, see **Execution Model**. 70 | //! 71 | //! * **Memory Model** 72 | //! **OpenCL 1.2** memory is divided into two fundamental memory regions: 73 | //! **host memory** and **device memory**. 74 | //! OpenCL [kernel]s run on **device memory**; an OpenCL application must write 75 | //! **host memory** to **device memory** for OpenCL [kernel]s to process. 76 | //! An OpenCL application must also read results from **device memory** to 77 | //! **host memory** after a [kernel] has completed execution. 78 | //! **OpenCL 2.0** shared virtual memory ([svm]) is shared between the host 79 | //! and device(s) and synchronised by OpenCL; eliminating the explicit transfer 80 | //! of memory between host and device(s) memory regions. 81 | //! 82 | //! * **Execution Model** 83 | //! An OpenCL application creates at least one OpenCL [command_queue] for each 84 | //! OpenCL [device] (or *sub-device*) within it's OpenCL [context]. 85 | //! OpenCL [kernel] executions and **OpenCL 1.2** memory reads and writes are 86 | //! "enqueued" by the OpenCL application on each [command_queue]. 87 | //! An application can wait for all "enqueued" commands to finish on a 88 | //! [command_queue] or it can wait for specific [event]s to complete. 89 | //! Normally [command_queue]s run commands in the order that they are given. 90 | //! However, [event]s can be used to execute [kernel]s out-of-order. 91 | //! 92 | //! # OpenCL Objects 93 | //! 94 | //! [Platform]: platform/struct.Platform.html 95 | //! [Device]: device/struct.Device.html 96 | //! [SubDevice]: device/struct.SubDevice.html 97 | //! [Context]: context/struct.Context.html 98 | //! [Program]: program/struct.Program.html 99 | //! [Kernel]: kernel/struct.Kernel.html 100 | //! [Buffer]: memory/struct.Buffer.html 101 | //! [Image]: memory/struct.Image.html 102 | //! [Sampler]: memory/struct.Sampler.html 103 | //! [SvmVec]: svm/struct.SvmVec.html 104 | //! [Pipe]: memory/struct.Pipe.html 105 | //! [CommandQueue]: command_queue/struct.CommandQueue.html 106 | //! [Event]: event/struct.Event.html 107 | //! 108 | //! ## Platform Model 109 | //! 110 | //! The platform model has thee objects: 111 | //! * [Platform] 112 | //! * [Device] 113 | //! * [Context] 114 | //! 115 | //! Of these three objects, the OpenCL [Context] is by *far* the most important. 116 | //! Each application must create a [Context] from the most appropriate [Device]s 117 | //! available on one of [Platform]s on the *host* system that the application 118 | //! is running on. 119 | //! 120 | //! Most example OpenCL applications just choose the first available [Platform] 121 | //! and [Device] for their [Context]. However, since many systems have multiple 122 | //! platforms and devices, the first [Platform] and [Device] are unlikely to 123 | //! provide the best performance. 124 | //! For example, on a system with an APU (combined CPU and GPU, e.g. Intel i7) 125 | //! and a discrete graphics card (e.g. Nvidia GTX 1070) OpenCL may find the 126 | //! either the integrated GPU or the GPU on the graphics card first. 127 | //! 128 | //! OpenCL applications often require the performance of discrete graphics cards 129 | //! or specific OpenCL features, such as [svm] or double/half floating point 130 | //! precision. In such cases, it is necessary to query the [Platform]s and 131 | //! [Device]s to choose the most appropriate [Device]s for the application before 132 | //! creating the [Context]. 133 | //! 134 | //! The [Platform] and [Device] modules contain structures and methods to simplify 135 | //! querying the host system [Platform]s and [Device]s to create a [Context]. 136 | //! 137 | //! ## Programming Model 138 | //! 139 | //! The OpenCL programming model has two objects: 140 | //! * [Program] 141 | //! * [Kernel] 142 | //! 143 | //! OpenCL [Kernel] functions are contained in OpenCL [Program]s. 144 | //! 145 | //! Kernels are usually defined as functions in OpenCL [Program] source code, 146 | //! however OpenCL [Device]s may contain built-in [Kernel]s, 147 | //! e.g.: some Intel GPUs have built-in motion estimation kernels. 148 | //! 149 | //! OpenCL [Program] objects can be created from OpenCL source code, 150 | //! built-in kernels, binaries and intermediate language binaries. 151 | //! Depending upon how an OpenCL [Program] object was created, it may need to 152 | //! be built (or complied and linked) before the [Kernel]s in them can be 153 | //! created. 154 | //! 155 | //! All the [Kernel]s in an [Program] can be created together or they can be 156 | //! created individually, by name. 157 | //! 158 | //! ## Memory Model 159 | //! 160 | //! The OpenCL memory model consists of five objects: 161 | //! * [Buffer] 162 | //! * [Image] 163 | //! * [Sampler] 164 | //! * [SvmVec] 165 | //! * [Pipe] 166 | //! 167 | //! [Buffer], [Image] and [Sampler] are OpenCL 1.2 (i.e. **mandatory**) objects, 168 | //! [svm] and [Pipe] are are OpenCL 2.0 (i.e. optional) objects. 169 | //! 170 | //! A [Buffer] is a contiguous block of memory used for general purpose data. 171 | //! An [Image] holds data for one, two or three dimensional images. 172 | //! A [Sampler] describes how a [Kernel] is to sample an [Image], see 173 | //! [Sampler objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_sampler_objects). 174 | //! 175 | //! [Shared Virtual Memory](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#shared-virtual-memory) 176 | //! enables the host and kernels executing on devices to directly share data 177 | //! without explicitly transferring it. 178 | //! 179 | //! [Pipes](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_pipes) 180 | //! store memory as FIFOs between [Kernel]s. [Pipe]s are not accessible from the host. 181 | //! 182 | //! ## Execution Model 183 | //! 184 | //! The OpenCL execution model has two objects: 185 | //! * [CommandQueue] 186 | //! * [Event] 187 | //! 188 | //! OpenCL commands to transfer memory and execute kernels on devices are 189 | //! performed via [CommandQueue]s. 190 | //! 191 | //! Each OpenCL device (and sub-device) must have at least one command_queue 192 | //! associated with it, so that commands may be enqueued on to the device. 193 | //! 194 | //! There are several OpenCL [CommandQueue] "enqueue_" methods to transfer 195 | //! data between host and device memory, map SVM memory and execute kernels. 196 | //! All the "enqueue_" methods accept an event_wait_list parameter and return 197 | //! an [Event] that can be used to monitor and control *out-of-order* execution 198 | //! of kernels on a [CommandQueue], see 199 | //! [Event Objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#event-objects). 200 | 201 | extern crate cl3; 202 | 203 | #[cfg(any(feature = "cl_khr_command_buffer", feature = "dynamic"))] 204 | pub mod command_buffer; 205 | pub mod command_queue; 206 | pub mod context; 207 | pub mod device; 208 | pub mod event; 209 | pub mod kernel; 210 | pub mod memory; 211 | pub mod platform; 212 | pub mod program; 213 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))] 214 | pub mod svm; 215 | 216 | pub mod error_codes { 217 | pub use cl3::error_codes::*; 218 | } 219 | pub mod types { 220 | pub use cl3::types::*; 221 | } 222 | 223 | use std::result; 224 | /// Custom Result type to output OpenCL error text. 225 | pub type Result = result::Result; 226 | -------------------------------------------------------------------------------- /docs/images/opencl_memory_objects.svg: -------------------------------------------------------------------------------- 1 | BufferMemoryImagePipeSVM -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Via Technology Ltd. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /RELEASES.md: -------------------------------------------------------------------------------- 1 | # Releases 2 | 3 | ## Version 0.12.1 (2025-09-22) 4 | 5 | * PR [#72](https://github.com/kenba/opencl3/pull/72) fix static linking by disabling default features in cl3. 6 | 7 | ## Version 0.12.0 (2025-04-19) 8 | 9 | ### Changes 10 | 11 | * Update for Rust edition = "2024" 12 | 13 | ### New Features 14 | 15 | * Add `cl_qcom_perf_hint` feature. 16 | * Add `cl_ext_buffer_device_address ` feature. 17 | 18 | ## Version 0.11.0 (2025-02-19) 19 | 20 | ### Changes 21 | 22 | * Up to date with OpenCL-Headers tag v2024.10.24. 23 | 24 | ## Version 0.10.0 (2024-12-21) 25 | 26 | ### Bug fixes 27 | 28 | * Issue [#69](https://github.com/kenba/opencl3/issues/69) Unsound issue in SvmVec. 29 | 30 | ### New Features 31 | 32 | * Issue [#70](https://github.com/kenba/opencl3/issues/70) UpdSupport dynamic linking. 33 | 34 | ## Version 0.9.5 (2023-12-22) 35 | 36 | ### New Features 37 | 38 | * Issue [#64](https://github.com/kenba/opencl3/issues/64) Update with new features in OpenCL-Headers repo. 39 | 40 | ## Version 0.9.4 (2023-11-05) 41 | 42 | ### New Features 43 | 44 | * Issue [#64](https://github.com/kenba/opencl3/issues/64) Update with new functions in OpenCL-Headers repo. 45 | 46 | ## Version 0.9.3 (2023-05-11) 47 | 48 | ### New Features 49 | 50 | * PR [#60](https://github.com/kenba/opencl3/pull/60) Support querying PCIE information for Intel devices. 51 | 52 | ## Version 0.9.2 (2022-12-30) 53 | 54 | ### Bug fixes 55 | 56 | * Issue [#58](https://github.com/kenba/opencl3/issues/58) Compile failure due to unmarked unsafe function call in program.rs. 57 | 58 | ## Version 0.9.1 (2022-09-20) 59 | 60 | ### Bug fixes 61 | 62 | * PR [#54](https://github.com/kenba/opencl3/pull/54) Unshadow device re-exports. 63 | 64 | ### New Features 65 | 66 | * Issue [#55](https://github.com/kenba/opencl3/issues/55) Add Khronos `cl_khr_command_buffer_mutable_dispatch` extension. 67 | 68 | ## Version 0.9.0 (2022-09-10) 69 | 70 | ### Breaking Changes 71 | 72 | * Issue [#51](https://github.com/kenba/opencl3/issues/51) Undefined behaviour when using underlying OpenCL pointers without any unsafe. 73 | * Issue [#52](https://github.com/kenba/opencl3/issues/52) `CL_MEM_USE_HOST_PTR` can result in undefined behaviour. 74 | 75 | ### New Features 76 | 77 | * PR [#53](https://github.com/kenba/opencl3/pull/53) added `sync` for all threadsafe OpenCL objects. 78 | 79 | ## Version 0.8.1 (2022-07-23) 80 | 81 | ### Bug fixes 82 | 83 | * Issue [#49](https://github.com/kenba/opencl3/issues/49) Better Error messages for 84 | `ExecuteKernel::set_arg`. 85 | 86 | ### New Features 87 | 88 | * Update for OpenCL extension `cl_ext_image_requirements_info`. 89 | 90 | * PR [#50](https://github.com/kenba/opencl3/issues/50) Introduce `track_caller` for panicing methods of `ExecuteKernel`. 91 | 92 | ## Version 0.8.0 (2022-05-02) 93 | 94 | ### Breaking Changes 95 | 96 | * Issue [#47](https://github.com/kenba/opencl3/issues/47) Base `opencl3` on [opencl-sys](https://crates.io/crates/opencl-sys) crate. 97 | * Issue [#48](https://github.com/kenba/opencl3/issues/48) Put deprecation notices behind conditional attributes. 98 | 99 | ## Version 0.7.0 (2022-04-10) 100 | 101 | ### Breaking Changes 102 | 103 | * Transition to Rust 2021 Edition. 104 | 105 | ## Version 0.6.3 (2021-12-30) 106 | 107 | ### New Features 108 | 109 | * Issue [#44](https://github.com/kenba/opencl3/issues/44) Add provisional command-buffer extension. See: [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer). 110 | 111 | ## Version 0.6.2 (2021-12-19) 112 | 113 | ### New Features 114 | 115 | * Issue [#37](https://github.com/kenba/opencl3/issues/37) Implement DeserializeSeed for ExtendSvmVec to enable `serde` to deserialize directly into an `SvmVec`. 116 | 117 | ### Bug fixes 118 | 119 | * Issue [#45](https://github.com/kenba/opencl3/issues/45) Examples and tests have `CL_MEM_READ_ONLY` and `CL_MEM_WRITE_ONLY` swapped over. 120 | 121 | ## Version 0.6.1 (2021-11-12) 122 | 123 | ### New Features 124 | 125 | * Issue [#43](https://github.com/kenba/opencl3/issues/43) Update for new OpenCL extensions: `cl_khr_external_memory`, `cl_khr_external_semaphore` and `cl_khr_semaphore`. 126 | 127 | ## Version 0.6.0 (2021-10-16) 128 | 129 | ### Breaking Changes 130 | 131 | * Issue [#41](https://github.com/kenba/opencl3/issues/41) Remove cl3 Info enums to support new OpenCL versions and extensions. 132 | * Add UUID and LUID types. See cl3 Issue [#13](https://github.com/kenba/cl3/issues/13) Remove Info enums to support new OpenCL versions and extensions. 133 | * Remove example from README.md. 134 | 135 | ## Version 0.5.3 (2021-10-10) 136 | 137 | ### New Features 138 | 139 | * Issue [#38](https://github.com/kenba/opencl3/issues/38) Add SVM fine grain system support. 140 | * Issue [#40](https://github.com/kenba/opencl3/issues/40) Replace all calls to `to_string` with `from` or `into`. 141 | * Issue [#42](https://github.com/kenba/opencl3/issues/42) add `From` traits. 142 | * Add `get_all_devices` function. 143 | 144 | ## Version 0.5.2 (2021-09-19) 145 | 146 | ```toml 147 | [dependencies] 148 | libc = "0.2" 149 | cl3 = { version = "0.4", default-features = false } 150 | serde = { version = "1.0", optional = true } 151 | ``` 152 | 153 | ### New Features 154 | 155 | * Issue [#39](https://github.com/kenba/opencl3/issues/39) Update for latest OpenCL-Headers. 156 | * Add CONTRIBUTING and CODE_OF_CONDUCT documents. 157 | 158 | ## Version 0.5.1 (2021-09-17) 159 | 160 | ```toml 161 | [dependencies] 162 | libc = "0.2" 163 | cl3 = { version = "0.4.2", default-features = false } 164 | serde = { version = "1.0", optional = true } 165 | ``` 166 | 167 | ### New Features 168 | 169 | * Issue [#37](https://github.com/kenba/opencl3/issues/37) Implement Serde's Serialize, Deserialize for SvmVec. 170 | 171 | ### Bug fixes 172 | 173 | * Issue [#32](https://github.com/kenba/opencl3/issues/32) Example from readme has zero output on GTX 1060 Max-Q. 174 | * Issue [#35](https://github.com/kenba/opencl3/issues/35) Superfluous/Misleading generic parameter in `ExecuteKernel::set_arg_local_buffer`. 175 | 176 | ## Version 0.5.0 (2021-09-12) 177 | 178 | ### Breaking Changes 179 | 180 | * Improve `SVM` interface and documentation. 181 | * Remove svm_capabilities parameter from `SvmVec` methods. 182 | 183 | ### Bug fixes 184 | 185 | * Issue [#33](https://github.com/kenba/opencl3/issues/33) Coarse-grained SVM has to be mapped before usage! 186 | 187 | ## Version 0.4.1 (2021-08-21) 188 | 189 | Depends on: 190 | `cl3 = { version = "0.4.2", default-features = false }` 191 | 192 | ### New Features 193 | 194 | * Issue [#30](https://github.com/kenba/opencl3/issues/30) opencl3 cannot be compiled with OpenCl 1.2 features only. 195 | 196 | ## Version 0.4.0 (2021-08-20) 197 | 198 | Depends on `cl3` = "0.4.2". 199 | 200 | ### Breaking Changes 201 | 202 | * Issue [#26](https://github.com/kenba/opencl3/issues/26) Should `CommandQueue.html::enqueue_write_buffer` take a mutable buffer reference. 203 | * PR [#27](https://github.com/kenba/opencl3/pull/27) Make mutability explicit. 204 | 205 | ### New Features 206 | 207 | * Issue [#25](https://github.com/kenba/opencl3/issues/25) Using `set_event_callback`. 208 | 209 | ## Version 0.3.1 (2021-08-06) 210 | 211 | Depends on `cl3` = "0.4.1". 212 | 213 | ### New Features 214 | 215 | * Add Device method for `cl_khr_integer_dot_product` extension. 216 | 217 | ## Version 0.3.0 (2021-07-10) 218 | 219 | ### Breaking Changes 220 | 221 | * Issue [#21](https://github.com/kenba/opencl3/issues/21) `Device::available()` should return a boolean. 222 | * PR [#22](https://github.com/kenba/opencl3/pull/22) Return booleans for device information where applicable. 223 | * Issue [#24](https://github.com/kenba/opencl3/issues/24) Use `bool` instead of `cl_bool`. 224 | * Use CL_BLOCKING and CL_NON_BLOCKING in enqueue calls. 225 | 226 | ## Version 0.2.4 (2021-07-03) 227 | 228 | ### New Features 229 | 230 | * Issue [#18](https://github.com/kenba/opencl3/issues/18) Return UUID as array. 231 | * PR [#19](https://github.com/kenba/opencl3/pull/19) Export sizes of UUID and LUID. 232 | 233 | ### Bug fixes 234 | 235 | * Issue [#20](https://github.com/kenba/opencl3/issues/20) Restore `c_void` to program.rs. 236 | 237 | ## Version 0.2.3 (2021-05-30) 238 | 239 | Depends on `cl3` = "0.4.0". 240 | 241 | ### New Features 242 | 243 | * Issue [#15](https://github.com/kenba/opencl3/issues/15) It's safe to implement `Send` for most of the types. 244 | * PR [#16](https://github.com/kenba/opencl3/pull/16) Implement Send for most of the types. 245 | * PR [#17](https://github.com/kenba/opencl3/pull/17) Implement Send for some of the types. 246 | 247 | ## Version 0.2.2 (2021-05-22) 248 | 249 | Depends on `cl3` = "0.3.1". 250 | 251 | ### New Features 252 | 253 | * Issue [#13](https://github.com/kenba/opencl3/issues/13) Higher level create_sub_buffer call. 254 | * Issue [#14](https://github.com/kenba/opencl3/issues/14) Adding Debug derives. 255 | * Add OpenCL `cl_ext.h` functions. 256 | * Add `Direct3D` extension methods. 257 | * Add feature `cl_apple_setmemobjectdestructor` for `cl3`. 258 | 259 | ## Version 0.2.1 (2021-05-16) 260 | 261 | Depends on `cl3` = "0.3". 262 | 263 | ### New Features 264 | 265 | * Add extension `device_info` values. 266 | * Add `OpenGL` extension functions. 267 | * Add `OpenGL ES` extension functions. 268 | 269 | ## Version 0.2.0 (2021-04-18) 270 | 271 | Depends on `cl3` = "0.2". 272 | 273 | ### Breaking Changes 274 | 275 | * Issue [#10](https://github.com/kenba/opencl3/issues/10) Change the API to use String instead of ffi::CString. 276 | * Change `set_wait_event` to take `Event` reference. 277 | 278 | ### New Features 279 | 280 | * Issue [#9](https://github.com/kenba/opencl3/issues/9) Support running multiple instances of the same kernel simultaneously. 281 | * Issue [#12](https://github.com/kenba/opencl3/issues/12) Improve OpenCL error handling. 282 | * Add `from_device_type` method for `Context`. 283 | * Add `ClMem` trait object. 284 | * Add `CommandExecutionStatus` and `EventCommandType`. 285 | 286 | ## Version 0.1.4 (2021-03-26) 287 | 288 | ### Changes 289 | 290 | * PR [#4](https://github.com/kenba/opencl3/pull/4) Implement Clone for CommandQueue 291 | * Issue [#5](https://github.com/kenba/opencl3/issues/5) Consider replacing unwrap with expect for error handling. 292 | * PR [#6](https://github.com/kenba/opencl3/pull/6) Make types Send and Sync where applicable. 293 | * PR [#7](https://github.com/kenba/opencl3/pull/7) Implement Clone for most of the types. 294 | * Issue [#8](https://github.com/kenba/opencl3/issues/8) Retrieving a program build log might be impossible. 295 | * PR [#10](https://github.com/kenba/opencl3/pull/10) Replace calls to to_str with to_string for issue [#10](https://github.com/kenba/opencl3/issues/10). 296 | 297 | ## Version 0.1.3 (2021-01-16) 298 | 299 | ### Changes 300 | 301 | * PR [#1](https://github.com/kenba/opencl3/pull/1) Add Buffer type field as PhantomData. 302 | * Issue [#2](https://github.com/kenba/opencl3/issues/2) Consider adding PhantomData to Image and Pipe memory objects. 303 | * PR [#3](https://github.com/kenba/opencl3/pull/3) Remove Buffer cast method. 304 | * Remove unnecessary templates from methods. 305 | 306 | ## Version 0.1.2 (2021-01-12) 307 | 308 | ### Changes 309 | 310 | * Remove `event_wait_list` from the `enqueue_nd_range` method. 311 | * Add `wait` method to `event`. 312 | * Add `opencl2_kernel_test.rs`. 313 | * Add example to README. 314 | * Don't raise error in `integration_test` if device is not SVM capable 315 | 316 | ## Version 0.1.1 (2021-01-04) 317 | 318 | ### Bug fixes 319 | 320 | * Fix build on OpenCL 2.0 ICD. 321 | * Fix integration tests on Intel Skylake. 322 | * Get the max_work_item_dimensions from the device CommandQueue. 323 | 324 | ## Version 0.1.0 (2020-12-31) 325 | 326 | Depends on `cl3` = "0.1". 327 | 328 | ### Features 329 | 330 | * OpenCL objects implemented by Rust structs that manage their resources by implementing the `Drop` trait to perform [RAII](https://doc.rust-lang.org/rust-by-example/scope/raii.html), e.g. Context, Program, CommandQueue, etc. 331 | * `safe` Rust functions that call OpenCL C API functions and return Rust Result types. 332 | * A `Vec` implemented using OpenCL Shared Virtual Memory (SVM), see [svm](src/svm.rs). 333 | -------------------------------------------------------------------------------- /tests/opencl2_kernel_test.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2024 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))] 16 | extern crate opencl3; 17 | 18 | use cl3::device::{ 19 | CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, CL_DEVICE_TYPE_ALL, 20 | CL_DEVICE_TYPE_GPU, 21 | }; 22 | use opencl3::Result; 23 | use opencl3::command_queue::CommandQueue; 24 | use opencl3::context::Context; 25 | use opencl3::device::Device; 26 | use opencl3::kernel::{ExecuteKernel, Kernel, create_program_kernels}; 27 | use opencl3::platform::get_platforms; 28 | use opencl3::program::{CL_STD_2_0, Program}; 29 | use opencl3::svm::SvmVec; 30 | use opencl3::types::cl_int; 31 | use std::ptr; 32 | 33 | // The OpenCL kernels in PROGRAM_SOURCE below use built-in work-group functions: 34 | // work_group_reduce_add, work_group_scan_inclusive_add and work_group_broadcast 35 | // which were introduced in OpenCL 2.0. 36 | const PROGRAM_SOURCE: &str = r#" 37 | kernel void sum_int (global int* sums, 38 | global int const* values) 39 | { 40 | int value = sub_group_reduce_add(values[get_global_id(0)]); 41 | 42 | if (0u == get_local_id(0)) 43 | sums[get_group_id(0)] = value; 44 | } 45 | 46 | kernel void inclusive_scan_int (global int* output, 47 | global int const* values) 48 | { 49 | int sum = 0; 50 | size_t lid = get_local_id(0); 51 | size_t lsize = get_local_size(0); 52 | 53 | size_t num_groups = get_num_groups(0); 54 | for (size_t i = 0u; i < num_groups; ++i) 55 | { 56 | size_t lidx = i * lsize + lid; 57 | int value = sub_group_scan_inclusive_add(values[lidx]); 58 | output[lidx] = sum + value; 59 | 60 | sum += sub_group_broadcast(value, lsize - 1); 61 | } 62 | }"#; 63 | 64 | const SUM_KERNEL_NAME: &str = "sum_int"; 65 | const INCLUSIVE_SCAN_KERNEL_NAME: &str = "inclusive_scan_int"; 66 | 67 | #[test] 68 | #[ignore] 69 | fn test_opencl_2_kernel_example() -> Result<()> { 70 | let platforms = get_platforms()?; 71 | assert!(0 < platforms.len()); 72 | 73 | ///////////////////////////////////////////////////////////////////// 74 | // Query OpenCL compute environment 75 | let opencl_2: &str = "OpenCL 2"; 76 | let opencl_3: &str = "OpenCL 3"; 77 | 78 | // Find an OpenCL fine grained SVM, platform and device 79 | let mut device_id = ptr::null_mut(); 80 | let mut is_fine_grained_svm: bool = false; 81 | for p in platforms { 82 | let platform_version = p.version()?; 83 | if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) { 84 | let devices = p 85 | .get_devices(CL_DEVICE_TYPE_GPU) 86 | .expect("Platform::get_devices failed"); 87 | 88 | for dev_id in devices { 89 | let device = Device::new(dev_id); 90 | let svm_mem_capability = device.svm_mem_capability(); 91 | is_fine_grained_svm = 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; 92 | if is_fine_grained_svm { 93 | device_id = dev_id; 94 | break; 95 | } 96 | } 97 | } 98 | } 99 | 100 | if is_fine_grained_svm { 101 | // Create OpenCL context from the OpenCL svm device 102 | let device = Device::new(device_id); 103 | let vendor = device.vendor()?; 104 | let vendor_id = device.vendor_id()?; 105 | println!("OpenCL device vendor name: {}", vendor); 106 | println!("OpenCL device vendor id: {:X}", vendor_id); 107 | 108 | ///////////////////////////////////////////////////////////////////// 109 | // Initialise OpenCL compute environment 110 | 111 | // Create a Context on the OpenCL device 112 | let context = Context::from_device(&device).expect("Context::from_device failed"); 113 | 114 | // Build the OpenCL program source. 115 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) 116 | .expect("Program::create_and_build_from_source failed"); 117 | 118 | // Create the kernels from the OpenCL program source. 119 | let kernels = create_program_kernels(&program)?; 120 | assert!(0 < kernels.len()); 121 | 122 | let kernel_0_name = kernels[0].function_name()?; 123 | println!("OpenCL kernel_0_name: {}", kernel_0_name); 124 | 125 | let sum_kernel = if SUM_KERNEL_NAME == kernel_0_name { 126 | &kernels[0] 127 | } else { 128 | &kernels[1] 129 | }; 130 | 131 | let inclusive_scan_kernel = if INCLUSIVE_SCAN_KERNEL_NAME == kernel_0_name { 132 | &kernels[0] 133 | } else { 134 | &kernels[1] 135 | }; 136 | 137 | // Create a command_queue on the Context's device 138 | let queue = CommandQueue::create_default_with_properties(&context, 0, 0) 139 | .expect("CommandQueue::create_with_properties failed"); 140 | 141 | // Get the svm capability of all the devices in the context. 142 | let svm_capability = context.get_svm_mem_capability(); 143 | assert!(0 < svm_capability); 144 | 145 | // Create SVM vectors for the input and output data 146 | 147 | // The input data 148 | const ARRAY_SIZE: usize = 8; 149 | let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2]; 150 | 151 | // Copy into an OpenCL SVM vector 152 | let mut test_values = 153 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 154 | test_values.copy_from_slice(&value_array); 155 | 156 | // Make test_values immutable 157 | let test_values = test_values; 158 | 159 | // The output data, an OpenCL SVM vector 160 | let mut results = 161 | SvmVec::::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed"); 162 | 163 | // Run the sum kernel on the input data 164 | let sum_kernel_event = unsafe { 165 | ExecuteKernel::new(sum_kernel) 166 | .set_arg_svm(results.as_mut_ptr()) 167 | .set_arg_svm(test_values.as_ptr()) 168 | .set_global_work_size(ARRAY_SIZE) 169 | .enqueue_nd_range(&queue)? 170 | }; 171 | 172 | // Wait for the kernel to complete execution on the device 173 | sum_kernel_event.wait()?; 174 | 175 | // Can access OpenCL SVM directly, no need to map or read the results 176 | println!("sum results: {:?}", results); 177 | assert_eq!(33, results[0]); 178 | assert_eq!(0, results[ARRAY_SIZE - 1]); 179 | 180 | // Run the inclusive scan kernel on the input data 181 | let kernel_event = unsafe { 182 | ExecuteKernel::new(inclusive_scan_kernel) 183 | .set_arg_svm(results.as_mut_ptr()) 184 | .set_arg_svm(test_values.as_ptr()) 185 | .set_global_work_size(ARRAY_SIZE) 186 | .enqueue_nd_range(&queue)? 187 | }; 188 | 189 | kernel_event.wait()?; 190 | 191 | println!("inclusive_scan results: {:?}", results); 192 | assert_eq!(value_array[0], results[0]); 193 | assert_eq!(33, results[ARRAY_SIZE - 1]); 194 | } else { 195 | println!("OpenCL fine grained SVM capable device not found"); 196 | } 197 | 198 | Ok(()) 199 | } 200 | 201 | #[test] 202 | #[ignore] 203 | fn test_opencl_2_system_svm_example() -> Result<()> { 204 | let platforms = get_platforms()?; 205 | assert!(0 < platforms.len()); 206 | 207 | ///////////////////////////////////////////////////////////////////// 208 | // Query OpenCL compute environment 209 | let opencl_2: &str = "OpenCL 2"; 210 | let opencl_3: &str = "OpenCL 3"; 211 | 212 | // Find an OpenCL fine grained SVM, platform and device 213 | let mut device_id = ptr::null_mut(); 214 | let mut is_fine_grained_system_svm: bool = false; 215 | for p in platforms { 216 | let platform_version = p.version()?; 217 | 218 | if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) { 219 | let devices = p 220 | .get_devices(CL_DEVICE_TYPE_ALL) 221 | .expect("Platform::get_devices failed"); 222 | 223 | for dev_id in devices { 224 | let device = Device::new(dev_id); 225 | let svm_mem_capability = device.svm_mem_capability(); 226 | is_fine_grained_system_svm = 227 | 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; 228 | if is_fine_grained_system_svm { 229 | device_id = dev_id; 230 | break; 231 | } 232 | } 233 | } 234 | } 235 | 236 | if is_fine_grained_system_svm { 237 | // Create OpenCL context from the OpenCL svm device 238 | let device = Device::new(device_id); 239 | let vendor = device.vendor().expect("Device.vendor failed"); 240 | let vendor_id = device.vendor_id().expect("Device.vendor_id failed"); 241 | println!("OpenCL device vendor name: {}", vendor); 242 | println!("OpenCL device vendor id: {:X}", vendor_id); 243 | 244 | ///////////////////////////////////////////////////////////////////// 245 | // Initialise OpenCL compute environment 246 | 247 | // Create a Context on the OpenCL svm device 248 | let context = Context::from_device(&device).expect("Context::from_device failed"); 249 | 250 | // Build the OpenCL program source and create the kernel. 251 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") 252 | .expect("Program::create_and_build_from_source failed"); 253 | 254 | let kernel = Kernel::create(&program, SUM_KERNEL_NAME).expect("Kernel::create failed"); 255 | 256 | // Create a command_queue on the Context's device 257 | let queue = CommandQueue::create_default_with_properties(&context, 0, 0) 258 | .expect("CommandQueue::create_default_with_properties failed"); 259 | 260 | // The input data 261 | const ARRAY_SIZE: usize = 8; 262 | let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2]; 263 | 264 | // Copy into an OpenCL SVM vector 265 | let mut test_values = 266 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 267 | test_values.copy_from_slice(&value_array); 268 | 269 | // Make test_values immutable 270 | let test_values = test_values; 271 | 272 | // The output data, an OpenCL SVM vector 273 | let mut results = 274 | SvmVec::::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed"); 275 | 276 | // Run the sum kernel on the input data 277 | let sum_kernel_event = unsafe { 278 | ExecuteKernel::new(&kernel) 279 | .set_arg_svm(results.as_mut_ptr()) 280 | .set_arg_svm(test_values.as_ptr()) 281 | .set_global_work_size(ARRAY_SIZE) 282 | .enqueue_nd_range(&queue)? 283 | }; 284 | 285 | // Wait for the kernel to complete execution on the device 286 | sum_kernel_event.wait()?; 287 | 288 | // Can access OpenCL SVM directly, no need to map or read the results 289 | println!("sum results: {:?}", results); 290 | assert_eq!(33, results[0]); 291 | assert_eq!(0, results[ARRAY_SIZE - 1]); 292 | } else { 293 | println!("OpenCL fine grained system SVM device not found") 294 | } 295 | 296 | Ok(()) 297 | } 298 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # opencl3 2 | 3 | [![crates.io](https://img.shields.io/crates/v/opencl3.svg)](https://crates.io/crates/opencl3) 4 | [![docs.io](https://docs.rs/opencl3/badge.svg)](https://docs.rs/opencl3/) 5 | [![OpenCL 3.0](https://img.shields.io/badge/OpenCL-3.0-blue.svg)](https://www.khronos.org/registry/OpenCL/) 6 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 7 | [![Rust](https://github.com/kenba/opencl3/workflows/Rust/badge.svg)](https://github.com/kenba/opencl3/actions) 8 | 9 | A Rust implementation of the Khronos [OpenCL](https://www.khronos.org/registry/OpenCL/) API. 10 | 11 | ## Description 12 | 13 | A relatively simple, object based model of the OpenCL 3.0 14 | [API](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html). 15 | It is built upon the [cl3](https://crates.io/crates/cl3) crate, which 16 | provides a functional interface to the OpenCL [C API](https://github.com/KhronosGroup/OpenCL-Headers/blob/master/CL/cl.h). 17 | 18 | [OpenCL](https://www.khronos.org/opencl/) (Open Computing Language) is framework for general purpose parallel programming across heterogeneous devices including: CPUs, GPUs, DSPs, FPGAs and other processors or hardware accelerators. It is often considered as an open-source alternative to Nvidia's proprietary 19 | Compute Unified Device Architecture [CUDA](https://developer.nvidia.com/cuda-zone) 20 | for performing General-purpose computing on GPUs, see 21 | [GPGPU](https://en.wikipedia.org/wiki/General-purpose_computing_on_graphics_processing_units). 22 | 23 | [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html) 24 | is a unified specification that adds little new functionality to previous OpenCL versions. 25 | It specifies that all **OpenCL 1.2** features are **mandatory**, while all 26 | OpenCL 2.x and 3.0 features are now optional. 27 | 28 | ### Features 29 | 30 | This library has: 31 | 32 | * A simple API, enabling most OpenCL objects to be created with a single function call. 33 | * Automatic OpenCL resource management using the [Drop trait](https://doc.rust-lang.org/book/ch15-03-drop.html) to implement [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization). 34 | * Support for [directed acyclic graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph) OpenCL control flow execution using event wait lists. 35 | * Support for Shared Virtual Memory (SVM) with an [SvmVec](src/svm.rs) object that can be serialized and deserialized by [serde](https://serde.rs/). 36 | * Support for OpenCL extensions, see [OpenCL Extensions](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html). 37 | * Support for multithreading with [Send and Sync](https://doc.rust-lang.org/nomicon/send-and-sync.html) traits. 38 | 39 | ## Design 40 | 41 | The library is object based with most OpenCL objects represented by rust structs. 42 | For example, an OpenCL `cl_device_id` is represented by [Device](src/device.rs) with methods to get information about the device instead of calling `clGetDeviceInfo` with the relevant `cl_device_info` value. 43 | 44 | ![OpenCL Context](docs/images/opencl_context_objects.svg) 45 | *OpenCL Context Class Diagram* 46 | 47 | The struct methods are simpler to use than their equivalent standalone functions in [cl3](https://github.com/kenba/cl3) because they convert the `InfoType` enum into the correct underlying type returned by the `clGetDeviceInfo` call for the `cl_device_info` value. 48 | 49 | Nearly all the structs implement the `Drop` trait to release their corresponding 50 | OpenCL objects. The exceptions are `Platform` and `Device` which don't need to be released. See the crate [documentation](https://docs.rs/opencl3/). 51 | 52 | The API for OpenCL versions and extensions are controlled by Rust features such as "CL_VERSION_2_0" and "cl_khr_gl_sharing". To enable an OpenCL version, the feature for that version and **all** previous OpenCL versions must be enabled, e.g. for "CL_VERSION_2_0"; "CL_VERSION_1_1" and "CL_VERSION_1_2" must also be enabled. 53 | 54 | The default features are "CL_VERSION_1_1", "CL_VERSION_1_2" and "CL_VERSION_2_0". 55 | 56 | Rust deprecation warnings are given for OpenCL API functions that are deprecated by an enabled OpenCL version e.g., `clCreateCommandQueue` is deprecated whenever "CL_VERSION_2_0" is enabled. 57 | 58 | ## Use 59 | 60 | Ensure that an OpenCL Installable Client Driver (ICD) and the appropriate OpenCL 61 | hardware driver(s) are installed, see 62 | [OpenCL Installation](https://github.com/kenba/cl3/tree/main/docs/opencl_installation.md). 63 | 64 | `opencl3` supports OpenCL 1.2 and 2.0 ICD loaders by default. If you have an 65 | OpenCL 2.0 ICD loader then just add the following to your project's `Cargo.toml`: 66 | 67 | ```toml 68 | [dependencies] 69 | opencl3 = "0.12" 70 | ``` 71 | 72 | If your OpenCL ICD loader supports higher versions of OpenCL then add the 73 | appropriate features to opencl3, e.g. for an OpenCL 3.0 ICD loader add the 74 | following to your project's `Cargo.toml` instead: 75 | 76 | ```toml 77 | [dependencies.opencl3] 78 | version = "0.12" 79 | features = ["CL_VERSION_2_1", "CL_VERSION_2_2", "CL_VERSION_3_0"] 80 | ``` 81 | 82 | OpenCL extensions and `serde` support can also be enabled by adding their features, e.g.: 83 | 84 | ```toml 85 | [dependencies.opencl3] 86 | version = "0.12" 87 | features = ["cl_khr_gl_sharing", "cl_khr_dx9_media_sharing", "serde"] 88 | ``` 89 | 90 | See the [OpenCL Guide](https://github.com/KhronosGroup/OpenCL-Guide) and [OpenCL Description](https://github.com/kenba/opencl3/tree/main/docs/opencl_description.md) for background on using OpenCL. 91 | 92 | ## Examples 93 | 94 | There are examples in the [examples](https://github.com/kenba/opencl3/tree/main/examples/) directory. 95 | The tests also provide examples of how the crate may be used, e.g. see: 96 | [platform](https://github.com/kenba/opencl3/tree/main/src/platform.rs), 97 | [device](https://github.com/kenba/opencl3/tree/main/src/device.rs), 98 | [context](https://github.com/kenba/opencl3/tree/main/src/context.rs), 99 | [integration_test](https://github.com/kenba/opencl3/tree/main/tests/integration_test.rs) and 100 | [opencl2_kernel_test](https://github.com/kenba/opencl3/tree/main/tests/opencl2_kernel_test.rs). 101 | 102 | The library is designed to support events and OpenCL 2 features such as Shared Virtual Memory (SVM) and kernel built-in work-group functions. 103 | It also has optional support for `serde` e.g.: 104 | 105 | ```rust no-run 106 | const PROGRAM_SOURCE: &str = r#" 107 | kernel void inclusive_scan_int (global int* output, 108 | global int const* values) 109 | { 110 | int sum = 0; 111 | size_t lid = get_local_id(0); 112 | size_t lsize = get_local_size(0); 113 | 114 | size_t num_groups = get_num_groups(0); 115 | for (size_t i = 0u; i < num_groups; ++i) 116 | { 117 | size_t lidx = i * lsize + lid; 118 | int value = work_group_scan_inclusive_add(values[lidx]); 119 | output[lidx] = sum + value; 120 | 121 | sum += work_group_broadcast(value, lsize - 1); 122 | } 123 | }"#; 124 | 125 | const KERNEL_NAME: &str = "inclusive_scan_int"; 126 | 127 | // Create a Context on an OpenCL device 128 | let context = Context::from_device(&device).expect("Context::from_device failed"); 129 | 130 | // Build the OpenCL program source and create the kernel. 131 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) 132 | .expect("Program::create_and_build_from_source failed"); 133 | 134 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 135 | 136 | // Create a command_queue on the Context's device 137 | let queue = CommandQueue::create_default_with_properties( 138 | &context, 139 | CL_QUEUE_PROFILING_ENABLE, 140 | 0, 141 | ) 142 | .expect("CommandQueue::create_default_with_properties failed"); 143 | 144 | // The input data 145 | const ARRAY_SIZE: usize = 8; 146 | const VALUE_ARRAY: &str = "[3,2,5,9,7,1,4,2]"; 147 | 148 | // Create an OpenCL SVM vector 149 | let mut test_values = SvmVec::::new(&context); 150 | 151 | // Handle test_values if device only supports CL_DEVICE_SVM_COARSE_GRAIN_BUFFER 152 | if !test_values.is_fine_grained() { 153 | // SVM_COARSE_GRAIN_BUFFER needs to know the size of the data to allocate the SVM 154 | test_values = SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 155 | // Map the SVM for a SVM_COARSE_GRAIN_BUFFER 156 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? }; 157 | // Clear the SVM for the deserializer 158 | test_values.clear(); 159 | } 160 | 161 | ExtendSvmVec(&mut test_values) 162 | .deserialize(&mut deserializer) 163 | .expect("Error deserializing the VALUE_ARRAY JSON string."); 164 | 165 | // Make test_values immutable 166 | let test_values = test_values; 167 | 168 | // Unmap test_values if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 169 | if !test_values.is_fine_grained() { 170 | let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? }; 171 | unmap_test_values_event.wait()?; 172 | } 173 | 174 | // The output data, an OpenCL SVM vector 175 | let mut results = 176 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 177 | 178 | // Run the kernel on the input data 179 | let sum_kernel_event = unsafe { 180 | ExecuteKernel::new(&kernel) 181 | .set_arg_svm(results.as_mut_ptr()) 182 | .set_arg_svm(test_values.as_ptr()) 183 | .set_global_work_size(ARRAY_SIZE) 184 | .enqueue_nd_range(&queue)? 185 | }; 186 | 187 | // Wait for the kernel to complete execution on the device 188 | kernel_event.wait()?; 189 | 190 | // Map results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 191 | if !results.is_fine_grained() { 192 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? }; 193 | } 194 | 195 | // Convert SVM results to json 196 | let json_results = serde_json::to_string(&results).unwrap(); 197 | println!("json results: {}", json_results); 198 | 199 | // Unmap results if not a CL_MEM_SVM_FINE_GRAIN_BUFFER 200 | if !results.is_fine_grained() { 201 | let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? }; 202 | unmap_results_event.wait()?; 203 | } 204 | ``` 205 | 206 | The example above was taken from: 207 | [opencl2serde.rs](https://github.com/kenba/opencl3/tree/main/examples/opencl2serde.rs). 208 | 209 | ## Tests 210 | 211 | The crate contains unit, documentation and integration tests. 212 | The tests run the platform and device info functions (among others) so they 213 | can provide useful information about OpenCL capabilities of the system. 214 | 215 | It is recommended to run the tests in single-threaded mode, since some of 216 | them can interfere with each other when run multi-threaded, e.g.: 217 | 218 | ```shell 219 | cargo test -- --test-threads=1 --show-output 220 | ``` 221 | 222 | The integration tests are marked `ignore` so use the following command to 223 | run them: 224 | 225 | ```shell 226 | cargo test -- --test-threads=1 --show-output --ignored 227 | ``` 228 | 229 | ## Recent changes 230 | 231 | The API has changed considerably since version `0.1` of the library, with the 232 | aim of making the library more consistent and easier to use. 233 | 234 | [SvmVec](src/svm.rs) was changed recently to provide support for `serde` deserialization. 235 | It also changed in version 0.5.0 to provide better support for 236 | coarse grain buffer Shared Virtual Memory now that Nvidia is supporting it, 237 | see [Nvidia OpenCL](https://developer.nvidia.com/opencl). 238 | 239 | In version 0.6.0 the Info enums were removed from the underlying [cl3](https://crates.io/crates/cl3) crate and this crate so that data can be read from OpenCL devices in the future using new values that are currently undefined. 240 | 241 | In version 0.8.0 deprecation warnings are given for OpenCL API functions that are deprecated by an enabled OpenCL version e.g., `clCreateCommandQueue` is deprecated whenever "CL_VERSION_2_0" is enabled. 242 | 243 | In version 0.9.0 many OpenCL API functions are declared `unsafe` since they may cause undefined behaviour if called incorrectly. 244 | 245 | For information on other changes, see [Releases](RELEASES.md). 246 | 247 | ## Contribution 248 | 249 | If you want to contribute through code or documentation, the [Contributing](CONTRIBUTING.md) guide is the best place to start. If you have any questions, please feel free to ask. 250 | Just please abide by our [Code of Conduct](CODE_OF_CONDUCT.md). 251 | 252 | ## License 253 | 254 | Licensed under the Apache License, Version 2.0, as per Khronos Group OpenCL. 255 | You may obtain a copy of the License at: 256 | 257 | Any contribution intentionally submitted for inclusion in the work by you shall be licensed as defined in the Apache-2.0 license above, without any additional terms or conditions, unless you explicitly state otherwise. 258 | 259 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. 260 | -------------------------------------------------------------------------------- /docs/opencl_description.md: -------------------------------------------------------------------------------- 1 | # OpenCL Description 2 | 3 | **OpenCL** (Open Computing Language) is framework for parallel programming on 4 | heterogeneous devices. 5 | It is designed to harness the compute performance of GPUs, DSPs, FPGAs, etc. 6 | to improve the throughput and latency of computationally intensive workloads. 7 | 8 | ## OpenCL Performance 9 | 10 | A well designed OpenCL application running on appropriate hardware can 11 | significantly outperform an equivalent application running on multiple CPUs. 12 | However, a poorly designed OpenCL application or an OpenCL application 13 | running on inappropriate hardware and/or with the wrong kind of data can be 14 | *slower* than an equivalent application running on CPUs. There are several 15 | performance overheads inherent to performing computational tasks off-board modern 16 | CPUs to be considered before embarking on an OpenCL solution. 17 | 18 | Parallel computing latency is governed by [Amdahl's law](https://en.wikipedia.org/wiki/Amdahl%27s_law), i.e. the minimum execution time of a parallelised process can 19 | not be less than the parts of the process that *cannot* be parallelised. 20 | Where OpenCL is concerned, the parts of the process that cannot be parallelised are: 21 | * OpenCL Initialisation 22 | * and data transfer between **host memory** and **device memory**. 23 | 24 | Both OpenCL initialisation and data transfer can take longer than processing 25 | the solution on modern CPUs, especially where the OpenCL programs/kernels to be 26 | compiled are relatively large and/or using a compute device accessed via a 27 | relatively slow mechanism, such as a [PCIe](https://en.wikipedia.org/wiki/PCI_Express) 28 | bus to a discrete graphics card. 29 | 30 | ### Small Tasks 31 | 32 | Modern multi-core CPUs can share cached data "on-chip" instead of transferring it 33 | via shared memory let alone down a data-bus and application "kernels" (i.e. functions) 34 | are compiled into an application, they do not need to be loaded and compiled again 35 | before the application can run. 36 | 37 | A well-designed parallel processing application using a [work-stealing](https://en.wikipedia.org/wiki/Work_stealing) task scheduler such as Intel's Threading 38 | Building Blocks ([TBB](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onetbb.html)) library often out-performs OpenCL applications for 39 | "one off" tasks with a significant kernel compilation time and/or relatively 40 | small data sets. 41 | 42 | ### CPU Devices 43 | 44 | Note: the OpenCL data transfer overhead can be significantly reduced by using 45 | a CPU device, since it can share the same memory (and maybe even the same cache) 46 | as the OpenCL application's host. An OpenCL CPU device may provide the optimum 47 | solution where the OpenCL kernels can utilise [SIMD](https://en.wikipedia.org/wiki/SIMD) 48 | instructions on CPUs. 49 | 50 | # OpenCL Lifecycle 51 | 52 | Figure 1 shows the typical lifecycle of an OpenCL application. 53 | It can be considered as consisting of 4 phases: 54 | * Query 55 | * Initialisation 56 | * Compute 57 | * Clean-up 58 | 59 | ![OpenCL Application Lifecycle](images/opencl_app_sequence.svg) 60 | *Figure 1 OpenCL Application Lifecycle* 61 | 62 | ## Query 63 | 64 | In the Query phase the OpenCL application queries the system tha it's running on 65 | to determine what features it supports and which is (are) the best device(s) to 66 | run on. 67 | 68 | Where an OpenCL application is designed to run on specific hardware, this simply 69 | involves discovering which OpenCL device(s) correspond to the required hardware. 70 | 71 | However, where an OpenCL application is designed to run almost anywhere (like 72 | the tests in this library) then it must query the available platforms and 73 | devices to find the most appropriate platform and device(s). 74 | 75 | This is not a trivial task, since any system with a discrete graphics card is 76 | likely to have more than one platform and each platform is likely to be connected 77 | to more than one device. Furthermore, each device may be connected to more than one platform, see Figure 2. 78 | 79 | ![Example OpenCL System](images/example_opencl_system.svg) 80 | *Figure 2 An Example OpenCL System* 81 | 82 | The [OpenCL 3.0](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html) API specification has new queries to simplify this task. 83 | 84 | ## Initialisation 85 | 86 | After the most suitable platform and device(s) have been found it is necessary 87 | to create an OpenCL context for them. 88 | 89 | ### Context 90 | 91 | An OpenCL application must have at least one context. 92 | An OpenCL application may create more than one context but OpenCL does not support synchronisation between multiple contexts. 93 | 94 | An OpenCL context can be created for more than one device, however the devices 95 | must all be connected to the same platform. Where an OpenCL context has multiple 96 | devices, OpenCL **does** provide synchronisation between the devices. 97 | 98 | ![OpenCL Context](images/opencl_context_objects.svg) 99 | *Figure 3 Context Class Diagram* 100 | 101 | The `opencl3` [Context](../src/context.rs#Context) manages OpenCL objects that 102 | are required to compute data on OpenCL devices, i.e.: 103 | 104 | * Command Queues 105 | * Programs 106 | * Kernels 107 | * Sub Devices 108 | * Memory 109 | 110 | ### Command Queue(s) 111 | 112 | In order to execute OpenCL kernels on the context device(s), it is necessary to 113 | create at least one command queue for each device. OpenCL permits more than 114 | one command queue per device and also enables applications to split devices into 115 | sub-devices, each of which require their own command queue(s). 116 | 117 | ### Programs and Kernels 118 | 119 | Also, in order to execute OpenCL kernels, the program(s) in which they are 120 | defined need to be created and built for all the devices in the context 121 | before the kernels themselves can be constructed. 122 | 123 | OpenCL programs can be built from source code, Intermediate Language 124 | (IL, e.g [SPIR](https://www.khronos.org/spir/) or [SPIR-V](https://www.khronos.org/registry/spir-v/)) or binaries. Building from source or IL can take many seconds 125 | for complex kernels, therefore it is tempting to load binary programs especially 126 | if the application is designed to run on specific hardware. 127 | 128 | Note: some devices have built-in kernels, e.g. [Intel Motion Estimation](https://software.intel.com/content/www/us/en/develop/articles/intro-to-advanced-motion-estimation-extension-for-opencl.html). These can also be 129 | built into the context for the device(s) that have them. 130 | 131 | `opencl3` creates all the OpenCL kernels in the programs managed by `Context` 132 | and stores them in a HashMap in `Context` using the kernel name (in a `CString`) 133 | as the key. 134 | 135 | ### Sub Devices 136 | 137 | OpenCL devices can be partitioned into sub-devices, see [Partitioning a Device](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_partitioning_a_device). Unlike normal OpenCL devices that are connected 138 | to a platform, sub-devices can be retained and released. 139 | `opencl3` has a [SubDevice](../src/device.rs#SubDevice) struct to release 140 | sub-devices when they are dropped. 141 | 142 | As with normal OpenCL devices, each sub-device requires at least one 143 | command queue to compute data on it. 144 | 145 | Device partitioning is not supported by all OpenCL devices. 146 | For more information see: [OpenCL Device Fission for CPU Performance](https://software.intel.com/content/www/us/en/develop/articles/opencl-device-fission-for-cpu-performance.html). 147 | 148 | ### Memory 149 | 150 | Finally, the OpenCL kernels require memory from which to read input data and 151 | write output data. Unless using host Shared Virtual Memory (SVM), the OpenCL 152 | device memory (buffers, images and device SVM) must be created before data can be 153 | transferred to and from the host to the OpenCL device memory, see Figure 4. 154 | 155 | ![OpenCL Memory Model](images/opencl_memory.png) 156 | *Figure 4 An OpenCL Context's Memory Model* 157 | 158 | OpenCL defines 4 different types of Memory Objects, see Figure 5: 159 | 160 | * Buffer 161 | * Image 162 | * Pipe (*OpenCL 2*) 163 | * Shared Virtual Memory (*OpenCL 2*) 164 | 165 | ![OpenCL Memory Objects](images/opencl_memory_objects.svg) 166 | *Figure 5 OpenCL Memory Objects* 167 | 168 | The OpenCL API describes the Memory Objects in detail in the OpenCL [Memory Model](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_model). 169 | 170 | #### Sampler Objects 171 | 172 | OpenCL also has Sampler Objects to specify how a kernel shall sample an image 173 | when the image is read in the kernel, see [Sampler Objects](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_sampler_objects). 174 | 175 | ## Compute 176 | 177 | Now that the OpenCL compute environment has been set up, data can be processed 178 | by the OpenCL kernel(s) on the device(s). 179 | 180 | The library stores OpenCL kernels by name in a HashMap in the Context struct. 181 | An application can get OpenCL kernels it requires by name from the Context. 182 | 183 | As shown in Figure 1, the application: 184 | * writes input data from the Host Memory to the Global Memory shown in Figure 3; 185 | * executes the kernel(s); 186 | * reads output data from the Global Memory to the Host Memory shown in Figure 3; 187 | * waits for the data read to finish. 188 | 189 | ### Data Transfer 190 | 191 | An OpenCL 1.2 application (i.e. *without* SVM) performs all the tasks above via 192 | a command_queue. OpenCL command queues have "enqueue" commands for: writing memory 193 | buffers and images, executing kernels, and reading memory buffers and images. 194 | 195 | An OpenCL 2.0 application with *coarse* SVM also performs all the tasks above via 196 | a command_queue. However, instead of writing and reading memory to and from 197 | context Global Memory, the application `maps` and `unmaps` the svm memory. 198 | 199 | An OpenCL 2.0 application with *host* or *fine device* SVM does not need to 200 | enqueue any commands to transfer data between Host Memory and Global Memory, 201 | it either doesn't need it or it is handled by OpenCL "under the covers". 202 | 203 | The OpenCL API describes [SVM data transfer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#shared-virtual-memory) 204 | in more detail. 205 | 206 | ### Kernel Execution 207 | 208 | Before a kernel can be "enqueued" on a command_queue, its arguments must be 209 | initialised, i.e. the kernel function arguments must be set to the locations 210 | of the input and output memory and any other data it may require. 211 | 212 | After its arguments have been set, a kernel can be executed on a command queue 213 | with the OpenCL `clEnqueueNDRangeKernel` function. This function controls *how* 214 | kernels execute, see [Mapping work-items onto an NDRange](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_mapping_work_items_onto_an_ndrange). 215 | 216 | This library contains an `ExecuteKernel` struct that implements the 217 | [builder pattern](https://doc.rust-lang.org/1.0.0/style/ownership/builders.html) 218 | to simplify setting up the arguments and `NDRange` when executing a kernel, 219 | see: [kernel.rs](../src/kernel.rs). 220 | 221 | ### Events 222 | 223 | All command_queue "enqueue_" commands can return an optional Event object and 224 | take an event_wait_list as a parameter. 225 | 226 | OpenCL events can be used to time how long OpenCL commands take to 227 | execute by creating command queues with `CL_QUEUE_PROFILING_ENABLE` and 228 | getting profile info from the events with `CL_PROFILING_COMMAND_START` and 229 | `CL_PROFILING_COMMAND_END`. 230 | 231 | OpenCL events can also be used to control "enqueue_" commands by supplying 232 | events from previous "enqueue_" commands in the event_wait_list of the 233 | command to be controlled. 234 | 235 | Furthermore, if an OpenCL device supports out-of-order command execution and its 236 | command queue was created with `CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE` then the 237 | OpenCL events can be used to control out-of-order command execution, enabling 238 | commands to run concurrently on the device. Together, the events and event_wait_lists 239 | effectively create [directed acyclic graphs](https://en.wikipedia.org/wiki/Directed_acyclic_graph) to control execution on the device. 240 | 241 | An application can wait for events to complete by calling `wait_for_events` with 242 | an event_wait_list instead of calling `finish` on a command_queue. 243 | `wait_for_events` enables an application to wait for events to complete 244 | simultaneously on multiple queues, for example, see 245 | [OpenCL events](http://people.cs.bris.ac.uk/~simonm/workshops/BSC_2013/opencl:course:bsc/Slides/OpenCL_events.pdf). 246 | 247 | ## Clean-up 248 | 249 | A well-behaved OpenCL application should clean-up after itself by freeing memory 250 | and releasing OpenCL objects, i.e.: 251 | * events, 252 | * samplers, 253 | * images, 254 | * buffers, 255 | * pipes, 256 | * svm, 257 | * kernels, 258 | * programs, 259 | * command queues, 260 | * and the context(s). 261 | 262 | This library handles OpenCL clean-up automatically using [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) by wrapping OpenCL objects in 263 | structs that implement the [Drop trait](https://doc.rust-lang.org/book/ch15-03-drop.html). 264 | -------------------------------------------------------------------------------- /docs/images/opencl_context_objects.svg: -------------------------------------------------------------------------------- 1 | cl_device_idContextCommandQueueProgramKernelSubDevice1..*1..*1..*1..*1..*1..*0..* -------------------------------------------------------------------------------- /tests/integration_test.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021 Via Technology Ltd. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | extern crate opencl3; 16 | 17 | use cl3::device::CL_DEVICE_TYPE_GPU; 18 | use opencl3::Result; 19 | use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue}; 20 | use opencl3::context::Context; 21 | use opencl3::device::Device; 22 | use opencl3::kernel::{ExecuteKernel, Kernel}; 23 | use opencl3::memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; 24 | use opencl3::platform::get_platforms; 25 | use opencl3::program::Program; 26 | use opencl3::types::{CL_BLOCKING, CL_NON_BLOCKING, cl_event, cl_float}; 27 | use std::ptr; 28 | 29 | const PROGRAM_SOURCE: &str = r#" 30 | kernel void saxpy_float (global float* z, 31 | global float const* x, 32 | global float const* y, 33 | float a) 34 | { 35 | size_t i = get_global_id(0); 36 | z[i] = a*x[i] + y[i]; 37 | }"#; 38 | 39 | const KERNEL_NAME: &str = "saxpy_float"; 40 | 41 | #[test] 42 | #[ignore] 43 | fn test_opencl_1_2_example() -> Result<()> { 44 | let platforms = get_platforms()?; 45 | assert!(0 < platforms.len()); 46 | 47 | // Get the first platform 48 | let platform = &platforms[0]; 49 | 50 | let devices = platform 51 | .get_devices(CL_DEVICE_TYPE_GPU) 52 | .expect("Platform::get_devices failed"); 53 | assert!(0 < devices.len()); 54 | 55 | let platform_name = platform.name()?; 56 | println!("Platform Name: {:?}", platform_name); 57 | 58 | // Create OpenCL context from the first device 59 | let device = Device::new(devices[0]); 60 | let vendor = device.vendor().expect("Device.vendor failed"); 61 | let vendor_id = device.vendor_id().expect("Device.vendor_id failed"); 62 | println!("OpenCL device vendor name: {}", vendor); 63 | println!("OpenCL device vendor id: {:X}", vendor_id); 64 | 65 | ///////////////////////////////////////////////////////////////////// 66 | // Initialise OpenCL compute environment 67 | 68 | // Create a Context on the OpenCL device 69 | let context = Context::from_device(&device).expect("Context::from_device failed"); 70 | 71 | // Build the OpenCL program source and create the kernel. 72 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") 73 | .expect("Program::create_and_build_from_source failed"); 74 | 75 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 76 | 77 | // Create a command_queue on the Context's device 78 | let queue = CommandQueue::create_default(&context, CL_QUEUE_PROFILING_ENABLE) 79 | .expect("CommandQueue::create_default failed"); 80 | 81 | ///////////////////////////////////////////////////////////////////// 82 | // Compute data 83 | 84 | // The input data 85 | const ARRAY_SIZE: usize = 1000; 86 | let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE]; 87 | let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE]; 88 | for i in 0..ARRAY_SIZE { 89 | sums[i] = 1.0 + 1.0 * i as cl_float; 90 | } 91 | 92 | // Create OpenCL device buffers 93 | let mut x = unsafe { 94 | Buffer::::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())? 95 | }; 96 | let mut y = unsafe { 97 | Buffer::::create(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())? 98 | }; 99 | let z = unsafe { 100 | Buffer::::create(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())? 101 | }; 102 | 103 | // Blocking write 104 | let _x_write_event = unsafe { queue.enqueue_write_buffer(&mut x, CL_BLOCKING, 0, &ones, &[])? }; 105 | 106 | // Non-blocking write, wait for y_write_event 107 | let y_write_event = 108 | unsafe { queue.enqueue_write_buffer(&mut y, CL_NON_BLOCKING, 0, &sums, &[])? }; 109 | 110 | // a value for the kernel function 111 | let a: cl_float = 300.0; 112 | 113 | // Use the ExecuteKernel builder to set the kernel buffer and 114 | // cl_float value arguments, before setting the one dimensional 115 | // global_work_size for the call to enqueue_nd_range. 116 | // Unwraps the Result to get the kernel execution event. 117 | let kernel_event = unsafe { 118 | ExecuteKernel::new(&kernel) 119 | .set_arg(&z) 120 | .set_arg(&x) 121 | .set_arg(&y) 122 | .set_arg(&a) 123 | .set_global_work_size(ARRAY_SIZE) 124 | .set_wait_event(&y_write_event) 125 | .enqueue_nd_range(&queue)? 126 | }; 127 | 128 | let mut events: Vec = Vec::default(); 129 | events.push(kernel_event.get()); 130 | 131 | // Create a results array to hold the results from the OpenCL device 132 | // and enqueue a read command to read the device buffer into the array 133 | // after the kernel event completes. 134 | let mut results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE]; 135 | let _event = 136 | unsafe { queue.enqueue_read_buffer(&z, CL_NON_BLOCKING, 0, &mut results, &events)? }; 137 | 138 | // Block until all commands on the queue have completed 139 | queue.finish()?; 140 | 141 | assert_eq!(1300.0, results[ARRAY_SIZE - 1]); 142 | println!("results back: {}", results[ARRAY_SIZE - 1]); 143 | 144 | // Calculate the kernel duration, from the kernel_event 145 | let start_time = kernel_event.profiling_command_start()?; 146 | let end_time = kernel_event.profiling_command_end()?; 147 | let duration = end_time - start_time; 148 | println!("kernel execution duration (ns): {}", duration); 149 | 150 | Ok(()) 151 | } 152 | 153 | #[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))] 154 | #[test] 155 | #[ignore] 156 | fn test_opencl_svm_example() -> Result<()> { 157 | use cl3::device::{CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_BUFFER}; 158 | use opencl3::command_queue::CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; 159 | use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE}; 160 | use opencl3::svm::SvmVec; 161 | 162 | let platforms = get_platforms()?; 163 | assert!(0 < platforms.len()); 164 | 165 | ///////////////////////////////////////////////////////////////////// 166 | // Query OpenCL compute environment 167 | let opencl_2: &str = "OpenCL 2"; 168 | let opencl_3: &str = "OpenCL 3"; 169 | 170 | // Find an OpenCL SVM, platform and device 171 | let mut device_id = ptr::null_mut(); 172 | let mut is_svm_capable: bool = false; 173 | for p in platforms { 174 | let platform_version = p.version()?; 175 | if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) { 176 | let devices = p 177 | .get_devices(CL_DEVICE_TYPE_GPU) 178 | .expect("Platform::get_devices failed"); 179 | 180 | for dev_id in devices { 181 | let device = Device::new(dev_id); 182 | let svm_mem_capability = device.svm_mem_capability(); 183 | is_svm_capable = 0 < svm_mem_capability 184 | & (CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER); 185 | if is_svm_capable { 186 | device_id = dev_id; 187 | break; 188 | } 189 | } 190 | } 191 | } 192 | 193 | if is_svm_capable { 194 | // Create OpenCL context from the OpenCL svm device 195 | let device = Device::new(device_id); 196 | let vendor = device.vendor().expect("Device.vendor failed"); 197 | let vendor_id = device.vendor_id().expect("Device.vendor_id failed"); 198 | println!("OpenCL device vendor name: {}", vendor); 199 | println!("OpenCL device vendor id: {:X}", vendor_id); 200 | 201 | ///////////////////////////////////////////////////////////////////// 202 | // Initialise OpenCL compute environment 203 | 204 | // Create a Context on the OpenCL svm device 205 | let context = Context::from_device(&device).expect("Context::from_device failed"); 206 | 207 | // Build the OpenCL program source and create the kernel. 208 | let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") 209 | .expect("Program::create_and_build_from_source failed"); 210 | 211 | let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); 212 | 213 | // Create a command_queue on the Context's device 214 | let queue = CommandQueue::create_default_with_properties( 215 | &context, 216 | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 217 | 0, 218 | ) 219 | .expect("CommandQueue::create_default_with_properties failed"); 220 | 221 | ///////////////////////////////////////////////////////////////////// 222 | // Compute data 223 | 224 | // Get the svm capability of all the devices in the context. 225 | let svm_capability = context.get_svm_mem_capability(); 226 | assert!(0 < svm_capability); 227 | 228 | let is_fine_grained_svm: bool = 0 < svm_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; 229 | println!("OpenCL SVM is fine grained: {}", is_fine_grained_svm); 230 | 231 | // Create SVM vectors for the data 232 | 233 | // The SVM vectors 234 | const ARRAY_SIZE: usize = 1000; 235 | let mut ones = 236 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 237 | let mut sums = 238 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 239 | let mut results = 240 | SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); 241 | 242 | let a: cl_float = 300.0; 243 | if is_fine_grained_svm { 244 | // The input data 245 | for i in 0..ARRAY_SIZE { 246 | ones[i] = 1.0; 247 | } 248 | 249 | for i in 0..ARRAY_SIZE { 250 | sums[i] = 1.0 + 1.0 * i as cl_float; 251 | } 252 | 253 | // Make ones and sums immutable 254 | let ones = ones; 255 | let sums = sums; 256 | 257 | // Use the ExecuteKernel builder to set the kernel buffer and 258 | // cl_float value arguments, before setting the one dimensional 259 | // global_work_size for the call to enqueue_nd_range. 260 | // Unwraps the Result to get the kernel execution event. 261 | let kernel_event = unsafe { 262 | ExecuteKernel::new(&kernel) 263 | .set_arg_svm(results.as_mut_ptr()) 264 | .set_arg_svm(ones.as_ptr()) 265 | .set_arg_svm(sums.as_ptr()) 266 | .set_arg(&a) 267 | .set_global_work_size(ARRAY_SIZE) 268 | .enqueue_nd_range(&queue)? 269 | }; 270 | 271 | // Wait for the kernel_event to complete 272 | kernel_event.wait()?; 273 | 274 | assert_eq!(1300.0, results[ARRAY_SIZE - 1]); 275 | println!("results back: {}", results[ARRAY_SIZE - 1]); 276 | 277 | // Calculate the kernel duration, from the kernel_event 278 | let start_time = kernel_event.profiling_command_start()?; 279 | let end_time = kernel_event.profiling_command_end()?; 280 | let duration = end_time - start_time; 281 | println!("kernel execution duration (ns): {}", duration); 282 | } else { 283 | // !is_fine_grained_svm 284 | 285 | // Resize and map the input SVM vectors, before setting their data 286 | unsafe { 287 | ones.set_len(ARRAY_SIZE)?; 288 | sums.set_len(ARRAY_SIZE)?; 289 | queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut ones, &[])?; 290 | queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut sums, &[])?; 291 | } 292 | // The input data 293 | for i in 0..ARRAY_SIZE { 294 | ones[i] = 1.0; 295 | } 296 | 297 | for i in 0..ARRAY_SIZE { 298 | sums[i] = 1.0 + 1.0 * i as cl_float; 299 | } 300 | 301 | // Make ones and sums immutable 302 | let ones = ones; 303 | let sums = sums; 304 | 305 | let mut events: Vec = Vec::default(); 306 | let unmap_sums_event = unsafe { queue.enqueue_svm_unmap(&sums, &[])? }; 307 | let unmap_ones_event = unsafe { queue.enqueue_svm_unmap(&ones, &[])? }; 308 | events.push(unmap_sums_event.get()); 309 | events.push(unmap_ones_event.get()); 310 | 311 | // Use the ExecuteKernel builder to set the kernel buffer and 312 | // cl_float value arguments, before setting the one dimensional 313 | // global_work_size for the call to enqueue_nd_range. 314 | // Unwraps the Result to get the kernel execution event. 315 | let kernel_event = unsafe { 316 | ExecuteKernel::new(&kernel) 317 | .set_arg_svm(results.as_mut_ptr()) 318 | .set_arg_svm(ones.as_ptr()) 319 | .set_arg_svm(sums.as_ptr()) 320 | .set_arg(&a) 321 | .set_global_work_size(ARRAY_SIZE) 322 | .set_event_wait_list(&events) 323 | .enqueue_nd_range(&queue)? 324 | }; 325 | 326 | // Wait for the kernel_event to complete 327 | kernel_event.wait()?; 328 | 329 | // Map SVM results before reading them 330 | let _map_results_event = 331 | unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? }; 332 | 333 | assert_eq!(1300.0, results[ARRAY_SIZE - 1]); 334 | println!("results back: {}", results[ARRAY_SIZE - 1]); 335 | 336 | // Calculate the kernel duration from the kernel_event 337 | let start_time = kernel_event.profiling_command_start()?; 338 | let end_time = kernel_event.profiling_command_end()?; 339 | let duration = end_time - start_time; 340 | println!("kernel execution duration (ns): {}", duration); 341 | 342 | ///////////////////////////////////////////////////////////////////// 343 | // Clean up 344 | let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? }; 345 | unmap_results_event.wait()?; 346 | println!("SVM buffers unmapped"); 347 | } 348 | } else { 349 | println!("OpenCL SVM capable device not found") 350 | } 351 | 352 | Ok(()) 353 | } 354 | -------------------------------------------------------------------------------- /src/command_buffer.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2024 Via Technology Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! OpenCL Command Buffers extension. Enable with feature: cl_khr_command_buffer. 16 | 17 | #![allow(clippy::too_many_arguments, clippy::missing_safety_doc)] 18 | 19 | use super::Result; 20 | use super::event::Event; 21 | use super::memory::*; 22 | 23 | #[allow(unused_imports)] 24 | use cl3::ext::{ 25 | CL_COMMAND_BUFFER_NUM_QUEUES_KHR, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR, 26 | CL_COMMAND_BUFFER_QUEUES_KHR, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, 27 | CL_COMMAND_BUFFER_STATE_KHR, cl_bool, cl_command_buffer_info_khr, cl_command_buffer_khr, 28 | cl_command_buffer_properties_khr, cl_command_properties_khr, cl_mutable_command_khr, 29 | cl_sync_point_khr, command_barrier_with_wait_list_khr, command_copy_buffer_khr, 30 | command_copy_buffer_rect_khr, command_copy_buffer_to_image_khr, command_copy_image_khr, 31 | command_copy_image_to_buffer_khr, command_fill_buffer_khr, command_fill_image_khr, 32 | command_nd_range_kernel_khr, command_svm_mem_fill_khr, command_svm_memcpy_khr, 33 | create_command_buffer_khr, enqueue_command_buffer_khr, finalize_command_buffer_khr, 34 | get_command_buffer_data_khr, get_command_buffer_info_khr, 35 | get_command_buffer_mutable_dispatch_data, release_command_buffer_khr, 36 | }; 37 | #[allow(unused_imports)] 38 | use cl3::types::{cl_command_queue, cl_event, cl_kernel, cl_mem, cl_uint}; 39 | use libc::{c_void, size_t}; 40 | use std::mem; 41 | use std::ptr; 42 | 43 | /// An OpenCL command-buffer. 44 | /// 45 | /// This extension adds the ability to record and replay buffers of OpenCL commands. 46 | /// See [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer) 47 | #[derive(Debug)] 48 | pub struct CommandBuffer { 49 | buffer: cl_command_buffer_khr, 50 | } 51 | 52 | impl From for cl_command_buffer_khr { 53 | fn from(value: CommandBuffer) -> Self { 54 | value.buffer 55 | } 56 | } 57 | 58 | impl Drop for CommandBuffer { 59 | fn drop(&mut self) { 60 | unsafe { 61 | release_command_buffer_khr(self.buffer).expect("Error: clReleaseCommandBufferKHR") 62 | }; 63 | } 64 | } 65 | 66 | unsafe impl Send for CommandBuffer {} 67 | unsafe impl Sync for CommandBuffer {} 68 | 69 | impl CommandBuffer { 70 | const fn new(buffer: cl_command_buffer_khr) -> Self { 71 | Self { buffer } 72 | } 73 | 74 | /// Get the underlying OpenCL cl_command_buffer_khr. 75 | pub const fn get(&self) -> cl_command_buffer_khr { 76 | self.buffer 77 | } 78 | 79 | /// Create a command-buffer that can record commands to the specified queues. 80 | pub fn create( 81 | queues: &[cl_command_queue], 82 | properties: &[cl_command_buffer_properties_khr], 83 | ) -> Result { 84 | let buffer = create_command_buffer_khr(queues, properties.as_ptr())?; 85 | Ok(Self::new(buffer)) 86 | } 87 | 88 | /// Finalizes command recording ready for enqueuing the command-buffer on a command-queue. 89 | pub fn finalize(&self) -> Result<()> { 90 | Ok(finalize_command_buffer_khr(self.buffer)?) 91 | } 92 | 93 | /// Enqueues a command-buffer to execute on command-queues specified by queues, 94 | /// or on default command-queues used during recording if queues is empty. 95 | pub unsafe fn enqueue( 96 | &self, 97 | queues: &mut [cl_command_queue], 98 | event_wait_list: &[cl_event], 99 | ) -> Result { 100 | unsafe { 101 | let event = enqueue_command_buffer_khr( 102 | queues.len() as cl_uint, 103 | queues.as_mut_ptr(), 104 | self.buffer, 105 | event_wait_list.len() as cl_uint, 106 | if !event_wait_list.is_empty() { 107 | event_wait_list.as_ptr() 108 | } else { 109 | ptr::null() 110 | }, 111 | )?; 112 | Ok(Event::new(event)) 113 | } 114 | } 115 | 116 | /// Records a barrier operation used as a synchronization point. 117 | pub unsafe fn command_barrier_with_wait_list( 118 | &self, 119 | queue: cl_command_queue, 120 | properties: *const cl_command_properties_khr, 121 | sync_point_wait_list: &[cl_sync_point_khr], 122 | ) -> Result { 123 | let mut sync_point = 0; 124 | unsafe { 125 | command_barrier_with_wait_list_khr( 126 | self.buffer, 127 | queue, 128 | properties, 129 | sync_point_wait_list, 130 | &mut sync_point, 131 | ptr::null_mut(), 132 | )? 133 | }; 134 | Ok(sync_point) 135 | } 136 | 137 | /// Records a command to copy from one buffer object to another. 138 | pub unsafe fn copy_buffer( 139 | &self, 140 | queue: cl_command_queue, 141 | properties: *const cl_command_properties_khr, 142 | src_buffer: &Buffer, 143 | dst_buffer: &mut Buffer, 144 | src_offset: size_t, 145 | dst_offset: size_t, 146 | size: size_t, 147 | sync_point_wait_list: &[cl_sync_point_khr], 148 | ) -> Result { 149 | unsafe { 150 | let mut sync_point = 0; 151 | command_copy_buffer_khr( 152 | self.buffer, 153 | queue, 154 | properties, 155 | src_buffer.get(), 156 | dst_buffer.get_mut(), 157 | src_offset, 158 | dst_offset, 159 | size, 160 | sync_point_wait_list, 161 | &mut sync_point, 162 | ptr::null_mut(), 163 | )?; 164 | Ok(sync_point) 165 | } 166 | } 167 | 168 | /// Records a command to copy a rectangular region from a buffer object to another buffer object. 169 | pub unsafe fn copy_buffer_rect( 170 | &self, 171 | queue: cl_command_queue, 172 | properties: *const cl_command_properties_khr, 173 | src_buffer: &Buffer, 174 | dst_buffer: &mut Buffer, 175 | src_origin: *const size_t, 176 | dst_origin: *const size_t, 177 | region: *const size_t, 178 | src_row_pitch: size_t, 179 | src_slice_pitch: size_t, 180 | dst_row_pitch: size_t, 181 | dst_slice_pitch: size_t, 182 | sync_point_wait_list: &[cl_sync_point_khr], 183 | ) -> Result { 184 | unsafe { 185 | let mut sync_point = 0; 186 | command_copy_buffer_rect_khr( 187 | self.buffer, 188 | queue, 189 | properties, 190 | src_buffer.get(), 191 | dst_buffer.get_mut(), 192 | src_origin, 193 | dst_origin, 194 | region, 195 | src_row_pitch, 196 | src_slice_pitch, 197 | dst_row_pitch, 198 | dst_slice_pitch, 199 | sync_point_wait_list, 200 | &mut sync_point, 201 | ptr::null_mut(), 202 | )?; 203 | Ok(sync_point) 204 | } 205 | } 206 | 207 | /// Records a command to copy a buffer object to an image object. 208 | pub unsafe fn copy_buffer_to_image( 209 | &self, 210 | queue: cl_command_queue, 211 | properties: *const cl_command_properties_khr, 212 | src_buffer: &Buffer, 213 | dst_image: &mut Image, 214 | src_offset: size_t, 215 | dst_origin: *const size_t, 216 | region: *const size_t, 217 | sync_point_wait_list: &[cl_sync_point_khr], 218 | ) -> Result { 219 | unsafe { 220 | let mut sync_point = 0; 221 | command_copy_buffer_to_image_khr( 222 | self.buffer, 223 | queue, 224 | properties, 225 | src_buffer.get(), 226 | dst_image.get_mut(), 227 | src_offset, 228 | dst_origin, 229 | region, 230 | sync_point_wait_list, 231 | &mut sync_point, 232 | ptr::null_mut(), 233 | )?; 234 | Ok(sync_point) 235 | } 236 | } 237 | 238 | /// Records a command to copy image objects. 239 | pub unsafe fn copy_image( 240 | &self, 241 | queue: cl_command_queue, 242 | properties: *const cl_command_properties_khr, 243 | src_image: Image, 244 | dst_image: &mut Image, 245 | src_origin: *const size_t, 246 | dst_origin: *const size_t, 247 | region: *const size_t, 248 | sync_point_wait_list: &[cl_sync_point_khr], 249 | ) -> Result { 250 | unsafe { 251 | let mut sync_point = 0; 252 | command_copy_image_khr( 253 | self.buffer, 254 | queue, 255 | properties, 256 | src_image.get(), 257 | dst_image.get_mut(), 258 | src_origin, 259 | dst_origin, 260 | region, 261 | sync_point_wait_list, 262 | &mut sync_point, 263 | ptr::null_mut(), 264 | )?; 265 | Ok(sync_point) 266 | } 267 | } 268 | 269 | /// Records a command to copy an image object to a buffer object. 270 | pub unsafe fn copy_image_to_buffer( 271 | &self, 272 | queue: cl_command_queue, 273 | properties: *const cl_command_properties_khr, 274 | src_image: &Image, 275 | dst_buffer: &mut Buffer, 276 | src_origin: *const size_t, 277 | region: *const size_t, 278 | dst_offset: size_t, 279 | sync_point_wait_list: &[cl_sync_point_khr], 280 | ) -> Result { 281 | unsafe { 282 | let mut sync_point = 0; 283 | command_copy_image_to_buffer_khr( 284 | self.buffer, 285 | queue, 286 | properties, 287 | src_image.get(), 288 | dst_buffer.get_mut(), 289 | src_origin, 290 | region, 291 | dst_offset, 292 | sync_point_wait_list, 293 | &mut sync_point, 294 | ptr::null_mut(), 295 | )?; 296 | Ok(sync_point) 297 | } 298 | } 299 | 300 | /// Records a command to fill a buffer object with a pattern of a given pattern size. 301 | #[allow(clippy::as_ptr_cast_mut)] 302 | pub unsafe fn fill_buffer( 303 | &self, 304 | queue: cl_command_queue, 305 | properties: *const cl_command_properties_khr, 306 | buffer: &mut Buffer, 307 | pattern: &[T], 308 | offset: size_t, 309 | size: size_t, 310 | sync_point_wait_list: &[cl_sync_point_khr], 311 | ) -> Result { 312 | unsafe { 313 | let mut sync_point = 0; 314 | command_fill_buffer_khr( 315 | self.buffer, 316 | queue, 317 | properties, 318 | buffer.get_mut(), 319 | pattern.as_ptr() as cl_mem, 320 | mem::size_of_val(pattern), 321 | offset, 322 | size, 323 | sync_point_wait_list, 324 | &mut sync_point, 325 | ptr::null_mut(), 326 | )?; 327 | Ok(sync_point) 328 | } 329 | } 330 | 331 | /// Records a command to fill an image object with a specified color. 332 | pub unsafe fn fill_image( 333 | &self, 334 | queue: cl_command_queue, 335 | properties: *const cl_command_properties_khr, 336 | image: &mut Image, 337 | fill_color: *const c_void, 338 | origin: *const size_t, 339 | region: *const size_t, 340 | sync_point_wait_list: &[cl_sync_point_khr], 341 | ) -> Result { 342 | unsafe { 343 | let mut sync_point = 0; 344 | command_fill_image_khr( 345 | self.buffer, 346 | queue, 347 | properties, 348 | image.get_mut(), 349 | fill_color, 350 | origin, 351 | region, 352 | sync_point_wait_list, 353 | &mut sync_point, 354 | ptr::null_mut(), 355 | )?; 356 | Ok(sync_point) 357 | } 358 | } 359 | 360 | /// Records a command to execute a kernel on a device. 361 | pub unsafe fn nd_range_kernel( 362 | &self, 363 | queue: cl_command_queue, 364 | properties: *const cl_command_properties_khr, 365 | kernel: cl_kernel, 366 | work_dim: cl_uint, 367 | global_work_offsets: *const size_t, 368 | global_work_sizes: *const size_t, 369 | local_work_sizes: *const size_t, 370 | sync_point_wait_list: &[cl_sync_point_khr], 371 | ) -> Result { 372 | unsafe { 373 | let mut sync_point = 0; 374 | command_nd_range_kernel_khr( 375 | self.buffer, 376 | queue, 377 | properties, 378 | kernel, 379 | work_dim, 380 | global_work_offsets, 381 | global_work_sizes, 382 | local_work_sizes, 383 | sync_point_wait_list, 384 | &mut sync_point, 385 | ptr::null_mut(), 386 | )?; 387 | Ok(sync_point) 388 | } 389 | } 390 | 391 | pub unsafe fn svm_memcpy( 392 | &self, 393 | queue: cl_command_queue, 394 | properties: *const cl_command_properties_khr, 395 | dst_ptr: *mut c_void, 396 | src_ptr: *const c_void, 397 | size: size_t, 398 | sync_point_wait_list: &[cl_sync_point_khr], 399 | mutable_handle: *mut cl_mutable_command_khr, 400 | ) -> Result { 401 | unsafe { 402 | let mut sync_point = 0; 403 | command_svm_memcpy_khr( 404 | self.buffer, 405 | queue, 406 | properties, 407 | dst_ptr, 408 | src_ptr, 409 | size, 410 | sync_point_wait_list, 411 | &mut sync_point, 412 | mutable_handle, 413 | )?; 414 | Ok(sync_point) 415 | } 416 | } 417 | 418 | pub unsafe fn svm_mem_fill( 419 | &self, 420 | queue: cl_command_queue, 421 | properties: *const cl_command_properties_khr, 422 | svm_ptr: *mut c_void, 423 | pattern: *const c_void, 424 | pattern_size: size_t, 425 | size: size_t, 426 | sync_point_wait_list: &[cl_sync_point_khr], 427 | mutable_handle: *mut cl_mutable_command_khr, 428 | ) -> Result { 429 | unsafe { 430 | let mut sync_point = 0; 431 | command_svm_mem_fill_khr( 432 | self.buffer, 433 | queue, 434 | properties, 435 | svm_ptr, 436 | pattern, 437 | pattern_size, 438 | size, 439 | sync_point_wait_list, 440 | &mut sync_point, 441 | mutable_handle, 442 | )?; 443 | Ok(sync_point) 444 | } 445 | } 446 | 447 | pub fn num_queues(&self) -> Result { 448 | Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_NUM_QUEUES_KHR)?.into()) 449 | } 450 | 451 | pub fn queues(&self) -> Result> { 452 | // cl_command_queue 453 | Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_QUEUES_KHR)?.into()) 454 | } 455 | 456 | pub fn reference_count(&self) -> Result { 457 | Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR)?.into()) 458 | } 459 | 460 | pub fn buffer_state(&self) -> Result { 461 | Ok(get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_STATE_KHR)?.into()) 462 | } 463 | 464 | pub fn properties_array(&self) -> Result> { 465 | Ok( 466 | get_command_buffer_info_khr(self.buffer, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR)? 467 | .into(), 468 | ) 469 | } 470 | 471 | pub fn get_data(&self, param_name: cl_command_buffer_info_khr) -> Result> { 472 | Ok(get_command_buffer_data_khr(self.buffer, param_name)?) 473 | } 474 | } 475 | -------------------------------------------------------------------------------- /src/context.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Via Technology Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![allow(clippy::missing_safety_doc)] 16 | 17 | pub use cl3::context; 18 | 19 | use super::Result; 20 | use super::device::Device; 21 | #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))] 22 | use super::device::SubDevice; 23 | 24 | #[allow(unused_imports)] 25 | use cl3::dx9_media_sharing; 26 | 27 | #[cfg(any(feature = "cl_khr_d3d10_sharing", feature = "dynamic"))] 28 | #[allow(unused_imports)] 29 | use cl3::d3d10; 30 | 31 | #[cfg(any(feature = "cl_khr_d3d11_sharing", feature = "dynamic"))] 32 | #[allow(unused_imports)] 33 | use cl3::d3d11; 34 | 35 | #[allow(unused_imports)] 36 | use cl3::egl; 37 | #[allow(unused_imports)] 38 | use cl3::ext; 39 | #[allow(unused_imports)] 40 | use cl3::gl; 41 | #[allow(unused_imports)] 42 | use cl3::types::{ 43 | cl_context, cl_context_info, cl_context_properties, cl_device_id, cl_device_svm_capabilities, 44 | cl_device_type, cl_event, cl_image_format, cl_mem, cl_mem_flags, cl_mem_object_type, cl_uint, 45 | }; 46 | use libc::{c_char, c_void, intptr_t, size_t}; 47 | use std::ptr; 48 | 49 | /// Get the current device used by an OpenGL context. 50 | /// 51 | /// * `properties` - the OpenCL context properties. 52 | /// 53 | /// returns a Result containing the device 54 | /// or the error code from the OpenCL C API function. 55 | #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))] 56 | #[allow(clippy::as_ptr_cast_mut)] 57 | pub fn get_current_device_for_gl_context_khr( 58 | properties: &[cl_context_properties], 59 | ) -> Result { 60 | let device = intptr_t::from(gl::get_gl_context_info_khr( 61 | properties.as_ptr() as *mut cl_context_properties, 62 | gl::CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, 63 | )?) as cl_device_id; 64 | Ok(device) 65 | } 66 | 67 | /// Get the devices for an OpenGL context. 68 | /// 69 | /// * `properties` - the OpenCL context properties. 70 | /// 71 | /// returns a Result containing the devices 72 | /// or the error code from the OpenCL C API function. 73 | #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))] 74 | #[allow(clippy::as_ptr_cast_mut)] 75 | pub fn get_devices_for_gl_context_khr( 76 | properties: &[cl_context_properties], 77 | ) -> Result> { 78 | let dev_ptrs: Vec = gl::get_gl_context_info_khr( 79 | properties.as_ptr() as *mut cl_context_properties, 80 | gl::CL_DEVICES_FOR_GL_CONTEXT_KHR, 81 | )? 82 | .into(); 83 | let devices = dev_ptrs 84 | .iter() 85 | .map(|ptr| *ptr as cl_device_id) 86 | .collect::>(); 87 | Ok(devices) 88 | } 89 | 90 | /// An OpenCL context object. 91 | /// Implements the Drop trait to call release_context when the object is dropped. 92 | #[derive(Debug)] 93 | pub struct Context { 94 | context: cl_context, 95 | devices: Vec, 96 | } 97 | 98 | impl From for cl_context { 99 | fn from(value: Context) -> Self { 100 | value.context 101 | } 102 | } 103 | 104 | impl Drop for Context { 105 | fn drop(&mut self) { 106 | self.devices.clear(); 107 | unsafe { context::release_context(self.context).expect("Error: clReleaseContext") }; 108 | } 109 | } 110 | 111 | unsafe impl Send for Context {} 112 | unsafe impl Sync for Context {} 113 | 114 | impl Context { 115 | fn new(context: cl_context, devices: &[cl_device_id]) -> Self { 116 | Self { 117 | context, 118 | devices: devices.to_vec(), 119 | } 120 | } 121 | 122 | /// Get the underlying OpenCL cl_context. 123 | pub const fn get(&self) -> cl_context { 124 | self.context 125 | } 126 | 127 | /// Create a Context from a slice of cl_device_ids. 128 | /// 129 | /// * `devices` - a slice of cl_device_ids for an OpenCL Platform. 130 | /// * `properties` - a null terminated list of cl_context_properties, see 131 | /// [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table). 132 | /// * `pfn_notify` - an optional callback function that can be registered by the application. 133 | /// * `user_data` - passed as the user_data argument when pfn_notify is called. 134 | /// 135 | /// returns a Result containing the new OpenCL context 136 | /// or the error code from the OpenCL C API function. 137 | pub fn from_devices( 138 | devices: &[cl_device_id], 139 | properties: &[cl_context_properties], 140 | pfn_notify: Option, 141 | user_data: *mut c_void, 142 | ) -> Result { 143 | let properties_ptr = if !properties.is_empty() { 144 | properties.as_ptr() 145 | } else { 146 | ptr::null() 147 | }; 148 | let context = context::create_context(devices, properties_ptr, pfn_notify, user_data)?; 149 | Ok(Self::new(context, devices)) 150 | } 151 | 152 | /// Create a Context from a [Device]. 153 | /// 154 | /// * `device` - a [Device]. 155 | /// 156 | /// returns a Result containing the new OpenCL context 157 | /// or the error code from the OpenCL C API function. 158 | pub fn from_device(device: &Device) -> Result { 159 | let devices: Vec = vec![device.id()]; 160 | let properties = Vec::::default(); 161 | Self::from_devices(&devices, &properties, None, ptr::null_mut()) 162 | } 163 | 164 | /// Create a Context from a slice of SubDevices. 165 | /// 166 | /// * `devices` - a slice of SubDevices for an OpenCL Platform. 167 | /// * `properties` - a null terminated list of cl_context_properties, see 168 | /// [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table). 169 | /// * `pfn_notify` - an optional callback function that can be registered by the application. 170 | /// * `user_data` - passed as the user_data argument when pfn_notify is called. 171 | /// 172 | /// returns a Result containing the new OpenCL context 173 | /// or the error code from the OpenCL C API function. 174 | #[cfg(any(feature = "CL_VERSION_1_2", feature = "dynamic"))] 175 | pub fn from_sub_devices( 176 | sub_devices: &[SubDevice], 177 | properties: &[cl_context_properties], 178 | pfn_notify: Option, 179 | user_data: *mut c_void, 180 | ) -> Result { 181 | let devices = sub_devices 182 | .iter() 183 | .map(|dev| dev.id()) 184 | .collect::>(); 185 | Self::from_devices(&devices, properties, pfn_notify, user_data) 186 | } 187 | 188 | /// Create a Context from a cl_device_type. 189 | /// 190 | /// * `device_type` - the cl_device_type to create a Context for. 191 | /// * `properties` - a null terminated list of cl_context_properties, see 192 | /// [Context Properties](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#context-properties-table). 193 | /// * `pfn_notify` - an optional callback function that can be registered by the application. 194 | /// * `user_data` - passed as the user_data argument when pfn_notify is called. 195 | /// 196 | /// returns a Result containing the new OpenCL context 197 | /// or the error code from the OpenCL C API function. 198 | pub fn from_device_type( 199 | device_type: cl_device_type, 200 | properties: &[cl_context_properties], 201 | pfn_notify: Option, 202 | user_data: *mut c_void, 203 | ) -> Result { 204 | let properties_ptr = if !properties.is_empty() { 205 | properties.as_ptr() 206 | } else { 207 | ptr::null() 208 | }; 209 | let context = 210 | context::create_context_from_type(device_type, properties_ptr, pfn_notify, user_data)?; 211 | let dev_ptrs: Vec = 212 | context::get_context_info(context, context::CL_CONTEXT_DEVICES)?.into(); 213 | let devices = dev_ptrs 214 | .iter() 215 | .map(|ptr| *ptr as cl_device_id) 216 | .collect::>(); 217 | Ok(Self::new(context, &devices)) 218 | } 219 | 220 | /// Get the common Shared Virtual Memory (SVM) capabilities of the 221 | /// devices in the Context. 222 | pub fn get_svm_mem_capability(&self) -> cl_device_svm_capabilities { 223 | let device = Device::new(self.devices[0]); 224 | let mut svm_capability = device.svm_mem_capability(); 225 | 226 | for index in 1..self.devices.len() { 227 | let device = Device::new(self.devices[index]); 228 | svm_capability &= device.svm_mem_capability(); 229 | } 230 | 231 | svm_capability 232 | } 233 | 234 | /// Get the list of image formats supported by the Context for an image type, 235 | /// and allocation information. 236 | /// 237 | /// Calls clGetSupportedImageFormats to get the desired information about the context. 238 | /// 239 | /// * `flags` - a bit-field used to specify allocation and usage information 240 | /// about the image memory object being created, see: 241 | /// [Memory Flags](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#memory-flags-table). 242 | /// * `image_type` - describes the image type. 243 | /// 244 | /// returns a Result containing the desired information in an InfoType enum 245 | /// or the error code from the OpenCL C API function. 246 | pub fn get_supported_image_formats( 247 | &self, 248 | flags: cl_mem_flags, 249 | image_type: cl_mem_object_type, 250 | ) -> Result> { 251 | Ok(cl3::memory::get_supported_image_formats( 252 | self.context, 253 | flags, 254 | image_type, 255 | )?) 256 | } 257 | 258 | #[cfg(any(feature = "cl_arm_import_memory", feature = "dynamic"))] 259 | pub unsafe fn import_memory_arm( 260 | &self, 261 | flags: cl_mem_flags, 262 | properties: *const ext::cl_import_properties_arm, 263 | memory: *mut c_void, 264 | size: size_t, 265 | ) -> Result { 266 | unsafe { 267 | Ok(ext::import_memory_arm( 268 | self.context, 269 | flags, 270 | properties, 271 | memory, 272 | size, 273 | )?) 274 | } 275 | } 276 | 277 | #[allow(clippy::missing_const_for_fn)] 278 | pub fn devices(&self) -> &[cl_device_id] { 279 | &self.devices 280 | } 281 | 282 | pub fn default_device(&self) -> cl_device_id { 283 | self.devices[0] 284 | } 285 | 286 | pub const fn num_devices(&self) -> cl_uint { 287 | self.devices.len() as cl_uint 288 | } 289 | 290 | #[cfg(any(feature = "CL_VERSION_3_0", feature = "dynamic"))] 291 | #[inline] 292 | pub fn set_destructor_callback( 293 | &self, 294 | pfn_notify: Option, 295 | user_data: *mut c_void, 296 | ) -> Result<()> { 297 | context::set_context_destructor_callback(self.context, pfn_notify, user_data) 298 | .map_err(Into::into) 299 | } 300 | 301 | pub fn reference_count(&self) -> Result { 302 | Ok(context::get_context_info(self.context, context::CL_CONTEXT_REFERENCE_COUNT)?.into()) 303 | } 304 | 305 | pub fn properties(&self) -> Result> { 306 | Ok(context::get_context_info(self.context, context::CL_CONTEXT_PROPERTIES)?.into()) 307 | } 308 | 309 | /// Get data about an OpenCL context. 310 | /// Calls clGetContextInfo to get the desired data about the context. 311 | pub fn get_data(&self, param_name: cl_context_info) -> Result> { 312 | Ok(context::get_context_data(self.context, param_name)?) 313 | } 314 | 315 | #[cfg(any(feature = "cl_khr_terminate_context", feature = "dynamic"))] 316 | pub unsafe fn terminate(&self) -> Result<()> { 317 | unsafe { Ok(ext::terminate_context_khr(self.context)?) } 318 | } 319 | 320 | /// Create a cl_event linked to an OpenGL sync object. 321 | /// Requires the cl_khr_gl_event extension 322 | /// 323 | /// * `sync` - the sync object in the GL share group associated with context. 324 | /// 325 | /// returns a Result containing the new OpenCL event 326 | /// or the error code from the OpenCL C API function. 327 | #[cfg(any(feature = "cl_khr_gl_sharing", feature = "dynamic"))] 328 | pub fn create_event_from_gl_sync_khr(&self, sync: gl::cl_GLsync) -> Result { 329 | Ok(gl::create_event_from_gl_sync_khr(self.context, sync)?) 330 | } 331 | 332 | /// Create an event object linked to an EGL fence sync object. 333 | /// Requires the cl_khr_egl_event extension 334 | /// 335 | /// * `sync` - the handle to an EGLSync object. 336 | /// * `display` - the handle to an EGLDisplay. 337 | /// 338 | /// returns a Result containing the new OpenCL event 339 | /// or the error code from the OpenCL C API function. 340 | #[cfg(any(feature = "cl_khr_egl_event", feature = "dynamic"))] 341 | pub unsafe fn create_event_from_egl_sync_khr( 342 | &self, 343 | sync: egl::CLeglSyncKHR, 344 | display: egl::CLeglDisplayKHR, 345 | ) -> Result { 346 | unsafe { 347 | Ok(egl::create_event_from_egl_sync_khr( 348 | self.context, 349 | sync, 350 | display, 351 | )?) 352 | } 353 | } 354 | 355 | #[cfg(any(feature = "cl_khr_semaphore", feature = "dynamic"))] 356 | pub fn create_semaphore_with_properties_khr( 357 | &self, 358 | sema_props: *const ext::cl_semaphore_properties_khr, 359 | ) -> Result { 360 | Ok(ext::create_semaphore_with_properties_khr( 361 | self.context, 362 | sema_props, 363 | )?) 364 | } 365 | 366 | #[cfg(any( 367 | feature = "cl_khr_dx9_media_sharing", 368 | feature = "cl_intel_dx9_media_sharing", 369 | feature = "dynamic" 370 | ))] 371 | pub fn get_supported_dx9_media_surface_formats_intel( 372 | &self, 373 | flags: cl_mem_flags, 374 | image_type: cl_mem_object_type, 375 | plane: cl_uint, 376 | ) -> Result> { 377 | Ok(unsafe { 378 | dx9_media_sharing::get_supported_dx9_media_surface_formats_intel( 379 | self.context, 380 | flags, 381 | image_type, 382 | plane, 383 | ) 384 | }?) 385 | } 386 | 387 | #[cfg(any(feature = "cl_khr_d3d10_sharing", feature = "dynamic"))] 388 | pub fn get_supported_d3d10_texture_formats_intel( 389 | &self, 390 | flags: cl_mem_flags, 391 | image_type: cl_mem_object_type, 392 | ) -> Result> { 393 | Ok(unsafe { 394 | d3d10::get_supported_d3d10_texture_formats_intel(self.context, flags, image_type) 395 | }?) 396 | } 397 | 398 | #[cfg(any(feature = "cl_khr_d3d11_sharing", feature = "dynamic"))] 399 | pub fn get_supported_d3d11_texture_formats_intel( 400 | &self, 401 | flags: cl_mem_flags, 402 | image_type: cl_mem_object_type, 403 | plane: cl_uint, 404 | ) -> Result> { 405 | Ok(unsafe { 406 | d3d11::get_supported_d3d11_texture_formats_intel(self.context, flags, image_type, plane) 407 | }?) 408 | } 409 | } 410 | 411 | #[cfg(test)] 412 | mod tests { 413 | use super::*; 414 | use crate::device::Device; 415 | use crate::platform::get_platforms; 416 | use cl3::device::CL_DEVICE_TYPE_GPU; 417 | use cl3::info_type::InfoType; 418 | use cl3::memory::{CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE}; 419 | 420 | #[test] 421 | fn test_context() { 422 | let platforms = get_platforms().unwrap(); 423 | assert!(0 < platforms.len()); 424 | 425 | // Get the first platform 426 | let platform = &platforms[0]; 427 | 428 | let devices = platform.get_devices(CL_DEVICE_TYPE_GPU).unwrap(); 429 | assert!(0 < devices.len()); 430 | 431 | // Get the first device 432 | let device = Device::new(devices[0]); 433 | let context = Context::from_device(&device).unwrap(); 434 | 435 | println!( 436 | "CL_DEVICE_SVM_CAPABILITIES: {:X}", 437 | context.get_svm_mem_capability() 438 | ); 439 | 440 | println!( 441 | "clGetSupportedImageFormats:\norder: data_type {}", 442 | InfoType::VecImageFormat( 443 | context 444 | .get_supported_image_formats(CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D) 445 | .unwrap() 446 | ) 447 | ); 448 | 449 | println!( 450 | "CL_CONTEXT_REFERENCE_COUNT: {}", 451 | context.reference_count().unwrap() 452 | ); 453 | 454 | println!("CL_CONTEXT_PROPERTIES: {:?}", context.properties().unwrap()); 455 | } 456 | 457 | #[test] 458 | fn test_context_from_device_type() { 459 | let properties = Vec::::default(); 460 | let context = 461 | Context::from_device_type(CL_DEVICE_TYPE_GPU, &properties, None, ptr::null_mut()); 462 | 463 | match context { 464 | Ok(value) => { 465 | println!("Context num devices: {}", value.num_devices()) 466 | } 467 | Err(e) => println!("OpenCL error, Context::from_device_type: {}", e), 468 | } 469 | } 470 | } 471 | --------------------------------------------------------------------------------