├── docs ├── LICENSE_THIRD_PARTY ├── .gitignore ├── _templates │ └── autoapi │ │ ├── python │ │ ├── attribute.rst │ │ ├── exception.rst │ │ ├── package.rst │ │ ├── property.rst │ │ ├── function.rst │ │ ├── method.rst │ │ └── data.rst │ │ ├── index.rst │ │ └── macros.rst ├── asset │ └── images │ │ ├── DPEP.png │ │ ├── dpep-all.png │ │ ├── DPEP-large.png │ │ ├── numba-dpex-logo.png │ │ ├── kernel_prog_model.png │ │ └── kernel-queue-device.png ├── source │ ├── user_guide │ │ ├── kernel_programming │ │ │ ├── call-kernel-async.rst │ │ │ ├── operators.rst │ │ │ ├── math-functions.rst │ │ │ ├── operators.csv │ │ │ └── supported-python-features.rst │ │ ├── index.rst │ │ └── debugging │ │ │ ├── features.rst │ │ │ ├── limitations.rst │ │ │ ├── backtrace.rst │ │ │ ├── altering.rst │ │ │ ├── debugging_environment.rst │ │ │ └── data.rst │ ├── config_options.rst │ ├── release-notes.rst │ ├── license.rst │ ├── index.rst │ ├── useful_links.rst │ └── sycl_spec_links.py ├── _static │ └── css │ │ └── custom.css ├── backups │ └── user_guides │ │ └── kernel_programming_guide │ │ ├── index.rst │ │ ├── atomic-operations.rst │ │ ├── device-functions.rst │ │ ├── synchronization.rst │ │ ├── reduction.rst │ │ └── memory_allocation_address_space.rst ├── Makefile └── make.bat ├── numba_dpex ├── .gitignore ├── core │ ├── parfors │ │ ├── __init__.py │ │ └── kernel_templates │ │ │ ├── __init__.py │ │ │ └── kernel_template_iface.py │ ├── targets │ │ └── __init__.py │ ├── utils │ │ └── __init__.py │ ├── datamodel │ │ └── __init__.py │ ├── overloads │ │ └── __init__.py │ ├── pipelines │ │ └── __init__.py │ ├── types │ │ ├── kernel_api │ │ │ ├── __init__.py │ │ │ ├── ranges.py │ │ │ └── literal_intenum.py │ │ ├── kernel_dispatcher_type.py │ │ ├── numba_types_short_names.py │ │ └── __init__.py │ ├── typing │ │ └── __init__.py │ ├── __init__.py │ ├── boxing │ │ └── __init__.py │ ├── passes │ │ └── __init__.py │ ├── runtime │ │ ├── __init__.py │ │ ├── _queuestruct.h │ │ ├── _eventstruct.c │ │ ├── experimental │ │ │ ├── tools │ │ │ │ ├── dpctl.cpp │ │ │ │ ├── dpctl.hpp │ │ │ │ └── boost_hash.hpp │ │ │ └── nrt_reserve_meminfo.h │ │ ├── _usmarraystruct.h │ │ ├── _eventstruct.h │ │ ├── _nrt_helper.h │ │ ├── _nrt_python_helper.h │ │ ├── _dbg_printer.h │ │ └── _meminfo_helper.h │ ├── lowering.py │ └── debuginfo.py ├── tests │ ├── kernel_api │ │ ├── __init__.py │ │ ├── test_local_accessor.py │ │ ├── test_range_kernel_launch.py │ │ └── test_ndrange_kernel_launch.py │ ├── codegen │ │ ├── __init__.py │ │ └── test_intenum_literal_codegen.py │ ├── core │ │ ├── types │ │ │ ├── IntEnumLiteral │ │ │ │ ├── __init__.py │ │ │ │ ├── test_type_creation.py │ │ │ │ ├── test_compilation.py │ │ │ │ └── test_type_registration.py │ │ │ ├── range_types │ │ │ │ ├── __init__.py │ │ │ │ └── test_constructor_overloads.py │ │ │ ├── __init__.py │ │ │ ├── USMNdArray │ │ │ │ ├── __init__.py │ │ │ │ ├── test_usm_ndarray_type.py │ │ │ │ └── test_array_creation_errors.py │ │ │ ├── DpctlSyclEvent │ │ │ │ ├── __init__.py │ │ │ │ ├── test_overloads.py │ │ │ │ ├── test_models.py │ │ │ │ └── test_box.py │ │ │ ├── DpctlSyclQueue │ │ │ │ ├── __init__.py │ │ │ │ └── test_box.py │ │ │ └── DpnpNdArray │ │ │ │ ├── __init__.py │ │ │ │ ├── test_bugs.py │ │ │ │ ├── test_dpnp_ndarray_type.py │ │ │ │ └── test_boxing_unboxing.py │ │ ├── passes │ │ │ └── __init__.py │ │ ├── runtime │ │ │ ├── __init__.py │ │ │ └── test_llvm_registration.py │ │ ├── __init__.py │ │ ├── test_dpjit_target.py │ │ └── test_itanium_mangler_extension.py │ ├── dpjit_tests │ │ ├── parfors │ │ │ ├── prange │ │ │ │ ├── __init__.py │ │ │ │ └── test_pairwise_distance.py │ │ │ └── __init__.py │ │ ├── dpnp │ │ │ ├── __init__.py │ │ │ └── test_target_specific_overload.py │ │ ├── __init__.py │ │ ├── test_slicing.py │ │ └── test_dpex_target_overload_isolation.py │ ├── misc │ │ ├── __init__.py │ │ ├── test_parse_sem_version.py │ │ └── test_warnings.py │ ├── debugging │ │ ├── __init__.py │ │ ├── conftest.py │ │ └── test_backtraces.py │ ├── kernel_tests │ │ ├── __init__.py │ │ ├── test_compiler_warnings.py │ │ ├── test_func.py │ │ ├── test_invalid_kernel_args.py │ │ ├── test_supported_array_types_as_kernel_args.py │ │ ├── test_ndrange_exceptions.py │ │ ├── test_atomic_fence.py │ │ ├── test_kernel_has_return_value_error.py │ │ ├── test_math_functions.py │ │ ├── test_barriers.py │ │ ├── test_inline_threshold_config.py │ │ ├── test_usm_ndarray_args.py │ │ ├── test_target_specific_overload.py │ │ └── test_complex_array_kernel_arg.py │ ├── __init__.py │ ├── conftest.py │ ├── test_examples.py │ └── test_dpex_use_alongside_numba.py ├── numba_patches │ └── __init__.py ├── kernel_api_impl │ ├── spirv │ │ ├── math │ │ │ └── __init__.py │ │ ├── overloads │ │ │ ├── __init__.py │ │ │ └── _registry.py │ │ └── __init__.py │ └── __init__.py ├── dpnp_iface │ └── __init__.py ├── dpctl_iface │ ├── __init__.py │ ├── dpctlimpl.py │ ├── wrappers.py │ └── _helpers.py ├── examples │ ├── debug │ │ ├── commands │ │ │ ├── docs │ │ │ │ ├── break_conditional │ │ │ │ ├── break_func │ │ │ │ ├── break_line_number │ │ │ │ ├── break_file_func │ │ │ │ ├── break_nested_func │ │ │ │ ├── simple_sum │ │ │ │ ├── step_sum │ │ │ │ ├── next │ │ │ │ ├── local_variables_1 │ │ │ │ ├── backtrace_kernel │ │ │ │ ├── backtrace │ │ │ │ ├── stepi │ │ │ │ ├── step_dpex_func │ │ │ │ ├── info_func │ │ │ │ ├── stepping │ │ │ │ └── sheduler_locking │ │ │ ├── break_conditional │ │ │ ├── break_func │ │ │ ├── break_line_number │ │ │ ├── break_file_func │ │ │ ├── break_nested_func │ │ │ ├── simple_sum │ │ │ ├── next │ │ │ ├── step_sum │ │ │ ├── local_variables_1 │ │ │ ├── backtrace_kernel │ │ │ ├── backtrace │ │ │ ├── step_dpex_func │ │ │ ├── stepi │ │ │ ├── info_func │ │ │ ├── stepping │ │ │ └── sheduler_locking │ │ ├── njit_basic.py │ │ ├── simple_sum.py │ │ ├── sum_local_vars.py │ │ ├── sum_local_vars_revive.py │ │ ├── simple_dpex_func.py │ │ ├── sum.py │ │ └── dpex_func.py │ ├── _helper.py │ ├── dpjit │ │ ├── vector_sum2D.py │ │ └── vector_sum.py │ └── kernel │ │ ├── vector_sum.py │ │ ├── atomic_op.py │ │ ├── vector_sum2D.py │ │ ├── device_func.py │ │ └── scan.py ├── register_kernel_api_overloads.py └── kernel_api │ ├── flag_enum.py │ ├── atomic_fence.py │ ├── __init__.py │ ├── memory_enums.py │ ├── private_array.py │ └── barrier.py ├── .github ├── CODEOWNERS ├── dependabot.yml ├── pull_request_template.md ├── release.yml └── workflows │ ├── cpp_style_checks.yml │ ├── license.yml │ ├── pre-commit.yml │ ├── black.yml │ ├── coverage.yml │ └── coverity.yml ├── .dockerignore ├── .gitattributes ├── .git-blame-ignore-revs ├── scripts ├── build_conda_package.sh ├── diag_env.sh ├── run_debug_examples.sh ├── update_copyrights.py ├── run_examples.sh ├── config_cpu_device.ps1 └── set_examples_to_doc.py ├── conda-recipe ├── run_test.sh ├── run_test.bat ├── build.sh └── bld.bat ├── environment ├── pre-commit.yml ├── coverage.yml └── docs.yml ├── SECURITY.md ├── .gitignore ├── .clang-format ├── .flake8 ├── environment.yml └── CMakeLists.txt /docs/LICENSE_THIRD_PARTY: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /numba_dpex/.gitignore: -------------------------------------------------------------------------------- 1 | *.bc 2 | *.ll 3 | *.spir 4 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | conda-recipe/* @oleksandr-pavlyk 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | apidoc 2 | sources/_build 3 | source/autoapi 4 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/attribute.rst: -------------------------------------------------------------------------------- 1 | {% extends "python/data.rst" %} 2 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/exception.rst: -------------------------------------------------------------------------------- 1 | {% extends "python/class.rst" %} 2 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/package.rst: -------------------------------------------------------------------------------- 1 | {% extends "python/module.rst" %} 2 | -------------------------------------------------------------------------------- /docs/asset/images/DPEP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/DPEP.png -------------------------------------------------------------------------------- /docs/asset/images/dpep-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/dpep-all.png -------------------------------------------------------------------------------- /docs/asset/images/DPEP-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/DPEP-large.png -------------------------------------------------------------------------------- /docs/asset/images/numba-dpex-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/numba-dpex-logo.png -------------------------------------------------------------------------------- /docs/asset/images/kernel_prog_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/kernel_prog_model.png -------------------------------------------------------------------------------- /docs/asset/images/kernel-queue-device.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/kernel-queue-device.png -------------------------------------------------------------------------------- /numba_dpex/core/parfors/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/core/targets/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_api/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore everything 2 | * 3 | 4 | # Allow files and directories 5 | !/scripts/builder_entrypoint.sh 6 | !/scripts/github_load.py 7 | -------------------------------------------------------------------------------- /numba_dpex/core/datamodel/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/core/overloads/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/core/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/core/types/kernel_api/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/numba_patches/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/tests/codegen/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api_impl/spirv/math/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | numba_dpex/_version.py export-subst 2 | * text=auto eol=lf 3 | *.{cmd,[cC][mM][dD]} text eol=crlf 4 | *.{bat,[bB][aA][tT]} text eol=crlf 5 | -------------------------------------------------------------------------------- /docs/source/user_guide/kernel_programming/call-kernel-async.rst: -------------------------------------------------------------------------------- 1 | .. _launching-an-async-kernel: 2 | 3 | Async kernel execution 4 | ====================== 5 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/IntEnumLiteral/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/parfors/prange/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api_impl/spirv/overloads/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/range_types/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /numba_dpex/tests/misc/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /docs/source/config_options.rst: -------------------------------------------------------------------------------- 1 | .. _configopts: 2 | 3 | Configuration Options 4 | ##################### 5 | 6 | .. include:: ./autoapi/numba_dpex/core/config/index.rst 7 | -------------------------------------------------------------------------------- /numba_dpex/core/typing/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import typeof 6 | -------------------------------------------------------------------------------- /numba_dpex/dpnp_iface/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import arrayobj 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/debugging/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/dpnp/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/USMNdArray/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/parfors/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclEvent/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclQueue/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpnpNdArray/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from . import * 6 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from .._helper import * 6 | from . import * 7 | -------------------------------------------------------------------------------- /docs/source/release-notes.rst: -------------------------------------------------------------------------------- 1 | .. _release-notes: 2 | .. include:: ./ext_links.txt 3 | 4 | Release Notes 5 | ============= 6 | 7 | .. include:: ../../CHANGELOG.md 8 | :parser: myst_parser.sphinx_ 9 | -------------------------------------------------------------------------------- /numba_dpex/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numba_dpex 6 | from numba_dpex.core import config 7 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from .._helper import * 6 | from . import * 7 | -------------------------------------------------------------------------------- /numba_dpex/core/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from .datamodel import * 6 | from .types import * 7 | from .typing import * 8 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api_impl/spirv/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """A SPIR-V backend to compile the numba_dpex.kernel_api functions to SPIR-V. 6 | """ 7 | -------------------------------------------------------------------------------- /docs/source/user_guide/index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | .. include:: ./../ext_links.txt 3 | 4 | Tutorials 5 | ========= 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | kernel_programming/index 11 | debugging/index 12 | config 13 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs 2 | 3 | # Migrate code style to Black 4 | 8bd62e61bb70fe0483bd494040e4103fd050252a 5 | 6 | # Change black to use 80 chars 7 | 2b9c67489cc8a5c6f13b28ec752b29a20c8c9a5f 8 | -------------------------------------------------------------------------------- /scripts/build_conda_package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON_VERSION=$1 4 | 5 | VERSIONS="--python $PYTHON_VERSION" 6 | TEST="--no-test" 7 | 8 | # shellcheck disable=SC2086 9 | conda build \ 10 | $TEST \ 11 | $VERSIONS \ 12 | $CHANNELS \ 13 | conda-recipe 14 | -------------------------------------------------------------------------------- /docs/source/user_guide/kernel_programming/operators.rst: -------------------------------------------------------------------------------- 1 | List of supported Python operators that can be used in a ``kernel`` or 2 | ``device_func`` decorated function. 3 | 4 | .. csv-table:: Current support matrix of Python operators 5 | :file: ./operators.csv 6 | :header-rows: 1 7 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api_impl/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """The module stores the numba_dpex backends implementing the target-specific 6 | code generation for the kernel_api Python functions. 7 | """ 8 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from dpnp import ndarray as dpnp_ndarray 6 | 7 | 8 | def test_dpnp_ndarray_flags(): 9 | assert hasattr(dpnp_ndarray([1]), "flags") 10 | -------------------------------------------------------------------------------- /numba_dpex/dpctl_iface/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | The ``dpctl_iface`` module implements Numba's interface to the libsyclinterface 7 | library that provides C bindings to DPC++'s SYCL runtime API. 8 | """ 9 | 10 | from . import arrayobj 11 | -------------------------------------------------------------------------------- /numba_dpex/tests/debugging/conftest.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | import pytest 8 | 9 | from .gdb import gdb 10 | 11 | 12 | @pytest.fixture 13 | def app(): 14 | g = gdb() 15 | 16 | yield g 17 | 18 | g.teardown_gdb() 19 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api_impl/spirv/overloads/_registry.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Implements the SPIR-V overloads for the kernel_api.PrivateArray class. 7 | """ 8 | 9 | from numba.core.imputils import Registry 10 | 11 | registry = Registry() 12 | lower = registry.lower 13 | -------------------------------------------------------------------------------- /docs/source/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | .. include:: ./ext_links.txt 3 | 4 | License 5 | ======= 6 | 7 | Numba-dpex is Licensed under Apache License 2.0 that can be found in `LICENSE 8 | `_. All usage and 9 | contributions to the project are subject to the terms and conditions of this 10 | license. 11 | -------------------------------------------------------------------------------- /numba_dpex/core/boxing/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Contains the ``box`` and ``unbox`` functions for numba_dpex types that are 6 | passable as arguments to a kernel or dpjit decorated function. 7 | """ 8 | 9 | from .ranges import * 10 | from .usm_ndarray import * 11 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | - [ ] Have you provided a meaningful PR description? 2 | - [ ] Have you added a test, reproducer or referred to an issue with a reproducer? 3 | - [ ] Have you tested your changes locally for CPU and GPU devices? 4 | - [ ] Have you made sure that new changes do not introduce compiler warnings? 5 | - [ ] If this PR is a work in progress, are you filing the PR as a draft? 6 | -------------------------------------------------------------------------------- /numba_dpex/core/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from .parfor_legalize_cfd_pass import ParforLegalizeCFDPass 6 | from .passes import DumpParforDiagnostics, NoPythonBackend 7 | 8 | __all__ = [ 9 | "DumpParforDiagnostics", 10 | "ParforLegalizeCFDPass", 11 | "NoPythonBackend", 12 | ] 13 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/break_conditional: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:23 if i == 1 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lane 1, __main__::data_parallel_sum () at simple_sum.py:23 7 | 23 c[i] = a[i] + b[i] 8 | (gdb) print i 9 | $1 = 1 10 | (gdb) continue 11 | ... 12 | Done... 13 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | span.summarylabel { 2 | background-color: var(--color-foreground-secondary); 3 | color: var(--color-background-secondary); 4 | font-size: 70%; 5 | padding-left: 2px; 6 | padding-right: 2px; 7 | border-radius: 3px; 8 | vertical-align: 15%; 9 | padding-bottom: 2px; 10 | filter: opacity(40%); 11 | } 12 | 13 | table.summarytable { 14 | width: 100%; 15 | } 16 | -------------------------------------------------------------------------------- /conda-recipe/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | unset ONEAPI_DEVICE_SELECTOR 5 | 6 | for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))") 7 | do 8 | ONEAPI_DEVICE_SELECTOR=$selector \ 9 | pytest -q -ra --disable-warnings --pyargs numba_dpex -vv 10 | done 11 | 12 | exit 0 13 | -------------------------------------------------------------------------------- /numba_dpex/tests/conftest.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | import pytest 8 | 9 | offload_devices = [ 10 | "opencl:gpu:0", 11 | "level_zero:gpu:0", 12 | "opencl:cpu:0", 13 | ] 14 | 15 | 16 | @pytest.fixture(params=offload_devices, scope="module") 17 | def offload_device(request): 18 | return request.param 19 | -------------------------------------------------------------------------------- /numba_dpex/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | import os 8 | 9 | import numba_dpex 10 | 11 | 12 | def test_examples_available(): 13 | package_path = os.path.dirname(numba_dpex.__file__) 14 | examples_path = os.path.join(package_path, "examples") 15 | 16 | assert os.path.isdir(examples_path) 17 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/property.rst: -------------------------------------------------------------------------------- 1 | {%- if obj.display %} 2 | .. py:property:: {{ obj.short_name }} 3 | {% if obj.annotation %} 4 | :type: {{ obj.annotation }} 5 | {% endif %} 6 | {% if obj.properties %} 7 | {% for property in obj.properties %} 8 | :{{ property }}: 9 | {% endfor %} 10 | {% endif %} 11 | 12 | {% if obj.docstring %} 13 | {{ obj.docstring|indent(3) }} 14 | {% endif %} 15 | {% endif %} 16 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2021 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import llvmlite.binding as ll 6 | 7 | from ._dpexrt_python import c_helpers 8 | 9 | # Register the helper function in _dpexrt_python so that we can insert 10 | # calls to them via llvmlite. 11 | for ( 12 | py_name, 13 | c_address, 14 | ) in c_helpers.items(): 15 | ll.add_symbol(py_name, c_address) 16 | -------------------------------------------------------------------------------- /conda-recipe/run_test.bat: -------------------------------------------------------------------------------- 1 | set "ONEAPI_DEVICE_SELECTOR=" 2 | 3 | for /F "USEBACKQ tokens=* delims=" %%F in ( 4 | `python -c "import dpctl; print(\"\n\".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))"` 5 | ) do ( 6 | set "ONEAPI_DEVICE_SELECTOR=%%F" 7 | 8 | pytest -q -ra --disable-warnings --pyargs numba_dpex -vv 9 | IF %ERRORLEVEL% NEQ 0 exit /B 1 10 | ) 11 | 12 | exit /B 0 13 | -------------------------------------------------------------------------------- /environment/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: dev 2 | channels: 3 | - dppy/label/dev 4 | - numba 5 | - conda-forge 6 | - nodefaults 7 | dependencies: 8 | - libffi 9 | - numba==0.59* 10 | - dpctl>=0.16* 11 | - dpnp>=0.14* 12 | - dpcpp-llvm-spirv 13 | - intel-opencl-rt 14 | - coverage 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pexpect 19 | - scikit-build>=0.15* 20 | - cmake>=3.26* 21 | - pre-commit 22 | - pylint 23 | - versioneer 24 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclEvent/test_overloads.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl 6 | 7 | from numba_dpex import dpjit 8 | 9 | 10 | @dpjit 11 | def wait_call(a): 12 | a.wait() 13 | return None 14 | 15 | 16 | def test_wait_DpctlSyclEvent(): 17 | """Test the dpctl.SyclEvent.wait() call overload.""" 18 | 19 | e = dpctl.SyclEvent() 20 | wait_call(e) 21 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/njit_basic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba_dpex import dpjit 6 | 7 | 8 | @dpjit(debug=True) 9 | def foo(arg): 10 | l1 = arg + 6 11 | l2 = arg * 5.43 12 | l3 = (arg, l1, l2, "bar") 13 | print(arg, l1, l2, l3) 14 | 15 | 16 | def main(): 17 | foo(987) 18 | 19 | 20 | if __name__ == "__main__": 21 | main() 22 | print("Done ...") 23 | -------------------------------------------------------------------------------- /numba_dpex/core/types/kernel_dispatcher_type.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Experimental types that will eventually move to numba_dpex.core.types 6 | """ 7 | 8 | from numba.core import types 9 | 10 | 11 | class KernelDispatcherType(types.Dispatcher): 12 | """The type of KernelDispatcher dispatchers""" 13 | 14 | def cast_python_value(self, args): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/index.rst: -------------------------------------------------------------------------------- 1 | Programming SYCL Kernels Using :func:`~numba_dpex.decorators.kernel` 2 | ==================================================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | writing_kernels 8 | memory-management 9 | synchronization 10 | device-functions 11 | atomic-operations 12 | selecting_device 13 | memory_allocation_address_space 14 | reduction 15 | ufunc 16 | supported-python-features 17 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Report a Vulnerability 4 | 5 | Please report security issues or vulnerabilities to the [Intel® Security Center]. 6 | 7 | For more information on how Intel® works to resolve security issues, see 8 | [Vulnerability Handling Guidelines]. 9 | 10 | [Intel® Security Center]:https://www.intel.com/content/www/us/en/security-center/default.html 11 | 12 | [Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html 13 | -------------------------------------------------------------------------------- /environment/coverage.yml: -------------------------------------------------------------------------------- 1 | name: dev 2 | channels: 3 | - dppy/label/dev 4 | - numba 5 | - conda-forge 6 | - nodefaults 7 | dependencies: 8 | - libffi 9 | - gcc_linux-64 10 | - dpcpp_linux-64>=2024.2 11 | - sysroot_linux-64=2.28 12 | - numba==0.59* 13 | - dpctl 14 | - dpnp 15 | - dpcpp-llvm-spirv 16 | - intel-opencl-rt 17 | - coverage 18 | - pytest 19 | - pytest-cov 20 | - pytest-xdist 21 | - pexpect 22 | - scikit-build>=0.15* 23 | - cmake>=3.26* 24 | - ninja 25 | - versioneer 26 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/features.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Supported Features 4 | ================== 5 | 6 | Numba-dpex and Intel® Distribution for GDB* provide at least 7 | following debugging features: 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | breakpoints 13 | stepping 14 | frame_info 15 | backtrace 16 | data 17 | symbols 18 | altering 19 | 20 | Other topics: 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | local_variables 26 | numba-0.55 27 | -------------------------------------------------------------------------------- /numba_dpex/examples/_helper.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | import dpctl 8 | 9 | 10 | def has_gpu(backend="opencl"): 11 | return bool(dpctl.get_num_devices(backend=backend, device_type="gpu")) 12 | 13 | 14 | def has_cpu(backend="opencl"): 15 | return bool(dpctl.get_num_devices(backend=backend, device_type="cpu")) 16 | 17 | 18 | def has_sycl_platforms(): 19 | return bool(len(dpctl.get_platforms())) 20 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_compiler_warnings.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import pytest 6 | 7 | from numba_dpex import kernel 8 | from numba_dpex.kernel_api import Item 9 | 10 | 11 | def _kernel(item: Item, a, b, c): 12 | i = item.get_id(0) 13 | c[i] = a[i] + b[i] 14 | 15 | 16 | def test_compilation_mode_option_user_definition(): 17 | with pytest.warns(UserWarning): 18 | kernel(_compilation_mode="kernel")(_kernel) 19 | -------------------------------------------------------------------------------- /numba_dpex/tests/misc/test_parse_sem_version.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba_dpex import parse_sem_version 6 | 7 | 8 | class TestParseSemVersion: 9 | def test_parse_sem_version(self): 10 | assert parse_sem_version("0.56.4") == (0, 56, 4) 11 | assert parse_sem_version("0.57.0") == (0, 57, 0) 12 | assert parse_sem_version("0.57.0rc1") == (0, 57, 0) 13 | assert parse_sem_version("0.58.1dev0") == (0, 58, 1) 14 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/break_func: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break data_parallel_sum 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 7 | 20 @dpex.kernel(debug=True) 8 | (gdb) continue 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 10 | 20 @dpex.kernel(debug=True) 11 | (gdb) continue 12 | ... 13 | Done... 14 | -------------------------------------------------------------------------------- /numba_dpex/dpctl_iface/dpctlimpl.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core.imputils import Registry 6 | 7 | registry = Registry("dpctlimpl") 8 | 9 | lower_builtin = registry.lower 10 | lower_getattr = registry.lower_getattr 11 | lower_getattr_generic = registry.lower_getattr_generic 12 | lower_setattr = registry.lower_setattr 13 | lower_setattr_generic = registry.lower_setattr_generic 14 | lower_cast = registry.lower_cast 15 | lower_constant = registry.lower_constant 16 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/break_line_number: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:20 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 7 | 20 @dpex.kernel(debug=True) 8 | (gdb) continue 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 10 | 20 @dpex.kernel(debug=True) 11 | (gdb) continue 12 | ... 13 | Done... 14 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/break_conditional: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_conditional python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:23 if i == 1 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lane 1, __main__::data_parallel_sum () at simple_sum.py:23 10 | # 23 c[i] = a[i] + b[i] 11 | print i 12 | # Expected: 13 | # $1 = 1 14 | continue 15 | # Expected: 16 | # ... 17 | # Done... 18 | echo Done\n 19 | quit 20 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/break_file_func: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:data_parallel_sum 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 7 | 20 @dpex.kernel(debug=True) 8 | (gdb) continue 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 10 | 20 @dpex.kernel(debug=True) 11 | (gdb) continue 12 | ... 13 | Done... 14 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - ignore-for-release-notes 5 | categories: 6 | - title: Added 7 | labels: 8 | - enhancement 9 | - feature 10 | - title: Changed 11 | labels: 12 | - feature-change 13 | - title: Fixed 14 | labels: 15 | - bug 16 | - title: Removed 17 | labels: 18 | - removed-feature 19 | - title: Deprecated 20 | labels: 21 | - deprecated-feature 22 | - title: Other Changes 23 | labels: 24 | - "*" 25 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | This page contains auto-generated API reference documentation [#f1]_. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | numba_dpex/kernel_api/index 10 | numba_dpex/core/decorators/index 11 | numba_dpex/core/kernel_launcher/index 12 | 13 | {% for page in pages %} 14 | {% if page.top_level_object and page.display %} 15 | {{ page.include_path }} 16 | {% endif %} 17 | {% endfor %} 18 | 19 | .. [#f1] Created with `sphinx-autoapi `_ 20 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/break_nested_func: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:func_sum 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22 7 | 22 result = a_in_func + b_in_func 8 | (gdb) continue 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22 10 | 22 result = a_in_func + b_in_func 11 | (gdb) continue 12 | ... 13 | Done... 14 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/simple_sum: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:22 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22 7 | 22 i = dpex.get_global_id(0) 8 | (gdb) next 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22 10 | 22 i = dpex.get_global_id(0) 11 | (gdb) next 12 | 23 c[i] = a[i] + b[i] 13 | (gdb) continue 14 | ... 15 | Done... 16 | -------------------------------------------------------------------------------- /numba_dpex/examples/dpjit/vector_sum2D.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | from numba_dpex import dpjit 8 | 9 | 10 | @dpjit 11 | def f1(a, b): 12 | c = a + b 13 | return c 14 | 15 | 16 | N = 1000 17 | print("N", N) 18 | 19 | a = np.ones((N, N), dtype=np.float32) 20 | b = np.ones((N, N), dtype=np.float32) 21 | 22 | print("a:", a) 23 | print("b:", b) 24 | 25 | 26 | def main(): 27 | c = f1(a, b) 28 | print("c:", c) 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/function.rst: -------------------------------------------------------------------------------- 1 | {% if obj.display %} 2 | .. py:function:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %} 3 | 4 | {% for (args, return_annotation) in obj.overloads %} 5 | {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %} 6 | 7 | {% endfor %} 8 | {% for property in obj.properties %} 9 | :{{ property }}: 10 | {% endfor %} 11 | 12 | {% if obj.docstring %} 13 | {{ obj.docstring|indent(3) }} 14 | {% endif %} 15 | {% endif %} 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | *.so 4 | *.dylib 5 | *.pyd 6 | *.pdb 7 | *.egg-info 8 | *.sw[po] 9 | *.out 10 | *.ll 11 | .nfs* 12 | tags 13 | MANIFEST 14 | 15 | .tmp/ 16 | build/ 17 | docs/_build/ 18 | docs/gh-pages/ 19 | dist/ 20 | .idea/ 21 | .vscode/ 22 | .mypy_cache/ 23 | .ipynb_checkpoints/ 24 | __pycache__/ 25 | _skbuild 26 | 27 | docs/source/developer/autogen* 28 | 29 | # Ignore versioneer generated files 30 | numba_dpex/_version.py 31 | 32 | # Ignore generated cpp files 33 | numba_dpex/dpnp_iface/*.cpp 34 | numba_dpex/dpnp_iface/*.h 35 | 36 | # Ignore coverage results 37 | .coverage 38 | coverage.xml 39 | htmlcov/ 40 | -------------------------------------------------------------------------------- /numba_dpex/core/parfors/kernel_templates/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Provides string templates for numba_dpex.kernel decorated functions. 6 | 7 | During lowering of a parfor node using the SPIRVKernelTarget, the node is 8 | first converted into a kernel function. The module provides a set of templates 9 | to generate the basic stub of a kernel function. The string template is 10 | compiled down to Numba IR using the Numba compiler front end and then the 11 | necessary body of the kernel function is inserted directly as Numba IR. 12 | """ 13 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/step_sum: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:22 4 | (gdb) run simple_sum.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22 7 | 22 i = dpex.get_global_id(0) 8 | (gdb) step 9 | [Switching to Thread 1.1073742080 lane 0] 10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22 11 | 22 i = dpex.get_global_id(0) 12 | (gdb) step 13 | 23 c[i] = a[i] + b[i] 14 | (gdb) continue 15 | ... 16 | Done... 17 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_func.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | import numpy 7 | 8 | import numba_dpex as dpex 9 | 10 | 11 | @dpex.device_func 12 | def g(a): 13 | return a + dpnp.float32(1) 14 | 15 | 16 | @dpex.kernel 17 | def f(item, a, b): 18 | i = item.get_id(0) 19 | b[i] = g(a[i]) 20 | 21 | 22 | def test_func_call_from_kernel(): 23 | a = dpnp.ones(1024) 24 | b = dpnp.ones(1024) 25 | 26 | dpex.call_kernel(f, dpex.Range(1024), a, b) 27 | nb = dpnp.asnumpy(b) 28 | assert numpy.all(nb == 2) 29 | -------------------------------------------------------------------------------- /numba_dpex/register_kernel_api_overloads.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Imports and registers kernel_api_impl target-specific overloads. 6 | """ 7 | 8 | 9 | def init_kernel_api_spirv_overloads(): 10 | """ 11 | Imports the kernel_api.spirv overloads to make them available in numba-dpex. 12 | """ 13 | from .kernel_api_impl.spirv.overloads import ( 14 | _atomic_fence_overloads, 15 | _atomic_ref_overloads, 16 | _group_barrier_overloads, 17 | _index_space_id_overloads, 18 | _private_array_overloads, 19 | ) 20 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/next: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:29 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 7 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 8 | (gdb) next 9 | [Switching to Thread 1.1073742080 lane 0] 10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 11 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 12 | (gdb) next 13 | ... 14 | Done... 15 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_queuestruct.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Defines the numba-dpex native representation for a dpctl.SyclQueue 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #pragma once 13 | 14 | #include "numba/core/runtime/nrt_external.h" 15 | #include 16 | 17 | typedef struct 18 | { 19 | NRT_MemInfo *meminfo; 20 | PyObject *parent; 21 | void *queue_ref; 22 | } queuestruct_t; 23 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/local_variables_1: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=1 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break sum_local_vars.py:22 4 | (gdb) run sum_local_vars.py 5 | ... 6 | Thread 2.1 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at sum_local_vars.py:22 7 | 22 i = dpex.get_global_id(0) 8 | (gdb) info locals 9 | __ocl_dbg_gid0 = 8 10 | __ocl_dbg_gid1 = 0 11 | __ocl_dbg_gid2 = 0 12 | __ocl_dbg_lid0 = 8 13 | __ocl_dbg_lid1 = 0 14 | __ocl_dbg_lid2 = 0 15 | __ocl_dbg_grid0 = 0 16 | __ocl_dbg_grid1 = 0 17 | __ocl_dbg_grid2 = 0 18 | i = 0 19 | l1 = 0 20 | l2 = 0 21 | (gdb) continue 22 | ... 23 | Done... 24 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/simple_sum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.kernel(debug=True) 11 | def data_parallel_sum(item, a, b, c): 12 | i = item.get_id(0) 13 | c[i] = a[i] + b[i] # Condition breakpoint location 14 | 15 | 16 | global_size = 10 17 | N = global_size 18 | 19 | a = np.array(np.random.random(N), dtype=np.float32) 20 | b = np.array(np.random.random(N), dtype=np.float32) 21 | c = np.ones_like(a) 22 | 23 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) 24 | 25 | print("Done...") 26 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/method.rst: -------------------------------------------------------------------------------- 1 | {%- if obj.display %} 2 | .. py:method:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %} 3 | 4 | {% for (args, return_annotation) in obj.overloads %} 5 | {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %} 6 | 7 | {% endfor %} 8 | {% if obj.properties %} 9 | {% for property in obj.properties %} 10 | :{{ property }}: 11 | {% endfor %} 12 | 13 | {% else %} 14 | 15 | {% endif %} 16 | {% if obj.docstring %} 17 | {{ obj.docstring|indent(3) }} 18 | {% endif %} 19 | {% endif %} 20 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/break_func: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_func python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break data_parallel_sum 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 10 | # 20 @dpex.kernel(debug=True) 11 | continue 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 14 | # 20 @dpex.kernel(debug=True) 15 | continue 16 | # Expected: 17 | # ... 18 | # Done... 19 | echo Done\n 20 | quit 21 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/break_line_number: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_line_number python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:20 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 10 | # 20 @dpex.kernel(debug=True) 11 | continue 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 14 | # 20 @dpex.kernel(debug=True) 15 | continue 16 | # Expected: 17 | # ... 18 | # Done... 19 | echo Done\n 20 | quit 21 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/backtrace_kernel: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:28 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:28 7 | 28 i = dpex.get_global_id(0) 8 | (gdb) backtrace 9 | #0 __main__::kernel_sum () at simple_dpex_func.py:28 10 | (gdb) continue 11 | ... 12 | [Switching to Thread 1.1073742080 lane 0] 13 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:28 14 | 28 i = dpex.get_global_id(0) 15 | (gdb) continue 16 | ... 17 | Done... 18 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/sum_local_vars.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.kernel(debug=True) 11 | def data_parallel_sum(item, a, b, c): 12 | i = item.get_id(0) 13 | l1 = a[i] + 2.5 14 | l2 = b[i] * 0.3 15 | c[i] = l1 + l2 16 | 17 | 18 | global_size = 10 19 | N = global_size 20 | 21 | a = np.array(np.random.random(N), dtype=np.float32) 22 | b = np.array(np.random.random(N), dtype=np.float32) 23 | c = np.ones_like(a) 24 | 25 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) 26 | 27 | print("Done...") 28 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclQueue/test_box.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests for boxing for dpctl.SyclQueue 7 | """ 8 | 9 | import dpnp 10 | import pytest 11 | from dpctl import SyclQueue 12 | 13 | from numba_dpex import dpjit 14 | 15 | 16 | def test_boxing_without_parent(): 17 | """Test unboxing of the queue that does not have parent""" 18 | 19 | @dpjit 20 | def func() -> SyclQueue: 21 | arr = dpnp.empty(10) 22 | queue = arr.sycl_queue 23 | return queue 24 | 25 | q: SyclQueue = func() 26 | 27 | assert len(q.sycl_device.filter_string) > 0 28 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_eventstruct.c: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "_eventstruct.h" 6 | #include "_dbg_printer.h" 7 | 8 | /*! 9 | * @brief A destructor that is called from NRT on object destruction. Deletes 10 | * dpctl event reference. 11 | * 12 | * @param data A dpctl event reference. 13 | * @return {return} Nothing. 14 | */ 15 | void NRT_MemInfo_EventRef_Delete(void *data) 16 | { 17 | DPCTLSyclEventRef eref = data; 18 | 19 | DPCTLEvent_Delete(eref); 20 | 21 | DPEXRT_DEBUG( 22 | drt_debug_print("DPEXRT-DEBUG: deleting dpctl event reference.\n");); 23 | } 24 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/break_file_func: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_file_func python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:data_parallel_sum 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20 10 | # 20 @dpex.kernel(debug=True) 11 | continue 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20 14 | # 20 @dpex.kernel(debug=True) 15 | continue 16 | # Expected: 17 | # ... 18 | # Done... 19 | echo Done\n 20 | quit 21 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/experimental/tools/dpctl.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "dpctl.hpp" 6 | #include 7 | 8 | namespace std 9 | { 10 | 11 | size_t 12 | hash::operator()(const DPCTLSyclDeviceRef &DRef) const 13 | { 14 | using dpctl::syclinterface::unwrap; 15 | return hash()(*unwrap(DRef)); 16 | } 17 | 18 | size_t 19 | hash::operator()(const DPCTLSyclContextRef &CRef) const 20 | { 21 | using dpctl::syclinterface::unwrap; 22 | return hash()(*unwrap(CRef)); 23 | } 24 | } // namespace std 25 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/break_nested_func: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_nested_func python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:func_sum 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22 10 | # 22 result = a_in_func + b_in_func 11 | continue 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22 14 | # 22 result = a_in_func + b_in_func 15 | continue 16 | # Expected: 17 | # ... 18 | # Done... 19 | echo Done\n 20 | quit 21 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | IndentWidth: 4 3 | AccessModifierOffset: -4 4 | AlignEscapedNewlines: Right 5 | AllowAllParametersOfDeclarationOnNextLine: false 6 | BinPackParameters: false 7 | BreakBeforeBraces: Custom 8 | BraceWrapping: 9 | AfterCaseLabel: true 10 | AfterClass: true 11 | AfterControlStatement: MultiLine 12 | AfterEnum: true 13 | AfterFunction: true 14 | AfterNamespace: true 15 | AfterObjCDeclaration: false 16 | AfterStruct: true 17 | AfterUnion: true 18 | AfterExternBlock: true 19 | BeforeCatch: false 20 | BeforeElse: true 21 | IndentBraces: false 22 | SplitEmptyFunction: true 23 | SplitEmptyRecord: true 24 | SplitEmptyNamespace: true 25 | -------------------------------------------------------------------------------- /scripts/diag_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # See https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal.html 4 | 5 | check_package_installed() { 6 | apt list --installed 2>/dev/null "$1" | grep "$1" || echo "$1 not installed" 7 | } 8 | 9 | check_package_installed intel-opencl-icd 10 | check_package_installed intel-level-zero-gpu 11 | check_package_installed level-zero 12 | check_package_installed intel-media-va-driver-non-free 13 | check_package_installed libmfx1 14 | 15 | check_package_installed libigc-dev 16 | check_package_installed intel-igc-cm 17 | check_package_installed libigdfcl-dev 18 | check_package_installed libigfxcmrt-dev 19 | check_package_installed level-zero-dev 20 | -------------------------------------------------------------------------------- /.github/workflows/cpp_style_checks.yml: -------------------------------------------------------------------------------- 1 | # This is a workflow to format C/C++ sources with clang-format 2 | 3 | name: C++ Code Style 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | pull_request: 9 | push: 10 | branches: [master] 11 | 12 | permissions: read-all 13 | 14 | jobs: 15 | formatting-check: 16 | name: clang-format 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Run clang-format style check for C/C++ programs. 21 | uses: jidicula/clang-format-action@v4.13.0 22 | with: 23 | clang-format-version: '14' 24 | check-path: 'numba_dpex/dpctl_iface' 25 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # E501 line too long 3 | # E722 do not use bare 'except' 4 | # F401 imported but unused 5 | # W503 line break before binary operator 6 | ignore = E501,E722,F401,W503 7 | per-file-ignores = 8 | # redefinition of unused 'dpnp_random_impl' 9 | dpnp_randomimpl.py: F811 10 | # module level import not at top of file 11 | device_init.py: E402 12 | # 'from . import *' used; unable to detect undefined names 13 | __init__.py: F403 14 | # module level import not at top of file 15 | target.py: E402 16 | # config should containe code lines examples in comment 17 | docs/source/conf.py: E800 18 | exclude = 19 | .git, 20 | __pycache__, 21 | _version.py, 22 | lowerer.py, 23 | parfor.py 24 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/backtrace: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:22 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22 7 | 22 result = a_in_func + b_in_func 8 | (gdb) backtrace 9 | #0 __main__::func_sum () at simple_dpex_func.py:22 10 | #1 __main__::kernel_sum () at simple_dpex_func.py:29 11 | (gdb) continue 12 | ... 13 | [Switching to Thread 1.1073742080 lane 0] 14 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22 15 | 22 result = a_in_func + b_in_func 16 | (gdb) continue 17 | ... 18 | Done... 19 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/simple_sum: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/simple_sum python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:22 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22 10 | # 22 i = dpex.get_global_id(0) 11 | next 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22 14 | # 22 i = dpex.get_global_id(0) 15 | next 16 | # Expected: 17 | # 23 c[i] = a[i] + b[i] 18 | continue 19 | # Expected: 20 | # ... 21 | # Done... 22 | echo Done\n 23 | quit 24 | -------------------------------------------------------------------------------- /.github/workflows/license.yml: -------------------------------------------------------------------------------- 1 | name: license 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | license: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-go@v5 17 | with: 18 | go-version: '1.18' 19 | - name: Install addlicense 20 | run: | 21 | export PATH=${PATH}:`go env GOPATH`/bin 22 | go install github.com/google/addlicense@latest 23 | - name: Check license 24 | run: | 25 | export PATH=${PATH}:`go env GOPATH`/bin 26 | addlicense -check -l apache -c "Intel Corporation" -ignore numba_dpex/_version.py numba_dpex/**/*.py numba_dpex/*.py setup.py 27 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/stepi: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:29 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 7 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 8 | (gdb) stepi 9 | 0x00000000fffeb630 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 10 | (gdb) stepi 11 | [Switching to Thread 1.1073742080 lane 0] 12 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 13 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 14 | (gdb) continue 15 | ... 16 | Done... 17 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_usmarraystruct.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Defines the numba-dpex native representation for a dpctl.tensor.usm_ndarray 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | 17 | typedef struct 18 | { 19 | void *meminfo; 20 | PyObject *parent; 21 | npy_intp nitems; 22 | npy_intp itemsize; 23 | void *data; 24 | void *sycl_queue; 25 | 26 | npy_intp shape_and_strides[]; 27 | } usmarystruct_t; 28 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/step_dpex_func: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:29 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 7 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 8 | (gdb) step 9 | [Switching to Thread 1.1073742080 lane 0] 10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 11 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 12 | (gdb) step 13 | __main__::func_sum () at simple_dpex_func.py:22 14 | 22 result = a_in_func + b_in_func 15 | (gdb) continue 16 | ... 17 | Done... 18 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst: -------------------------------------------------------------------------------- 1 | Supported Atomic Operations 2 | =========================== 3 | 4 | Numba-dpex supports some of the atomic operations supported in DPC++. 5 | Those that are presently implemented are as follows: 6 | 7 | .. automodule:: numba_dpex.ocl.stubs 8 | :members: atomic 9 | :noindex: 10 | 11 | Example 12 | ------- 13 | 14 | Example usage of atomic operations 15 | 16 | .. literalinclude:: ../../../numba_dpex/examples/atomic_op.py 17 | :pyobject: main 18 | 19 | .. note:: 20 | 21 | The ``numba_dpex.atomic.add`` function is analogous to The 22 | ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend. 23 | 24 | Full examples 25 | ------------- 26 | 27 | - ``numba_dpex/examples/atomic_op.py`` 28 | -------------------------------------------------------------------------------- /numba_dpex/core/types/numba_types_short_names.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core.types import Boolean, Float, Integer, NoneType 6 | 7 | # Short names for numba types supported in dpex kernel 8 | 9 | none = NoneType("none") 10 | 11 | boolean = bool_ = Boolean("bool") 12 | 13 | uint32 = Integer("uint32") 14 | uint64 = Integer("uint64") 15 | int32 = Integer("int32") 16 | int64 = Integer("int64") 17 | float32 = Float("float32") 18 | float64 = Float("float64") 19 | 20 | 21 | # Aliases to NumPy type names 22 | 23 | b1 = bool_ 24 | i4 = int32 25 | i8 = int64 26 | u4 = uint32 27 | u8 = uint64 28 | f4 = float32 29 | f8 = float64 30 | 31 | float_ = float32 32 | double = float64 33 | void = none 34 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/next: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/next python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:29 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 10 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | next 12 | # Expected: 13 | # [Switching to Thread 1.1073742080 lane 0] 14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 15 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 16 | next 17 | # Expected: 18 | # ... 19 | # Done... 20 | echo Done\n 21 | quit 22 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/step_sum: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/step_sum python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:22 6 | run simple_sum.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22 10 | # 22 i = dpex.get_global_id(0) 11 | step 12 | # Expected: 13 | # [Switching to Thread 1.1073742080 lane 0] 14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22 15 | # 22 i = dpex.get_global_id(0) 16 | step 17 | # Expected: 18 | # 23 c[i] = a[i] + b[i] 19 | continue 20 | # Expected: 21 | # ... 22 | # Done... 23 | echo Done\n 24 | quit 25 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/dpnp/test_target_specific_overload.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests if dpnp dpex specific overloads are not available at numba njit. 7 | """ 8 | 9 | import dpnp 10 | import pytest 11 | from numba import njit 12 | from numba.core import errors 13 | 14 | from numba_dpex import dpjit 15 | 16 | 17 | @pytest.mark.parametrize("func", [dpnp.empty, dpnp.ones, dpnp.zeros]) 18 | def test_dpnp_dpex_target(func): 19 | def dpnp_func(): 20 | func(10) 21 | 22 | dpnp_func_njit = njit(dpnp_func) 23 | dpnp_func_dpjit = dpjit(dpnp_func) 24 | 25 | dpnp_func_dpjit() 26 | with pytest.raises((errors.TypingError, errors.UnsupportedError)): 27 | dpnp_func_njit() 28 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy 6 | import pytest 7 | 8 | import numba_dpex as dpex 9 | from numba_dpex import kernel_api as kapi 10 | 11 | N = 1024 12 | 13 | 14 | @dpex.kernel 15 | def vecadd_kernel(item: kapi.Item, a, b, c): 16 | i = item.get_id(0) 17 | c[i] = a[i] + b[i] 18 | 19 | 20 | def test_passing_numpy_arrays_as_kernel_args(): 21 | """ 22 | Negative test to verify that NumPy arrays cannot be passed to a kernel. 23 | """ 24 | a = numpy.ones(N) 25 | b = numpy.ones(N) 26 | c = numpy.zeros(N) 27 | 28 | with pytest.raises(Exception): 29 | dpex.call_kernel(vecadd_kernel, dpex.Range(N), a, b, c) 30 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/info_func: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_sum.py:22 4 | (gdb) run simple_sum.py 5 | (gdb) info functions data_parallel_sum 6 | ... 7 | All functions matching regular expression "data_parallel_sum": 8 | File simple_sum.py: 9 | 20: void __main__::data_parallel_sum(Array, Array, Array); 10 | (gdb) continue 11 | (gdb) info functions __main__ 12 | ... 13 | All functions matching regular expression "__main__": 14 | 20: void __main__::data_parallel_sum(Array, Array, Array); 15 | (gdb) continue 16 | ... 17 | Done... 18 | -------------------------------------------------------------------------------- /numba_dpex/core/lowering.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Registers any custom lowering functions to default Numba lowering registry. 6 | """ 7 | from numba.core.imputils import Registry 8 | 9 | from .types import KernelDispatcherType 10 | 11 | registry = Registry() 12 | lower_constant = registry.lower_constant 13 | 14 | 15 | @lower_constant(KernelDispatcherType) 16 | def dpex_dispatcher_const(context): 17 | """Dummy lowering function for a KernelDispatcherType object. 18 | 19 | The dummy lowering function for the KernelDispatcher types is added so that 20 | a :func:`numba_dpex.core.decorators.kernel` decorated function can be passed 21 | as an argument to dpjit. 22 | """ 23 | return context.get_dummy_value() 24 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/local_variables_1: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=1 gdb-oneapi -q -command commands/local_variables_1 python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break sum_local_vars.py:22 6 | run sum_local_vars.py 7 | # Expected: 8 | # ... 9 | # Thread 2.1 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at sum_local_vars.py:22 10 | # 22 i = dpex.get_global_id(0) 11 | info locals 12 | # __ocl_dbg_gid0 = 8 13 | # __ocl_dbg_gid1 = 0 14 | # __ocl_dbg_gid2 = 0 15 | # __ocl_dbg_lid0 = 8 16 | # __ocl_dbg_lid1 = 0 17 | # __ocl_dbg_lid2 = 0 18 | # __ocl_dbg_grid0 = 0 19 | # __ocl_dbg_grid1 = 0 20 | # __ocl_dbg_grid2 = 0 21 | # i = 0 22 | # l1 = 0 23 | # l2 = 0 24 | continue 25 | # Expected: 26 | # ... 27 | # Done... 28 | echo Done\n 29 | quit 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = ./source 9 | BUILDDIR = _build 10 | AUTOAPIDIR = source/autoapi 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | 23 | clean: 24 | rm -rf "$(BUILDDIR)" "$(AUTOAPIDIR)" 25 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/sum_local_vars_revive.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.device_func 11 | def revive(x): 12 | return x 13 | 14 | 15 | @ndpx.kernel(debug=True) 16 | def data_parallel_sum(item, a, b, c): 17 | i = item.get_id(0) 18 | l1 = a[i] + 2.5 19 | l2 = b[i] * 0.3 20 | c[i] = l1 + l2 21 | revive(a) # pass variable to dummy function 22 | 23 | 24 | global_size = 10 25 | N = global_size 26 | 27 | a = np.array(np.random.random(N), dtype=np.float32) 28 | b = np.array(np.random.random(N), dtype=np.float32) 29 | c = np.ones_like(a) 30 | 31 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) 32 | 33 | print("Done...") 34 | -------------------------------------------------------------------------------- /numba_dpex/tests/debugging/test_backtraces.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | """Tests for Backtraces 8 | 9 | https://www.sourceware.org/gdb/onlinedocs/gdb/Backtrace.html 10 | """ 11 | 12 | from numba_dpex.tests._helper import skip_no_gdb 13 | 14 | pytestmark = skip_no_gdb 15 | 16 | 17 | def test_backtrace(app): 18 | """Simple test for backtrace. 19 | 20 | commands/backtrace 21 | """ 22 | app.breakpoint("simple_dpex_func.py:12") 23 | app.run("simple_dpex_func.py") 24 | app.expect_hit_breakpoint("simple_dpex_func.py:12") 25 | 26 | app.backtrace() 27 | 28 | app.expect(r"#0.*func_sum.* at simple_dpex_func.py:12", with_eol=True) 29 | app.expect(r"#1.*kernel_sum", with_eol=True) 30 | -------------------------------------------------------------------------------- /environment/docs.yml: -------------------------------------------------------------------------------- 1 | name: dpex-docs-dev 2 | channels: 3 | - dppy/label/dev 4 | - numba 5 | - conda-forge 6 | - nodefaults 7 | dependencies: 8 | - libffi 9 | - gcc_linux-64 10 | - dpcpp_linux-64>=2024.2 11 | - sysroot_linux-64=2.28 12 | - numba==0.59* 13 | - scikit-build>=0.15* 14 | - cmake>=3.26* 15 | - ninja 16 | - dpctl>=0.16* 17 | - dpnp>=0.14* 18 | - dpcpp-llvm-spirv 19 | - intel-opencl-rt 20 | - versioneer 21 | - pip 22 | - pip: 23 | - sphinx 24 | - sphinx-autoapi==3.0.0 25 | - autodoc # there is no conda package 26 | - recommonmark 27 | - sphinx-rtd-theme 28 | - sphinxcontrib-apidoc 29 | - sphinxcontrib-bibtex 30 | - sphinxcontrib-googleanalytics 31 | - sphinxcontrib.programoutput 32 | - pydata-sphinx-theme 33 | - myst-parser 34 | - furo 35 | -------------------------------------------------------------------------------- /numba_dpex/examples/dpjit/vector_sum.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | from numba_dpex import dpjit 8 | 9 | 10 | @dpjit 11 | def f1(a, b): 12 | c = a + b 13 | return c 14 | 15 | 16 | def main(): 17 | global_size = 64 18 | local_size = 32 19 | N = global_size * local_size 20 | print("N", N) 21 | 22 | a = np.ones(N, dtype=np.float32) 23 | b = np.ones(N, dtype=np.float32) 24 | 25 | print(a) 26 | print(b) 27 | 28 | c = f1(a, b) 29 | 30 | print("RESULT c:", c) 31 | for i in range(N): 32 | if c[i] != 2.0: 33 | print("First index not equal to 2.0 was", i) 34 | break 35 | 36 | print("Done...") 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/simple_dpex_func.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.device_func(debug=True) 11 | def func_sum(a_in_func, b_in_func): 12 | result = a_in_func + b_in_func # breakpoint location 13 | return result 14 | 15 | 16 | @ndpx.kernel(debug=True) 17 | def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): 18 | i = item.get_id(0) 19 | c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 20 | 21 | 22 | global_size = 10 23 | a = np.arange(global_size, dtype=np.float32) 24 | b = np.arange(global_size, dtype=np.float32) 25 | c = np.empty_like(a) 26 | 27 | ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c) 28 | 29 | print("Done...") 30 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/flag_enum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Provides a FlagEnum class to help distinguish IntEnum types that numba_dpex 7 | intends to use as Integer literal types inside the compiler type inferring 8 | infrastructure. 9 | """ 10 | from enum import IntEnum 11 | 12 | 13 | class FlagEnum(IntEnum): 14 | """Helper class to distinguish IntEnum types that numba_dpex should consider 15 | as Numba Literal types. 16 | """ 17 | 18 | @classmethod 19 | def basetype(cls) -> int: 20 | """Returns an dummy int object that helps numba_dpex infer the type of 21 | an instance of a FlagEnum class. 22 | 23 | Returns: 24 | int: Dummy int value 25 | """ 26 | return int(0) 27 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/experimental/tools/dpctl.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Defines overloads to dpctl library that eventually must be ported there. 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #pragma once 13 | #include "syclinterface/dpctl_sycl_type_casters.hpp" 14 | 15 | namespace std 16 | { 17 | template <> struct hash 18 | { 19 | size_t operator()(const DPCTLSyclDeviceRef &DRef) const; 20 | }; 21 | 22 | template <> struct hash 23 | { 24 | size_t operator()(const DPCTLSyclContextRef &CRef) const; 25 | }; 26 | } // namespace std 27 | -------------------------------------------------------------------------------- /numba_dpex/tests/misc/test_warnings.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import warnings 6 | 7 | import dpnp 8 | import pytest 9 | 10 | import numba_dpex as dpex 11 | from numba_dpex.core import config 12 | 13 | 14 | @dpex.kernel 15 | def foo(item, a): 16 | a[item.get_id(0)] = 0 17 | 18 | 19 | def test_inline_threshold_negative_val_warning_(): 20 | bkp = config.INLINE_THRESHOLD 21 | config.INLINE_THRESHOLD = -1 22 | 23 | with pytest.warns(UserWarning): 24 | dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10)) 25 | 26 | config.INLINE_THRESHOLD = bkp 27 | 28 | 29 | def test_no_warning(): 30 | with warnings.catch_warnings(): 31 | warnings.simplefilter("error") 32 | dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10)) 33 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | pre-commit: 12 | runs-on: ubuntu-20.04 13 | defaults: 14 | run: 15 | shell: bash -el {0} 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: conda-incubator/setup-miniconda@v3 19 | with: 20 | python-version: '3.11' 21 | activate-environment: "coverage" 22 | channel-priority: "disabled" 23 | environment-file: environment/pre-commit.yml 24 | - uses: actions/cache@v4 25 | with: 26 | path: ~/.cache/pre-commit 27 | key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} 28 | - run: pre-commit run --show-diff-on-failure --color=always --all-files 29 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/backtrace_kernel: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/backtrace python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:28 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:28 10 | # 28 i = dpex.get_global_id(0) 11 | backtrace 12 | # Expected: 13 | # #0 __main__::kernel_sum () at simple_dpex_func.py:28 14 | continue 15 | # Expected: 16 | # ... 17 | # [Switching to Thread 1.1073742080 lane 0] 18 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:28 19 | # 28 i = dpex.get_global_id(0) 20 | continue 21 | # Expected: 22 | # ... 23 | # Done... 24 | echo Done\n 25 | quit 26 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/stepping: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:29 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 7 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 8 | (gdb) step 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 10 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | (gdb) step 12 | __main__::func_sum () at dpex_func.py:22 13 | 22 result = a_in_func + b_in_func 14 | (gdb) backtrace 15 | #0 __main__::func_sum () at simple_dpex_func.py:22 16 | #1 __main__::kernel_sum () at simple_dpex_func.py:29 17 | (gdb) continue 18 | ... 19 | Done... 20 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_eventstruct.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Defines the numba-dpex native representation for a dpctl.SyclEvent 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef _EVENTSTRUCT_H_ 13 | #define _EVENTSTRUCT_H_ 14 | 15 | #include "_nrt_helper.h" 16 | #include "dpctl_sycl_interface.h" 17 | #include "numba/core/runtime/nrt_external.h" 18 | #include 19 | 20 | typedef struct 21 | { 22 | NRT_MemInfo *meminfo; 23 | PyObject *parent; 24 | void *event_ref; 25 | } eventstruct_t; 26 | 27 | void NRT_MemInfo_EventRef_Delete(void *data); 28 | 29 | #endif /* _EVENTSTRUCT_H_ */ 30 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: dev 2 | channels: 3 | - defaults 4 | - dppy/label/dev 5 | - numba 6 | - intel 7 | - numba/label/dev 8 | - nodefaults 9 | dependencies: 10 | - python=3.9 11 | - gxx_linux-64 12 | - dpcpp_linux-64>=2023.2,!=2024.0.1,!=2024.0.2 13 | - numba ==0.58* 14 | - dpctl >=0.14* 15 | - dpnp >=0.11* 16 | - mkl >=2021.3.0 # for dpnp 17 | - dpcpp-llvm-spirv 18 | - scikit-build >=0.15* 19 | - cmake >=3.26* 20 | - pytest 21 | - pip 22 | - pip: 23 | - coverage 24 | - pre-commit 25 | - flake8 26 | - black==20.8b1 27 | - pytest-cov 28 | - pytest-xdist 29 | - pexpect 30 | variables: 31 | CHANNELS: -c defaults -c numba -c intel -c numba/label/dev -c dppy/label/dev --override-channels 32 | CHANNELS_DEV: -c dppy/label/dev -c defaults -c numba -c intel -c numba/label/dev --override-channels 33 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_nrt_helper.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #ifndef _NRT_HELPER_H_ 6 | #define _NRT_HELPER_H_ 7 | 8 | #define NO_IMPORT_ARRAY 9 | #include "_meminfo_helper.h" 10 | 11 | void *NRT_MemInfo_external_allocator(NRT_MemInfo *mi); 12 | void *NRT_MemInfo_data(NRT_MemInfo *mi); 13 | void NRT_MemInfo_release(NRT_MemInfo *mi); 14 | void NRT_MemInfo_call_dtor(NRT_MemInfo *mi); 15 | void NRT_MemInfo_acquire(NRT_MemInfo *mi); 16 | size_t NRT_MemInfo_size(NRT_MemInfo *mi); 17 | void *NRT_MemInfo_parent(NRT_MemInfo *mi); 18 | size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); 19 | void NRT_Free(void *ptr); 20 | void NRT_dealloc(NRT_MemInfo *mi); 21 | void NRT_MemInfo_destroy(NRT_MemInfo *mi); 22 | void NRT_MemInfo_pyobject_dtor(void *data); 23 | 24 | #endif /* _NRT_HELPER_H_ */ 25 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/docs/sheduler_locking: -------------------------------------------------------------------------------- 1 | $ NUMBA_OPT=0 gdb-oneapi -q python 2 | (gdb) set breakpoint pending on 3 | (gdb) break simple_dpex_func.py:29 4 | (gdb) run simple_dpex_func.py 5 | ... 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 7 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 8 | (gdb) set scheduler-locking step 9 | (gdb) step 10 | __main__::func_sum () at dpex_func.py:22 11 | 22 result = a_in_func + b_in_func 12 | (gdb) step 13 | 23 return result 14 | (gdb) continue 15 | ... 16 | [Switching to Thread 1.1073742080 lane 0] 17 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 18 | 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 19 | (gdb) continue 20 | ... 21 | Done... 22 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpnpNdArray/test_dpnp_ndarray_type.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests for numba_dpex.dpnp_ndarray.typeof 7 | """ 8 | 9 | import pytest 10 | from dpnp import ndarray as dpnp_ndarray 11 | from numba import typeof 12 | 13 | from numba_dpex.core.types.dpnp_ndarray_type import DpnpNdArray 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "array_type, expected_numba_type", 18 | [ 19 | (dpnp_ndarray, DpnpNdArray), 20 | ], 21 | ) 22 | @pytest.mark.parametrize( 23 | "shape, expected_ndim", 24 | [ 25 | ([1], 1), 26 | ([1, 1], 2), 27 | ], 28 | ) 29 | def test_typeof(array_type, shape, expected_numba_type, expected_ndim): 30 | array = array_type(shape) 31 | assert isinstance(typeof(array), expected_numba_type) 32 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/range_types/test_constructor_overloads.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import pytest 6 | 7 | from numba_dpex import NdRange, Range, dpjit 8 | 9 | ranges = [(10,), (10, 10), (10, 10, 10)] 10 | 11 | 12 | @pytest.mark.parametrize("r", ranges) 13 | def test_range_ctor(r): 14 | @dpjit 15 | def _tester(r): 16 | return Range(*r) 17 | 18 | r_expected = Range(*r) 19 | r_out = _tester(r) 20 | 21 | assert r_out == r_expected 22 | 23 | 24 | @pytest.mark.parametrize("r", ranges) 25 | def test_ndrange_unbox_box(r): 26 | @dpjit 27 | def _tester(r): 28 | gr = lr = Range(*r) 29 | return NdRange(gr, lr) 30 | 31 | gr = lr = Range(*r) 32 | r_expected = NdRange(gr, lr) 33 | r_out = _tester(r) 34 | 35 | assert r_out == r_expected 36 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/limitations.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Limitations 4 | =========== 5 | 6 | The following functionality is **limited** or **not supported**. 7 | 8 | Altering arguments modified in code 9 | ----------------------------------- 10 | 11 | Altering arguments has limitation. For it to work correctly 12 | arguments should not be modified in code. 13 | See `Numba issue `_. 14 | 15 | See :ref:`assignment-to-variables`. 16 | 17 | Using Numba's direct ``gdb`` bindings in ``nopython`` mode 18 | ---------------------------------------------------------- 19 | 20 | Using Numba's direct ``gdb`` bindings in ``nopython`` mode is not supported in 21 | numba-dpex. 22 | 23 | See `Numba documentation `_. 24 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/backtrace: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/backtrace python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:22 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22 10 | # 22 result = a_in_func + b_in_func 11 | backtrace 12 | # Expected: 13 | # #0 __main__::func_sum () at simple_dpex_func.py:22 14 | # #1 __main__::kernel_sum () at simple_dpex_func.py:29 15 | continue 16 | # Expected: 17 | # ... 18 | # [Switching to Thread 1.1073742080 lane 0] 19 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22 20 | # 22 result = a_in_func + b_in_func 21 | continue 22 | # Expected: 23 | # ... 24 | # Done... 25 | echo Done\n 26 | quit 27 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/step_dpex_func: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/step_dpex_func python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:29 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 10 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | step 12 | # Expected: 13 | # [Switching to Thread 1.1073742080 lane 0] 14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 15 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 16 | step 17 | # __main__::func_sum () at simple_dpex_func.py:22 18 | # 22 result = a_in_func + b_in_func 19 | continue 20 | # Expected: 21 | # ... 22 | # Done... 23 | echo Done\n 24 | quit 25 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/stepi: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/stepi python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:29 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 10 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | stepi 12 | # Expected: 13 | # 0x00000000fffeb630 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 14 | stepi 15 | # Expected: 16 | # [Switching to Thread 1.1073742080 lane 0] 17 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 18 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 19 | continue 20 | # Expected: 21 | # ... 22 | # Done... 23 | echo Done\n 24 | quit 25 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/test_dpjit_target.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Tests for class DpexTargetContext.""" 6 | 7 | 8 | import pytest 9 | from numba.core import typing 10 | from numba.core.codegen import JITCPUCodegen 11 | 12 | from numba_dpex.core.targets.dpjit_target import DpexTargetContext 13 | 14 | ctx = typing.Context() 15 | dpexctx = DpexTargetContext(ctx) 16 | 17 | 18 | def test_dpjit_target(): 19 | assert dpexctx.lower_extensions == {} 20 | assert dpexctx.is32bit is False 21 | assert dpexctx.dpexrt is not None 22 | assert ( 23 | isinstance(dpexctx._internal_codegen, type(JITCPUCodegen("numba.exec"))) 24 | == 1 25 | ) 26 | 27 | 28 | def test_dpjit_target_refresh(): 29 | try: 30 | dpexctx.refresh 31 | except KeyError: 32 | pytest.fail("Unexpected KeyError in dpjit_target.") 33 | -------------------------------------------------------------------------------- /docs/source/user_guide/kernel_programming/math-functions.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | 4 | Scalar mathematical functions from the Python `math`_ module and the `dpnp`_ 5 | library can be used inside a kernel function. During compilation the 6 | mathematical functions get compiled into device-specific intrinsic instructions. 7 | 8 | 9 | .. csv-table:: Current support matrix of ``math`` module functions 10 | :file: ./math-functions.csv 11 | :widths: 30, 70 12 | :header-rows: 1 13 | 14 | .. caution:: 15 | 16 | The supported signature for some of the ``math`` module functions in the 17 | compiled mode differs from CPython. The divergence in behavior is a known 18 | issue. Please refer https://github.com/IntelPython/numba-dpex/issues/759 for 19 | updates. 20 | 21 | .. csv-table:: Current support matrix of ``dpnp`` functions 22 | :file: ./dpnp-ufuncs.csv 23 | :widths: auto 24 | :header-rows: 1 25 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/info_func: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/info_func python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_sum.py:22 6 | run simple_sum.py 7 | info functions data_parallel_sum 8 | # Expected: 9 | # ... 10 | # All functions matching regular expression "data_parallel_sum": 11 | # File simple_sum.py: 12 | # 20: void __main__::data_parallel_sum(Array, Array, Array); 13 | continue 14 | info functions __main__ 15 | # Expected: 16 | # ... 17 | # All functions matching regular expression "__main__": 18 | # 20: void __main__::data_parallel_sum(Array, Array, Array); 19 | continue 20 | # Expected: 21 | # ... 22 | # Done... 23 | echo Done\n 24 | quit 25 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | .. include:: ./ext_links.txt 3 | 4 | Data Parallel Extension for Numba* 5 | ================================== 6 | 7 | Numba-dpex is an open-source kernel-programming API and JIT compiler for 8 | portable accelerator programming directly in Python. The API and the compiler is 9 | modeled after the C++ SYCL* language and brings a similar programming model and 10 | language design to Python. The page lists the relevant documentation to learn to 11 | program data-parallel kernels using numba-dpex. 12 | 13 | .. module:: numba_dpex 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | 18 | overview 19 | getting_started 20 | programming_model 21 | user_guide/index 22 | autoapi/index 23 | config_options 24 | supported_sycl_features 25 | experimental/index 26 | useful_links 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | :caption: Miscellaneous Notes 31 | 32 | license 33 | release-notes 34 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/IntEnumLiteral/test_type_creation.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from enum import IntEnum 6 | 7 | import pytest 8 | 9 | from numba_dpex.core.exceptions import IllegalIntEnumLiteralValueError 10 | from numba_dpex.core.types import IntEnumLiteral 11 | from numba_dpex.kernel_api.flag_enum import FlagEnum 12 | 13 | 14 | def test_intenumliteral_creation(): 15 | """Tests the creation of an IntEnumLiteral type.""" 16 | 17 | class DummyFlags(FlagEnum): 18 | DUMMY = 0 19 | 20 | try: 21 | IntEnumLiteral(DummyFlags) 22 | except: 23 | pytest.fail("Unexpected failure in IntEnumLiteral initialization") 24 | 25 | with pytest.raises(IllegalIntEnumLiteralValueError): 26 | 27 | class SomeKindOfUnknownEnum(IntEnum): 28 | UNKNOWN_FLAG = 1 29 | 30 | IntEnumLiteral(SomeKindOfUnknownEnum) 31 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclEvent/test_models.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core.datamodel import models 6 | 7 | from numba_dpex.core.datamodel.models import ( 8 | SyclEventModel, 9 | dpjit_data_model_manager, 10 | ) 11 | from numba_dpex.core.types.dpctl_types import DpctlSyclEvent 12 | 13 | 14 | def test_model_for_DpctlSyclEvent(): 15 | """Test the data model for DpctlSyclEvent that is registered with numba's 16 | default data model manager. 17 | """ 18 | sycl_event = DpctlSyclEvent() 19 | default_model = dpjit_data_model_manager.lookup(sycl_event) 20 | assert isinstance(default_model, SyclEventModel) 21 | 22 | 23 | def test_sycl_event_Model(): 24 | """Test for sycl_event_Model. 25 | 26 | It is a subclass of models.StructModel and models.ArrayModel. 27 | """ 28 | 29 | assert issubclass(SyclEventModel, models.StructModel) 30 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=./source 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :clean 35 | rd /s %BUILDDIR% 36 | 37 | :end 38 | popd 39 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/stepping: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/stepping python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:29 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 10 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | step 12 | # Expected: 13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 14 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 15 | step 16 | # Expected: 17 | # __main__::func_sum () at dpex_func.py:22 18 | # 22 result = a_in_func + b_in_func 19 | backtrace 20 | # Expected: 21 | # #0 __main__::func_sum () at simple_dpex_func.py:22 22 | # #1 __main__::kernel_sum () at simple_dpex_func.py:29 23 | continue 24 | # Expected: 25 | # ... 26 | # Done... 27 | echo Done\n 28 | quit 29 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | 7 | import numba_dpex as dpex 8 | from numba_dpex import Range 9 | from numba_dpex.kernel_api.flag_enum import FlagEnum 10 | 11 | 12 | class MockFlags(FlagEnum): 13 | FLAG1 = 100 14 | FLAG2 = 200 15 | 16 | 17 | @dpex.kernel( 18 | release_gil=False, 19 | no_compile=True, 20 | no_cpython_wrapper=True, 21 | no_cfunc_wrapper=True, 22 | ) 23 | def update_with_flag(a): 24 | a[0] = MockFlags.FLAG1 25 | a[1] = MockFlags.FLAG2 26 | 27 | 28 | def test_compilation_of_flag_enum(): 29 | """Tests if a FlagEnum subclass can be used inside a kernel function.""" 30 | a = dpnp.ones(10, dtype=dpnp.int64) 31 | dpex.call_kernel(update_with_flag, Range(10), a) 32 | 33 | assert a[0] == MockFlags.FLAG1 34 | assert a[1] == MockFlags.FLAG2 35 | for idx in range(2, 9): 36 | assert a[idx] == 1 37 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/commands/sheduler_locking: -------------------------------------------------------------------------------- 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/sheduler_locking python 2 | set trace-commands on 3 | set pagination off 4 | set breakpoint pending on 5 | break simple_dpex_func.py:29 6 | run simple_dpex_func.py 7 | # Expected: 8 | # ... 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29 10 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 11 | set scheduler-locking step 12 | step 13 | # Expected: 14 | # __main__::func_sum () at dpex_func.py:22 15 | # 22 result = a_in_func + b_in_func 16 | step 17 | # Expected: 18 | # 23 return result 19 | continue 20 | # Expected: 21 | # ... 22 | # [Switching to Thread 1.1073742080 lane 0] 23 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29 24 | # 29 c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 25 | continue 26 | # Expected: 27 | # ... 28 | # Done... 29 | echo Done\n 30 | quit 31 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/python/data.rst: -------------------------------------------------------------------------------- 1 | {% if obj.display %} 2 | .. py:{{ obj.type }}:: {{ obj.name }} 3 | {%- if obj.annotation is not none %} 4 | 5 | :type: {%- if obj.annotation %} {{ obj.annotation }}{%- endif %} 6 | 7 | {%- endif %} 8 | 9 | {%- if obj.value is not none %} 10 | 11 | :value: {% if obj.value is string and obj.value.splitlines()|count > 1 -%} 12 | Multiline-String 13 | 14 | .. raw:: html 15 | 16 |
Show Value 17 | 18 | .. code-block:: python 19 | 20 | """{{ obj.value|indent(width=8,blank=true) }}""" 21 | 22 | .. raw:: html 23 | 24 |
25 | 26 | {%- else -%} 27 | {%- if obj.value is string -%} 28 | {{ "%r" % obj.value|string|truncate(100) }} 29 | {%- else -%} 30 | {{ obj.value|string|truncate(100) }} 31 | {%- endif -%} 32 | {%- endif %} 33 | {%- endif %} 34 | 35 | 36 | {{ obj.docstring|indent(3) }} 37 | {% endif %} 38 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/IntEnumLiteral/test_type_registration.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import pytest 6 | from numba.core.datamodel import default_manager 7 | 8 | from numba_dpex.core.datamodel.models import dpex_data_model_manager 9 | from numba_dpex.core.types import IntEnumLiteral 10 | from numba_dpex.kernel_api.flag_enum import FlagEnum 11 | 12 | 13 | def test_data_model_registration(): 14 | """Tests that the IntEnumLiteral type is only registered with the 15 | DpexExpKernelTargetContext target. 16 | """ 17 | 18 | class DummyFlags(FlagEnum): 19 | DUMMY = 0 20 | 21 | dummy = IntEnumLiteral(DummyFlags) 22 | 23 | with pytest.raises(KeyError): 24 | default_manager.lookup(dummy) 25 | 26 | try: 27 | dpex_data_model_manager.lookup(dummy) 28 | except: 29 | pytest.fail( 30 | "IntEnumLiteral type lookup failed in experimental " 31 | "data model manager" 32 | ) 33 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/atomic_fence.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Python functions that simulate SYCL's atomic_fence primitives. 6 | """ 7 | from .memory_enums import MemoryOrder, MemoryScope 8 | 9 | 10 | def atomic_fence( 11 | memory_order: MemoryOrder, memory_scope: MemoryScope 12 | ): # pylint: disable=unused-argument 13 | """Performs a memory fence operations across all work-items. 14 | 15 | The function is equivalent to the ``sycl::atomic_fence`` function and 16 | controls the order of memory accesses (loads and stores) by individual 17 | work-items. 18 | 19 | .. important:: 20 | The function is a no-op during CPython execution and only available in 21 | JIT compiled mode of execution. 22 | 23 | Args: 24 | memory_order (MemoryOrder): The memory synchronization order. 25 | memory_scope (MemoryScope): The set of work-items and devices to which 26 | the memory ordering constraints apply. 27 | 28 | """ 29 | -------------------------------------------------------------------------------- /numba_dpex/tests/test_dpex_use_alongside_numba.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | This module contains tests to ensure that numba.njit works with numpy after 7 | importing numba_dpex. Aka lazy testing if we break numba's default behavior. 8 | """ 9 | 10 | import numba as nb 11 | import numpy as np 12 | 13 | import numba_dpex 14 | 15 | 16 | @nb.njit 17 | def add1(a): 18 | return a + 1 19 | 20 | 21 | def add_py(a, b): 22 | return np.add(a, b) 23 | 24 | 25 | add_jit = nb.njit(add_py) 26 | 27 | 28 | def test_add1(): 29 | a = np.asarray([1j], dtype=np.complex64) 30 | assert np.array_equal(add1(a), np.asarray([1 + 1j], dtype=np.complex64)) 31 | 32 | 33 | def test_add_py(): 34 | a = np.ones((10,), dtype=np.complex128) 35 | assert np.array_equal(add_py(a, 1.5), np.full((10,), 2.5, dtype=a.dtype)) 36 | 37 | 38 | def test_add_jit(): 39 | a = np.ones((10,), dtype=np.complex128) 40 | assert np.array_equal(add_jit(a, 1.5), np.full((10,), 2.5, dtype=a.dtype)) 41 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #[=======================================================================[.rst: 2 | numba_dpex 3 | ----------- 4 | 5 | A cmake file to compile the ``_dpexrt_python`` Python C extension for 6 | ``numba_dpex``. You can build this component locally in-place by invoking these 7 | commands: 8 | 9 | .. code-block:: cmake 10 | ~$ cmake . 11 | ~$ cmake --build . --verbose 12 | 13 | Once compiled, the _dpexrt_python library will be in ``numba_dpex/core/runtime`` 14 | folder. 15 | 16 | This ``CMakeLists.txt`` file will be used by ``setup.py``. 17 | #]=======================================================================] 18 | 19 | cmake_minimum_required(VERSION 3.21...3.27 FATAL_ERROR) 20 | 21 | project(numba-dpex 22 | VERSION ${NUMBA_DPEX_VERSION} 23 | DESCRIPTION "An extension for Numba to add data-parallel offload capability" 24 | ) 25 | 26 | # Help conda build find path from both host and build env. 27 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) 28 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) 29 | 30 | add_subdirectory(numba_dpex/core/runtime) 31 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/sum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.kernel(debug=True) 11 | def data_parallel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): 12 | i = item.get_id(0) # numba-kernel-breakpoint 13 | l1 = a_in_kernel[i] # second-line 14 | l2 = b_in_kernel[i] # third-line 15 | c_in_kernel[i] = l1 + l2 # fourth-line 16 | 17 | 18 | def driver(a, b, c, global_size): 19 | print("before : ", a) 20 | print("before : ", b) 21 | print("before : ", c) 22 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) 23 | print("after : ", c) 24 | 25 | 26 | def main(): 27 | global_size = 10 28 | N = global_size 29 | 30 | a = np.arange(N, dtype=np.float32) 31 | b = np.arange(N, dtype=np.float32) 32 | c = np.empty_like(a) 33 | 34 | driver(a, b, c, global_size) 35 | 36 | print("Done...") 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /scripts/run_debug_examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | check() { 6 | echo "Run $1 ..." 7 | (cd numba_dpex/examples/debug && NUMBA_OPT=0 gdb-oneapi -q -command "$1" python) | grep Done 8 | } 9 | 10 | run_checks() { 11 | check commands/function_breakpoint 12 | check commands/local_variables_0 13 | check commands/local_variables_1 14 | check commands/next 15 | check commands/sheduler_locking 16 | check commands/stepi 17 | check commands/stepping 18 | check commands/step_dpex_func 19 | check commands/step_sum 20 | check commands/simple_sum 21 | check commands/backtrace 22 | check commands/backtrace_kernel 23 | check commands/break_func 24 | check commands/break_file_func 25 | check commands/break_line_number 26 | check commands/break_nested_func 27 | check commands/info_func 28 | } 29 | 30 | run_with_device() { 31 | echo "Run with SYCL_DEVICE_FILTER=$1 ..." 32 | SYCL_DEVICE_FILTER=$1 run_checks 33 | } 34 | 35 | # run_with_device level_zero:gpu:0 36 | run_with_device opencl:gpu:0 37 | # run_with_device opencl:cpu:0 38 | 39 | echo Done 40 | -------------------------------------------------------------------------------- /.github/workflows/black.yml: -------------------------------------------------------------------------------- 1 | # This is a workflow to format Python code with black formatter 2 | 3 | name: black 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | pull_request: 9 | push: 10 | branches: [main] 11 | 12 | permissions: read-all 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "black" 17 | black: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-20.04 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v4 25 | # Set up a Python environment for use in actions 26 | - uses: actions/setup-python@v5 27 | 28 | # Run black code formatter 29 | - uses: psf/black@stable 30 | with: 31 | args: ". --check" 32 | version: "24.1.1" 33 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Tests different input array type support for the kernel.""" 6 | 7 | import dpctl.tensor as dpt 8 | import dpnp 9 | import pytest 10 | 11 | import numba_dpex as dpex 12 | from numba_dpex.kernel_api import Item, Range 13 | from numba_dpex.tests._helper import get_all_dtypes 14 | 15 | list_of_dtypes = get_all_dtypes( 16 | no_bool=True, no_float16=True, no_none=True, no_complex=True 17 | ) 18 | 19 | zeros_func = (dpt.zeros, dpnp.zeros) 20 | 21 | _SIZE = 10 22 | 23 | 24 | @pytest.fixture(params=((a, b) for a in zeros_func for b in list_of_dtypes)) 25 | def input_array(request): 26 | zeros, dtype = request.param 27 | return zeros(_SIZE, dtype=dtype) 28 | 29 | 30 | @dpex.kernel 31 | def set_ones(item: Item, a): 32 | i = item.get_id(0) 33 | a[i] = 1 34 | 35 | 36 | def test_fetch_add(input_array): 37 | dpex.call_kernel(set_ones, Range(_SIZE), input_array) 38 | 39 | assert input_array[0] == 1 40 | -------------------------------------------------------------------------------- /docs/source/user_guide/kernel_programming/operators.csv: -------------------------------------------------------------------------------- 1 | Name, Operator, Note 2 | Addition, ``+``, 3 | Multiplication, ``*``, 4 | Subtraction, ``-``, 5 | Division, ``/``, 6 | Floor Division, ``//``, 7 | Modulo, ``%``, 8 | Exponent, ``**``, 9 | In-place Addition, ``+=``, 10 | In-place Subtraction, ``-=``, 11 | In-place Division, ``/=``, 12 | In-place Floor Division, ``//=``, 13 | In-place Modulo, ``%=``, 14 | In-place Exponent, ``**=``, Only supported on OpenCL CPU devices 15 | Bitwise And, ``&``, 16 | Bitwise Left Shift, ``<<``, 17 | Bitwise Right Shift, ``>>``, 18 | Bitwise Or, ``|``, 19 | Bitwise Exclusive Or, ``^``, 20 | In-place Bitwise And, ``&=``, 21 | In-place Bitwise Left Shift, ``<<=``, 22 | In-place Bitwise Right Shift, ``>>=``, 23 | In-place Bitwise Or, ``|=``, 24 | In-place Bitwise Exclusive Or, ``^=``, 25 | Negation, ``-``, 26 | Complement, ``~``, 27 | Pos, ``+``, 28 | Less Than, ``<``, 29 | Less Than Equal, ``<=``, 30 | Greater Than, ``>``, 31 | Greater Than Equal, ``>=``, 32 | Equal To, ``==``, 33 | Not Equal To, ``!=``, 34 | Matmul, ``@``, **Not supported** 35 | In-place Matmul, ``@=``, **Not supported** 36 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpctlSyclEvent/test_box.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests for boxing and allocating for dpctl.SyclEvent 7 | """ 8 | 9 | import sys 10 | 11 | from dpctl import SyclEvent 12 | 13 | from numba_dpex import dpjit 14 | 15 | 16 | def test_dpjit_constructor(): 17 | """Test event delete that does not have parent""" 18 | 19 | @dpjit 20 | def func() -> SyclEvent: 21 | SyclEvent() 22 | return None 23 | 24 | # We just want to make sure execution did not crush. There are currently 25 | # no way to check if event wast destroyed, except manual run with debug 26 | # logs on. 27 | func() 28 | 29 | 30 | def test_boxing_without_parent(): 31 | """Test unboxing of the event that does not have parent""" 32 | 33 | @dpjit 34 | def func() -> SyclEvent: 35 | event = SyclEvent() 36 | return event 37 | 38 | e: SyclEvent = func() 39 | ref_cnt = sys.getrefcount(e) 40 | 41 | assert isinstance(e, SyclEvent) 42 | assert ref_cnt == 2 43 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/device-functions.rst: -------------------------------------------------------------------------------- 1 | .. _device-functions: 2 | 3 | Writing Device Functions 4 | ======================== 5 | 6 | The user-level API of SYCL does not have a notion for device-only functions, 7 | *i.e.* functions that can be only invoked from a kernel and not from a host 8 | function. However, numba-dpex provides a special decorator 9 | ``numba_dpex.func`` specifically to implement device functions. 10 | 11 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py 12 | :pyobject: a_device_function 13 | 14 | To use a device function from an another device function: 15 | 16 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py 17 | :pyobject: another_device_function 18 | 19 | To use a device function from a kernel function ``numba_dpex.kernel``: 20 | 21 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py 22 | :pyobject: a_kernel_function 23 | 24 | Unlike a kernel function, a device function can return a value like normal 25 | functions. 26 | 27 | .. todo:: 28 | 29 | Specific capabilities and limitations for device functions need to be added. 30 | -------------------------------------------------------------------------------- /numba_dpex/examples/debug/dpex_func.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp as np 6 | 7 | import numba_dpex as ndpx 8 | 9 | 10 | @ndpx.device_func(debug=True) 11 | def func_sum(a_in_func, b_in_func): 12 | result = a_in_func + b_in_func 13 | return result 14 | 15 | 16 | @ndpx.kernel(debug=True) 17 | def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): 18 | i = item.get_id(0) 19 | c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) 20 | 21 | 22 | def driver(a, b, c, global_size): 23 | print("a = ", a) 24 | print("b = ", b) 25 | print("c = ", c) 26 | ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c) 27 | print("a + b = ", c) 28 | 29 | 30 | def main(): 31 | global_size = 10 32 | N = global_size 33 | print("N", N) 34 | 35 | a = np.arange(N, dtype=np.float32) 36 | b = np.arange(N, dtype=np.float32) 37 | c = np.empty_like(a) 38 | 39 | driver(a, b, c, global_size) 40 | 41 | print("Done...") 42 | 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl.tensor as dpt 6 | import pytest 7 | 8 | import numba_dpex as ndpx 9 | from numba_dpex.kernel_api import NdRange 10 | 11 | 12 | # Data parallel kernel implementing vector sum 13 | @ndpx.kernel 14 | def kernel_vector_sum(a, b, c): 15 | i = ndpx.get_global_id(0) 16 | c[i] = a[i] + b[i] 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "error, ranges", 21 | [ 22 | (TypeError, ((2, 2), ("a", 1, 1))), 23 | (TypeError, ((3, 3, 3, 3), (2, 2, 2))), 24 | ], 25 | ) 26 | def test_ndrange_config_error(error, ranges): 27 | """Test if a exception is raised when calling a ndrange kernel with 28 | unsupported arguments. 29 | """ 30 | 31 | a = dpt.ones(1024, dtype=dpt.int32) 32 | b = dpt.ones(1024, dtype=dpt.int32) 33 | c = dpt.zeros(1024, dtype=dpt.int64) 34 | 35 | with pytest.raises(error): 36 | range = NdRange(ranges[0], ranges[1]) 37 | ndpx.call_kernel(kernel_vector_sum, range, a, b, c) 38 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_atomic_fence.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | 7 | import numba_dpex as dpex 8 | from numba_dpex.kernel_api import ( 9 | AtomicRef, 10 | Item, 11 | MemoryOrder, 12 | MemoryScope, 13 | atomic_fence, 14 | ) 15 | 16 | 17 | def test_atomic_fence(): 18 | """A test for atomic_fence function.""" 19 | 20 | @dpex.kernel 21 | def _kernel(item: Item, a, b): 22 | i = item.get_id(0) 23 | 24 | bref = AtomicRef(b, index=0) 25 | 26 | if i == 1: 27 | a[i] += 1 28 | atomic_fence(MemoryOrder.RELEASE, MemoryScope.DEVICE) 29 | bref.store(1) 30 | elif i == 0: 31 | while not bref.load(): 32 | continue 33 | atomic_fence(MemoryOrder.ACQUIRE, MemoryScope.DEVICE) 34 | for idx in range(1, a.size): 35 | a[0] += a[idx] 36 | 37 | N = 2 38 | a = dpnp.ones(N, dtype=dpnp.int64) 39 | b = dpnp.zeros(1, dtype=dpnp.int64) 40 | 41 | dpex.call_kernel(_kernel, dpex.Range(N), a, b) 42 | 43 | assert a[0] == N + 1 44 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | The kernel_api module provides a set of Python classes and functions that are 7 | analogous to the C++ SYCL API. The kernel_api module is meant to allow 8 | prototyping SYCL-like kernels in pure Python before compiling them using 9 | numba_dpex. 10 | """ 11 | 12 | from .atomic_fence import atomic_fence 13 | from .atomic_ref import AtomicRef 14 | from .barrier import group_barrier 15 | from .index_space_ids import Group, Item, NdItem 16 | from .launcher import call_kernel 17 | from .local_accessor import LocalAccessor 18 | from .memory_enums import AddressSpace, MemoryOrder, MemoryScope 19 | from .private_array import PrivateArray 20 | from .ranges import NdRange, Range 21 | 22 | __all__ = [ 23 | "call_kernel", 24 | "group_barrier", 25 | "AddressSpace", 26 | "atomic_fence", 27 | "AtomicRef", 28 | "Group", 29 | "Item", 30 | "LocalAccessor", 31 | "MemoryOrder", 32 | "MemoryScope", 33 | "NdItem", 34 | "NdRange", 35 | "Range", 36 | "PrivateArray", 37 | "group_barrier", 38 | "call_kernel", 39 | ] 40 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | import pytest 7 | from numba.core.errors import TypingError 8 | 9 | import numba_dpex as dpex 10 | from numba_dpex import int32, usm_ndarray 11 | from numba_dpex.core.exceptions import KernelHasReturnValueError 12 | from numba_dpex.core.types.kernel_api.index_space_ids import ItemType 13 | 14 | i32arrty = usm_ndarray(ndim=1, dtype=int32, layout="C") 15 | item_ty = ItemType(ndim=1) 16 | 17 | 18 | def f(item, a): 19 | return a 20 | 21 | 22 | list_of_sig = [ 23 | None, 24 | (i32arrty(item_ty, i32arrty)), 25 | ] 26 | 27 | 28 | @pytest.fixture(params=list_of_sig) 29 | def sig(request): 30 | return request.param 31 | 32 | 33 | def test_return(sig): 34 | a = dpnp.arange(1024, dtype=dpnp.int32) 35 | 36 | with pytest.raises((TypingError, KernelHasReturnValueError)) as excinfo: 37 | kernel_fn = dpex.kernel(sig)(f) 38 | dpex.call_kernel(kernel_fn, dpex.Range(a.size), a) 39 | 40 | if isinstance(excinfo.type, TypingError): 41 | assert "KernelHasReturnValueError" in excinfo.value.args[0] 42 | -------------------------------------------------------------------------------- /scripts/update_copyrights.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | 5 | def update_copyrights(root_dir, year): 6 | for folder, _, files in os.walk(root_dir): 7 | for filename in files: 8 | if filename[0] != "." and os.path.splitext(filename)[1] in [ 9 | ".py", 10 | ".h", 11 | ".c", 12 | ".cpp", 13 | ]: 14 | filePath = os.path.abspath(os.path.join(folder, filename)) 15 | args = [ 16 | "annotate", 17 | "--copyright=Intel Corporation", 18 | "--license=Apache-2.0", 19 | "--year", 20 | str(year), 21 | "--merge-copyrights", 22 | filePath, 23 | ] 24 | subprocess.check_call( 25 | ["reuse", *args], 26 | shell=False, 27 | ) 28 | 29 | 30 | path = os.path.dirname(os.path.realpath(__file__)) 31 | source_path = os.path.dirname(path) 32 | 33 | if __name__ == "__main__": 34 | print("Provide new copyright year:") 35 | year = input() 36 | update_copyrights(source_path + "/numba_dpex", year) 37 | -------------------------------------------------------------------------------- /numba_dpex/dpctl_iface/wrappers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core import cgutils 6 | 7 | from numba_dpex.core.runtime import context as dpexrt 8 | from numba_dpex.core.types import DpctlSyclEvent 9 | 10 | 11 | def wrap_event_reference(ctx, builder, eref): 12 | """Wrap dpctl event reference into datamodel so it can be boxed to 13 | Python.""" 14 | 15 | ty_event = DpctlSyclEvent() 16 | 17 | pyapi = ctx.get_python_api(builder) 18 | 19 | event_struct_proxy = cgutils.create_struct_proxy(ty_event)(ctx, builder) 20 | 21 | # Ref count after the call is equal to 1. 22 | # TODO: get dpex RT from cached property once the PR is merged 23 | # https://github.com/IntelPython/numba-dpex/pull/1027 24 | # ctx.dpexrt.eventstruct_init( # noqa: W0621 25 | dpexrt.DpexRTContext(ctx).eventstruct_init( 26 | pyapi, 27 | eref, 28 | # calling _() is by numba's design 29 | event_struct_proxy._getpointer(), # pylint: disable=W0212 30 | ) 31 | 32 | # calling _() is by numba's design 33 | event_value = event_struct_proxy._getvalue() # pylint: disable=W0212 34 | 35 | return event_value 36 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_type.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl 6 | import dpctl.tensor as dpt 7 | import numpy as np 8 | import pytest 9 | from numba.misc.special import typeof 10 | 11 | from numba_dpex.core.types import USMNdArray 12 | from numba_dpex.tests._helper import ( 13 | get_queue_or_skip, 14 | skip_if_dtype_not_supported, 15 | ) 16 | 17 | list_of_dtypes = [ 18 | np.int32, 19 | np.float32, 20 | np.int64, 21 | np.float64, 22 | ] 23 | 24 | 25 | @pytest.fixture(params=list_of_dtypes) 26 | def dtype(request): 27 | return request.param 28 | 29 | 30 | list_of_usm_type = [ 31 | "shared", 32 | "device", 33 | "host", 34 | ] 35 | 36 | 37 | @pytest.fixture(params=list_of_usm_type) 38 | def usm_type(request): 39 | return request.param 40 | 41 | 42 | def test_usm_ndarray_type(dtype, usm_type): 43 | q = get_queue_or_skip() 44 | skip_if_dtype_not_supported(dtype, q) 45 | 46 | a = np.array(np.random.random(10), dtype) 47 | da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer=usm_type) 48 | 49 | assert isinstance(typeof(da), USMNdArray) 50 | assert da.usm_type == usm_type 51 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/backtrace.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Backtrace 4 | ========== 5 | 6 | The ``backtrace`` command displays a summary of how your program got where it 7 | is. Consider the following example 8 | ``numba_dpex/examples/debug/simple_dpex_func.py``: 9 | 10 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/simple_dpex_func.py 11 | :lines: 5- 12 | :linenos: 13 | :lineno-match: 14 | 15 | 16 | The section presents two examples of using Intel Distribution for GDB* to 17 | generate backtrace from a numa_dpex.kernel function. The first example presents 18 | the case where the kernel function does not invoke any other function. The 19 | second example presents the case where the kernel function invokes a 20 | numba_dpex.func. 21 | 22 | Example 1: 23 | 24 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/backtrace_kernel 25 | :language: shell-session 26 | :emphasize-lines: 8,9 27 | 28 | Example 2: 29 | 30 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/backtrace 31 | :language: shell-session 32 | :emphasize-lines: 8-10 33 | 34 | See also: 35 | 36 | - `Backtraces in GDB* 37 | `_ 38 | -------------------------------------------------------------------------------- /numba_dpex/core/parfors/kernel_templates/kernel_template_iface.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import abc 6 | 7 | 8 | class KernelTemplateInterface(metaclass=abc.ABCMeta): 9 | @classmethod 10 | def __subclasshook__(cls, subclass): 11 | return hasattr( 12 | callable(subclass._generate_kernel_stub_as_string) 13 | and callable(subclass._generate_kernel_ir) 14 | and callable(subclass.dump_kernel_string) 15 | and callable(subclass.dump_kernel_ir) 16 | and hasattr(subclass, "kernel_ir") 17 | and hasattr(subclass, "kernel_string") 18 | ) 19 | 20 | @abc.abstractmethod 21 | def _generate_kernel_stub_as_string(self): 22 | """Generates as a string a stub for a numba_dpex kernel function""" 23 | raise NotImplementedError 24 | 25 | @abc.abstractmethod 26 | def _generate_kernel_ir(self): 27 | raise NotImplementedError 28 | 29 | @abc.abstractmethod 30 | def dump_kernel_string(self): 31 | raise NotImplementedError 32 | 33 | @property 34 | @abc.abstractmethod 35 | def py_func(self): 36 | raise NotImplementedError 37 | 38 | @property 39 | @abc.abstractmethod 40 | def kernel_string(self): 41 | raise NotImplementedError 42 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/DpnpNdArray/test_boxing_unboxing.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests for boxing for dpnp.ndarray 7 | """ 8 | 9 | import dpnp 10 | 11 | from numba_dpex import dpjit 12 | 13 | 14 | def test_boxing_unboxing(): 15 | """Tests basic boxing and unboxing of a dpnp.ndarray object. 16 | 17 | Checks if we can pass in and return a dpctl.ndarray object to and 18 | from a dpjit decorated function. 19 | """ 20 | 21 | @dpjit 22 | def func(a): 23 | return a 24 | 25 | a = dpnp.empty(10, dtype=dpnp.float32) 26 | try: 27 | b = func(a) 28 | except: 29 | assert False, "Failure during unbox/box of dpnp.ndarray" 30 | 31 | assert a.shape == b.shape 32 | assert a.device == b.device 33 | assert a.strides == b.strides 34 | assert a.dtype == b.dtype 35 | # To ensure we are returning the original array when boxing 36 | assert id(a) == id(b) 37 | 38 | 39 | def test_stride_calc_at_unboxing(): 40 | """Tests if strides were correctly computed during unboxing.""" 41 | 42 | def _tester(a): 43 | return a.strides 44 | 45 | b = dpnp.empty((4, 16, 4), dtype=dpnp.float32) 46 | strides = dpjit(_tester)(b) 47 | 48 | # Numba computes strides as bytes 49 | assert list(strides) == [256, 16, 4] 50 | -------------------------------------------------------------------------------- /numba_dpex/examples/kernel/vector_sum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """The example demonstrates a 1D vector addition kernel. 6 | """ 7 | 8 | import dpnp 9 | import numpy.testing as testing 10 | 11 | import numba_dpex as ndpx 12 | 13 | 14 | # Data parallel kernel implementing vector sum 15 | @ndpx.kernel 16 | def kernel_vector_sum(item, a, b, c): 17 | i = item.get_id(0) 18 | c[i] = a[i] + b[i] 19 | 20 | 21 | # Utility function for printing and testing 22 | def driver(a, b, c, global_size): 23 | ndpx.call_kernel(kernel_vector_sum, ndpx.Range(global_size), a, b, c) 24 | a_np = dpnp.asnumpy(a) # Copy dpnp array a to NumPy array a_np 25 | b_np = dpnp.asnumpy(b) # Copy dpnp array b to NumPy array b_np 26 | c_np = dpnp.asnumpy(c) # Copy dpnp array c to NumPy array c_np 27 | testing.assert_equal(c_np, a_np + b_np) 28 | 29 | 30 | # Main function 31 | def main(): 32 | N = 10 33 | global_size = N 34 | print("Vector size N", N) 35 | 36 | # Create random vectors on the default device 37 | a = dpnp.random.random(N) 38 | b = dpnp.random.random(N) 39 | c = dpnp.ones_like(a) 40 | 41 | print("Executing on device:") 42 | a.device.print_device_info() 43 | driver(a, b, c, global_size) 44 | print("Done...") 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_nrt_python_helper.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Re-definition of NRT functions for marshalling from / to Python objects 9 | /// defined in numba/core/runtime/_nrt_python.c. 10 | /// 11 | //===----------------------------------------------------------------------===// 12 | 13 | #ifndef _NRT_PYTHON_HELPER_H_ 14 | #define _NRT_PYTHON_HELPER_H_ 15 | 16 | #define NO_IMPORT_ARRAY 17 | #include "_meminfo_helper.h" 18 | 19 | /*! 20 | * @brief A pyTypeObject to describe a Python object to wrap Numba's MemInfo 21 | * 22 | */ 23 | extern PyTypeObject MemInfoType; 24 | 25 | void MemInfo_dealloc(MemInfoObject *self); 26 | int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds); 27 | int MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags); 28 | PyObject *MemInfo_acquire(MemInfoObject *self); 29 | PyObject *MemInfo_release(MemInfoObject *self); 30 | PyObject *MemInfo_get_data(MemInfoObject *self, void *closure); 31 | PyObject *MemInfo_get_refcount(MemInfoObject *self, void *closure); 32 | PyObject *MemInfo_get_external_allocator(MemInfoObject *self, void *closure); 33 | PyObject *MemInfo_get_parent(MemInfoObject *self, void *closure); 34 | 35 | #endif /* _NRT_PYTHON_HELPER_H_ */ 36 | -------------------------------------------------------------------------------- /numba_dpex/core/types/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from .dpctl_types import DpctlSyclEvent, DpctlSyclQueue 6 | from .dpnp_ndarray_type import DpnpNdArray 7 | from .kernel_api.literal_intenum import IntEnumLiteral 8 | from .kernel_api.ranges import NdRangeType, RangeType 9 | from .kernel_dispatcher_type import KernelDispatcherType 10 | from .numba_types_short_names import ( 11 | b1, 12 | bool_, 13 | boolean, 14 | double, 15 | f4, 16 | f8, 17 | float32, 18 | float64, 19 | float_, 20 | i4, 21 | i8, 22 | int32, 23 | int64, 24 | none, 25 | u4, 26 | u8, 27 | uint32, 28 | uint64, 29 | void, 30 | ) 31 | from .usm_ndarray_type import USMNdArray 32 | 33 | usm_ndarray = USMNdArray 34 | 35 | __all__ = [ 36 | "DpctlSyclQueue", 37 | "DpctlSyclEvent", 38 | "DpnpNdArray", 39 | "IntEnumLiteral", 40 | "KernelDispatcherType", 41 | "NdRangeType", 42 | "RangeType", 43 | "USMNdArray", 44 | "none", 45 | "boolean", 46 | "bool_", 47 | "uint32", 48 | "uint64", 49 | "int32", 50 | "int64", 51 | "float32", 52 | "float64", 53 | "b1", 54 | "i4", 55 | "i8", 56 | "u4", 57 | "u8", 58 | "f4", 59 | "f8", 60 | "float_", 61 | "double", 62 | "usm_ndarray", 63 | "void", 64 | ] 65 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/experimental/tools/boost_hash.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2005-2014 Daniel James. 2 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 3 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | // Based on Peter Dimov's proposal 6 | // http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf 7 | // issue 6.18. 8 | // 9 | // This also contains public domain code from MurmurHash. From the 10 | // MurmurHash header: 11 | 12 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 13 | // domain. The author hereby disclaims copyright to this source code. 14 | 15 | // 2023 Intel Corporation 16 | // Copied hash_combine and hash_combine_impl from boost 17 | // (https://www.boost.org/doc/libs/1_76_0/boost/container_hash/hash.hpp) and 18 | // changed hash_combine to use std::hash instead of boost::hash. 19 | 20 | #include 21 | 22 | namespace boost 23 | { 24 | namespace hash_detail 25 | { 26 | template 27 | inline void hash_combine_impl(SizeT &seed, SizeT value) 28 | { 29 | seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); 30 | } 31 | } // namespace hash_detail 32 | 33 | template inline void hash_combine(std::size_t &seed, T const &v) 34 | { 35 | std::hash hasher; 36 | return boost::hash_detail::hash_combine_impl(seed, hasher(v)); 37 | } 38 | } // namespace boost 39 | -------------------------------------------------------------------------------- /numba_dpex/examples/kernel/atomic_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """The example demonstrates the use of :class:`numba_dpex.kernel_api.AtomicRef`. 6 | 7 | The kernel shows the implementation of a reduction operation in numba-dpex 8 | where every work-item is updating a global accumulator atomically. 9 | """ 10 | import dpnp 11 | 12 | import numba_dpex as dpex 13 | from numba_dpex import kernel_api as kapi 14 | 15 | 16 | @dpex.kernel 17 | def atomic_reduction(item: kapi.Item, a, res): 18 | """Array reduction using :func:`AtomicRef.fetch_add`. 19 | 20 | Args: 21 | item (kapi.Item): Index space id for each work item. 22 | a (dpnp.ndarray): An 1-d array to be reduced. 23 | res (dpnp.ndarray): A single element array into which the result is 24 | accumulated. 25 | """ 26 | idx = item.get_id(0) 27 | acc = kapi.AtomicRef(res, 0) 28 | acc.fetch_add(a[idx]) 29 | 30 | 31 | def main(): 32 | N = 1024 33 | 34 | a = dpnp.arange(0, N) 35 | res = dpnp.zeros(1, dtype=a.dtype) 36 | 37 | print("Executing on device:") 38 | a.device.print_device_info() 39 | 40 | dpex.call_kernel(atomic_reduction, dpex.Range(N), a, res) 41 | print(f"Summation of {N} integers = {res[0]}") 42 | 43 | assert res[0] == N * (N - 1) / 2 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_dbg_printer.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// A helper macro to print debug prints. 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #pragma once 13 | 14 | /* Debugging facilities - enabled at compile-time */ 15 | /* #undef NDEBUG */ 16 | #if 0 17 | #include 18 | #define DPEXRT_DEBUG(X) \ 19 | { \ 20 | X; \ 21 | fflush(stdout); \ 22 | } 23 | #else 24 | #define DPEXRT_DEBUG(X) \ 25 | if (0) { \ 26 | X; \ 27 | } 28 | #endif 29 | 30 | /* 31 | * Debugging printf function used internally 32 | */ 33 | static inline void drt_debug_print(const char *fmt, ...) 34 | { 35 | va_list args; 36 | 37 | va_start(args, fmt); 38 | vfprintf(stderr, fmt, args); 39 | va_end(args); 40 | } 41 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/_meminfo_helper.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #ifndef _NRT_ARRAY_STRUCT_H_ 6 | #define _NRT_ARRAY_STRUCT_H_ 7 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "numba/_numba_common.h" 15 | #include "numba/_pymodule.h" 16 | #include "numba/core/runtime/nrt.h" 17 | 18 | /* 19 | * The MemInfo structure. 20 | * NOTE: copy from numba/core/runtime/nrt.c 21 | */ 22 | struct MemInfo 23 | { 24 | size_t refct; 25 | NRT_dtor_function dtor; 26 | void *dtor_info; 27 | void *data; 28 | size_t size; /* only used for NRT allocated memory */ 29 | NRT_ExternalAllocator *external_allocator; 30 | }; 31 | 32 | /*! 33 | * @brief A wrapper struct to store a MemInfo pointer along with the PyObject 34 | * that is associated with the MeMinfo. 35 | * 36 | * The struct is stored in the dtor_info attribute of a MemInfo object and 37 | * used by the destructor to free the MemInfo and DecRef the Pyobject. 38 | * 39 | */ 40 | typedef struct 41 | { 42 | PyObject *owner; 43 | NRT_MemInfo *mi; 44 | } MemInfoDtorInfo; 45 | 46 | typedef struct 47 | { 48 | PyObject_HEAD NRT_MemInfo *meminfo; 49 | } MemInfoObject; 50 | 51 | #endif /* _NRT_ARRAY_STRUCT_H_ */ 52 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_api/test_local_accessor.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy 6 | import pytest 7 | 8 | from numba_dpex import kernel_api as kapi 9 | 10 | 11 | def _slm_kernel(nd_item: kapi.NdItem, a, slm): 12 | i = nd_item.get_global_linear_id() 13 | j = nd_item.get_local_linear_id() 14 | 15 | slm[j] = 100 16 | a[i] = slm[i] 17 | 18 | 19 | def test_local_accessor_data_inaccessible_outside_kernel(): 20 | la = kapi.LocalAccessor((100,), dtype=numpy.float32) 21 | 22 | with pytest.raises(NotImplementedError): 23 | print(la[0]) 24 | 25 | with pytest.raises(NotImplementedError): 26 | la[0] = 10 27 | 28 | 29 | def test_local_accessor_use_inside_kernel(): 30 | 31 | a = numpy.empty(32) 32 | slm = kapi.LocalAccessor(32, dtype=a.dtype) 33 | 34 | # launches one work group with 32 work item. Each work item initializes its 35 | # position in the SLM to 100 and then writes it to the global array `a`. 36 | kapi.call_kernel(_slm_kernel, kapi.NdRange((32,), (32,)), a, slm) 37 | 38 | assert numpy.all(a == 100) 39 | 40 | 41 | def test_local_accessor_usage_not_allowed_with_range_kernel(): 42 | 43 | a = numpy.empty(32) 44 | slm = kapi.LocalAccessor(32, dtype=a.dtype) 45 | 46 | with pytest.raises(TypeError): 47 | kapi.call_kernel(_slm_kernel, kapi.Range((32,)), a, slm) 48 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_math_functions.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import math 6 | 7 | import dpnp 8 | import numpy 9 | import pytest 10 | 11 | import numba_dpex as dpex 12 | from numba_dpex.tests._helper import get_all_dtypes 13 | 14 | list_of_unary_ops = ["fabs", "exp", "log", "sqrt", "sin", "cos", "tan"] 15 | 16 | 17 | @pytest.fixture(params=list_of_unary_ops) 18 | def unary_op(request): 19 | return request.param 20 | 21 | 22 | list_of_dtypes = get_all_dtypes( 23 | no_bool=True, no_int=True, no_float16=True, no_none=True, no_complex=True 24 | ) 25 | 26 | 27 | @pytest.fixture(params=list_of_dtypes) 28 | def input_arrays(request): 29 | # The size of input and out arrays to be used 30 | N = 2048 31 | a = dpnp.arange(N, dtype=request.param) 32 | b = dpnp.arange(N, dtype=request.param) 33 | return a, b 34 | 35 | 36 | def test_binary_ops(unary_op, input_arrays): 37 | a, b = input_arrays 38 | uop = getattr(math, unary_op) 39 | dpnp_uop = getattr(dpnp, unary_op) 40 | 41 | @dpex.kernel 42 | def f(item, a, b): 43 | i = item.get_id(0) 44 | b[i] = uop(a[i]) 45 | 46 | dpex.call_kernel(f, dpex.Range(a.size), a, b) 47 | 48 | expected = dpnp_uop(a) 49 | 50 | np_expected = dpnp.asnumpy(expected) 51 | np_actual = dpnp.asnumpy(b) 52 | 53 | assert numpy.allclose(np_expected, np_actual) 54 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/memory_enums.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """A collection of FlagEnum classes that syntactically represents the SYCL 6 | memory enum classes. 7 | """ 8 | 9 | from numba_dpex.kernel_api.flag_enum import FlagEnum 10 | 11 | 12 | class MemoryOrder(FlagEnum): 13 | """ 14 | Analogue of :sycl_memory_order:`sycl::memory_order <>` enumeration. 15 | 16 | The integer values of the enums is kept consistent with the corresponding 17 | implementation in dpcpp. 18 | 19 | """ 20 | 21 | RELAXED = 0 22 | ACQUIRE = 1 23 | CONSUME_UNSUPPORTED = 2 24 | RELEASE = 3 25 | ACQ_REL = 4 26 | SEQ_CST = 5 27 | 28 | 29 | class MemoryScope(FlagEnum): 30 | """ 31 | Analogue of :sycl_memory_scope:`sycl::memory_scope <>` enumeration. 32 | 33 | The integer values of the enums is kept consistent with the corresponding 34 | implementation in dpcpp. 35 | 36 | """ 37 | 38 | WORK_ITEM = 0 39 | SUB_GROUP = 1 40 | WORK_GROUP = 2 41 | DEVICE = 3 42 | SYSTEM = 4 43 | 44 | 45 | class AddressSpace(FlagEnum): 46 | """Analogue of :sycl_addr_space:`SYCL address space classes <>`. 47 | 48 | The integer values of the enums is kept consistent with the corresponding 49 | implementation in dpcpp. 50 | """ 51 | 52 | PRIVATE = 0 53 | GLOBAL = 1 54 | CONSTANT = 2 55 | LOCAL = 3 56 | GENERIC = 4 57 | -------------------------------------------------------------------------------- /docs/_templates/autoapi/macros.rst: -------------------------------------------------------------------------------- 1 | {% macro _render_item_name(obj, sig=False) -%} 2 | :py:obj:`{{ obj.name }} <{{ obj.id }}>` 3 | {%- if sig -%} 4 | \ ( 5 | {%- for arg in obj.obj.args -%} 6 | {%- if arg[0] %}{{ arg[0]|replace('*', '\*') }}{% endif -%}{{ arg[1] -}} 7 | {%- if not loop.last %}, {% endif -%} 8 | {%- endfor -%} 9 | ){%- endif -%} 10 | {%- endmacro %} 11 | 12 | {% macro _item(obj, sig=False, label='') %} 13 | * - {{ _render_item_name(obj, sig) }} 14 | - {% if label %}:summarylabel:`{{ label }}` {% endif %}{% if obj.summary %}{{ obj.summary }}{% else %}\-{% endif +%} 15 | {% endmacro %} 16 | 17 | {% macro auto_summary(objs, title='') -%} 18 | .. list-table:: {{ title }} 19 | :header-rows: 0 20 | :widths: auto 21 | :class: summarytable 22 | 23 | {% for obj in objs -%} 24 | {%- set sig = (obj.type in ['method', 'function'] and not 'property' in obj.properties) -%} 25 | 26 | {%- if 'property' in obj.properties -%} 27 | {%- set label = 'prop' -%} 28 | {%- elif 'classmethod' in obj.properties -%} 29 | {%- set label = 'class' -%} 30 | {%- elif 'abstractmethod' in obj.properties -%} 31 | {%- set label = 'abc' -%} 32 | {%- elif 'staticmethod' in obj.properties -%} 33 | {%- set label = 'static' -%} 34 | {%- else -%} 35 | {%- set label = '' -%} 36 | {%- endif -%} 37 | 38 | {{- _item(obj, sig=sig, label=label) -}} 39 | {%- endfor -%} 40 | 41 | {% endmacro %} 42 | -------------------------------------------------------------------------------- /conda-recipe/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | # Intel LLVM must cooperate with compiler and sysroot from conda 6 | export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:${BUILD_PREFIX}/lib" 7 | 8 | echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg 9 | ICPXCFG="$(pwd)/icpx_for_conda.cfg" 10 | ICXCFG="$(pwd)/icpx_for_conda.cfg" 11 | 12 | read -r GLIBC_MAJOR GLIBC_MINOR <<<"$(conda list '^sysroot_linux-64$' \ 13 | | tail -n 1 | awk '{print $2}' | grep -oP '\d+' | head -n 2 | tr '\n' ' ')" 14 | 15 | export ICXCFG 16 | export ICPXCFG 17 | 18 | export CC=icx 19 | export CXX=icpx 20 | 21 | export CMAKE_GENERATOR=Ninja 22 | # Make CMake verbose 23 | export VERBOSE=1 24 | 25 | # new llvm-spirv location 26 | # starting from dpcpp_impl_linux-64=2022.0.0=intel_3610 27 | export PATH=$CONDA_PREFIX/bin-llvm:$PATH 28 | 29 | # -wnx flags mean: --wheel --no-isolation --skip-dependency-check 30 | ${PYTHON} -m build -w -n -x 31 | ${PYTHON} -m wheel tags --remove --build "$GIT_DESCRIBE_NUMBER" \ 32 | --platform-tag "manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" \ 33 | dist/numba_dpex*.whl 34 | ${PYTHON} -m pip install dist/numba_dpex*.whl \ 35 | --no-build-isolation \ 36 | --no-deps \ 37 | --only-binary :all: \ 38 | --no-index \ 39 | --prefix "${PREFIX}" \ 40 | -vv 41 | 42 | # Copy wheel package 43 | if [[ -v WHEELS_OUTPUT_FOLDER ]]; then 44 | cp dist/numba_dpex*.whl "${WHEELS_OUTPUT_FOLDER[@]}" 45 | fi 46 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_api/test_range_kernel_launch.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy 6 | 7 | from numba_dpex import kernel_api as kapi 8 | 9 | 10 | def test_range_kernel_call1D(): 11 | def vecadd(item: kapi.Item, a, b, c): 12 | idx = item.get_id(0) 13 | c[idx] = a[idx] + b[idx] 14 | 15 | a = numpy.ones(100) 16 | b = numpy.ones(100) 17 | c = numpy.empty(100) 18 | 19 | kapi.call_kernel(vecadd, kapi.Range(100), a, b, c) 20 | 21 | assert numpy.allclose(c, a + b) 22 | 23 | 24 | def test_range_kernel_call2D(): 25 | def vecadd(item: kapi.Item, a, b, c): 26 | idx = item.get_id(0) 27 | jdx = item.get_id(1) 28 | c[idx, jdx] = a[idx, jdx] + b[idx, jdx] 29 | 30 | a = numpy.ones((10, 10)) 31 | b = numpy.ones((10, 10)) 32 | c = numpy.empty((10, 10)) 33 | 34 | kapi.call_kernel(vecadd, kapi.Range(10, 10), a, b, c) 35 | 36 | assert numpy.allclose(c, a + b) 37 | 38 | 39 | def test_range_kernel_call3D(): 40 | def vecadd(item: kapi.Item, a, b, c): 41 | idx = item.get_id(0) 42 | jdx = item.get_id(1) 43 | kdx = item.get_id(2) 44 | c[idx, jdx, kdx] = a[idx, jdx, kdx] + b[idx, jdx, kdx] 45 | 46 | a = numpy.ones((5, 5, 5)) 47 | b = numpy.ones((5, 5, 5)) 48 | c = numpy.empty((5, 5, 5)) 49 | 50 | kapi.call_kernel(vecadd, kapi.Range(5, 5, 5), a, b, c) 51 | 52 | assert numpy.allclose(c, a + b) 53 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/private_array.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Implements a simple array intended to be used inside kernel work item. 6 | Implementation is intended to be used in pure Python code when prototyping a 7 | kernel function. 8 | """ 9 | 10 | import numpy as np 11 | 12 | 13 | class PrivateArray: 14 | """An array that gets allocated on the private memory of a work-item. 15 | 16 | The class should be used to allocate small arrays on the private 17 | per-work-item memory for fast accesses inside a kernel. It is similar in 18 | intent to the :sycl_private_memory:`sycl::private_memory <>` class but is 19 | not a direct analogue. 20 | """ 21 | 22 | def __init__(self, shape, dtype, fill_zeros=False) -> None: 23 | """Creates a new PrivateArray instance of the given shape and dtype.""" 24 | 25 | if fill_zeros: 26 | self._data = np.zeros(shape=shape, dtype=dtype) 27 | else: 28 | self._data = np.empty(shape=shape, dtype=dtype) 29 | 30 | def __getitem__(self, idx_obj): 31 | """Returns the value stored at the position represented by idx_obj in 32 | the self._data ndarray. 33 | """ 34 | 35 | return self._data[idx_obj] 36 | 37 | def __setitem__(self, idx_obj, val): 38 | """Assigns a new value to the position represented by idx_obj in 39 | the self._data ndarray. 40 | """ 41 | 42 | self._data[idx_obj] = val 43 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_barriers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | 7 | import numba_dpex as dpex 8 | from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier 9 | 10 | 11 | def test_group_barrier(): 12 | """A test for group_barrier function.""" 13 | 14 | @dpex.kernel 15 | def _kernel(nd_item: NdItem, a): 16 | i = nd_item.get_global_id(0) 17 | 18 | a[i] += 1 19 | group_barrier(nd_item.get_group(), MemoryScope.DEVICE) 20 | 21 | if i == 0: 22 | for idx in range(1, a.size): 23 | a[0] += a[idx] 24 | 25 | N = 16 26 | a = dpnp.ones(N, dtype=dpnp.int32) 27 | 28 | dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) 29 | 30 | assert a[0] == N * 2 31 | 32 | 33 | def test_group_barrier_device_func(): 34 | """A test for group_barrier function.""" 35 | 36 | @dpex.device_func 37 | def _increment_value(nd_item: NdItem, a): 38 | i = nd_item.get_global_id(0) 39 | 40 | a[i] += 1 41 | group_barrier(nd_item.get_group(), MemoryScope.DEVICE) 42 | 43 | if i == 0: 44 | for idx in range(1, a.size): 45 | a[0] += a[idx] 46 | 47 | @dpex.kernel 48 | def _kernel(nd_item: NdItem, a): 49 | _increment_value(nd_item, a) 50 | 51 | N = 16 52 | a = dpnp.ones(N, dtype=dpnp.int32) 53 | 54 | dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) 55 | 56 | assert a[0] == N * 2 57 | -------------------------------------------------------------------------------- /docs/source/user_guide/kernel_programming/supported-python-features.rst: -------------------------------------------------------------------------------- 1 | 2 | A kapi function when run in the purely interpreted mode by the CPython 3 | interpreter is a regular Python function, and as such in theory any Python 4 | feature can be used in the body of the function. In practice, to be 5 | JIT compilable and executable on a device only a subset of Python language 6 | features are supported in a kapi function. The restriction stems from both 7 | limitations in the Numba compiler tooling and also from the device-specific 8 | calling convention and other restrictions applied by a device's ABI. 9 | 10 | This section provides a partial support matrix for Python features with respect 11 | to their usage in a kapi function. 12 | 13 | 14 | Built-in types 15 | -------------- 16 | 17 | **Supported Types** 18 | 19 | - ``int`` 20 | - ``float`` 21 | 22 | **Unsupported Types** 23 | 24 | - ``complex`` 25 | - ``bool`` 26 | - ``None`` 27 | - ``tuple`` 28 | 29 | Built-in functions 30 | ------------------ 31 | 32 | The following built-in functions are supported: 33 | 34 | - ``abs()`` 35 | - ``float`` 36 | - ``int`` 37 | - ``len()`` 38 | - ``range()`` 39 | - ``round()`` 40 | 41 | Unsupported Constructs 42 | ---------------------- 43 | 44 | The following Python constructs are **not supported**: 45 | 46 | - Exception handling (``try .. except``, ``try .. finally``) 47 | - Context management (the ``with`` statement) 48 | - Comprehensions (either list, dict, set or generator comprehensions) 49 | - Generator (any ``yield`` statements) 50 | - The ``raise`` statement 51 | - The ``assert`` statement 52 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_api/test_ndrange_kernel_launch.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy 6 | 7 | from numba_dpex import kernel_api as kapi 8 | 9 | 10 | def test_range_kernel_call1D(): 11 | def vecadd(item: kapi.NdItem, a, b, c): 12 | idx = item.get_global_id(0) 13 | c[idx] = a[idx] + b[idx] 14 | 15 | a = numpy.ones(100) 16 | b = numpy.ones(100) 17 | c = numpy.empty(100) 18 | 19 | kapi.call_kernel(vecadd, kapi.NdRange((100,), (20,)), a, b, c) 20 | 21 | assert numpy.allclose(c, a + b) 22 | 23 | 24 | def test_range_kernel_call2D(): 25 | def vecadd(item: kapi.NdItem, a, b, c): 26 | idx = item.get_global_id(0) 27 | jdx = item.get_global_id(1) 28 | c[idx, jdx] = a[idx, jdx] + b[idx, jdx] 29 | 30 | a = numpy.ones((10, 10)) 31 | b = numpy.ones((10, 10)) 32 | c = numpy.empty((10, 10)) 33 | 34 | kapi.call_kernel(vecadd, kapi.NdRange((10, 10), (2, 2)), a, b, c) 35 | 36 | assert numpy.allclose(c, a + b) 37 | 38 | 39 | def test_range_kernel_call3D(): 40 | def vecadd(item: kapi.Item, a, b, c): 41 | idx = item.get_global_id(0) 42 | jdx = item.get_global_id(1) 43 | kdx = item.get_global_id(2) 44 | c[idx, jdx, kdx] = a[idx, jdx, kdx] + b[idx, jdx, kdx] 45 | 46 | a = numpy.ones((8, 8, 8)) 47 | b = numpy.ones((8, 8, 8)) 48 | c = numpy.empty((8, 8, 8)) 49 | 50 | kapi.call_kernel(vecadd, kapi.NdRange((8, 8, 8), (2, 2, 2)), a, b, c) 51 | 52 | assert numpy.allclose(c, a + b) 53 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/test_slicing.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests for slicing dpnp.ndarray 7 | """ 8 | 9 | import dpnp 10 | import numpy 11 | 12 | from numba_dpex import dpjit 13 | 14 | 15 | def test_1d_slicing(): 16 | """Tests if dpjit properly computes strides and returns them to Python.""" 17 | 18 | def _tester(a): 19 | return a[1:5] 20 | 21 | a = dpnp.arange(10) 22 | b = dpnp.asnumpy(dpjit(_tester)(a)) 23 | 24 | na = numpy.arange(10) 25 | nb = _tester(na) 26 | 27 | assert (b == nb).all() 28 | 29 | 30 | def test_1d_slicing2(): 31 | """Tests if dpjit properly computes strides and returns them to Python.""" 32 | 33 | def _tester(a): 34 | b = a[1:4] 35 | a[6:9] = b 36 | 37 | a = dpnp.arange(10) 38 | b = dpnp.asnumpy(dpjit(_tester)(a)) 39 | 40 | na = numpy.arange(10) 41 | nb = _tester(na) 42 | 43 | assert (b == nb).all() 44 | 45 | 46 | def test_multidim_slicing(): 47 | """Tests if dpjit properly slices strides and returns them to Python.""" 48 | 49 | def _tester(a, b): 50 | b[:, :, 0] = a 51 | 52 | a = dpnp.arange(64, dtype=numpy.int64) 53 | a = a.reshape(4, 16) 54 | b = dpnp.empty((4, 16, 4), dtype=numpy.int64) 55 | dpjit(_tester)(a, b) 56 | 57 | na = numpy.arange(64, dtype=numpy.int64) 58 | na = na.reshape(4, 16) 59 | nb = numpy.empty((4, 16, 4), dtype=numpy.int64) 60 | _tester(na, nb) 61 | 62 | assert (nb[:, :, 0] == dpnp.asnumpy(b)[:, :, 0]).all() 63 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/test_itanium_mangler_extension.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import pytest 6 | from numba import float32, float64, int32, int64, uint32, uint64 7 | from numba.core import types 8 | 9 | import numba_dpex.core.utils.itanium_mangler as itanium_mangler 10 | from numba_dpex.kernel_api import AddressSpace as address_space 11 | 12 | list_of_dtypes = [ 13 | (int32, "i"), 14 | (int64, "x"), 15 | (uint32, "j"), 16 | (uint64, "y"), 17 | (float32, "f"), 18 | (float64, "d"), 19 | ] 20 | 21 | 22 | @pytest.fixture(params=list_of_dtypes) 23 | def dtypes(request): 24 | return request.param 25 | 26 | 27 | list_of_addrspaces = [ 28 | (address_space.PRIVATE.value, "3AS0"), 29 | (address_space.GLOBAL.value, "3AS1"), 30 | (address_space.LOCAL.value, "3AS3"), 31 | (address_space.GENERIC.value, "3AS4"), 32 | ] 33 | 34 | 35 | @pytest.fixture(params=list_of_addrspaces) 36 | def addrspaces(request): 37 | return request.param 38 | 39 | 40 | def test_mangling_arg_type(dtypes): 41 | dtype, expected_str = dtypes 42 | got = itanium_mangler.mangle_type(types.CPointer(dtype)) 43 | expected = "P" + expected_str 44 | assert got == expected 45 | 46 | 47 | def test_mangling_arg_type_2(dtypes, addrspaces): 48 | dtype, expected_dtype_str = dtypes 49 | addrspace, expected_addrspace_str = addrspaces 50 | got = itanium_mangler.mangle_type( 51 | types.CPointer(dtype, addrspace=addrspace) 52 | ) 53 | expected = "PU" + expected_addrspace_str + expected_dtype_str 54 | assert got == expected 55 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/altering.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Altering Execution 4 | ================== 5 | 6 | See `GDB* documentation `_. 7 | 8 | .. _assignment-to-variables: 9 | 10 | Assignment to Variables 11 | ----------------------- 12 | 13 | To alter the value of a variable, evaluate an assignment expression. 14 | This also works for function arguments. 15 | 16 | .. note:: 17 | 18 | Altering arguments has limitation. For it to work correctly 19 | arguments should not be modified in code. 20 | See `Numba issue `_. 21 | 22 | Example 23 | ``````` 24 | 25 | Source code :file:`numba_dpex/examples/debug/side-by-side-2.py`: 26 | 27 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/side-by-side-2.py 28 | :pyobject: common_loop_body 29 | :linenos: 30 | :lineno-match: 31 | :emphasize-lines: 6 32 | 33 | Debug session: 34 | 35 | .. code-block:: shell-session 36 | :emphasize-lines: 11- 37 | 38 | $ gdb-oneapi -q python 39 | ... 40 | (gdb) set environment NUMBA_OPT 0 41 | (gdb) set environment NUMBA_EXTEND_VARIABLE_LIFETIMES 1 42 | (gdb) break side-by-side-2.py:29 if param_a == 5 43 | ... 44 | (gdb) run numba_dpex/examples/debug/side-by-side-2.py --api=numba-dpex-kernel 45 | ... 46 | Thread 2.1 hit Breakpoint 1, with SIMD lane 5, __main__::common_loop_body (i=5, a=..., b=...) at side-by-side-2.py:29 47 | 29 result = param_c + param_d 48 | (gdb) print param_c 49 | $1 = 15 50 | (gdb) print param_c=200 51 | $2 = 200 52 | (gdb) print param_c 53 | $3 = 200 54 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/synchronization.rst: -------------------------------------------------------------------------------- 1 | Synchronization Functions 2 | ========================= 3 | 4 | Numba-dpex only supports some of the SYCL synchronization operations. For 5 | synchronization of all threads in the same thread block, numba-dpex provides 6 | a helper function called ``numba_dpex.barrier()``. This function implements the 7 | same pattern as barriers in traditional multi-threaded programming: invoking the 8 | function forces a thread to wait until all threads in the block reach the 9 | barrier, at which point it returns control to all its callers. 10 | 11 | ``numba_dpex.barrier()`` supports two memory fence options: 12 | 13 | - ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory 14 | fence to ensure correct ordering of memory operations to global memory. Using 15 | the option can be useful when work-items, for example, write to buffer or 16 | image objects and then want to read the updated data. Passing no arguments to 17 | ``numba_dpex.barrier()`` is equivalent to setting the global memory fence 18 | option. For example, 19 | 20 | .. literalinclude:: ../../../numba_dpex/examples/barrier.py 21 | :pyobject: no_arg_barrier_support 22 | 23 | - ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush 24 | any variables stored in local memory or queue a memory fence to ensure 25 | correct ordering of memory operations to local memory. For example, 26 | 27 | .. literalinclude:: ../../../numba_dpex/examples/barrier.py 28 | :pyobject: local_memory 29 | 30 | 31 | .. note:: 32 | 33 | The ``numba_dpex.barrier()`` function is semantically equivalent to 34 | ``numba.cuda.syncthreads``. 35 | -------------------------------------------------------------------------------- /scripts/run_examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | check() { 6 | echo "Run $1 ..." 7 | python "$1" | grep "$SYCL_DEVICE_FILTER" 8 | # python $1 | grep Done 9 | } 10 | 11 | run_checks() { 12 | check numba_dpex/examples/atomic_op.py 13 | check numba_dpex/examples/barrier.py 14 | check numba_dpex/examples/blacksholes_kernel.py 15 | check numba_dpex/examples/blacksholes_njit.py 16 | check numba_dpex/examples/dpex_func.py 17 | check numba_dpex/examples/dpex_with_context.py 18 | check numba_dpex/examples/matmul.py 19 | check numba_dpex/examples/pairwise_distance.py 20 | check numba_dpex/examples/rand.py 21 | check numba_dpex/examples/sum2D.py 22 | check numba_dpex/examples/sum_ndarray.py 23 | check numba_dpex/examples/sum.py 24 | check numba_dpex/examples/sum_reduction_ocl.py 25 | check numba_dpex/examples/sum_reduction.py 26 | check numba_dpex/examples/sum_reduction_recursive_ocl.py 27 | # check numba_dpex/examples/usm_ndarray.py # See https://github.com/IntelPython/numba-dpex/issues/436 28 | 29 | check numba_dpex/examples/auto_offload_examples/sum-1d.py 30 | check numba_dpex/examples/auto_offload_examples/sum-2d.py 31 | check numba_dpex/examples/auto_offload_examples/sum-3d.py 32 | check numba_dpex/examples/auto_offload_examples/sum-4d.py 33 | check numba_dpex/examples/auto_offload_examples/sum-5d.py 34 | 35 | check numba_dpex/examples/debug/dpex_func.py 36 | check numba_dpex/examples/debug/sum.py 37 | } 38 | 39 | run_with_device() { 40 | echo "Run with SYCL_DEVICE_FILTER=$1 ..." 41 | SYCL_DEVICE_FILTER=$1 run_checks 42 | } 43 | 44 | run_with_device level_zero:gpu:0 45 | run_with_device opencl:gpu:0 46 | run_with_device opencl:cpu:0 47 | 48 | echo Done 49 | -------------------------------------------------------------------------------- /numba_dpex/dpctl_iface/_helpers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core import types 6 | 7 | from numba_dpex.core.types.kernel_api.local_accessor import LocalAccessorType 8 | 9 | 10 | def numba_type_to_dpctl_typenum(context, ty): 11 | """ 12 | This function looks up the dpctl defined enum values from 13 | ``DPCTLKernelArgType``. 14 | """ 15 | 16 | from dpctl._sycl_queue import kernel_arg_type as kargty 17 | 18 | if ty == types.boolean: 19 | return context.get_constant(types.int32, kargty.dpctl_uint8.value) 20 | elif ty == types.int32 or isinstance(ty, types.scalars.IntegerLiteral): 21 | return context.get_constant(types.int32, kargty.dpctl_int32.value) 22 | elif ty == types.uint32: 23 | return context.get_constant(types.int32, kargty.dpctl_uint32.value) 24 | elif ty == types.int64: 25 | return context.get_constant(types.int32, kargty.dpctl_int64.value) 26 | elif ty == types.uint64: 27 | return context.get_constant(types.int32, kargty.dpctl_uint64.value) 28 | elif ty == types.float32: 29 | return context.get_constant(types.int32, kargty.dpctl_float32.value) 30 | elif ty == types.float64: 31 | return context.get_constant(types.int32, kargty.dpctl_float64.value) 32 | elif ty == types.voidptr or isinstance(ty, types.CPointer): 33 | return context.get_constant(types.int32, kargty.dpctl_void_ptr.value) 34 | elif isinstance(ty, LocalAccessorType): 35 | return context.get_constant( 36 | types.int32, kargty.dpctl_local_accessor.value 37 | ) 38 | else: 39 | raise NotImplementedError 40 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl 6 | import pytest 7 | 8 | from numba_dpex.core.types import USMNdArray, dpctl_types, float32 9 | 10 | 11 | def test_usmndarray_negative_tests(): 12 | default_device = dpctl.SyclDevice().filter_string 13 | 14 | usmarr1 = USMNdArray(1, device=None, queue=None, dtype=float32) 15 | assert usmarr1.dtype.name == "float32" 16 | assert usmarr1.ndim == 1 17 | assert usmarr1.layout == "C" 18 | assert usmarr1.addrspace == 1 19 | assert usmarr1.usm_type == "device" 20 | 21 | assert usmarr1.queue.sycl_device == default_device 22 | 23 | usmarr2 = USMNdArray(1, device=default_device, queue=None, dtype=float32) 24 | assert usmarr2.dtype.name == "float32" 25 | assert usmarr2.ndim == 1 26 | assert usmarr2.layout == "C" 27 | assert usmarr2.addrspace == 1 28 | assert usmarr2.usm_type == "device" 29 | assert usmarr2.queue.sycl_device == default_device 30 | 31 | queue = dpctl_types.DpctlSyclQueue(dpctl.SyclQueue()) 32 | 33 | usmarr3 = USMNdArray(1, device=None, queue=queue, dtype=float32) 34 | assert usmarr3.dtype.name == "float32" 35 | assert usmarr3.ndim == 1 36 | assert usmarr3.layout == "C" 37 | assert usmarr3.addrspace == 1 38 | assert usmarr3.usm_type == "device" 39 | 40 | with pytest.raises(TypeError): 41 | USMNdArray(1, device=default_device, queue=queue, dtype=float32) 42 | 43 | with pytest.raises(TypeError): 44 | USMNdArray(1, queue=0, dtype=float32) 45 | 46 | with pytest.raises(TypeError): 47 | USMNdArray(1, device=0, dtype=float32) 48 | -------------------------------------------------------------------------------- /numba_dpex/examples/kernel/vector_sum2D.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 4 | # 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | """The example demonstrates a 2-D vector addition kernel. 8 | """ 9 | 10 | import dpctl 11 | import dpctl.tensor as dpt 12 | import numpy as np 13 | 14 | import numba_dpex as ndpx 15 | 16 | 17 | @ndpx.kernel 18 | def data_parallel_sum(item, a, b, c): 19 | """ 20 | A two-dimensional vector addition example using the ``kernel`` decorator. 21 | """ 22 | i = item.get_id(0) 23 | j = item.get_id(1) 24 | c[i, j] = a[i, j] + b[i, j] 25 | 26 | 27 | def driver(a, b, c, global_size): 28 | ndpx.call_kernel(data_parallel_sum, global_size, a, b, c) 29 | 30 | 31 | def main(): 32 | # Array dimensions 33 | X = 8 34 | Y = 8 35 | global_size = ndpx.Range(X, Y) 36 | 37 | a = np.arange(X * Y, dtype=np.float32).reshape(X, Y) 38 | b = np.arange(X * Y, dtype=np.float32).reshape(X, Y) 39 | c = np.empty_like(a).reshape(X, Y) 40 | 41 | c = a + b 42 | 43 | device = dpctl.select_default_device() 44 | a_dpt = dpt.arange(X * Y, dtype=dpt.float32, device=device) 45 | a_dpt = dpt.reshape(a_dpt, (X, Y)) 46 | b_dpt = dpt.arange(X * Y, dtype=dpt.float32, device=device) 47 | b_dpt = dpt.reshape(b_dpt, (X, Y)) 48 | c_dpt = dpt.empty_like(a_dpt) 49 | c_dpt = dpt.reshape(c_dpt, (X, Y)) 50 | 51 | print("Executing on device:") 52 | device.print_device_info() 53 | 54 | print("Running kernel ...") 55 | driver(a_dpt, b_dpt, c_dpt, global_size) 56 | c_out = dpt.asnumpy(c_dpt) 57 | assert np.allclose(c, c_out) 58 | 59 | print("Done...") 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /numba_dpex/core/types/kernel_api/ranges.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from contextlib import ExitStack 6 | 7 | from numba.core import cgutils, errors, types 8 | 9 | 10 | class RangeType(types.Type): 11 | """Numba-dpex type corresponding to 12 | :class:`numba_dpex.kernel_api.ranges.Range` 13 | """ 14 | 15 | def __init__(self, ndim: int): 16 | self._ndim = ndim 17 | if ndim < 1 or ndim > 3: 18 | raise errors.TypingError( 19 | "RangeType can only have 1,2, or 3 dimensions" 20 | ) 21 | super(RangeType, self).__init__(name="Range<" + str(ndim) + ">") 22 | 23 | @property 24 | def ndim(self): 25 | return self._ndim 26 | 27 | @property 28 | def key(self): 29 | return self._ndim 30 | 31 | @property 32 | def mangling_args(self): 33 | args = [self.ndim] 34 | return self.__class__.__name__, args 35 | 36 | 37 | class NdRangeType(types.Type): 38 | """Numba-dpex type corresponding to 39 | :class:`numba_dpex.kernel_api.ranges.NdRange` 40 | """ 41 | 42 | def __init__(self, ndim: int): 43 | self._ndim = ndim 44 | if ndim < 1 or ndim > 3: 45 | raise errors.TypingError( 46 | "RangeType can only have 1,2, or 3 dimensions" 47 | ) 48 | super(NdRangeType, self).__init__(name="NdRange<" + str(ndim) + ">") 49 | 50 | @property 51 | def ndim(self): 52 | return self._ndim 53 | 54 | @property 55 | def key(self): 56 | return self._ndim 57 | 58 | @property 59 | def mangling_args(self): 60 | args = [self.ndim] 61 | return self.__class__.__name__, args 62 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/reduction.rst: -------------------------------------------------------------------------------- 1 | Reduction on SYCL-supported Devices 2 | =================================== 3 | 4 | Numba-dpex does not yet provide any specific decorator to implement 5 | reduction kernels. However, a kernel reduction can be written explicitly. This 6 | section provides two approaches for writing a reduction kernel as a 7 | ``numba_dpex.kernel`` function. 8 | 9 | 10 | Example 1 11 | --------- 12 | 13 | This example demonstrates a summation reduction on a one-dimensional array. 14 | 15 | Full example can be found at ``numba_dpex/examples/sum_reduction.py``. 16 | 17 | In this example, to reduce the array we invoke the kernel multiple times. 18 | 19 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction.py 20 | :pyobject: sum_reduction_kernel 21 | 22 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction.py 23 | :pyobject: sum_reduce 24 | 25 | Example 2 26 | --------- 27 | 28 | Full example can be found at 29 | ``numba_dpex/examples/sum_reduction_recursive_ocl.py``. 30 | 31 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py 32 | :pyobject: sum_reduction_kernel 33 | 34 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py 35 | :pyobject: sum_recursive_reduction 36 | 37 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py 38 | :pyobject: sum_reduce 39 | 40 | .. note:: 41 | 42 | Numba-dpex does not yet provide any analogue to the ``numba.cuda.reduce`` 43 | decorator for writing reductions kernel. Such a decorator will be added in 44 | future releases. 45 | 46 | Full examples 47 | ------------- 48 | 49 | - ``numba_dpex/examples/sum_reduction_recursive_ocl.py`` 50 | - ``numba_dpex/examples/sum_reduction_ocl.py`` 51 | - ``numba_dpex/examples/sum_reduction.py`` 52 | -------------------------------------------------------------------------------- /numba_dpex/examples/kernel/device_func.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Demonstrates the usage of the :func:`numba_dpex.device_func` decorator. 6 | 7 | Refer the API documentation and the Kenrel programming guide for further 8 | details. 9 | """ 10 | 11 | import dpnp 12 | 13 | import numba_dpex as dpex 14 | from numba_dpex import kernel_api as kapi 15 | 16 | 17 | @dpex.device_func 18 | def increment_by_1(a): 19 | """A device callable function that can be invoked from a kernel or 20 | another device function. 21 | """ 22 | return a + 1 23 | 24 | 25 | @dpex.device_func 26 | def increment_and_sum_up(nd_item: kapi.NdItem, a): 27 | """Demonstrates the usage of group_barrier and NdItem usage in a 28 | device_func. 29 | """ 30 | i = nd_item.get_global_id(0) 31 | 32 | a[i] += 1 33 | kapi.group_barrier(nd_item.get_group(), kapi.MemoryScope.DEVICE) 34 | 35 | if i == 0: 36 | for idx in range(1, a.size): 37 | a[0] += a[idx] 38 | 39 | 40 | @dpex.kernel 41 | def kernel1(item: kapi.Item, a, b): 42 | """Demonstrates calling a device function from a kernel.""" 43 | i = item.get_id(0) 44 | b[i] = increment_by_1(a[i]) 45 | 46 | 47 | @dpex.kernel 48 | def kernel2(nd_item: kapi.NdItem, a): 49 | """The kernel delegates everything to a device_func and calls it.""" 50 | increment_and_sum_up(nd_item, a) 51 | 52 | 53 | if __name__ == "__main__": 54 | # Array size 55 | N = 100 56 | a = dpnp.ones(N, dtype=dpnp.int32) 57 | b = dpnp.zeros(N, dtype=dpnp.int32) 58 | 59 | dpex.call_kernel(kernel1, dpex.Range(N), a, b) 60 | # b should be [2, 2, ...., 2] 61 | print(b) 62 | 63 | dpex.call_kernel(kernel2, dpex.NdRange((N,), (N,)), b) 64 | # b[0] should be 300 65 | print(b[0]) 66 | -------------------------------------------------------------------------------- /docs/backups/user_guides/kernel_programming_guide/memory_allocation_address_space.rst: -------------------------------------------------------------------------------- 1 | Supported Address Space Qualifiers 2 | ================================== 3 | 4 | The address space qualifier may be used to specify the region of memory that is 5 | used to allocate the object. 6 | 7 | Numba-dpex supports three disjoint named address spaces: 8 | 9 | 1. Global Address Space 10 | Global Address Space refers to memory objects allocated from the global 11 | memory pool and will be shared among all work-items. Arguments passed to any 12 | kernel are allocated in the global address space. In the below example, 13 | arguments `a`, `b` and `c` will be allocated in the global address space: 14 | 15 | .. literalinclude:: ../../../numba_dpex/examples/sum.py 16 | 17 | 18 | 2. Local Address Space 19 | Local Address Space refers to memory objects that need to be allocated in 20 | local memory pool and are shared by all work-items of a work-group. 21 | Numba-dpex does not support passing arguments that are allocated in the 22 | local address space to `@numba_dpex.kernel`. Users are allowed to allocate 23 | static arrays in the local address space inside the `@numba_dpex.kernel`. In 24 | the example below `numba_dpex.local.array(shape, dtype)` is the API used to 25 | allocate a static array in the local address space: 26 | 27 | .. literalinclude:: ../../../numba_dpex/examples/barrier.py 28 | :lines: 54-87 29 | 30 | 3. Private Address Space 31 | Private Address Space refers to memory objects that are local to each 32 | work-item and is not shared with any other work-item. In the example below 33 | `numba_dpex.private.array(shape, dtype)` is the API used to allocate a 34 | static array in the private address space: 35 | 36 | .. literalinclude:: ../../../numba_dpex/examples/kernel_private_memory.py 37 | -------------------------------------------------------------------------------- /.github/workflows/coverage.yml: -------------------------------------------------------------------------------- 1 | name: Coverage 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | main: 12 | name: Generate coverage and push to Coveralls.io 13 | runs-on: ubuntu-latest 14 | permissions: 15 | pull-requests: write 16 | defaults: 17 | run: 18 | shell: bash -l {0} 19 | steps: 20 | - name: Cancel Previous Runs 21 | uses: styfle/cancel-workflow-action@0.12.1 22 | with: 23 | access_token: ${{ github.token }} 24 | 25 | - name: Checkout repo 26 | uses: actions/checkout@v4 27 | with: 28 | fetch-depth: 0 29 | 30 | - uses: conda-incubator/setup-miniconda@v3 31 | with: 32 | python-version: '3.10' 33 | miniforge-variant: Mambaforge 34 | miniforge-version: latest 35 | activate-environment: "coverage" 36 | channel-priority: "disabled" 37 | environment-file: environment/coverage.yml 38 | 39 | - name: Build numba-dpex 40 | run: | 41 | export PATH=$CONDA/bin-llvm:$PATH 42 | CC=icx CXX=icpx python setup.py develop 43 | 44 | - name: Test installation 45 | run: | 46 | conda list 47 | python -c "import numba_dpex; print(numba_dpex.__file__)" 48 | 49 | - name: Dump coverage test environment 50 | run: | 51 | conda env export > /tmp/env-cov.yml 52 | cat /tmp/env-cov.yml 53 | 54 | # Ignoring test due to opencl driver optimization bug 55 | - name: Run tests with coverage 56 | run: | 57 | pytest -q --cov=./ --cov-report xml --pyargs numba_dpex \ 58 | -k 'not test_1d_strided_dpnp_array_in_kernel[2]' 59 | 60 | - name: Coveralls 61 | uses: coverallsapp/github-action@v2 62 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/parfors/prange/test_pairwise_distance.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl 6 | import dpnp 7 | import numba as nb 8 | import pytest 9 | 10 | from numba_dpex import dpjit 11 | 12 | 13 | def test_pairwise_distance(): 14 | @dpjit 15 | def pairwise_distance(X1, X2, D): 16 | """Naïve pairwise distance impl - take an array representing M points in N 17 | dimensions, and return the M x M matrix of Euclidean distances 18 | 19 | Args: 20 | X1 : Set of points 21 | X2 : Set of points 22 | D : Outputted distance matrix 23 | """ 24 | # Size of inputs 25 | X1_rows = X1.shape[0] 26 | X2_rows = X2.shape[0] 27 | X1_cols = X1.shape[1] 28 | 29 | # TODO: get rid of it once prange supports dtype 30 | # https://github.com/IntelPython/numba-dpex/issues/1063 31 | float0 = X1.dtype.type(0.0) 32 | 33 | # Outermost parallel loop over the matrix X1 34 | for i in nb.prange(X1_rows): 35 | # Loop over the matrix X2 36 | for j in range(X2_rows): 37 | d = float0 38 | # Compute exclidean distance 39 | for k in range(X1_cols): 40 | tmp = X1[i, k] - X2[j, k] 41 | d += tmp * tmp 42 | # Write computed distance to distance matrix 43 | D[i, j] = dpnp.sqrt(d) 44 | 45 | q = dpctl.SyclQueue() 46 | X1 = dpnp.ones((100, 2), sycl_queue=q) 47 | X2 = dpnp.ones((100, 2), sycl_queue=q) 48 | D = dpnp.empty((100, 100), sycl_queue=q) 49 | 50 | try: 51 | pairwise_distance(X1, X2, D) 52 | except: 53 | pytest.fail("Failed to compile prange loop for pairwise distance calc") 54 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/debugging_environment.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Configure debugging environment 4 | ================================= 5 | 6 | 1) Activate the debugger and compiler: 7 | 8 | .. code-block:: bash 9 | 10 | export ONEAPI_ROOT=/path/to/oneapi 11 | source $ONEAPI_ROOT/debugger/latest/env/vars.sh 12 | source $ONEAPI_ROOT/compiler/latest/env/vars.sh 13 | 14 | 2) Create and activate conda environment with the installed numba-dpex: 15 | 16 | .. code-block:: bash 17 | 18 | conda create numba-dpex-dev numba-dpex 19 | conda activate numba-dpex-dev 20 | 21 | 3) Activate NEO drivers (optional). 22 | 23 | If you want to use the local NEO driver, activate the variables for it. See 24 | the :ref:`NEO-driver`. 25 | 26 | 4) Check debugging environment. 27 | 28 | You can check the correctness of the work with the following example: 29 | 30 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/simple_sum.py 31 | :lines: 5- 32 | :linenos: 33 | :lineno-match: 34 | 35 | Launch the Intel® Distribution for GDB* and set a breakpoint in the kernel: 36 | 37 | .. code-block:: shell-session 38 | 39 | $ gdb-oneapi -q --args python simple_sum.py 40 | (gdb) break simple_sum.py:22 41 | No source file named simple_sum.py. 42 | Make breakpoint pending on future shared library load? (y or [n]) y 43 | Breakpoint 1 (simple_sum.py:22) pending. 44 | (gdb) run 45 | 46 | In the output you can see that the breakpoint was hit successfully: 47 | 48 | .. code-block:: shell-session 49 | 50 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22 51 | 22 i = dpex.get_global_id(0) 52 | (gdb) continue 53 | Done... 54 | ... 55 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_inline_threshold_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from numba.core import compiler 6 | 7 | import numba_dpex as dpex 8 | from numba_dpex.kernel_api import Item 9 | 10 | 11 | def kernel_func(item: Item, a, b, c): 12 | i = item.get_id(0) 13 | c[i] = a[i] + b[i] 14 | 15 | 16 | def test_inline_threshold_set_using_config(): 17 | oldConfig = dpex.config.INLINE_THRESHOLD 18 | dpex.config.INLINE_THRESHOLD = None 19 | 20 | disp = dpex.kernel(kernel_func) 21 | flags = compiler.Flags() 22 | disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) 23 | 24 | assert flags.inline_threshold == 0 25 | 26 | dpex.config.INLINE_THRESHOLD = 2 27 | 28 | flags = compiler.Flags() 29 | disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) 30 | 31 | assert flags.inline_threshold == 2 32 | 33 | dpex.config.INLINE_THRESHOLD = oldConfig 34 | 35 | 36 | def test_inline_threshold_set_using_decorator_option(): 37 | """ 38 | Test setting the inline_threshold value using the kernel decorator flag 39 | """ 40 | 41 | disp = dpex.kernel(inline_threshold=2)(kernel_func) 42 | flags = compiler.Flags() 43 | disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) 44 | 45 | assert flags.inline_threshold == 2 46 | 47 | 48 | def test_inline_threshold_set_using_decorator_supersedes_config_option(): 49 | oldConfig = dpex.config.INLINE_THRESHOLD 50 | dpex.config.INLINE_THRESHOLD = None 51 | 52 | disp = dpex.kernel(inline_threshold=3)(kernel_func) 53 | flags = compiler.Flags() 54 | disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) 55 | 56 | print(flags.inline_threshold) 57 | assert flags.inline_threshold == 3 58 | 59 | dpex.config.INLINE_THRESHOLD = oldConfig 60 | -------------------------------------------------------------------------------- /numba_dpex/tests/core/runtime/test_llvm_registration.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import llvmlite.binding as llb 6 | 7 | from numba_dpex.core import runtime 8 | 9 | 10 | def test_llvm_symbol_registered(): 11 | """Checks if the functions in the _dpexrt_python module are accessible 12 | using llvmlite. 13 | """ 14 | assert ( 15 | llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_from_python") 16 | == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_from_python 17 | ) 18 | 19 | assert ( 20 | llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_to_python_acqref") 21 | == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_to_python_acqref 22 | ) 23 | 24 | assert ( 25 | llb.address_of_symbol("NRT_ExternalAllocator_new_for_usm") 26 | == runtime._dpexrt_python.NRT_ExternalAllocator_new_for_usm 27 | ) 28 | 29 | assert ( 30 | llb.address_of_symbol("DPEXRT_sycl_queue_from_python") 31 | == runtime._dpexrt_python.DPEXRT_sycl_queue_from_python 32 | ) 33 | 34 | assert ( 35 | llb.address_of_symbol("DPEXRT_sycl_queue_to_python") 36 | == runtime._dpexrt_python.DPEXRT_sycl_queue_to_python 37 | ) 38 | 39 | assert ( 40 | llb.address_of_symbol("DPEXRTQueue_CreateFromFilterString") 41 | == runtime._dpexrt_python.DPEXRTQueue_CreateFromFilterString 42 | ) 43 | 44 | assert ( 45 | llb.address_of_symbol("DpexrtQueue_SubmitRange") 46 | == runtime._dpexrt_python.DpexrtQueue_SubmitRange 47 | ) 48 | 49 | assert ( 50 | llb.address_of_symbol("DPEXRT_MemInfo_alloc") 51 | == runtime._dpexrt_python.DPEXRT_MemInfo_alloc 52 | ) 53 | 54 | assert ( 55 | llb.address_of_symbol("DPEXRT_MemInfo_fill") 56 | == runtime._dpexrt_python.DPEXRT_MemInfo_fill 57 | ) 58 | -------------------------------------------------------------------------------- /numba_dpex/tests/codegen/test_intenum_literal_codegen.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import re 6 | 7 | import dpctl 8 | from numba.core import types 9 | 10 | import numba_dpex as dpex 11 | from numba_dpex import DpctlSyclQueue, DpnpNdArray, int64 12 | from numba_dpex.kernel_api.flag_enum import FlagEnum 13 | 14 | 15 | def test_compilation_as_literal_constant(): 16 | """Tests if FlagEnum objects are treaded as scalar constants inside 17 | numba-dpex generated code. 18 | 19 | The test case compiles the kernel `pass_flags_to_func` that includes a 20 | call to the device_func `bitwise_or_flags`. The `bitwise_or_flags` function 21 | is passed two FlagEnum arguments. The test case evaluates the generated 22 | LLVM IR for `pass_flags_to_func` to see if the call to `bitwise_or_flags` 23 | has the scalar arguments `i64 1` and `i64 2`. 24 | """ 25 | 26 | class PseudoFlags(FlagEnum): 27 | FLAG1 = 1 28 | FLAG2 = 2 29 | 30 | @dpex.device_func 31 | def bitwise_or_flags(flag1, flag2): 32 | return flag1 | flag2 33 | 34 | def pass_flags_to_func(a): 35 | f1 = PseudoFlags.FLAG1 36 | f2 = PseudoFlags.FLAG2 37 | a[0] = bitwise_or_flags(f1, f2) 38 | 39 | queue_ty = DpctlSyclQueue(dpctl.SyclQueue()) 40 | i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) 41 | kernel_sig = types.void(i64arr_ty) 42 | 43 | disp = dpex.kernel(inline_threshold=0)(pass_flags_to_func) 44 | disp.compile(kernel_sig) 45 | kcres = disp.overloads[kernel_sig.args] 46 | llvm_ir_mod = kcres.library._final_module.__str__() 47 | 48 | pattern = re.compile( 49 | r"call spir_func i32 @\_Z.*bitwise\_or" 50 | r"\_flags.*\(i64\*\s(\w+)?\s*%.*, i64 1, i64 2\)" 51 | ) 52 | 53 | assert re.search(pattern, llvm_ir_mod) is not None 54 | -------------------------------------------------------------------------------- /numba_dpex/kernel_api/barrier.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Python functions that simulate SYCL's group_barrier function. 6 | """ 7 | 8 | from .index_space_ids import Group 9 | from .memory_enums import MemoryScope 10 | 11 | 12 | def group_barrier( 13 | group: Group, fence_scope: MemoryScope = MemoryScope.WORK_GROUP 14 | ): 15 | """Performs a barrier operation across all work-items in a work-group. 16 | 17 | The function is equivalent to the ``sycl::group_barrier`` function. It 18 | synchronizes work within a group of work-items. All the work-items 19 | of the group must execute the barrier call before any work-item 20 | continues execution beyond the barrier. 21 | 22 | The ``group_barrier`` performs a memory fence operation ensuring that memory 23 | accesses issued before the barrier are not re-ordered with those issued 24 | after the barrier. All work-items in group G execute a release fence prior 25 | to synchronizing at the barrier, all work-items in group G execute an 26 | acquire fence afterwards, and there is an implicit synchronization of these 27 | fences as if provided by an explicit atomic operation on an atomic object. 28 | 29 | .. important:: 30 | The function is not implemented yet for pure CPython execution and is 31 | only supported in JIT compiled mode of execution. 32 | 33 | Args: 34 | group (Group): Indicates the work-group inside which the barrier is to 35 | be executed. 36 | fence_scope (MemoryScope) (optional): scope of any memory 37 | consistency operations that are performed by the barrier. 38 | Raises: 39 | NotImplementedError: When the function is called directly from Python. 40 | """ 41 | 42 | # TODO: A pure Python simulation of a group_barrier will be added later. 43 | raise NotImplementedError 44 | -------------------------------------------------------------------------------- /conda-recipe/bld.bat: -------------------------------------------------------------------------------- 1 | @REM new llvm-spirv location 2 | @REM starting from dpcpp_impl_win-64=2022.0.0=intel_3638 location is env\Library\bin-llvm 3 | @REM used BUILD_PREFIX as compiler installed in build section of meta.yml 4 | set "PATH=%BUILD_PREFIX%\Library\bin-llvm;%PATH%" 5 | 6 | REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 7 | set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" 8 | SET "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" 9 | 10 | REM Since the 60.0.0 release, setuptools includes a local, vendored copy 11 | REM of distutils (from late copies of CPython) that is enabled by default. 12 | REM It breaks build for Windows, so use distutils from "stdlib" as before. 13 | REM @TODO: remove the setting, once transition to build backend on Windows 14 | REM to cmake is complete. 15 | REM UPD: Seems to work fine with setuptools 69, so we need to set minimal 16 | REM requirements before removing it. 17 | SET "SETUPTOOLS_USE_DISTUTILS=stdlib" 18 | 19 | set "CC=icx" 20 | set "CXX=icx" 21 | 22 | set "CMAKE_GENERATOR=Ninja" 23 | :: Make CMake verbose 24 | set "VERBOSE=1" 25 | 26 | %PYTHON% -m build -w -n -x 27 | if %ERRORLEVEL% neq 0 exit 1 28 | 29 | :: `pip install dist\numpy*.whl` does not work on windows, 30 | :: so use a loop; there's only one wheel in dist/ anyway 31 | for /f %%f in ('dir /b /S .\dist') do ( 32 | %PYTHON% -m wheel tags --remove --build %GIT_DESCRIBE_NUMBER% %%f 33 | if %ERRORLEVEL% neq 0 exit 1 34 | ) 35 | 36 | :: wheel file was renamed 37 | for /f %%f in ('dir /b /S .\dist') do ( 38 | %PYTHON% -m pip install %%f ^ 39 | --no-build-isolation ^ 40 | --no-deps ^ 41 | --only-binary :all: ^ 42 | --no-index ^ 43 | --prefix %PREFIX% ^ 44 | -vv 45 | if %ERRORLEVEL% neq 0 exit 1 46 | ) 47 | 48 | :: Copy wheel package 49 | if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( 50 | copy dist\numba_dpex*.whl %WHEELS_OUTPUT_FOLDER% 51 | if errorlevel 1 exit 1 52 | ) 53 | -------------------------------------------------------------------------------- /docs/source/user_guide/debugging/data.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./../../ext_links.txt 2 | 3 | Examining Data 4 | ============== 5 | 6 | See `GDB* documentation `_. 7 | 8 | .. _print: 9 | 10 | ``print expr`` 11 | -------------- 12 | 13 | To print the value of a variable, run the ``print `` command. 14 | 15 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/local_variables_0 16 | :language: shell-session 17 | :lines: 67-72 18 | :emphasize-lines: 1-6 19 | 20 | .. note:: 21 | 22 | Displaying complex data types requires Numba 0.55 or higher. 23 | 24 | Example - Complex Data Types 25 | ```````````````````````````` 26 | 27 | Source code :file:`numba_dpex/examples/debug/side-by-side-2.py`: 28 | 29 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/side-by-side-2.py 30 | :pyobject: common_loop_body 31 | :linenos: 32 | :lineno-match: 33 | :emphasize-lines: 6 34 | 35 | Debug session: 36 | 37 | .. code-block:: shell-session 38 | :emphasize-lines: 9- 39 | 40 | $ gdb-oneapi -q python 41 | ... 42 | (gdb) set environment NUMBA_OPT 0 43 | (gdb) set environment NUMBA_EXTEND_VARIABLE_LIFETIMES 1 44 | (gdb) break side-by-side-2.py:29 if param_a == 5 45 | ... 46 | (gdb) run numba_dpex/examples/debug/side-by-side-2.py --api=numba-dpex-kernel 47 | ... 48 | Thread 2.1 hit Breakpoint 1, with SIMD lane 5, __main__::common_loop_body (i=5, a=..., b=...) at side-by-side-2.py:29 49 | 29 result = param_c + param_d 50 | (gdb) print a 51 | $1 = {meminfo = 0x0, parent = 0x0, nitems = 10, itemsize = 4, 52 | data = 0x555558461000, shape = {10}, strides = {4}} 53 | (gdb) x/10f a.data 54 | 0x555558461000: 0 1 2 3 55 | 0x555558461010: 4 5 6 7 56 | 0x555558461020: 8 9 57 | (gdb) print a.data[5] 58 | $2 = 5 59 | 60 | This example prints array and its element. 61 | -------------------------------------------------------------------------------- /scripts/config_cpu_device.ps1: -------------------------------------------------------------------------------- 1 | # Original code: https://github.com/IntelPython/dpctl/blob/0e595728eb9dfc943774b654035e9b339bde8dce/.github/workflows/conda-package.yml#L220-L250 2 | echo "OCL_ICD_FILENAMES=C:\Miniconda\Library\lib\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append 3 | try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} 4 | if ($list.count -eq 0) { 5 | if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) { 6 | New-Item -Path HKLM:\SOFTWARE\Khronos 7 | } 8 | if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) { 9 | New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL 10 | } 11 | if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) { 12 | New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors 13 | } 14 | New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name C:\Miniconda\Library\lib\intelocl64.dll -Value 0 15 | try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} 16 | Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors) 17 | # Now copy OpenCL.dll into system folder 18 | $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll" 19 | $python_ocl_icd_loader="C:\Miniconda\Library\bin\OpenCL.dll" 20 | Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader 21 | if (Test-Path -Path $system_ocl_icd_loader) { 22 | Write-Output "$system_ocl_icd_loader has been copied" 23 | $acl = Get-Acl $system_ocl_icd_loader 24 | Write-Output $acl 25 | } else { 26 | Write-Output "OCL-ICD-Loader was not copied" 27 | } 28 | # Variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default 29 | echo "TBB_DLL_PATH=C:\Miniconda\Library\bin" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append 30 | } 31 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpctl.tensor as dpt 6 | import dpnp 7 | import numpy 8 | import pytest 9 | 10 | import numba_dpex as dpex 11 | from numba_dpex.tests._helper import get_all_dtypes 12 | 13 | 14 | @dpex.kernel 15 | def sum_2d(item, a, b, c): 16 | """ 17 | Vector addition using the ``kernel`` decorator. 18 | """ 19 | i = item.get_id(0) 20 | j = item.get_id(1) 21 | c[i, j] = a[i, j] + b[i, j] 22 | 23 | 24 | @dpex.kernel 25 | def sum_2d_slice(item, a, b, c): 26 | """ 27 | Vector addition using the ``kernel`` decorator. 28 | """ 29 | i = item.get_id(0) 30 | j = item.get_id(1) 31 | ai, bi, ci = a[i], b[i], c[i] 32 | ci[j] = ai[j] + bi[j] 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "usm_type", 37 | [ 38 | "shared", 39 | "device", 40 | "host", 41 | ], 42 | ) 43 | @pytest.mark.parametrize( 44 | "dtype", 45 | get_all_dtypes( 46 | no_bool=True, no_float16=True, no_none=True, no_complex=True 47 | ), 48 | ) 49 | @pytest.mark.parametrize( 50 | "kernel", 51 | [ 52 | sum_2d, 53 | sum_2d_slice, 54 | ], 55 | ) 56 | @pytest.mark.parametrize( 57 | "np", 58 | [ 59 | dpt, 60 | dpnp, 61 | ], 62 | ) 63 | def test_consuming_usm_ndarray( 64 | kernel, 65 | dtype, 66 | usm_type, 67 | np, 68 | ): 69 | N = 1000 70 | global_size = N * N 71 | 72 | a = np.arange(global_size, dtype=dtype, usm_type=usm_type) 73 | a = np.reshape(a, (N, N)) 74 | 75 | b = np.arange(global_size, dtype=dtype, usm_type=usm_type) 76 | b = np.reshape(b, (N, N)) 77 | 78 | c = np.empty_like(a) 79 | 80 | dpex.call_kernel(kernel, dpex.Range(N, N), a, b, c) 81 | 82 | na, nb, nc = np.asnumpy(a), np.asnumpy(b), np.asnumpy(c) 83 | 84 | assert numpy.array_equal(nc, na + nb) 85 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_target_specific_overload.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | from numba.core.extending import overload 7 | 8 | import numba_dpex as dpex 9 | from numba_dpex.core.descriptor import dpex_kernel_target 10 | from numba_dpex.kernel_api import Item, Range 11 | from numba_dpex.kernel_api_impl.spirv.target import SPIRV_TARGET_NAME 12 | 13 | 14 | def scalar_add(a, b): 15 | return a + b 16 | 17 | 18 | @overload(scalar_add, target=SPIRV_TARGET_NAME) 19 | def _ol_scalar_add(a, b): 20 | def ol_scalar_add_impl(a, b): 21 | return a + b 22 | 23 | return ol_scalar_add_impl 24 | 25 | 26 | @dpex.kernel 27 | def kernel_calling_overload(item: Item, a, b, c): 28 | i = item.get_id(0) 29 | c[i] = scalar_add(a[i], b[i]) 30 | 31 | 32 | a = dpnp.ones(10, dtype=dpnp.int64) 33 | b = dpnp.ones(10, dtype=dpnp.int64) 34 | c = dpnp.zeros(10, dtype=dpnp.int64) 35 | 36 | dpex.call_kernel(kernel_calling_overload, Range(10), a, b, c) 37 | 38 | 39 | def test_end_to_end_overload_execution(): 40 | """Tests that an overload function can be called from an experimental.kernel 41 | decorated function and works end to end. 42 | """ 43 | for i in range(c.shape[0]): 44 | assert c[i] == scalar_add(a[i], b[i]) 45 | 46 | 47 | def test_overload_registration(): 48 | """Tests that the overload _ol_scalar_add is registered only in the 49 | "dpex_kernel_exp" target and not in the "dpex_kernel" target. 50 | """ 51 | 52 | def check_for_overload_registration(targetctx, key): 53 | found_key = False 54 | for fn_key in targetctx._defns.keys(): 55 | if isinstance(fn_key, str) and fn_key.startswith(key): 56 | found_key = True 57 | break 58 | return found_key 59 | 60 | assert check_for_overload_registration( 61 | dpex_kernel_target.target_context, "_ol_scalar_add" 62 | ) 63 | -------------------------------------------------------------------------------- /numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | //===----------------------------------------------------------------------===// 6 | /// 7 | /// \file 8 | /// Defines dpctl style function(s) that interact with nrt meminfo and sycl. 9 | /// 10 | //===----------------------------------------------------------------------===// 11 | 12 | #ifndef _EXPERIMENTAL_H_ 13 | #define _EXPERIMENTAL_H_ 14 | 15 | #include "dpctl_capi.h" 16 | #include "numba/core/runtime/nrt_external.h" 17 | 18 | #ifdef __cplusplus 19 | extern "C" 20 | { 21 | #endif 22 | 23 | /*! 24 | * @brief Acquires meminfos and schedules a host task to release them. 25 | * 26 | * @param nrt NRT public API functions, 27 | * @param QRef Queue reference, 28 | * @param meminfo_array Array of meminfo pointers to perform actions on, 29 | * @param meminfo_array_size Length of meminfo_array, 30 | * @param depERefs Array of dependent events for the host task, 31 | * @param nDepERefs Length of depERefs, 32 | * @param status Variable to write status to. Same style as 33 | * dpctl, 34 | * @return {return} Event reference to the host task. 35 | */ 36 | DPCTLSyclEventRef 37 | DPEXRT_nrt_acquire_meminfo_and_schedule_release(NRT_api_functions *nrt, 38 | DPCTLSyclQueueRef QRef, 39 | NRT_MemInfo **meminfo_array, 40 | size_t meminfo_array_size, 41 | DPCTLSyclEventRef *depERefs, 42 | size_t nDepERefs, 43 | int *status); 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | 48 | #endif /* _EXPERIMENTAL_H_ */ 49 | -------------------------------------------------------------------------------- /numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import dpnp 6 | import numpy 7 | import pytest 8 | 9 | import numba_dpex as dpex 10 | from numba_dpex.tests._helper import get_all_dtypes 11 | 12 | N = 1024 13 | 14 | 15 | @dpex.kernel 16 | def kernel_scalar(item, a, b, c): 17 | i = item.get_id(0) 18 | b[i] = a[i] * c 19 | 20 | 21 | @dpex.kernel 22 | def kernel_array(item, a, b, c): 23 | i = item.get_id(0) 24 | b[i] = a[i] * c[i] 25 | 26 | 27 | list_of_dtypes = get_all_dtypes( 28 | no_bool=True, no_int=True, no_float=True, no_none=True 29 | ) 30 | 31 | list_of_usm_types = ["shared", "device", "host"] 32 | 33 | 34 | @pytest.fixture(params=list_of_dtypes) 35 | def input_arrays(request): 36 | a = dpnp.ones(N, dtype=request.param) 37 | c = dpnp.zeros(N, dtype=request.param) 38 | b = dpnp.empty_like(a) 39 | return a, b, c 40 | 41 | 42 | def test_numeric_kernel_arg_complex_scalar(input_arrays): 43 | """Tests passing complex type scalar and dpnp arrays to a kernel function. 44 | 45 | Args: 46 | input_arrays (dpnp.ndarray): Array arguments to be passed to a kernel. 47 | """ 48 | a, b, _ = input_arrays 49 | s = a.dtype.type(2 + 1j) 50 | 51 | dpex.call_kernel(kernel_scalar, dpex.Range(N), a, b, s) 52 | 53 | nb = dpnp.asnumpy(b) 54 | nexpected = numpy.full_like(nb, fill_value=2 + 1j) 55 | 56 | assert numpy.allclose(nb, nexpected) 57 | 58 | 59 | def test_numeric_kernel_arg_complex_array(input_arrays): 60 | """Tests passing complex type dpnp arrays to a kernel function. 61 | 62 | Args: 63 | input_arrays (dpnp.ndarray): Array arguments to be passed to a kernel. 64 | """ 65 | 66 | a, b, c = input_arrays 67 | 68 | dpex.call_kernel(kernel_array, dpex.Range(N), a, b, c) 69 | 70 | nb = dpnp.asnumpy(b) 71 | nexpected = numpy.full_like(nb, fill_value=0 + 0j) 72 | 73 | assert numpy.allclose(nb, nexpected) 74 | -------------------------------------------------------------------------------- /.github/workflows/coverity.yml: -------------------------------------------------------------------------------- 1 | name: Coverity 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | workflow_dispatch: 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | Coverity: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | env: 19 | CHECKERS: --concurrency --security --rule --enable-constraint-fpp --enable-fnptr --enable-virtual --webapp-security --enable-audit-checkers --enable-default 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: actions/setup-java@v4 24 | with: 25 | distribution: zulu 26 | java-version: 11 27 | 28 | - name: URL encode project name 29 | run: echo "COV_PROJECT=${{ github.repository }}" | sed -e 's:/:%2F:g' -e 's/ /%20/g' >> $GITHUB_ENV 30 | 31 | - name: Coverity Download 32 | run: | 33 | mkdir -p /tmp/cov-analysis 34 | wget https://scan.coverity.com/download/linux64 --post-data "token=${{secrets.COV_TOKEN}}&project=${{env.COV_PROJECT}}" -O cov-analysis.tgz 35 | tar -xzf cov-analysis.tgz --strip 1 -C /tmp/cov-analysis 36 | rm cov-analysis.tgz 37 | 38 | - name: Coverity Full Scan 39 | if: ${{ github.event_name != 'pull_request' }} 40 | run: | 41 | export PATH=$PATH:/tmp/cov-analysis/bin 42 | set -x 43 | cov-build --dir cov-int --fs-capture-search $GITHUB_WORKSPACE --no-command 44 | # Not available in package, maybe will be once approved? 45 | # cov-analyze --dir cov-int --ticker-mode none --strip-path $GITHUB_WORKSPACE $CHECKERS 46 | 47 | tar czvf numba-dpex.tgz cov-int 48 | rm -rf cov-int 49 | 50 | curl --form token=${{ secrets.COV_TOKEN }} \ 51 | --form email=${{ secrets.COV_EMAIL }} \ 52 | --form file=@numba-dpex.tgz \ 53 | --form version="${{ github.sha }}" \ 54 | --form description="Coverity Scan ${{ github.repository }} / ${{ github.ref }}" \ 55 | https://scan.coverity.com/builds?project=${{env.COV_PROJECT}} 56 | -------------------------------------------------------------------------------- /numba_dpex/core/debuginfo.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Implements a custom debug metadata generator class for numba-dpex kernels. 6 | """ 7 | 8 | from numba.core import debuginfo 9 | 10 | 11 | class DIBuilder(debuginfo.DIBuilder): 12 | """Overrides Numba's default DIBuilder with numba-dpex-specific customizations.""" 13 | 14 | # pylint: disable=too-many-arguments 15 | def mark_subprogram(self, function, qualname, argnames, argtypes, line): 16 | """Sets DW_AT_name and DW_AT_linkagename tags for a kernel decorated function. 17 | 18 | Numba generates a unique name for every function it compiles, but in 19 | upstream Numba the unique name is not used as the "qualified" name of 20 | the function. The behavior leads to a bug discovered in Numba-dpex when 21 | a compiled function uses closure variables. 22 | Refer (https://github.com/IntelPython/numba-dpex/issues/898). 23 | To resolve the issue numba-dpex uses the unique_name as the qualified 24 | name. Refer to 25 | :class:`numba_dpex.core.passes.passes.QualNameDisambiguationLowering`. 26 | However, doing so breaks setting GDB breakpoints based on function 27 | name as the function name is no longer what is in the source, but what 28 | is the unique name generated by Numba. To fix it, numba-dpex uses a 29 | modified DISubprogram metadata generator. The name (DW_AT_name) tag is 30 | set to the base function name, discarding the unique qualifier and 31 | linkagename is set to an empty string. 32 | """ 33 | name = qualname[0 : qualname.find("$")] # noqa: E203 34 | argmap = dict(zip(argnames, argtypes)) 35 | 36 | di_subp = self._add_subprogram( 37 | name=name, 38 | linkagename="", 39 | line=line, 40 | function=function, 41 | argmap=argmap, 42 | ) 43 | function.set_metadata("dbg", di_subp) 44 | -------------------------------------------------------------------------------- /numba_dpex/core/types/kernel_api/literal_intenum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Definition of a new Literal type in numba-dpex that allows treating IntEnum 6 | members as integer literals inside a JIT compiled function. 7 | """ 8 | from enum import IntEnum 9 | 10 | from numba.core.pythonapi import box 11 | from numba.core.typeconv import Conversion 12 | from numba.core.types import Integer, Literal 13 | from numba.core.typing.typeof import typeof 14 | 15 | from numba_dpex.core.exceptions import IllegalIntEnumLiteralValueError 16 | from numba_dpex.kernel_api.flag_enum import FlagEnum 17 | 18 | 19 | class IntEnumLiteral(Literal, Integer): 20 | """A Literal type for IntEnum objects. The type contains the original Python 21 | value of the IntEnum class in it. 22 | """ 23 | 24 | # pylint: disable=W0231 25 | def __init__(self, value): 26 | self._literal_init(value) 27 | self.name = f"Literal[IntEnum]({value})" 28 | if issubclass(value, FlagEnum): 29 | basetype = typeof(value.basetype()) 30 | Integer.__init__( 31 | self, 32 | name=self.name, 33 | bitwidth=basetype.bitwidth, 34 | signed=basetype.signed, 35 | ) 36 | else: 37 | raise IllegalIntEnumLiteralValueError 38 | 39 | def can_convert_to(self, typingctx, other) -> bool: 40 | conv = typingctx.can_convert(self.literal_type, other) 41 | if conv is not None: 42 | return max(conv, Conversion.promote) 43 | return False 44 | 45 | 46 | Literal.ctor_map[IntEnum] = IntEnumLiteral 47 | 48 | 49 | @box(IntEnumLiteral) 50 | def box_literal_integer(typ, val, ctx): 51 | """Defines how a Numba representation for an IntEnumLiteral object should 52 | be converted to a PyObject* object and returned back to Python. 53 | """ 54 | val = ctx.context.cast(ctx.builder, val, typ, typ.literal_type) 55 | return ctx.box(typ.literal_type, val) 56 | -------------------------------------------------------------------------------- /numba_dpex/tests/dpjit_tests/test_dpex_target_overload_isolation.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests if dpex target overloads are not available at numba.njit and only 7 | available at numba_dpex.dpjit. 8 | """ 9 | 10 | import pytest 11 | from numba import njit, types 12 | from numba.core import errors 13 | from numba.extending import intrinsic, overload 14 | 15 | from numba_dpex import dpjit 16 | from numba_dpex.core.targets.dpjit_target import DPEX_TARGET_NAME 17 | 18 | 19 | def foo(): 20 | return 1 21 | 22 | 23 | @overload(foo, target=DPEX_TARGET_NAME) 24 | def ol_foo(): 25 | return lambda: 1 26 | 27 | 28 | @intrinsic(target=DPEX_TARGET_NAME) 29 | def intrinsic_foo( 30 | ty_context, 31 | ): 32 | """A numba "intrinsic" function to inject dpctl.SyclEvent constructor code. 33 | 34 | Args: 35 | ty_context (numba.core.typing.context.Context): The typing context 36 | for the codegen. 37 | 38 | Returns: 39 | tuple(numba.core.typing.templates.Signature, function): A tuple of 40 | numba function signature type and a function object. 41 | """ 42 | 43 | sig = types.int32(types.void) 44 | 45 | def codegen(context, builder, sig, args: list): 46 | return context.get_constant(types.int32, 1) 47 | 48 | return sig, codegen 49 | 50 | 51 | def bar(): 52 | return foo() 53 | 54 | 55 | def intrinsic_bar(): 56 | res = intrinsic_foo() 57 | return res 58 | 59 | 60 | def test_dpex_overload_from_njit(): 61 | bar_njit = njit(bar) 62 | 63 | with pytest.raises(errors.TypingError): 64 | bar_njit() 65 | 66 | 67 | def test_dpex_overload_from_dpjit(): 68 | bar_dpjit = dpjit(bar) 69 | bar_dpjit() 70 | 71 | 72 | def test_dpex_intrinsic_from_njit(): 73 | bar_njit = njit(intrinsic_bar) 74 | 75 | with pytest.raises(errors.TypingError): 76 | bar_njit() 77 | 78 | 79 | def test_dpex_intrinsic_from_dpjit(): 80 | bar_dpjit = dpjit(intrinsic_bar) 81 | bar_dpjit() 82 | -------------------------------------------------------------------------------- /scripts/set_examples_to_doc.py: -------------------------------------------------------------------------------- 1 | """ This script is needed to convert gdb scripts from commands to documentation 2 | """ 3 | 4 | import os 5 | 6 | 7 | def convert_commands_to_docs(): 8 | commands_dir = os.getcwd() + "/numba_dpex/examples/debug/commands" 9 | examples = os.listdir(commands_dir) 10 | os.chdir(commands_dir + "/docs") 11 | for file in examples: 12 | if file != "docs": 13 | with open(commands_dir + "/" + file, "r") as open_file: 14 | read_lines = open_file.readlines() 15 | if os.path.exists(file): 16 | os.remove(file) 17 | with open(file, "a") as write_file: 18 | for line in read_lines: 19 | if ( 20 | line.startswith("# Expected") 21 | or line.startswith("echo Done") 22 | or line.startswith("quit") 23 | or line.startswith("set trace-commands") 24 | or line.startswith("set pagination") 25 | ): 26 | continue 27 | if line.startswith("# Run: "): 28 | line = line.replace("# Run:", "$") 29 | words = line.split() 30 | for i in range(len(words)): 31 | if words[i] == "-command" or words[ 32 | i 33 | ].startswith("commands"): 34 | words[i] = "" 35 | line = " ".join(words) 36 | line = " ".join(line.split()) + "\n" 37 | elif line.startswith("# "): 38 | line = line.replace("# ", "") 39 | else: 40 | line = "(gdb) " + line 41 | 42 | write_file.write(line) 43 | 44 | 45 | if __name__ == "__main__": 46 | convert_commands_to_docs() 47 | -------------------------------------------------------------------------------- /docs/source/useful_links.rst: -------------------------------------------------------------------------------- 1 | .. _useful_links: 2 | .. include:: ./ext_links.txt 3 | 4 | Useful links 5 | ============ 6 | 7 | .. list-table:: **Companion documentation** 8 | :widths: 70 200 9 | :header-rows: 1 10 | 11 | * - Document 12 | - Description 13 | * - `Data Parallel Extension for Numpy*`_ 14 | - Documentation for programming NumPy-like codes on data parallel devices 15 | * - `Data Parallel Extension for Numba*`_ 16 | - Documentation for programming Numba codes on data parallel devices the same way as you program Numba on CPU 17 | * - `Data Parallel Control`_ 18 | - Documentation how to manage data and devices, how to interchange data between different tensor implementations, 19 | and how to write data parallel extensions 20 | * - `Intel VTune Profiler`_ 21 | - Performance profiler supporting analysis of bottlenecks from function leve down to low level instructions. 22 | Supports Python and Numba 23 | * - `Intel Advisor`_ 24 | - Analyzes native and Python codes and provides an advice for better composition of heterogeneous algorithms 25 | * - `Python* Array API Standard`_ 26 | - Standard for writing portable Numpy-like codes targeting different hardware vendors and frameworks 27 | operating with tensor data 28 | * - `SYCL*`_ 29 | - Standard for writing C++-like codes for heterogeneous computing 30 | * - `DPC++`_ 31 | - Free e-book how to program data parallel devices using Data Parallel C++ 32 | * - `OpenCl*`_ 33 | - OpenCl* Standard for heterogeneous programming 34 | * - `IEEE 754-2019 Standard for Floating-Point Arithmetic`_ 35 | - Standard for floating-point arithmetic, essential for writing robust numerical codes 36 | * - `Numpy*`_ 37 | - Documentation for Numpy - foundational CPU library for array programming. Used in conjunction with 38 | `Data Parallel Extension for Numpy*`_. 39 | * - `Numba*`_ 40 | - Documentation for Numba - Just-In-Time compiler for Numpy-like codes. Used in conjunction with 41 | `Data Parallel Extension for Numba*`_. 42 | -------------------------------------------------------------------------------- /numba_dpex/examples/kernel/scan.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """An implementation of the Hillis-Steele algorithm to compute prefix sums. 6 | 7 | The algorithm is implemented to work with a single work group of N work items, 8 | where N is the number of elements. 9 | """ 10 | 11 | import dpnp as np 12 | 13 | import numba_dpex as ndpx 14 | from numba_dpex import kernel_api as kapi 15 | 16 | # 1D array size 17 | N = 64 18 | 19 | 20 | @ndpx.kernel 21 | def kernel_hillis_steele_scan(nditem: kapi.NdItem, a, slm_b, slm_c): 22 | # Get local and global id and workgroup size 23 | gid = nditem.get_global_id(0) 24 | lid = nditem.get_local_id(0) 25 | ls = nditem.get_local_range(0) 26 | gr = nditem.get_group() 27 | 28 | # Initialize locals 29 | slm_c[lid] = slm_b[lid] = a[gid] 30 | 31 | kapi.group_barrier(gr) 32 | 33 | # Calculate prefix sum 34 | d = 1 35 | while d < ls: 36 | if lid > d: 37 | slm_c[lid] = slm_b[lid] + slm_b[lid - d] 38 | else: 39 | slm_c[lid] = slm_b[lid] 40 | 41 | kapi.group_barrier(gr) 42 | 43 | # Swap c and b 44 | e = slm_c[lid] 45 | slm_c[lid] = slm_b[lid] 46 | slm_b[lid] = e 47 | 48 | # Double the stride 49 | d *= 2 50 | 51 | kapi.group_barrier(gr, kapi.MemoryScope.DEVICE) 52 | 53 | a[gid] = slm_b[lid] 54 | 55 | 56 | def main(): 57 | arr = np.arange(N) 58 | print("Original array:", arr) 59 | 60 | print("Using device ...") 61 | print(arr.device) 62 | 63 | # Create temporals in local memory 64 | slm_b = kapi.LocalAccessor(N, dtype=arr.dtype) 65 | slm_c = kapi.LocalAccessor(N, dtype=arr.dtype) 66 | 67 | ndpx.call_kernel( 68 | kernel_hillis_steele_scan, ndpx.NdRange((N,), (N,)), arr, slm_b, slm_c 69 | ) 70 | 71 | # the output should be [0, 1, 3, 6, ...] 72 | arr_np = np.asnumpy(arr) 73 | print(arr_np) 74 | 75 | print("Done...") 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /docs/source/sycl_spec_links.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """Links to the SYCL 2020 specification that are used in docstring. 6 | 7 | The module provides a dictionary in the format needed by the sphinx.ext.extlinks 8 | extension. 9 | """ 10 | 11 | sycl_ext_links = { 12 | "sycl_item": ( 13 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class%s", 14 | None, 15 | ), 16 | "sycl_group": ( 17 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#group-class%s", 18 | None, 19 | ), 20 | "sycl_nditem": ( 21 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:nditem.class%s", 22 | None, 23 | ), 24 | "sycl_ndrange": ( 25 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsubsec:nd-range-class%s", 26 | None, 27 | ), 28 | "sycl_range": ( 29 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class%s", 30 | None, 31 | ), 32 | "sycl_atomic_ref": ( 33 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references%s", 34 | None, 35 | ), 36 | "sycl_local_accessor": ( 37 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:accessor.local%s", 38 | None, 39 | ), 40 | "sycl_private_memory": ( 41 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_hierarchical_invoke%s", 42 | None, 43 | ), 44 | "sycl_memory_scope": ( 45 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-scope%s", 46 | None, 47 | ), 48 | "sycl_memory_order": ( 49 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-ordering%s", 50 | None, 51 | ), 52 | "sycl_addr_space": ( 53 | "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_address_space_classes%s", 54 | None, 55 | ), 56 | } 57 | --------------------------------------------------------------------------------