├── docs
    ├── LICENSE_THIRD_PARTY
    ├── .gitignore
    ├── _templates
    │   └── autoapi
    │   │   ├── python
    │   │       ├── attribute.rst
    │   │       ├── exception.rst
    │   │       ├── package.rst
    │   │       ├── property.rst
    │   │       ├── function.rst
    │   │       ├── method.rst
    │   │       └── data.rst
    │   │   ├── index.rst
    │   │   └── macros.rst
    ├── asset
    │   └── images
    │   │   ├── DPEP.png
    │   │   ├── dpep-all.png
    │   │   ├── DPEP-large.png
    │   │   ├── numba-dpex-logo.png
    │   │   ├── kernel_prog_model.png
    │   │   └── kernel-queue-device.png
    ├── source
    │   ├── user_guide
    │   │   ├── kernel_programming
    │   │   │   ├── call-kernel-async.rst
    │   │   │   ├── operators.rst
    │   │   │   ├── math-functions.rst
    │   │   │   ├── operators.csv
    │   │   │   └── supported-python-features.rst
    │   │   ├── index.rst
    │   │   └── debugging
    │   │   │   ├── features.rst
    │   │   │   ├── limitations.rst
    │   │   │   ├── backtrace.rst
    │   │   │   ├── altering.rst
    │   │   │   ├── debugging_environment.rst
    │   │   │   └── data.rst
    │   ├── config_options.rst
    │   ├── release-notes.rst
    │   ├── license.rst
    │   ├── index.rst
    │   ├── useful_links.rst
    │   └── sycl_spec_links.py
    ├── _static
    │   └── css
    │   │   └── custom.css
    ├── backups
    │   └── user_guides
    │   │   └── kernel_programming_guide
    │   │       ├── index.rst
    │   │       ├── atomic-operations.rst
    │   │       ├── device-functions.rst
    │   │       ├── synchronization.rst
    │   │       ├── reduction.rst
    │   │       └── memory_allocation_address_space.rst
    ├── Makefile
    └── make.bat
├── numba_dpex
    ├── .gitignore
    ├── core
    │   ├── parfors
    │   │   ├── __init__.py
    │   │   └── kernel_templates
    │   │   │   ├── __init__.py
    │   │   │   └── kernel_template_iface.py
    │   ├── targets
    │   │   └── __init__.py
    │   ├── utils
    │   │   └── __init__.py
    │   ├── datamodel
    │   │   └── __init__.py
    │   ├── overloads
    │   │   └── __init__.py
    │   ├── pipelines
    │   │   └── __init__.py
    │   ├── types
    │   │   ├── kernel_api
    │   │   │   ├── __init__.py
    │   │   │   ├── ranges.py
    │   │   │   └── literal_intenum.py
    │   │   ├── kernel_dispatcher_type.py
    │   │   ├── numba_types_short_names.py
    │   │   └── __init__.py
    │   ├── typing
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── boxing
    │   │   └── __init__.py
    │   ├── passes
    │   │   └── __init__.py
    │   ├── runtime
    │   │   ├── __init__.py
    │   │   ├── _queuestruct.h
    │   │   ├── _eventstruct.c
    │   │   ├── experimental
    │   │   │   ├── tools
    │   │   │   │   ├── dpctl.cpp
    │   │   │   │   ├── dpctl.hpp
    │   │   │   │   └── boost_hash.hpp
    │   │   │   └── nrt_reserve_meminfo.h
    │   │   ├── _usmarraystruct.h
    │   │   ├── _eventstruct.h
    │   │   ├── _nrt_helper.h
    │   │   ├── _nrt_python_helper.h
    │   │   ├── _dbg_printer.h
    │   │   └── _meminfo_helper.h
    │   ├── lowering.py
    │   └── debuginfo.py
    ├── tests
    │   ├── kernel_api
    │   │   ├── __init__.py
    │   │   ├── test_local_accessor.py
    │   │   ├── test_range_kernel_launch.py
    │   │   └── test_ndrange_kernel_launch.py
    │   ├── codegen
    │   │   ├── __init__.py
    │   │   └── test_intenum_literal_codegen.py
    │   ├── core
    │   │   ├── types
    │   │   │   ├── IntEnumLiteral
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_type_creation.py
    │   │   │   │   ├── test_compilation.py
    │   │   │   │   └── test_type_registration.py
    │   │   │   ├── range_types
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_constructor_overloads.py
    │   │   │   ├── __init__.py
    │   │   │   ├── USMNdArray
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_usm_ndarray_type.py
    │   │   │   │   └── test_array_creation_errors.py
    │   │   │   ├── DpctlSyclEvent
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_overloads.py
    │   │   │   │   ├── test_models.py
    │   │   │   │   └── test_box.py
    │   │   │   ├── DpctlSyclQueue
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_box.py
    │   │   │   └── DpnpNdArray
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_bugs.py
    │   │   │   │   ├── test_dpnp_ndarray_type.py
    │   │   │   │   └── test_boxing_unboxing.py
    │   │   ├── passes
    │   │   │   └── __init__.py
    │   │   ├── runtime
    │   │   │   ├── __init__.py
    │   │   │   └── test_llvm_registration.py
    │   │   ├── __init__.py
    │   │   ├── test_dpjit_target.py
    │   │   └── test_itanium_mangler_extension.py
    │   ├── dpjit_tests
    │   │   ├── parfors
    │   │   │   ├── prange
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_pairwise_distance.py
    │   │   │   └── __init__.py
    │   │   ├── dpnp
    │   │   │   ├── __init__.py
    │   │   │   └── test_target_specific_overload.py
    │   │   ├── __init__.py
    │   │   ├── test_slicing.py
    │   │   └── test_dpex_target_overload_isolation.py
    │   ├── misc
    │   │   ├── __init__.py
    │   │   ├── test_parse_sem_version.py
    │   │   └── test_warnings.py
    │   ├── debugging
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   └── test_backtraces.py
    │   ├── kernel_tests
    │   │   ├── __init__.py
    │   │   ├── test_compiler_warnings.py
    │   │   ├── test_func.py
    │   │   ├── test_invalid_kernel_args.py
    │   │   ├── test_supported_array_types_as_kernel_args.py
    │   │   ├── test_ndrange_exceptions.py
    │   │   ├── test_atomic_fence.py
    │   │   ├── test_kernel_has_return_value_error.py
    │   │   ├── test_math_functions.py
    │   │   ├── test_barriers.py
    │   │   ├── test_inline_threshold_config.py
    │   │   ├── test_usm_ndarray_args.py
    │   │   ├── test_target_specific_overload.py
    │   │   └── test_complex_array_kernel_arg.py
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_examples.py
    │   └── test_dpex_use_alongside_numba.py
    ├── numba_patches
    │   └── __init__.py
    ├── kernel_api_impl
    │   ├── spirv
    │   │   ├── math
    │   │   │   └── __init__.py
    │   │   ├── overloads
    │   │   │   ├── __init__.py
    │   │   │   └── _registry.py
    │   │   └── __init__.py
    │   └── __init__.py
    ├── dpnp_iface
    │   └── __init__.py
    ├── dpctl_iface
    │   ├── __init__.py
    │   ├── dpctlimpl.py
    │   ├── wrappers.py
    │   └── _helpers.py
    ├── examples
    │   ├── debug
    │   │   ├── commands
    │   │   │   ├── docs
    │   │   │   │   ├── break_conditional
    │   │   │   │   ├── break_func
    │   │   │   │   ├── break_line_number
    │   │   │   │   ├── break_file_func
    │   │   │   │   ├── break_nested_func
    │   │   │   │   ├── simple_sum
    │   │   │   │   ├── step_sum
    │   │   │   │   ├── next
    │   │   │   │   ├── local_variables_1
    │   │   │   │   ├── backtrace_kernel
    │   │   │   │   ├── backtrace
    │   │   │   │   ├── stepi
    │   │   │   │   ├── step_dpex_func
    │   │   │   │   ├── info_func
    │   │   │   │   ├── stepping
    │   │   │   │   └── sheduler_locking
    │   │   │   ├── break_conditional
    │   │   │   ├── break_func
    │   │   │   ├── break_line_number
    │   │   │   ├── break_file_func
    │   │   │   ├── break_nested_func
    │   │   │   ├── simple_sum
    │   │   │   ├── next
    │   │   │   ├── step_sum
    │   │   │   ├── local_variables_1
    │   │   │   ├── backtrace_kernel
    │   │   │   ├── backtrace
    │   │   │   ├── step_dpex_func
    │   │   │   ├── stepi
    │   │   │   ├── info_func
    │   │   │   ├── stepping
    │   │   │   └── sheduler_locking
    │   │   ├── njit_basic.py
    │   │   ├── simple_sum.py
    │   │   ├── sum_local_vars.py
    │   │   ├── sum_local_vars_revive.py
    │   │   ├── simple_dpex_func.py
    │   │   ├── sum.py
    │   │   └── dpex_func.py
    │   ├── _helper.py
    │   ├── dpjit
    │   │   ├── vector_sum2D.py
    │   │   └── vector_sum.py
    │   └── kernel
    │   │   ├── vector_sum.py
    │   │   ├── atomic_op.py
    │   │   ├── vector_sum2D.py
    │   │   ├── device_func.py
    │   │   └── scan.py
    ├── register_kernel_api_overloads.py
    └── kernel_api
    │   ├── flag_enum.py
    │   ├── atomic_fence.py
    │   ├── __init__.py
    │   ├── memory_enums.py
    │   ├── private_array.py
    │   └── barrier.py
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    ├── pull_request_template.md
    ├── release.yml
    └── workflows
    │   ├── cpp_style_checks.yml
    │   ├── license.yml
    │   ├── pre-commit.yml
    │   ├── black.yml
    │   ├── coverage.yml
    │   └── coverity.yml
├── .dockerignore
├── .gitattributes
├── .git-blame-ignore-revs
├── scripts
    ├── build_conda_package.sh
    ├── diag_env.sh
    ├── run_debug_examples.sh
    ├── update_copyrights.py
    ├── run_examples.sh
    ├── config_cpu_device.ps1
    └── set_examples_to_doc.py
├── conda-recipe
    ├── run_test.sh
    ├── run_test.bat
    ├── build.sh
    └── bld.bat
├── environment
    ├── pre-commit.yml
    ├── coverage.yml
    └── docs.yml
├── SECURITY.md
├── .gitignore
├── .clang-format
├── .flake8
├── environment.yml
└── CMakeLists.txt


/docs/LICENSE_THIRD_PARTY:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/numba_dpex/.gitignore:
--------------------------------------------------------------------------------
1 | *.bc
2 | *.ll
3 | *.spir
4 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | conda-recipe/*  @oleksandr-pavlyk
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | apidoc
2 | sources/_build
3 | source/autoapi
4 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/attribute.rst:
--------------------------------------------------------------------------------
1 | {% extends "python/data.rst" %}
2 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/exception.rst:
--------------------------------------------------------------------------------
1 | {% extends "python/class.rst" %}
2 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/package.rst:
--------------------------------------------------------------------------------
1 | {% extends "python/module.rst" %}
2 | 


--------------------------------------------------------------------------------
/docs/asset/images/DPEP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/DPEP.png


--------------------------------------------------------------------------------
/docs/asset/images/dpep-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/dpep-all.png


--------------------------------------------------------------------------------
/docs/asset/images/DPEP-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/DPEP-large.png


--------------------------------------------------------------------------------
/docs/asset/images/numba-dpex-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/numba-dpex-logo.png


--------------------------------------------------------------------------------
/docs/asset/images/kernel_prog_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/kernel_prog_model.png


--------------------------------------------------------------------------------
/docs/asset/images/kernel-queue-device.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelPython/numba-dpex/HEAD/docs/asset/images/kernel-queue-device.png


--------------------------------------------------------------------------------
/numba_dpex/core/parfors/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/core/targets/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_api/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Ignore everything
2 | *
3 | 
4 | # Allow files and directories
5 | !/scripts/builder_entrypoint.sh
6 | !/scripts/github_load.py
7 | 


--------------------------------------------------------------------------------
/numba_dpex/core/datamodel/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/core/overloads/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/core/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/kernel_api/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/numba_patches/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/codegen/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api_impl/spirv/math/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | numba_dpex/_version.py export-subst
2 | * text=auto eol=lf
3 | *.{cmd,[cC][mM][dD]} text eol=crlf
4 | *.{bat,[bB][aA][tT]} text eol=crlf
5 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/kernel_programming/call-kernel-async.rst:
--------------------------------------------------------------------------------
1 | .. _launching-an-async-kernel:
2 | 
3 | Async kernel execution
4 | ======================
5 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/IntEnumLiteral/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/parfors/prange/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api_impl/spirv/overloads/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/range_types/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/misc/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/docs/source/config_options.rst:
--------------------------------------------------------------------------------
1 | .. _configopts:
2 | 
3 | Configuration Options
4 | #####################
5 | 
6 | .. include:: ./autoapi/numba_dpex/core/config/index.rst
7 | 


--------------------------------------------------------------------------------
/numba_dpex/core/typing/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import typeof
6 | 


--------------------------------------------------------------------------------
/numba_dpex/dpnp_iface/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import arrayobj
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/passes/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/debugging/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/runtime/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/dpnp/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/USMNdArray/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/parfors/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclEvent/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclQueue/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpnpNdArray/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from . import *
6 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from .._helper import *
6 | from . import *
7 | 


--------------------------------------------------------------------------------
/docs/source/release-notes.rst:
--------------------------------------------------------------------------------
1 | .. _release-notes:
2 | .. include:: ./ext_links.txt
3 | 
4 | Release Notes
5 | =============
6 | 
7 | .. include:: ../../CHANGELOG.md
8 |    :parser: myst_parser.sphinx_
9 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | import numba_dpex
6 | from numba_dpex.core import config
7 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from .._helper import *
6 | from . import *
7 | 


--------------------------------------------------------------------------------
/numba_dpex/core/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | from .datamodel import *
6 | from .types import *
7 | from .typing import *
8 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api_impl/spirv/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | """A SPIR-V backend to compile the numba_dpex.kernel_api functions to SPIR-V.
6 | """
7 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/index.rst:
--------------------------------------------------------------------------------
 1 | .. _index:
 2 | .. include:: ./../ext_links.txt
 3 | 
 4 | Tutorials
 5 | =========
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 2
 9 | 
10 |     kernel_programming/index
11 |     debugging/index
12 |     config
13 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs
2 | 
3 | # Migrate code style to Black
4 | 8bd62e61bb70fe0483bd494040e4103fd050252a
5 | 
6 | # Change black to use 80 chars
7 | 2b9c67489cc8a5c6f13b28ec752b29a20c8c9a5f
8 | 


--------------------------------------------------------------------------------
/scripts/build_conda_package.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PYTHON_VERSION=$1
 4 | 
 5 | VERSIONS="--python $PYTHON_VERSION"
 6 | TEST="--no-test"
 7 | 
 8 | # shellcheck disable=SC2086
 9 | conda build \
10 |   $TEST \
11 |   $VERSIONS \
12 |   $CHANNELS \
13 |   conda-recipe
14 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/kernel_programming/operators.rst:
--------------------------------------------------------------------------------
1 | List of supported Python operators that can be used in a ``kernel`` or
2 | ``device_func`` decorated function.
3 | 
4 | .. csv-table:: Current support matrix of Python operators
5 |    :file: ./operators.csv
6 |    :header-rows: 1
7 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api_impl/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | 
5 | """The module stores the numba_dpex backends implementing the target-specific
6 | code generation for the kernel_api Python functions.
7 | """
8 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from dpnp import ndarray as dpnp_ndarray
 6 | 
 7 | 
 8 | def test_dpnp_ndarray_flags():
 9 |     assert hasattr(dpnp_ndarray([1]), "flags")
10 | 


--------------------------------------------------------------------------------
/numba_dpex/dpctl_iface/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | The ``dpctl_iface`` module implements Numba's interface to the libsyclinterface
 7 | library that provides C bindings to DPC++'s SYCL runtime API.
 8 | """
 9 | 
10 | from . import arrayobj
11 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/debugging/conftest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | import pytest
 8 | 
 9 | from .gdb import gdb
10 | 
11 | 
12 | @pytest.fixture
13 | def app():
14 |     g = gdb()
15 | 
16 |     yield g
17 | 
18 |     g.teardown_gdb()
19 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api_impl/spirv/overloads/_registry.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Implements the SPIR-V overloads for the kernel_api.PrivateArray class.
 7 | """
 8 | 
 9 | from numba.core.imputils import Registry
10 | 
11 | registry = Registry()
12 | lower = registry.lower
13 | 


--------------------------------------------------------------------------------
/docs/source/license.rst:
--------------------------------------------------------------------------------
 1 | .. _license:
 2 | .. include:: ./ext_links.txt
 3 | 
 4 | License
 5 | =======
 6 | 
 7 | Numba-dpex is Licensed under Apache License 2.0 that can be found in `LICENSE
 8 | <https://github.com/IntelPython/numba-dpex/blob/main/LICENSE>`_. All usage and
 9 | contributions to the project are subject to the terms and conditions of this
10 | license.
11 | 


--------------------------------------------------------------------------------
/numba_dpex/core/boxing/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Contains the ``box`` and ``unbox`` functions for numba_dpex types that are
 6 | passable as arguments to a kernel or dpjit decorated function.
 7 | """
 8 | 
 9 | from .ranges import *
10 | from .usm_ndarray import *
11 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | - [ ] Have you provided a meaningful PR description?
2 | - [ ] Have you added a test, reproducer or referred to an issue with a reproducer?
3 | - [ ] Have you tested your changes locally for CPU and GPU devices?
4 | - [ ] Have you made sure that new changes do not introduce compiler warnings?
5 | - [ ] If this PR is a work in progress, are you filing the PR as a draft?
6 | 


--------------------------------------------------------------------------------
/numba_dpex/core/passes/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from .parfor_legalize_cfd_pass import ParforLegalizeCFDPass
 6 | from .passes import DumpParforDiagnostics, NoPythonBackend
 7 | 
 8 | __all__ = [
 9 |     "DumpParforDiagnostics",
10 |     "ParforLegalizeCFDPass",
11 |     "NoPythonBackend",
12 | ]
13 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/break_conditional:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:23 if i == 1
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lane 1, __main__::data_parallel_sum () at simple_sum.py:23
 7 | 23          c[i] = a[i] + b[i]
 8 | (gdb) print i
 9 | $1 = 1
10 | (gdb) continue
11 | ...
12 | Done...
13 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | span.summarylabel {
 2 |     background-color: var(--color-foreground-secondary);
 3 |     color: var(--color-background-secondary);
 4 |     font-size: 70%;
 5 |     padding-left: 2px;
 6 |     padding-right: 2px;
 7 |     border-radius: 3px;
 8 |     vertical-align: 15%;
 9 |     padding-bottom: 2px;
10 |     filter: opacity(40%);
11 | }
12 | 
13 | table.summarytable {
14 |     width: 100%;
15 | }
16 | 


--------------------------------------------------------------------------------
/conda-recipe/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euxo pipefail
 4 | unset ONEAPI_DEVICE_SELECTOR
 5 | 
 6 | for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))")
 7 | do
 8 |     ONEAPI_DEVICE_SELECTOR=$selector \
 9 |     pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
10 | done
11 | 
12 | exit 0
13 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | import pytest
 8 | 
 9 | offload_devices = [
10 |     "opencl:gpu:0",
11 |     "level_zero:gpu:0",
12 |     "opencl:cpu:0",
13 | ]
14 | 
15 | 
16 | @pytest.fixture(params=offload_devices, scope="module")
17 | def offload_device(request):
18 |     return request.param
19 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | import os
 8 | 
 9 | import numba_dpex
10 | 
11 | 
12 | def test_examples_available():
13 |     package_path = os.path.dirname(numba_dpex.__file__)
14 |     examples_path = os.path.join(package_path, "examples")
15 | 
16 |     assert os.path.isdir(examples_path)
17 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/property.rst:
--------------------------------------------------------------------------------
 1 | {%- if obj.display %}
 2 | .. py:property:: {{ obj.short_name }}
 3 |    {% if obj.annotation %}
 4 |    :type: {{ obj.annotation }}
 5 |    {% endif %}
 6 |    {% if obj.properties %}
 7 |    {% for property in obj.properties %}
 8 |    :{{ property }}:
 9 |    {% endfor %}
10 |    {% endif %}
11 | 
12 |    {% if obj.docstring %}
13 |    {{ obj.docstring|indent(3) }}
14 |    {% endif %}
15 | {% endif %}
16 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2021 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import llvmlite.binding as ll
 6 | 
 7 | from ._dpexrt_python import c_helpers
 8 | 
 9 | # Register the helper function in _dpexrt_python so that we can insert
10 | # calls to them via llvmlite.
11 | for (
12 |     py_name,
13 |     c_address,
14 | ) in c_helpers.items():
15 |     ll.add_symbol(py_name, c_address)
16 | 


--------------------------------------------------------------------------------
/conda-recipe/run_test.bat:
--------------------------------------------------------------------------------
 1 | set "ONEAPI_DEVICE_SELECTOR="
 2 | 
 3 | for /F "USEBACKQ tokens=* delims=" %%F in (
 4 | `python -c "import dpctl; print(\"\n\".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))"`
 5 | ) do (
 6 |     set "ONEAPI_DEVICE_SELECTOR=%%F"
 7 | 
 8 |     pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
 9 |     IF %ERRORLEVEL% NEQ 0 exit /B 1
10 | )
11 | 
12 | exit /B 0
13 | 


--------------------------------------------------------------------------------
/environment/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: dev
 2 | channels:
 3 |   - dppy/label/dev
 4 |   - numba
 5 |   - conda-forge
 6 |   - nodefaults
 7 | dependencies:
 8 |   - libffi
 9 |   - numba==0.59*
10 |   - dpctl>=0.16*
11 |   - dpnp>=0.14*
12 |   - dpcpp-llvm-spirv
13 |   - intel-opencl-rt
14 |   - coverage
15 |   - pytest
16 |   - pytest-cov
17 |   - pytest-xdist
18 |   - pexpect
19 |   - scikit-build>=0.15*
20 |   - cmake>=3.26*
21 |   - pre-commit
22 |   - pylint
23 |   - versioneer
24 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclEvent/test_overloads.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl
 6 | 
 7 | from numba_dpex import dpjit
 8 | 
 9 | 
10 | @dpjit
11 | def wait_call(a):
12 |     a.wait()
13 |     return None
14 | 
15 | 
16 | def test_wait_DpctlSyclEvent():
17 |     """Test the dpctl.SyclEvent.wait() call overload."""
18 | 
19 |     e = dpctl.SyclEvent()
20 |     wait_call(e)
21 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/njit_basic.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba_dpex import dpjit
 6 | 
 7 | 
 8 | @dpjit(debug=True)
 9 | def foo(arg):
10 |     l1 = arg + 6
11 |     l2 = arg * 5.43
12 |     l3 = (arg, l1, l2, "bar")
13 |     print(arg, l1, l2, l3)
14 | 
15 | 
16 | def main():
17 |     foo(987)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 |     print("Done ...")
23 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/kernel_dispatcher_type.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Experimental types that will eventually move to numba_dpex.core.types
 6 | """
 7 | 
 8 | from numba.core import types
 9 | 
10 | 
11 | class KernelDispatcherType(types.Dispatcher):
12 |     """The type of KernelDispatcher dispatchers"""
13 | 
14 |     def cast_python_value(self, args):
15 |         raise NotImplementedError
16 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/index.rst:
--------------------------------------------------------------------------------
 1 | Programming SYCL Kernels Using :func:`~numba_dpex.decorators.kernel`
 2 | ====================================================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    writing_kernels
 8 |    memory-management
 9 |    synchronization
10 |    device-functions
11 |    atomic-operations
12 |    selecting_device
13 |    memory_allocation_address_space
14 |    reduction
15 |    ufunc
16 |    supported-python-features
17 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Report a Vulnerability
 4 | 
 5 | Please report security issues or vulnerabilities to the [Intel® Security Center].
 6 | 
 7 | For more information on how Intel® works to resolve security issues, see
 8 | [Vulnerability Handling Guidelines].
 9 | 
10 | [Intel® Security Center]:https://www.intel.com/content/www/us/en/security-center/default.html
11 | 
12 | [Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
13 | 


--------------------------------------------------------------------------------
/environment/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: dev
 2 | channels:
 3 |   - dppy/label/dev
 4 |   - numba
 5 |   - conda-forge
 6 |   - nodefaults
 7 | dependencies:
 8 |   - libffi
 9 |   - gcc_linux-64
10 |   - dpcpp_linux-64>=2024.2
11 |   - sysroot_linux-64=2.28
12 |   - numba==0.59*
13 |   - dpctl
14 |   - dpnp
15 |   - dpcpp-llvm-spirv
16 |   - intel-opencl-rt
17 |   - coverage
18 |   - pytest
19 |   - pytest-cov
20 |   - pytest-xdist
21 |   - pexpect
22 |   - scikit-build>=0.15*
23 |   - cmake>=3.26*
24 |   - ninja
25 |   - versioneer
26 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/features.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Supported Features
 4 | ==================
 5 | 
 6 | Numba-dpex and Intel® Distribution for GDB* provide at least
 7 | following debugging features:
 8 | 
 9 | .. toctree::
10 |     :maxdepth: 2
11 | 
12 |     breakpoints
13 |     stepping
14 |     frame_info
15 |     backtrace
16 |     data
17 |     symbols
18 |     altering
19 | 
20 | Other topics:
21 | 
22 | .. toctree::
23 |     :maxdepth: 2
24 | 
25 |     local_variables
26 |     numba-0.55
27 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/_helper.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | import dpctl
 8 | 
 9 | 
10 | def has_gpu(backend="opencl"):
11 |     return bool(dpctl.get_num_devices(backend=backend, device_type="gpu"))
12 | 
13 | 
14 | def has_cpu(backend="opencl"):
15 |     return bool(dpctl.get_num_devices(backend=backend, device_type="cpu"))
16 | 
17 | 
18 | def has_sycl_platforms():
19 |     return bool(len(dpctl.get_platforms()))
20 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_compiler_warnings.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import pytest
 6 | 
 7 | from numba_dpex import kernel
 8 | from numba_dpex.kernel_api import Item
 9 | 
10 | 
11 | def _kernel(item: Item, a, b, c):
12 |     i = item.get_id(0)
13 |     c[i] = a[i] + b[i]
14 | 
15 | 
16 | def test_compilation_mode_option_user_definition():
17 |     with pytest.warns(UserWarning):
18 |         kernel(_compilation_mode="kernel")(_kernel)
19 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/misc/test_parse_sem_version.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba_dpex import parse_sem_version
 6 | 
 7 | 
 8 | class TestParseSemVersion:
 9 |     def test_parse_sem_version(self):
10 |         assert parse_sem_version("0.56.4") == (0, 56, 4)
11 |         assert parse_sem_version("0.57.0") == (0, 57, 0)
12 |         assert parse_sem_version("0.57.0rc1") == (0, 57, 0)
13 |         assert parse_sem_version("0.58.1dev0") == (0, 58, 1)
14 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/break_func:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break data_parallel_sum
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
 7 | 20          @dpex.kernel(debug=True)
 8 | (gdb) continue
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
10 | 20          @dpex.kernel(debug=True)
11 | (gdb) continue
12 | ...
13 | Done...
14 | 


--------------------------------------------------------------------------------
/numba_dpex/dpctl_iface/dpctlimpl.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core.imputils import Registry
 6 | 
 7 | registry = Registry("dpctlimpl")
 8 | 
 9 | lower_builtin = registry.lower
10 | lower_getattr = registry.lower_getattr
11 | lower_getattr_generic = registry.lower_getattr_generic
12 | lower_setattr = registry.lower_setattr
13 | lower_setattr_generic = registry.lower_setattr_generic
14 | lower_cast = registry.lower_cast
15 | lower_constant = registry.lower_constant
16 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/break_line_number:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:20
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
 7 | 20          @dpex.kernel(debug=True)
 8 | (gdb) continue
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
10 | 20          @dpex.kernel(debug=True)
11 | (gdb) continue
12 | ...
13 | Done...
14 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/break_conditional:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_conditional python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:23 if i == 1
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lane 1, __main__::data_parallel_sum () at simple_sum.py:23
10 | # 23          c[i] = a[i] + b[i]
11 | print i
12 | # Expected:
13 | # $1 = 1
14 | continue
15 | # Expected:
16 | # ...
17 | # Done...
18 | echo Done\n
19 | quit
20 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/break_file_func:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:data_parallel_sum
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
 7 | 20          @dpex.kernel(debug=True)
 8 | (gdb) continue
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
10 | 20          @dpex.kernel(debug=True)
11 | (gdb) continue
12 | ...
13 | Done...
14 | 


--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
 1 | changelog:
 2 |   exclude:
 3 |     labels:
 4 |       - ignore-for-release-notes
 5 |   categories:
 6 |     - title: Added
 7 |       labels:
 8 |         - enhancement
 9 |         - feature
10 |     - title: Changed
11 |       labels:
12 |         - feature-change
13 |     - title: Fixed
14 |       labels:
15 |         - bug
16 |     - title: Removed
17 |       labels:
18 |         - removed-feature
19 |     - title: Deprecated
20 |       labels:
21 |         - deprecated-feature
22 |     - title: Other Changes
23 |       labels:
24 |         - "*"
25 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/index.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | This page contains auto-generated API reference documentation [#f1]_.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 | 
 9 |    numba_dpex/kernel_api/index
10 |    numba_dpex/core/decorators/index
11 |    numba_dpex/core/kernel_launcher/index
12 | 
13 |    {% for page in pages %}
14 |    {% if page.top_level_object and page.display %}
15 |    {{ page.include_path }}
16 |    {% endif %}
17 |    {% endfor %}
18 | 
19 | .. [#f1] Created with `sphinx-autoapi <https://github.com/readthedocs/sphinx-autoapi>`_
20 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/break_nested_func:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:func_sum
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22
 7 | 22          result = a_in_func + b_in_func
 8 | (gdb) continue
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22
10 | 22          result = a_in_func + b_in_func
11 | (gdb) continue
12 | ...
13 | Done...
14 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/simple_sum:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:22
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22
 7 | 22           i = dpex.get_global_id(0)
 8 | (gdb) next
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22
10 | 22           i = dpex.get_global_id(0)
11 | (gdb) next
12 | 23           c[i] = a[i] + b[i]
13 | (gdb) continue
14 | ...
15 | Done...
16 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/dpjit/vector_sum2D.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | from numba_dpex import dpjit
 8 | 
 9 | 
10 | @dpjit
11 | def f1(a, b):
12 |     c = a + b
13 |     return c
14 | 
15 | 
16 | N = 1000
17 | print("N", N)
18 | 
19 | a = np.ones((N, N), dtype=np.float32)
20 | b = np.ones((N, N), dtype=np.float32)
21 | 
22 | print("a:", a)
23 | print("b:", b)
24 | 
25 | 
26 | def main():
27 |     c = f1(a, b)
28 |     print("c:", c)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/function.rst:
--------------------------------------------------------------------------------
 1 | {% if obj.display %}
 2 | .. py:function:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %}
 3 | 
 4 | {% for (args, return_annotation) in obj.overloads %}
 5 |                  {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %}
 6 | 
 7 | {% endfor %}
 8 |    {% for property in obj.properties %}
 9 |    :{{ property }}:
10 |    {% endfor %}
11 | 
12 |    {% if obj.docstring %}
13 |    {{ obj.docstring|indent(3) }}
14 |    {% endif %}
15 | {% endif %}
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.o
 3 | *.so
 4 | *.dylib
 5 | *.pyd
 6 | *.pdb
 7 | *.egg-info
 8 | *.sw[po]
 9 | *.out
10 | *.ll
11 | .nfs*
12 | tags
13 | MANIFEST
14 | 
15 | .tmp/
16 | build/
17 | docs/_build/
18 | docs/gh-pages/
19 | dist/
20 | .idea/
21 | .vscode/
22 | .mypy_cache/
23 | .ipynb_checkpoints/
24 | __pycache__/
25 | _skbuild
26 | 
27 | docs/source/developer/autogen*
28 | 
29 | # Ignore versioneer generated files
30 | numba_dpex/_version.py
31 | 
32 | # Ignore generated cpp files
33 | numba_dpex/dpnp_iface/*.cpp
34 | numba_dpex/dpnp_iface/*.h
35 | 
36 | # Ignore coverage results
37 | .coverage
38 | coverage.xml
39 | htmlcov/
40 | 


--------------------------------------------------------------------------------
/numba_dpex/core/parfors/kernel_templates/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Provides string templates for numba_dpex.kernel decorated functions.
 6 | 
 7 | During lowering of a parfor node using the SPIRVKernelTarget, the node is
 8 | first converted into a kernel function. The module provides a set of templates
 9 | to generate the basic stub of a kernel function. The string template is
10 | compiled down to Numba IR using the Numba compiler front end and then the
11 | necessary body of the kernel function is inserted directly as Numba IR.
12 | """
13 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/step_sum:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:22
 4 | (gdb) run simple_sum.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22
 7 | 22           i = dpex.get_global_id(0)
 8 | (gdb) step
 9 | [Switching to Thread 1.1073742080 lane 0]
10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22
11 | 22          i = dpex.get_global_id(0)
12 | (gdb) step
13 | 23          c[i] = a[i] + b[i]
14 | (gdb) continue
15 | ...
16 | Done...
17 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_func.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | import numpy
 7 | 
 8 | import numba_dpex as dpex
 9 | 
10 | 
11 | @dpex.device_func
12 | def g(a):
13 |     return a + dpnp.float32(1)
14 | 
15 | 
16 | @dpex.kernel
17 | def f(item, a, b):
18 |     i = item.get_id(0)
19 |     b[i] = g(a[i])
20 | 
21 | 
22 | def test_func_call_from_kernel():
23 |     a = dpnp.ones(1024)
24 |     b = dpnp.ones(1024)
25 | 
26 |     dpex.call_kernel(f, dpex.Range(1024), a, b)
27 |     nb = dpnp.asnumpy(b)
28 |     assert numpy.all(nb == 2)
29 | 


--------------------------------------------------------------------------------
/numba_dpex/register_kernel_api_overloads.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Imports and registers kernel_api_impl target-specific overloads.
 6 | """
 7 | 
 8 | 
 9 | def init_kernel_api_spirv_overloads():
10 |     """
11 |     Imports the kernel_api.spirv overloads to make them available in numba-dpex.
12 |     """
13 |     from .kernel_api_impl.spirv.overloads import (
14 |         _atomic_fence_overloads,
15 |         _atomic_ref_overloads,
16 |         _group_barrier_overloads,
17 |         _index_space_id_overloads,
18 |         _private_array_overloads,
19 |     )
20 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/next:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:29
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
 7 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
 8 | (gdb) next
 9 | [Switching to Thread 1.1073742080 lane 0]
10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
11 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
12 | (gdb) next
13 | ...
14 | Done...
15 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_queuestruct.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Defines the numba-dpex native representation for a dpctl.SyclQueue
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #pragma once
13 | 
14 | #include "numba/core/runtime/nrt_external.h"
15 | #include <Python.h>
16 | 
17 | typedef struct
18 | {
19 |     NRT_MemInfo *meminfo;
20 |     PyObject *parent;
21 |     void *queue_ref;
22 | } queuestruct_t;
23 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/local_variables_1:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=1 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break sum_local_vars.py:22
 4 | (gdb) run sum_local_vars.py
 5 | ...
 6 | Thread 2.1 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at sum_local_vars.py:22
 7 | 22           i = dpex.get_global_id(0)
 8 | (gdb) info locals
 9 | __ocl_dbg_gid0 = 8
10 | __ocl_dbg_gid1 = 0
11 | __ocl_dbg_gid2 = 0
12 | __ocl_dbg_lid0 = 8
13 | __ocl_dbg_lid1 = 0
14 | __ocl_dbg_lid2 = 0
15 | __ocl_dbg_grid0 = 0
16 | __ocl_dbg_grid1 = 0
17 | __ocl_dbg_grid2 = 0
18 | i = 0
19 | l1 = 0
20 | l2 = 0
21 | (gdb) continue
22 | ...
23 | Done...
24 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/simple_sum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.kernel(debug=True)
11 | def data_parallel_sum(item, a, b, c):
12 |     i = item.get_id(0)
13 |     c[i] = a[i] + b[i]  # Condition breakpoint location
14 | 
15 | 
16 | global_size = 10
17 | N = global_size
18 | 
19 | a = np.array(np.random.random(N), dtype=np.float32)
20 | b = np.array(np.random.random(N), dtype=np.float32)
21 | c = np.ones_like(a)
22 | 
23 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c)
24 | 
25 | print("Done...")
26 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/method.rst:
--------------------------------------------------------------------------------
 1 | {%- if obj.display %}
 2 | .. py:method:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %}
 3 | 
 4 | {% for (args, return_annotation) in obj.overloads %}
 5 |                {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %}
 6 | 
 7 | {% endfor %}
 8 |    {% if obj.properties %}
 9 |    {% for property in obj.properties %}
10 |    :{{ property }}:
11 |    {% endfor %}
12 | 
13 |    {% else %}
14 | 
15 |    {% endif %}
16 |    {% if obj.docstring %}
17 |    {{ obj.docstring|indent(3) }}
18 |    {% endif %}
19 | {% endif %}
20 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/break_func:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_func python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break data_parallel_sum
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
10 | # 20          @dpex.kernel(debug=True)
11 | continue
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
14 | # 20          @dpex.kernel(debug=True)
15 | continue
16 | # Expected:
17 | # ...
18 | # Done...
19 | echo Done\n
20 | quit
21 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/break_line_number:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_line_number python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:20
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
10 | # 20          @dpex.kernel(debug=True)
11 | continue
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
14 | # 20          @dpex.kernel(debug=True)
15 | continue
16 | # Expected:
17 | # ...
18 | # Done...
19 | echo Done\n
20 | quit
21 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/backtrace_kernel:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:28
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:28
 7 | 28          i = dpex.get_global_id(0)
 8 | (gdb) backtrace
 9 | #0  __main__::kernel_sum () at simple_dpex_func.py:28
10 | (gdb) continue
11 | ...
12 | [Switching to Thread 1.1073742080 lane 0]
13 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:28
14 | 28          i = dpex.get_global_id(0)
15 | (gdb) continue
16 | ...
17 | Done...
18 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/sum_local_vars.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.kernel(debug=True)
11 | def data_parallel_sum(item, a, b, c):
12 |     i = item.get_id(0)
13 |     l1 = a[i] + 2.5
14 |     l2 = b[i] * 0.3
15 |     c[i] = l1 + l2
16 | 
17 | 
18 | global_size = 10
19 | N = global_size
20 | 
21 | a = np.array(np.random.random(N), dtype=np.float32)
22 | b = np.array(np.random.random(N), dtype=np.float32)
23 | c = np.ones_like(a)
24 | 
25 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c)
26 | 
27 | print("Done...")
28 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclQueue/test_box.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests for boxing for dpctl.SyclQueue
 7 | """
 8 | 
 9 | import dpnp
10 | import pytest
11 | from dpctl import SyclQueue
12 | 
13 | from numba_dpex import dpjit
14 | 
15 | 
16 | def test_boxing_without_parent():
17 |     """Test unboxing of the queue that does not have parent"""
18 | 
19 |     @dpjit
20 |     def func() -> SyclQueue:
21 |         arr = dpnp.empty(10)
22 |         queue = arr.sycl_queue
23 |         return queue
24 | 
25 |     q: SyclQueue = func()
26 | 
27 |     assert len(q.sycl_device.filter_string) > 0
28 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_eventstruct.c:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #include "_eventstruct.h"
 6 | #include "_dbg_printer.h"
 7 | 
 8 | /*!
 9 |  * @brief A destructor that is called from NRT on object destruction. Deletes
10 |  * dpctl event reference.
11 |  *
12 |  * @param    data           A dpctl event reference.
13 |  * @return   {return}       Nothing.
14 |  */
15 | void NRT_MemInfo_EventRef_Delete(void *data)
16 | {
17 |     DPCTLSyclEventRef eref = data;
18 | 
19 |     DPCTLEvent_Delete(eref);
20 | 
21 |     DPEXRT_DEBUG(
22 |         drt_debug_print("DPEXRT-DEBUG: deleting dpctl event reference.\n"););
23 | }
24 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/break_file_func:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_file_func python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:data_parallel_sum
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:20
10 | # 20          @dpex.kernel(debug=True)
11 | continue
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:20
14 | # 20          @dpex.kernel(debug=True)
15 | continue
16 | # Expected:
17 | # ...
18 | # Done...
19 | echo Done\n
20 | quit
21 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/experimental/tools/dpctl.cpp:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #include "dpctl.hpp"
 6 | #include <CL/sycl.hpp>
 7 | 
 8 | namespace std
 9 | {
10 | 
11 | size_t
12 | hash<DPCTLSyclDeviceRef>::operator()(const DPCTLSyclDeviceRef &DRef) const
13 | {
14 |     using dpctl::syclinterface::unwrap;
15 |     return hash<sycl::device>()(*unwrap<sycl::device>(DRef));
16 | }
17 | 
18 | size_t
19 | hash<DPCTLSyclContextRef>::operator()(const DPCTLSyclContextRef &CRef) const
20 | {
21 |     using dpctl::syclinterface::unwrap;
22 |     return hash<sycl::context>()(*unwrap<sycl::context>(CRef));
23 | }
24 | } // namespace std
25 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/break_nested_func:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/break_nested_func python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:func_sum
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22
10 | # 22          result = a_in_func + b_in_func
11 | continue
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22
14 | # 22          result = a_in_func + b_in_func
15 | continue
16 | # Expected:
17 | # ...
18 | # Done...
19 | echo Done\n
20 | quit
21 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | BasedOnStyle: LLVM
 2 | IndentWidth: 4
 3 | AccessModifierOffset: -4
 4 | AlignEscapedNewlines: Right
 5 | AllowAllParametersOfDeclarationOnNextLine: false
 6 | BinPackParameters: false
 7 | BreakBeforeBraces: Custom
 8 | BraceWrapping:
 9 |   AfterCaseLabel:  true
10 |   AfterClass:      true
11 |   AfterControlStatement: MultiLine
12 |   AfterEnum:       true
13 |   AfterFunction:   true
14 |   AfterNamespace:  true
15 |   AfterObjCDeclaration: false
16 |   AfterStruct:     true
17 |   AfterUnion:      true
18 |   AfterExternBlock: true
19 |   BeforeCatch:     false
20 |   BeforeElse:      true
21 |   IndentBraces:    false
22 |   SplitEmptyFunction: true
23 |   SplitEmptyRecord: true
24 |   SplitEmptyNamespace: true
25 | 


--------------------------------------------------------------------------------
/scripts/diag_env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # See https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal.html
 4 | 
 5 | check_package_installed() {
 6 |   apt list --installed 2>/dev/null "$1" | grep "$1" || echo "$1 not installed"
 7 | }
 8 | 
 9 | check_package_installed    intel-opencl-icd
10 | check_package_installed    intel-level-zero-gpu
11 | check_package_installed    level-zero
12 | check_package_installed    intel-media-va-driver-non-free
13 | check_package_installed    libmfx1
14 | 
15 | check_package_installed    libigc-dev
16 | check_package_installed    intel-igc-cm
17 | check_package_installed    libigdfcl-dev
18 | check_package_installed    libigfxcmrt-dev
19 | check_package_installed    level-zero-dev
20 | 


--------------------------------------------------------------------------------
/.github/workflows/cpp_style_checks.yml:
--------------------------------------------------------------------------------
 1 | # This is a workflow to format C/C++ sources with clang-format
 2 | 
 3 | name: C++ Code Style
 4 | 
 5 | # Controls when the action will run. Triggers the workflow on push or pull request
 6 | # events but only for the master branch
 7 | on:
 8 |   pull_request:
 9 |   push:
10 |     branches: [master]
11 | 
12 | permissions: read-all
13 | 
14 | jobs:
15 |   formatting-check:
16 |     name: clang-format
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - uses: actions/checkout@v4
20 |     - name: Run clang-format style check for C/C++ programs.
21 |       uses: jidicula/clang-format-action@v4.13.0
22 |       with:
23 |         clang-format-version: '14'
24 |         check-path: 'numba_dpex/dpctl_iface'
25 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # E501 line too long
 3 | # E722 do not use bare 'except'
 4 | # F401 imported but unused
 5 | # W503 line break before binary operator
 6 | ignore = E501,E722,F401,W503
 7 | per-file-ignores =
 8 |     # redefinition of unused 'dpnp_random_impl'
 9 |     dpnp_randomimpl.py: F811
10 |     # module level import not at top of file
11 |     device_init.py: E402
12 |     # 'from . import *' used; unable to detect undefined names
13 |     __init__.py: F403
14 |     # module level import not at top of file
15 |     target.py: E402
16 |     # config should containe code lines examples in comment
17 |     docs/source/conf.py: E800
18 | exclude =
19 |     .git,
20 |     __pycache__,
21 |     _version.py,
22 |     lowerer.py,
23 |     parfor.py
24 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/backtrace:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:22
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22
 7 | 22          result = a_in_func + b_in_func
 8 | (gdb) backtrace
 9 | #0  __main__::func_sum () at simple_dpex_func.py:22
10 | #1  __main__::kernel_sum () at simple_dpex_func.py:29
11 | (gdb) continue
12 | ...
13 | [Switching to Thread 1.1073742080 lane 0]
14 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22
15 | 22          result = a_in_func + b_in_func
16 | (gdb) continue
17 | ...
18 | Done...
19 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/simple_sum:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/simple_sum python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:22
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22
10 | # 22           i = dpex.get_global_id(0)
11 | next
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22
14 | # 22           i = dpex.get_global_id(0)
15 | next
16 | # Expected:
17 | # 23           c[i] = a[i] + b[i]
18 | continue
19 | # Expected:
20 | # ...
21 | # Done...
22 | echo Done\n
23 | quit
24 | 


--------------------------------------------------------------------------------
/.github/workflows/license.yml:
--------------------------------------------------------------------------------
 1 | name: license
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | permissions: read-all
 9 | 
10 | jobs:
11 |   license:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - uses: actions/setup-go@v5
17 |       with:
18 |         go-version: '1.18'
19 |     - name: Install addlicense
20 |       run: |
21 |         export PATH=${PATH}:`go env GOPATH`/bin
22 |         go install github.com/google/addlicense@latest
23 |     - name: Check license
24 |       run: |
25 |         export PATH=${PATH}:`go env GOPATH`/bin
26 |         addlicense -check -l apache -c "Intel Corporation" -ignore numba_dpex/_version.py numba_dpex/**/*.py numba_dpex/*.py setup.py
27 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/stepi:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:29
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
 7 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
 8 | (gdb) stepi
 9 | 0x00000000fffeb630      29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
10 | (gdb) stepi
11 | [Switching to Thread 1.1073742080 lane 0]
12 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
13 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
14 | (gdb) continue
15 | ...
16 | Done...
17 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_usmarraystruct.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Defines the numba-dpex native representation for a dpctl.tensor.usm_ndarray
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #pragma once
13 | 
14 | #include <Python.h>
15 | #include <numpy/npy_common.h>
16 | 
17 | typedef struct
18 | {
19 |     void *meminfo;
20 |     PyObject *parent;
21 |     npy_intp nitems;
22 |     npy_intp itemsize;
23 |     void *data;
24 |     void *sycl_queue;
25 | 
26 |     npy_intp shape_and_strides[];
27 | } usmarystruct_t;
28 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/step_dpex_func:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:29
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
 7 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
 8 | (gdb) step
 9 | [Switching to Thread 1.1073742080 lane 0]
10 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
11 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
12 | (gdb) step
13 | __main__::func_sum () at simple_dpex_func.py:22
14 | 22          result = a_in_func + b_in_func
15 | (gdb) continue
16 | ...
17 | Done...
18 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst:
--------------------------------------------------------------------------------
 1 | Supported Atomic Operations
 2 | ===========================
 3 | 
 4 | Numba-dpex supports some of the atomic operations supported in DPC++.
 5 | Those that are presently implemented are as follows:
 6 | 
 7 | .. automodule:: numba_dpex.ocl.stubs
 8 |    :members: atomic
 9 |    :noindex:
10 | 
11 | Example
12 | -------
13 | 
14 | Example usage of atomic operations
15 | 
16 | .. literalinclude:: ../../../numba_dpex/examples/atomic_op.py
17 |    :pyobject: main
18 | 
19 | .. note::
20 | 
21 |     The ``numba_dpex.atomic.add`` function is analogous to The
22 |     ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend.
23 | 
24 | Full examples
25 | -------------
26 | 
27 | - ``numba_dpex/examples/atomic_op.py``
28 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/numba_types_short_names.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core.types import Boolean, Float, Integer, NoneType
 6 | 
 7 | # Short names for numba types supported in dpex kernel
 8 | 
 9 | none = NoneType("none")
10 | 
11 | boolean = bool_ = Boolean("bool")
12 | 
13 | uint32 = Integer("uint32")
14 | uint64 = Integer("uint64")
15 | int32 = Integer("int32")
16 | int64 = Integer("int64")
17 | float32 = Float("float32")
18 | float64 = Float("float64")
19 | 
20 | 
21 | # Aliases to NumPy type names
22 | 
23 | b1 = bool_
24 | i4 = int32
25 | i8 = int64
26 | u4 = uint32
27 | u8 = uint64
28 | f4 = float32
29 | f8 = float64
30 | 
31 | float_ = float32
32 | double = float64
33 | void = none
34 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/next:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/next python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:29
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
10 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | next
12 | # Expected:
13 | # [Switching to Thread 1.1073742080 lane 0]
14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
15 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
16 | next
17 | # Expected:
18 | # ...
19 | # Done...
20 | echo Done\n
21 | quit
22 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/step_sum:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/step_sum python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:22
 6 | run simple_sum.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22
10 | # 22           i = dpex.get_global_id(0)
11 | step
12 | # Expected:
13 | # [Switching to Thread 1.1073742080 lane 0]
14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::data_parallel_sum () at simple_sum.py:22
15 | # 22          i = dpex.get_global_id(0)
16 | step
17 | # Expected:
18 | # 23          c[i] = a[i] + b[i]
19 | continue
20 | # Expected:
21 | # ...
22 | # Done...
23 | echo Done\n
24 | quit
25 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/dpnp/test_target_specific_overload.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests if dpnp dpex specific overloads are not available at numba njit.
 7 | """
 8 | 
 9 | import dpnp
10 | import pytest
11 | from numba import njit
12 | from numba.core import errors
13 | 
14 | from numba_dpex import dpjit
15 | 
16 | 
17 | @pytest.mark.parametrize("func", [dpnp.empty, dpnp.ones, dpnp.zeros])
18 | def test_dpnp_dpex_target(func):
19 |     def dpnp_func():
20 |         func(10)
21 | 
22 |     dpnp_func_njit = njit(dpnp_func)
23 |     dpnp_func_dpjit = dpjit(dpnp_func)
24 | 
25 |     dpnp_func_dpjit()
26 |     with pytest.raises((errors.TypingError, errors.UnsupportedError)):
27 |         dpnp_func_njit()
28 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy
 6 | import pytest
 7 | 
 8 | import numba_dpex as dpex
 9 | from numba_dpex import kernel_api as kapi
10 | 
11 | N = 1024
12 | 
13 | 
14 | @dpex.kernel
15 | def vecadd_kernel(item: kapi.Item, a, b, c):
16 |     i = item.get_id(0)
17 |     c[i] = a[i] + b[i]
18 | 
19 | 
20 | def test_passing_numpy_arrays_as_kernel_args():
21 |     """
22 |     Negative test to verify that NumPy arrays cannot be passed to a kernel.
23 |     """
24 |     a = numpy.ones(N)
25 |     b = numpy.ones(N)
26 |     c = numpy.zeros(N)
27 | 
28 |     with pytest.raises(Exception):
29 |         dpex.call_kernel(vecadd_kernel, dpex.Range(N), a, b, c)
30 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/info_func:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_sum.py:22
 4 | (gdb) run simple_sum.py
 5 | (gdb) info functions data_parallel_sum
 6 | ...
 7 | All functions matching regular expression "data_parallel_sum":
 8 | File simple_sum.py:
 9 | 20:         void __main__::data_parallel_sum(Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>);
10 | (gdb) continue
11 | (gdb) info functions __main__
12 | ...
13 | All functions matching regular expression "__main__":
14 | 20:         void __main__::data_parallel_sum(Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>);
15 | (gdb) continue
16 | ...
17 | Done...
18 | 


--------------------------------------------------------------------------------
/numba_dpex/core/lowering.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Registers any custom lowering functions to default Numba lowering registry.
 6 | """
 7 | from numba.core.imputils import Registry
 8 | 
 9 | from .types import KernelDispatcherType
10 | 
11 | registry = Registry()
12 | lower_constant = registry.lower_constant
13 | 
14 | 
15 | @lower_constant(KernelDispatcherType)
16 | def dpex_dispatcher_const(context):
17 |     """Dummy lowering function for a KernelDispatcherType object.
18 | 
19 |     The dummy lowering function for the KernelDispatcher types is added so that
20 |     a :func:`numba_dpex.core.decorators.kernel` decorated function can be passed
21 |     as an argument to dpjit.
22 |     """
23 |     return context.get_dummy_value()
24 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/local_variables_1:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=1 gdb-oneapi -q -command commands/local_variables_1 python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break sum_local_vars.py:22
 6 | run sum_local_vars.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.1 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at sum_local_vars.py:22
10 | # 22           i = dpex.get_global_id(0)
11 | info locals
12 | # __ocl_dbg_gid0 = 8
13 | # __ocl_dbg_gid1 = 0
14 | # __ocl_dbg_gid2 = 0
15 | # __ocl_dbg_lid0 = 8
16 | # __ocl_dbg_lid1 = 0
17 | # __ocl_dbg_lid2 = 0
18 | # __ocl_dbg_grid0 = 0
19 | # __ocl_dbg_grid1 = 0
20 | # __ocl_dbg_grid2 = 0
21 | # i = 0
22 | # l1 = 0
23 | # l2 = 0
24 | continue
25 | # Expected:
26 | # ...
27 | # Done...
28 | echo Done\n
29 | quit
30 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = ./source
 9 | BUILDDIR      = _build
10 | AUTOAPIDIR    = source/autoapi
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 
23 | clean:
24 | 	rm -rf "$(BUILDDIR)" "$(AUTOAPIDIR)"
25 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/sum_local_vars_revive.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.device_func
11 | def revive(x):
12 |     return x
13 | 
14 | 
15 | @ndpx.kernel(debug=True)
16 | def data_parallel_sum(item, a, b, c):
17 |     i = item.get_id(0)
18 |     l1 = a[i] + 2.5
19 |     l2 = b[i] * 0.3
20 |     c[i] = l1 + l2
21 |     revive(a)  # pass variable to dummy function
22 | 
23 | 
24 | global_size = 10
25 | N = global_size
26 | 
27 | a = np.array(np.random.random(N), dtype=np.float32)
28 | b = np.array(np.random.random(N), dtype=np.float32)
29 | c = np.ones_like(a)
30 | 
31 | ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c)
32 | 
33 | print("Done...")
34 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/debugging/test_backtraces.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | """Tests for Backtraces
 8 | 
 9 | https://www.sourceware.org/gdb/onlinedocs/gdb/Backtrace.html
10 | """
11 | 
12 | from numba_dpex.tests._helper import skip_no_gdb
13 | 
14 | pytestmark = skip_no_gdb
15 | 
16 | 
17 | def test_backtrace(app):
18 |     """Simple test for backtrace.
19 | 
20 |     commands/backtrace
21 |     """
22 |     app.breakpoint("simple_dpex_func.py:12")
23 |     app.run("simple_dpex_func.py")
24 |     app.expect_hit_breakpoint("simple_dpex_func.py:12")
25 | 
26 |     app.backtrace()
27 | 
28 |     app.expect(r"#0.*func_sum.* at simple_dpex_func.py:12", with_eol=True)
29 |     app.expect(r"#1.*kernel_sum", with_eol=True)
30 | 


--------------------------------------------------------------------------------
/environment/docs.yml:
--------------------------------------------------------------------------------
 1 | name: dpex-docs-dev
 2 | channels:
 3 |   - dppy/label/dev
 4 |   - numba
 5 |   - conda-forge
 6 |   - nodefaults
 7 | dependencies:
 8 |   - libffi
 9 |   - gcc_linux-64
10 |   - dpcpp_linux-64>=2024.2
11 |   - sysroot_linux-64=2.28
12 |   - numba==0.59*
13 |   - scikit-build>=0.15*
14 |   - cmake>=3.26*
15 |   - ninja
16 |   - dpctl>=0.16*
17 |   - dpnp>=0.14*
18 |   - dpcpp-llvm-spirv
19 |   - intel-opencl-rt
20 |   - versioneer
21 |   - pip
22 |   - pip:
23 |     - sphinx
24 |     - sphinx-autoapi==3.0.0
25 |     - autodoc # there is no conda package
26 |     - recommonmark
27 |     - sphinx-rtd-theme
28 |     - sphinxcontrib-apidoc
29 |     - sphinxcontrib-bibtex
30 |     - sphinxcontrib-googleanalytics
31 |     - sphinxcontrib.programoutput
32 |     - pydata-sphinx-theme
33 |     - myst-parser
34 |     - furo
35 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/dpjit/vector_sum.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | from numba_dpex import dpjit
 8 | 
 9 | 
10 | @dpjit
11 | def f1(a, b):
12 |     c = a + b
13 |     return c
14 | 
15 | 
16 | def main():
17 |     global_size = 64
18 |     local_size = 32
19 |     N = global_size * local_size
20 |     print("N", N)
21 | 
22 |     a = np.ones(N, dtype=np.float32)
23 |     b = np.ones(N, dtype=np.float32)
24 | 
25 |     print(a)
26 |     print(b)
27 | 
28 |     c = f1(a, b)
29 | 
30 |     print("RESULT c:", c)
31 |     for i in range(N):
32 |         if c[i] != 2.0:
33 |             print("First index not equal to 2.0 was", i)
34 |             break
35 | 
36 |     print("Done...")
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/simple_dpex_func.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.device_func(debug=True)
11 | def func_sum(a_in_func, b_in_func):
12 |     result = a_in_func + b_in_func  # breakpoint location
13 |     return result
14 | 
15 | 
16 | @ndpx.kernel(debug=True)
17 | def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel):
18 |     i = item.get_id(0)
19 |     c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
20 | 
21 | 
22 | global_size = 10
23 | a = np.arange(global_size, dtype=np.float32)
24 | b = np.arange(global_size, dtype=np.float32)
25 | c = np.empty_like(a)
26 | 
27 | ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c)
28 | 
29 | print("Done...")
30 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/flag_enum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Provides a FlagEnum class to help distinguish IntEnum types that numba_dpex
 7 | intends to use as Integer literal types inside the compiler type inferring
 8 | infrastructure.
 9 | """
10 | from enum import IntEnum
11 | 
12 | 
13 | class FlagEnum(IntEnum):
14 |     """Helper class to distinguish IntEnum types that numba_dpex should consider
15 |     as Numba Literal types.
16 |     """
17 | 
18 |     @classmethod
19 |     def basetype(cls) -> int:
20 |         """Returns an dummy int object that helps numba_dpex infer the type of
21 |         an instance of a FlagEnum class.
22 | 
23 |         Returns:
24 |             int: Dummy int value
25 |         """
26 |         return int(0)
27 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/experimental/tools/dpctl.hpp:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Defines overloads to dpctl library that eventually must be ported there.
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #pragma once
13 | #include "syclinterface/dpctl_sycl_type_casters.hpp"
14 | 
15 | namespace std
16 | {
17 | template <> struct hash<DPCTLSyclDeviceRef>
18 | {
19 |     size_t operator()(const DPCTLSyclDeviceRef &DRef) const;
20 | };
21 | 
22 | template <> struct hash<DPCTLSyclContextRef>
23 | {
24 |     size_t operator()(const DPCTLSyclContextRef &CRef) const;
25 | };
26 | } // namespace std
27 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/misc/test_warnings.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import warnings
 6 | 
 7 | import dpnp
 8 | import pytest
 9 | 
10 | import numba_dpex as dpex
11 | from numba_dpex.core import config
12 | 
13 | 
14 | @dpex.kernel
15 | def foo(item, a):
16 |     a[item.get_id(0)] = 0
17 | 
18 | 
19 | def test_inline_threshold_negative_val_warning_():
20 |     bkp = config.INLINE_THRESHOLD
21 |     config.INLINE_THRESHOLD = -1
22 | 
23 |     with pytest.warns(UserWarning):
24 |         dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10))
25 | 
26 |     config.INLINE_THRESHOLD = bkp
27 | 
28 | 
29 | def test_no_warning():
30 |     with warnings.catch_warnings():
31 |         warnings.simplefilter("error")
32 |         dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10))
33 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | permissions: read-all
 9 | 
10 | jobs:
11 |   pre-commit:
12 |     runs-on: ubuntu-20.04
13 |     defaults:
14 |       run:
15 |         shell: bash -el {0}
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - uses: conda-incubator/setup-miniconda@v3
19 |       with:
20 |         python-version: '3.11'
21 |         activate-environment: "coverage"
22 |         channel-priority: "disabled"
23 |         environment-file: environment/pre-commit.yml
24 |     - uses: actions/cache@v4
25 |       with:
26 |         path: ~/.cache/pre-commit
27 |         key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
28 |     - run: pre-commit run --show-diff-on-failure --color=always --all-files
29 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/backtrace_kernel:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/backtrace python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:28
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:28
10 | # 28          i = dpex.get_global_id(0)
11 | backtrace
12 | # Expected:
13 | # #0  __main__::kernel_sum () at simple_dpex_func.py:28
14 | continue
15 | # Expected:
16 | # ...
17 | # [Switching to Thread 1.1073742080 lane 0]
18 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:28
19 | # 28          i = dpex.get_global_id(0)
20 | continue
21 | # Expected:
22 | # ...
23 | # Done...
24 | echo Done\n
25 | quit
26 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/stepping:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:29
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
 7 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
 8 | (gdb) step
 9 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
10 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | (gdb) step
12 | __main__::func_sum () at dpex_func.py:22
13 | 22          result = a_in_func + b_in_func
14 | (gdb) backtrace
15 | #0  __main__::func_sum () at simple_dpex_func.py:22
16 | #1  __main__::kernel_sum () at simple_dpex_func.py:29
17 | (gdb) continue
18 | ...
19 | Done...
20 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_eventstruct.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Defines the numba-dpex native representation for a dpctl.SyclEvent
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #ifndef _EVENTSTRUCT_H_
13 | #define _EVENTSTRUCT_H_
14 | 
15 | #include "_nrt_helper.h"
16 | #include "dpctl_sycl_interface.h"
17 | #include "numba/core/runtime/nrt_external.h"
18 | #include <Python.h>
19 | 
20 | typedef struct
21 | {
22 |     NRT_MemInfo *meminfo;
23 |     PyObject *parent;
24 |     void *event_ref;
25 | } eventstruct_t;
26 | 
27 | void NRT_MemInfo_EventRef_Delete(void *data);
28 | 
29 | #endif /* _EVENTSTRUCT_H_ */
30 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dev
 2 | channels:
 3 |   - defaults
 4 |   - dppy/label/dev
 5 |   - numba
 6 |   - intel
 7 |   - numba/label/dev
 8 |   - nodefaults
 9 | dependencies:
10 |   - python=3.9
11 |   - gxx_linux-64
12 |   - dpcpp_linux-64>=2023.2,!=2024.0.1,!=2024.0.2
13 |   - numba ==0.58*
14 |   - dpctl >=0.14*
15 |   - dpnp >=0.11*
16 |   - mkl >=2021.3.0 # for dpnp
17 |   - dpcpp-llvm-spirv
18 |   - scikit-build >=0.15*
19 |   - cmake >=3.26*
20 |   - pytest
21 |   - pip
22 |   - pip:
23 |       - coverage
24 |       - pre-commit
25 |       - flake8
26 |       - black==20.8b1
27 |       - pytest-cov
28 |       - pytest-xdist
29 |       - pexpect
30 | variables:
31 |   CHANNELS: -c defaults -c numba -c intel -c numba/label/dev -c dppy/label/dev --override-channels
32 |   CHANNELS_DEV: -c dppy/label/dev -c defaults -c numba -c intel -c numba/label/dev --override-channels
33 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_nrt_helper.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #ifndef _NRT_HELPER_H_
 6 | #define _NRT_HELPER_H_
 7 | 
 8 | #define NO_IMPORT_ARRAY
 9 | #include "_meminfo_helper.h"
10 | 
11 | void *NRT_MemInfo_external_allocator(NRT_MemInfo *mi);
12 | void *NRT_MemInfo_data(NRT_MemInfo *mi);
13 | void NRT_MemInfo_release(NRT_MemInfo *mi);
14 | void NRT_MemInfo_call_dtor(NRT_MemInfo *mi);
15 | void NRT_MemInfo_acquire(NRT_MemInfo *mi);
16 | size_t NRT_MemInfo_size(NRT_MemInfo *mi);
17 | void *NRT_MemInfo_parent(NRT_MemInfo *mi);
18 | size_t NRT_MemInfo_refcount(NRT_MemInfo *mi);
19 | void NRT_Free(void *ptr);
20 | void NRT_dealloc(NRT_MemInfo *mi);
21 | void NRT_MemInfo_destroy(NRT_MemInfo *mi);
22 | void NRT_MemInfo_pyobject_dtor(void *data);
23 | 
24 | #endif /* _NRT_HELPER_H_ */
25 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/docs/sheduler_locking:
--------------------------------------------------------------------------------
 1 | $ NUMBA_OPT=0 gdb-oneapi -q python
 2 | (gdb) set breakpoint pending on
 3 | (gdb) break simple_dpex_func.py:29
 4 | (gdb) run simple_dpex_func.py
 5 | ...
 6 | Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
 7 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
 8 | (gdb) set scheduler-locking step
 9 | (gdb) step
10 | __main__::func_sum () at dpex_func.py:22
11 | 22          result = a_in_func + b_in_func
12 | (gdb) step
13 | 23          return result
14 | (gdb) continue
15 | ...
16 | [Switching to Thread 1.1073742080 lane 0]
17 | Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
18 | 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
19 | (gdb) continue
20 | ...
21 | Done...
22 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpnpNdArray/test_dpnp_ndarray_type.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests for numba_dpex.dpnp_ndarray.typeof
 7 | """
 8 | 
 9 | import pytest
10 | from dpnp import ndarray as dpnp_ndarray
11 | from numba import typeof
12 | 
13 | from numba_dpex.core.types.dpnp_ndarray_type import DpnpNdArray
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "array_type, expected_numba_type",
18 |     [
19 |         (dpnp_ndarray, DpnpNdArray),
20 |     ],
21 | )
22 | @pytest.mark.parametrize(
23 |     "shape, expected_ndim",
24 |     [
25 |         ([1], 1),
26 |         ([1, 1], 2),
27 |     ],
28 | )
29 | def test_typeof(array_type, shape, expected_numba_type, expected_ndim):
30 |     array = array_type(shape)
31 |     assert isinstance(typeof(array), expected_numba_type)
32 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/range_types/test_constructor_overloads.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import pytest
 6 | 
 7 | from numba_dpex import NdRange, Range, dpjit
 8 | 
 9 | ranges = [(10,), (10, 10), (10, 10, 10)]
10 | 
11 | 
12 | @pytest.mark.parametrize("r", ranges)
13 | def test_range_ctor(r):
14 |     @dpjit
15 |     def _tester(r):
16 |         return Range(*r)
17 | 
18 |     r_expected = Range(*r)
19 |     r_out = _tester(r)
20 | 
21 |     assert r_out == r_expected
22 | 
23 | 
24 | @pytest.mark.parametrize("r", ranges)
25 | def test_ndrange_unbox_box(r):
26 |     @dpjit
27 |     def _tester(r):
28 |         gr = lr = Range(*r)
29 |         return NdRange(gr, lr)
30 | 
31 |     gr = lr = Range(*r)
32 |     r_expected = NdRange(gr, lr)
33 |     r_out = _tester(r)
34 | 
35 |     assert r_out == r_expected
36 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/limitations.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Limitations
 4 | ===========
 5 | 
 6 | The following functionality is **limited** or **not supported**.
 7 | 
 8 | Altering arguments modified in code
 9 | -----------------------------------
10 | 
11 | Altering arguments has limitation. For it to work correctly
12 | arguments should not be modified in code.
13 | See `Numba issue <https://github.com/numba/numba/pull/7196>`_.
14 | 
15 | See :ref:`assignment-to-variables`.
16 | 
17 | Using Numba's direct ``gdb`` bindings in ``nopython`` mode
18 | ----------------------------------------------------------
19 | 
20 | Using Numba's direct ``gdb`` bindings in ``nopython`` mode is not supported in
21 | numba-dpex.
22 | 
23 | See `Numba documentation <https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#using-numba-s-direct-gdb-bindings-in-nopython-mode>`_.
24 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/backtrace:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/backtrace python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:22
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::func_sum () at simple_dpex_func.py:22
10 | # 22          result = a_in_func + b_in_func
11 | backtrace
12 | # Expected:
13 | # #0  __main__::func_sum () at simple_dpex_func.py:22
14 | # #1  __main__::kernel_sum () at simple_dpex_func.py:29
15 | continue
16 | # Expected:
17 | # ...
18 | # [Switching to Thread 1.1073742080 lane 0]
19 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::func_sum () at simple_dpex_func.py:22
20 | # 22          result = a_in_func + b_in_func
21 | continue
22 | # Expected:
23 | # ...
24 | # Done...
25 | echo Done\n
26 | quit
27 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/step_dpex_func:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/step_dpex_func python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:29
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
10 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | step
12 | # Expected:
13 | # [Switching to Thread 1.1073742080 lane 0]
14 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
15 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
16 | step
17 | # __main__::func_sum () at simple_dpex_func.py:22
18 | # 22          result = a_in_func + b_in_func
19 | continue
20 | # Expected:
21 | # ...
22 | # Done...
23 | echo Done\n
24 | quit
25 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/stepi:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/stepi python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:29
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
10 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | stepi
12 | # Expected:
13 | # 0x00000000fffeb630      29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
14 | stepi
15 | # Expected:
16 | # [Switching to Thread 1.1073742080 lane 0]
17 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
18 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
19 | continue
20 | # Expected:
21 | # ...
22 | # Done...
23 | echo Done\n
24 | quit
25 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/test_dpjit_target.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Tests for class DpexTargetContext."""
 6 | 
 7 | 
 8 | import pytest
 9 | from numba.core import typing
10 | from numba.core.codegen import JITCPUCodegen
11 | 
12 | from numba_dpex.core.targets.dpjit_target import DpexTargetContext
13 | 
14 | ctx = typing.Context()
15 | dpexctx = DpexTargetContext(ctx)
16 | 
17 | 
18 | def test_dpjit_target():
19 |     assert dpexctx.lower_extensions == {}
20 |     assert dpexctx.is32bit is False
21 |     assert dpexctx.dpexrt is not None
22 |     assert (
23 |         isinstance(dpexctx._internal_codegen, type(JITCPUCodegen("numba.exec")))
24 |         == 1
25 |     )
26 | 
27 | 
28 | def test_dpjit_target_refresh():
29 |     try:
30 |         dpexctx.refresh
31 |     except KeyError:
32 |         pytest.fail("Unexpected KeyError in dpjit_target.")
33 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/kernel_programming/math-functions.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | 
 4 | Scalar mathematical functions from the Python `math`_ module and the `dpnp`_
 5 | library can be used inside a kernel function. During compilation the
 6 | mathematical functions get compiled into device-specific intrinsic instructions.
 7 | 
 8 | 
 9 | .. csv-table:: Current support matrix of ``math`` module functions
10 |    :file: ./math-functions.csv
11 |    :widths: 30, 70
12 |    :header-rows: 1
13 | 
14 | .. caution::
15 | 
16 |    The supported signature for some of the ``math`` module functions in the
17 |    compiled mode differs from CPython. The divergence in behavior is a known
18 |    issue. Please refer https://github.com/IntelPython/numba-dpex/issues/759 for
19 |    updates.
20 | 
21 | .. csv-table:: Current support matrix of ``dpnp`` functions
22 |    :file: ./dpnp-ufuncs.csv
23 |    :widths: auto
24 |    :header-rows: 1
25 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/info_func:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/info_func python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_sum.py:22
 6 | run simple_sum.py
 7 | info functions data_parallel_sum
 8 | # Expected:
 9 | # ...
10 | # All functions matching regular expression "data_parallel_sum":
11 | # File simple_sum.py:
12 | # 20:         void __main__::data_parallel_sum(Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>);
13 | continue
14 | info functions __main__
15 | # Expected:
16 | # ...
17 | # All functions matching regular expression "__main__":
18 | # 20:         void __main__::data_parallel_sum(Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>, Array<float, 1, C, mutable, aligned>);
19 | continue
20 | # Expected:
21 | # ...
22 | # Done...
23 | echo Done\n
24 | quit
25 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. _index:
 2 | .. include:: ./ext_links.txt
 3 | 
 4 | Data Parallel Extension for Numba*
 5 | ==================================
 6 | 
 7 | Numba-dpex is an open-source kernel-programming API and JIT compiler for
 8 | portable accelerator programming directly in Python. The API and the compiler is
 9 | modeled after the C++ SYCL* language and brings a similar programming model and
10 | language design to Python. The page lists the relevant documentation to learn to
11 | program data-parallel kernels using numba-dpex.
12 | 
13 | .. module:: numba_dpex
14 | 
15 | .. toctree::
16 |    :maxdepth: 1
17 | 
18 |    overview
19 |    getting_started
20 |    programming_model
21 |    user_guide/index
22 |    autoapi/index
23 |    config_options
24 |    supported_sycl_features
25 |    experimental/index
26 |    useful_links
27 | 
28 | .. toctree::
29 |    :maxdepth: 1
30 |    :caption: Miscellaneous Notes
31 | 
32 |    license
33 |    release-notes
34 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/IntEnumLiteral/test_type_creation.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from enum import IntEnum
 6 | 
 7 | import pytest
 8 | 
 9 | from numba_dpex.core.exceptions import IllegalIntEnumLiteralValueError
10 | from numba_dpex.core.types import IntEnumLiteral
11 | from numba_dpex.kernel_api.flag_enum import FlagEnum
12 | 
13 | 
14 | def test_intenumliteral_creation():
15 |     """Tests the creation of an IntEnumLiteral type."""
16 | 
17 |     class DummyFlags(FlagEnum):
18 |         DUMMY = 0
19 | 
20 |     try:
21 |         IntEnumLiteral(DummyFlags)
22 |     except:
23 |         pytest.fail("Unexpected failure in IntEnumLiteral initialization")
24 | 
25 |     with pytest.raises(IllegalIntEnumLiteralValueError):
26 | 
27 |         class SomeKindOfUnknownEnum(IntEnum):
28 |             UNKNOWN_FLAG = 1
29 | 
30 |         IntEnumLiteral(SomeKindOfUnknownEnum)
31 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclEvent/test_models.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core.datamodel import models
 6 | 
 7 | from numba_dpex.core.datamodel.models import (
 8 |     SyclEventModel,
 9 |     dpjit_data_model_manager,
10 | )
11 | from numba_dpex.core.types.dpctl_types import DpctlSyclEvent
12 | 
13 | 
14 | def test_model_for_DpctlSyclEvent():
15 |     """Test the data model for DpctlSyclEvent that is registered with numba's
16 |     default data model manager.
17 |     """
18 |     sycl_event = DpctlSyclEvent()
19 |     default_model = dpjit_data_model_manager.lookup(sycl_event)
20 |     assert isinstance(default_model, SyclEventModel)
21 | 
22 | 
23 | def test_sycl_event_Model():
24 |     """Test for sycl_event_Model.
25 | 
26 |     It is a subclass of models.StructModel and models.ArrayModel.
27 |     """
28 | 
29 |     assert issubclass(SyclEventModel, models.StructModel)
30 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=./source
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :clean
35 | rd /s %BUILDDIR%
36 | 
37 | :end
38 | popd
39 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/stepping:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/stepping python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:29
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
10 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | step
12 | # Expected:
13 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
14 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
15 | step
16 | # Expected:
17 | # __main__::func_sum () at dpex_func.py:22
18 | # 22          result = a_in_func + b_in_func
19 | backtrace
20 | # Expected:
21 | # #0  __main__::func_sum () at simple_dpex_func.py:22
22 | # #1  __main__::kernel_sum () at simple_dpex_func.py:29
23 | continue
24 | # Expected:
25 | # ...
26 | # Done...
27 | echo Done\n
28 | quit
29 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | 
 7 | import numba_dpex as dpex
 8 | from numba_dpex import Range
 9 | from numba_dpex.kernel_api.flag_enum import FlagEnum
10 | 
11 | 
12 | class MockFlags(FlagEnum):
13 |     FLAG1 = 100
14 |     FLAG2 = 200
15 | 
16 | 
17 | @dpex.kernel(
18 |     release_gil=False,
19 |     no_compile=True,
20 |     no_cpython_wrapper=True,
21 |     no_cfunc_wrapper=True,
22 | )
23 | def update_with_flag(a):
24 |     a[0] = MockFlags.FLAG1
25 |     a[1] = MockFlags.FLAG2
26 | 
27 | 
28 | def test_compilation_of_flag_enum():
29 |     """Tests if a FlagEnum subclass can be used inside a kernel function."""
30 |     a = dpnp.ones(10, dtype=dpnp.int64)
31 |     dpex.call_kernel(update_with_flag, Range(10), a)
32 | 
33 |     assert a[0] == MockFlags.FLAG1
34 |     assert a[1] == MockFlags.FLAG2
35 |     for idx in range(2, 9):
36 |         assert a[idx] == 1
37 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/commands/sheduler_locking:
--------------------------------------------------------------------------------
 1 | # Run: NUMBA_OPT=0 gdb-oneapi -q -command commands/sheduler_locking python
 2 | set trace-commands on
 3 | set pagination off
 4 | set breakpoint pending on
 5 | break simple_dpex_func.py:29
 6 | run simple_dpex_func.py
 7 | # Expected:
 8 | # ...
 9 | # Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::kernel_sum () at simple_dpex_func.py:29
10 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
11 | set scheduler-locking step
12 | step
13 | # Expected:
14 | # __main__::func_sum () at dpex_func.py:22
15 | # 22          result = a_in_func + b_in_func
16 | step
17 | # Expected:
18 | # 23          return result
19 | continue
20 | # Expected:
21 | # ...
22 | # [Switching to Thread 1.1073742080 lane 0]
23 | # Thread 2.3 hit Breakpoint 1, with SIMD lanes [0-1], __main__::kernel_sum () at simple_dpex_func.py:29
24 | # 29          c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
25 | continue
26 | # Expected:
27 | # ...
28 | # Done...
29 | echo Done\n
30 | quit
31 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/python/data.rst:
--------------------------------------------------------------------------------
 1 | {% if obj.display %}
 2 | .. py:{{ obj.type }}:: {{ obj.name }}
 3 |    {%- if obj.annotation is not none %}
 4 | 
 5 |    :type: {%- if obj.annotation %} {{ obj.annotation }}{%- endif %}
 6 | 
 7 |    {%- endif %}
 8 | 
 9 |    {%- if obj.value is not none %}
10 | 
11 |    :value: {% if obj.value is string and obj.value.splitlines()|count > 1 -%}
12 |                 Multiline-String
13 | 
14 |     .. raw:: html
15 | 
16 |         <details><summary>Show Value</summary>
17 | 
18 |     .. code-block:: python
19 | 
20 |         """{{ obj.value|indent(width=8,blank=true) }}"""
21 | 
22 |     .. raw:: html
23 | 
24 |         </details>
25 | 
26 |             {%- else -%}
27 |               {%- if obj.value is string -%}
28 |                 {{ "%r" % obj.value|string|truncate(100) }}
29 |               {%- else -%}
30 |                 {{ obj.value|string|truncate(100) }}
31 |               {%- endif -%}
32 |             {%- endif %}
33 |    {%- endif %}
34 | 
35 | 
36 |    {{ obj.docstring|indent(3) }}
37 | {% endif %}
38 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/IntEnumLiteral/test_type_registration.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import pytest
 6 | from numba.core.datamodel import default_manager
 7 | 
 8 | from numba_dpex.core.datamodel.models import dpex_data_model_manager
 9 | from numba_dpex.core.types import IntEnumLiteral
10 | from numba_dpex.kernel_api.flag_enum import FlagEnum
11 | 
12 | 
13 | def test_data_model_registration():
14 |     """Tests that the IntEnumLiteral type is only registered with the
15 |     DpexExpKernelTargetContext target.
16 |     """
17 | 
18 |     class DummyFlags(FlagEnum):
19 |         DUMMY = 0
20 | 
21 |     dummy = IntEnumLiteral(DummyFlags)
22 | 
23 |     with pytest.raises(KeyError):
24 |         default_manager.lookup(dummy)
25 | 
26 |     try:
27 |         dpex_data_model_manager.lookup(dummy)
28 |     except:
29 |         pytest.fail(
30 |             "IntEnumLiteral type lookup failed in experimental "
31 |             "data model manager"
32 |         )
33 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/atomic_fence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Python functions that simulate SYCL's atomic_fence primitives.
 6 | """
 7 | from .memory_enums import MemoryOrder, MemoryScope
 8 | 
 9 | 
10 | def atomic_fence(
11 |     memory_order: MemoryOrder, memory_scope: MemoryScope
12 | ):  # pylint: disable=unused-argument
13 |     """Performs a memory fence operations across all work-items.
14 | 
15 |     The function is equivalent to the ``sycl::atomic_fence`` function and
16 |     controls the order of memory accesses (loads and stores) by individual
17 |     work-items.
18 | 
19 |     .. important::
20 |         The function is a no-op during CPython execution and only available in
21 |         JIT compiled mode of execution.
22 | 
23 |     Args:
24 |         memory_order (MemoryOrder): The memory synchronization order.
25 |         memory_scope (MemoryScope): The set of work-items and devices to which
26 |             the memory ordering constraints apply.
27 | 
28 |     """
29 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/test_dpex_use_alongside_numba.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | This module contains tests to ensure that numba.njit works with numpy after
 7 | importing numba_dpex. Aka lazy testing if we break numba's default behavior.
 8 | """
 9 | 
10 | import numba as nb
11 | import numpy as np
12 | 
13 | import numba_dpex
14 | 
15 | 
16 | @nb.njit
17 | def add1(a):
18 |     return a + 1
19 | 
20 | 
21 | def add_py(a, b):
22 |     return np.add(a, b)
23 | 
24 | 
25 | add_jit = nb.njit(add_py)
26 | 
27 | 
28 | def test_add1():
29 |     a = np.asarray([1j], dtype=np.complex64)
30 |     assert np.array_equal(add1(a), np.asarray([1 + 1j], dtype=np.complex64))
31 | 
32 | 
33 | def test_add_py():
34 |     a = np.ones((10,), dtype=np.complex128)
35 |     assert np.array_equal(add_py(a, 1.5), np.full((10,), 2.5, dtype=a.dtype))
36 | 
37 | 
38 | def test_add_jit():
39 |     a = np.ones((10,), dtype=np.complex128)
40 |     assert np.array_equal(add_jit(a, 1.5), np.full((10,), 2.5, dtype=a.dtype))
41 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #[=======================================================================[.rst:
 2 | numba_dpex
 3 | -----------
 4 | 
 5 | A cmake file to compile the ``_dpexrt_python`` Python C extension for
 6 | ``numba_dpex``. You can build this component locally in-place by invoking these
 7 | commands:
 8 | 
 9 | .. code-block:: cmake
10 |     ~$ cmake .
11 |     ~$ cmake --build . --verbose
12 | 
13 | Once compiled, the _dpexrt_python library will be in ``numba_dpex/core/runtime``
14 | folder.
15 | 
16 | This ``CMakeLists.txt`` file will be used by ``setup.py``.
17 | #]=======================================================================]
18 | 
19 | cmake_minimum_required(VERSION 3.21...3.27 FATAL_ERROR)
20 | 
21 | project(numba-dpex
22 |     VERSION ${NUMBA_DPEX_VERSION}
23 |     DESCRIPTION "An extension for Numba to add data-parallel offload capability"
24 | )
25 | 
26 | # Help conda build find path from both host and build env.
27 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
28 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
29 | 
30 | add_subdirectory(numba_dpex/core/runtime)
31 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/sum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.kernel(debug=True)
11 | def data_parallel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel):
12 |     i = item.get_id(0)  # numba-kernel-breakpoint
13 |     l1 = a_in_kernel[i]  # second-line
14 |     l2 = b_in_kernel[i]  # third-line
15 |     c_in_kernel[i] = l1 + l2  # fourth-line
16 | 
17 | 
18 | def driver(a, b, c, global_size):
19 |     print("before : ", a)
20 |     print("before : ", b)
21 |     print("before : ", c)
22 |     ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c)
23 |     print("after : ", c)
24 | 
25 | 
26 | def main():
27 |     global_size = 10
28 |     N = global_size
29 | 
30 |     a = np.arange(N, dtype=np.float32)
31 |     b = np.arange(N, dtype=np.float32)
32 |     c = np.empty_like(a)
33 | 
34 |     driver(a, b, c, global_size)
35 | 
36 |     print("Done...")
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/scripts/run_debug_examples.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | check() {
 6 |   echo "Run $1 ..."
 7 |   (cd numba_dpex/examples/debug && NUMBA_OPT=0 gdb-oneapi -q -command "$1" python) | grep Done
 8 | }
 9 | 
10 | run_checks() {
11 |   check commands/function_breakpoint
12 |   check commands/local_variables_0
13 |   check commands/local_variables_1
14 |   check commands/next
15 |   check commands/sheduler_locking
16 |   check commands/stepi
17 |   check commands/stepping
18 |   check commands/step_dpex_func
19 |   check commands/step_sum
20 |   check commands/simple_sum
21 |   check commands/backtrace
22 |   check commands/backtrace_kernel
23 |   check commands/break_func
24 |   check commands/break_file_func
25 |   check commands/break_line_number
26 |   check commands/break_nested_func
27 |   check commands/info_func
28 | }
29 | 
30 | run_with_device() {
31 |   echo "Run with SYCL_DEVICE_FILTER=$1 ..."
32 |   SYCL_DEVICE_FILTER=$1 run_checks
33 | }
34 | 
35 | # run_with_device level_zero:gpu:0
36 | run_with_device opencl:gpu:0
37 | # run_with_device opencl:cpu:0
38 | 
39 | echo Done
40 | 


--------------------------------------------------------------------------------
/.github/workflows/black.yml:
--------------------------------------------------------------------------------
 1 | # This is a workflow to format Python code with black formatter
 2 | 
 3 | name: black
 4 | 
 5 | # Controls when the action will run. Triggers the workflow on push or pull request
 6 | # events but only for the master branch
 7 | on:
 8 |   pull_request:
 9 |   push:
10 |     branches: [main]
11 | 
12 | permissions: read-all
13 | 
14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
15 | jobs:
16 |   # This workflow contains a single job called "black"
17 |   black:
18 |     # The type of runner that the job will run on
19 |     runs-on: ubuntu-20.04
20 | 
21 |     # Steps represent a sequence of tasks that will be executed as part of the job
22 |     steps:
23 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 |       - uses: actions/checkout@v4
25 |       # Set up a Python environment for use in actions
26 |       - uses: actions/setup-python@v5
27 | 
28 |       # Run black code formatter
29 |       - uses: psf/black@stable
30 |         with:
31 |           args: ". --check"
32 |           version: "24.1.1"
33 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Tests different input array type support for the kernel."""
 6 | 
 7 | import dpctl.tensor as dpt
 8 | import dpnp
 9 | import pytest
10 | 
11 | import numba_dpex as dpex
12 | from numba_dpex.kernel_api import Item, Range
13 | from numba_dpex.tests._helper import get_all_dtypes
14 | 
15 | list_of_dtypes = get_all_dtypes(
16 |     no_bool=True, no_float16=True, no_none=True, no_complex=True
17 | )
18 | 
19 | zeros_func = (dpt.zeros, dpnp.zeros)
20 | 
21 | _SIZE = 10
22 | 
23 | 
24 | @pytest.fixture(params=((a, b) for a in zeros_func for b in list_of_dtypes))
25 | def input_array(request):
26 |     zeros, dtype = request.param
27 |     return zeros(_SIZE, dtype=dtype)
28 | 
29 | 
30 | @dpex.kernel
31 | def set_ones(item: Item, a):
32 |     i = item.get_id(0)
33 |     a[i] = 1
34 | 
35 | 
36 | def test_fetch_add(input_array):
37 |     dpex.call_kernel(set_ones, Range(_SIZE), input_array)
38 | 
39 |     assert input_array[0] == 1
40 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/kernel_programming/operators.csv:
--------------------------------------------------------------------------------
 1 | Name, Operator, Note
 2 | Addition, ``+``,
 3 | Multiplication, ``*``,
 4 | Subtraction, ``-``,
 5 | Division, ``/``,
 6 | Floor Division, ``//``,
 7 | Modulo, ``%``,
 8 | Exponent, ``**``,
 9 | In-place Addition, ``+=``,
10 | In-place Subtraction, ``-=``,
11 | In-place Division, ``/=``,
12 | In-place Floor Division, ``//=``,
13 | In-place Modulo, ``%=``,
14 | In-place Exponent, ``**=``, Only supported on OpenCL CPU devices
15 | Bitwise And, ``&``,
16 | Bitwise Left Shift, ``<<``,
17 | Bitwise Right Shift, ``>>``,
18 | Bitwise Or, ``|``,
19 | Bitwise Exclusive Or, ``^``,
20 | In-place Bitwise And, ``&=``,
21 | In-place Bitwise Left Shift, ``<<=``,
22 | In-place Bitwise Right Shift, ``>>=``,
23 | In-place Bitwise Or, ``|=``,
24 | In-place Bitwise Exclusive Or, ``^=``,
25 | Negation, ``-``,
26 | Complement, ``~``,
27 | Pos, ``+``,
28 | Less Than, ``<``,
29 | Less Than Equal, ``<=``,
30 | Greater Than, ``>``,
31 | Greater Than Equal, ``>=``,
32 | Equal To, ``==``,
33 | Not Equal To, ``!=``,
34 | Matmul, ``@``, **Not supported**
35 | In-place Matmul, ``@=``, **Not supported**
36 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpctlSyclEvent/test_box.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests for boxing and allocating for dpctl.SyclEvent
 7 | """
 8 | 
 9 | import sys
10 | 
11 | from dpctl import SyclEvent
12 | 
13 | from numba_dpex import dpjit
14 | 
15 | 
16 | def test_dpjit_constructor():
17 |     """Test event delete that does not have parent"""
18 | 
19 |     @dpjit
20 |     def func() -> SyclEvent:
21 |         SyclEvent()
22 |         return None
23 | 
24 |     # We just want to make sure execution did not crush. There are currently
25 |     # no way to check if event wast destroyed, except manual run with debug
26 |     # logs on.
27 |     func()
28 | 
29 | 
30 | def test_boxing_without_parent():
31 |     """Test unboxing of the event that does not have parent"""
32 | 
33 |     @dpjit
34 |     def func() -> SyclEvent:
35 |         event = SyclEvent()
36 |         return event
37 | 
38 |     e: SyclEvent = func()
39 |     ref_cnt = sys.getrefcount(e)
40 | 
41 |     assert isinstance(e, SyclEvent)
42 |     assert ref_cnt == 2
43 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/device-functions.rst:
--------------------------------------------------------------------------------
 1 | .. _device-functions:
 2 | 
 3 | Writing Device Functions
 4 | ========================
 5 | 
 6 | The user-level API of SYCL does not have a notion for device-only functions,
 7 | *i.e.* functions that can be only invoked from a kernel and not from a host
 8 | function. However, numba-dpex provides a special decorator
 9 | ``numba_dpex.func`` specifically to implement device functions.
10 | 
11 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py
12 |    :pyobject: a_device_function
13 | 
14 | To use a device function from an another device function:
15 | 
16 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py
17 |    :pyobject: another_device_function
18 | 
19 | To use a device function from a kernel function ``numba_dpex.kernel``:
20 | 
21 | .. literalinclude:: ../../../numba_dpex/examples/dpex_func.py
22 |    :pyobject: a_kernel_function
23 | 
24 | Unlike a kernel function, a device function can return a value like normal
25 | functions.
26 | 
27 | .. todo::
28 | 
29 |    Specific capabilities and limitations for device functions need to be added.
30 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/debug/dpex_func.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp as np
 6 | 
 7 | import numba_dpex as ndpx
 8 | 
 9 | 
10 | @ndpx.device_func(debug=True)
11 | def func_sum(a_in_func, b_in_func):
12 |     result = a_in_func + b_in_func
13 |     return result
14 | 
15 | 
16 | @ndpx.kernel(debug=True)
17 | def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel):
18 |     i = item.get_id(0)
19 |     c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i])
20 | 
21 | 
22 | def driver(a, b, c, global_size):
23 |     print("a = ", a)
24 |     print("b = ", b)
25 |     print("c = ", c)
26 |     ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c)
27 |     print("a + b = ", c)
28 | 
29 | 
30 | def main():
31 |     global_size = 10
32 |     N = global_size
33 |     print("N", N)
34 | 
35 |     a = np.arange(N, dtype=np.float32)
36 |     b = np.arange(N, dtype=np.float32)
37 |     c = np.empty_like(a)
38 | 
39 |     driver(a, b, c, global_size)
40 | 
41 |     print("Done...")
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl.tensor as dpt
 6 | import pytest
 7 | 
 8 | import numba_dpex as ndpx
 9 | from numba_dpex.kernel_api import NdRange
10 | 
11 | 
12 | # Data parallel kernel implementing vector sum
13 | @ndpx.kernel
14 | def kernel_vector_sum(a, b, c):
15 |     i = ndpx.get_global_id(0)
16 |     c[i] = a[i] + b[i]
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "error, ranges",
21 |     [
22 |         (TypeError, ((2, 2), ("a", 1, 1))),
23 |         (TypeError, ((3, 3, 3, 3), (2, 2, 2))),
24 |     ],
25 | )
26 | def test_ndrange_config_error(error, ranges):
27 |     """Test if a exception is raised when calling a ndrange kernel with
28 |     unsupported arguments.
29 |     """
30 | 
31 |     a = dpt.ones(1024, dtype=dpt.int32)
32 |     b = dpt.ones(1024, dtype=dpt.int32)
33 |     c = dpt.zeros(1024, dtype=dpt.int64)
34 | 
35 |     with pytest.raises(error):
36 |         range = NdRange(ranges[0], ranges[1])
37 |         ndpx.call_kernel(kernel_vector_sum, range, a, b, c)
38 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_atomic_fence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | 
 7 | import numba_dpex as dpex
 8 | from numba_dpex.kernel_api import (
 9 |     AtomicRef,
10 |     Item,
11 |     MemoryOrder,
12 |     MemoryScope,
13 |     atomic_fence,
14 | )
15 | 
16 | 
17 | def test_atomic_fence():
18 |     """A test for atomic_fence function."""
19 | 
20 |     @dpex.kernel
21 |     def _kernel(item: Item, a, b):
22 |         i = item.get_id(0)
23 | 
24 |         bref = AtomicRef(b, index=0)
25 | 
26 |         if i == 1:
27 |             a[i] += 1
28 |             atomic_fence(MemoryOrder.RELEASE, MemoryScope.DEVICE)
29 |             bref.store(1)
30 |         elif i == 0:
31 |             while not bref.load():
32 |                 continue
33 |             atomic_fence(MemoryOrder.ACQUIRE, MemoryScope.DEVICE)
34 |             for idx in range(1, a.size):
35 |                 a[0] += a[idx]
36 | 
37 |     N = 2
38 |     a = dpnp.ones(N, dtype=dpnp.int64)
39 |     b = dpnp.zeros(1, dtype=dpnp.int64)
40 | 
41 |     dpex.call_kernel(_kernel, dpex.Range(N), a, b)
42 | 
43 |     assert a[0] == N + 1
44 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | The kernel_api module provides a set of Python classes and functions that are
 7 | analogous to the C++ SYCL API. The kernel_api module is meant to allow
 8 | prototyping SYCL-like kernels in pure Python before compiling them using
 9 | numba_dpex.
10 | """
11 | 
12 | from .atomic_fence import atomic_fence
13 | from .atomic_ref import AtomicRef
14 | from .barrier import group_barrier
15 | from .index_space_ids import Group, Item, NdItem
16 | from .launcher import call_kernel
17 | from .local_accessor import LocalAccessor
18 | from .memory_enums import AddressSpace, MemoryOrder, MemoryScope
19 | from .private_array import PrivateArray
20 | from .ranges import NdRange, Range
21 | 
22 | __all__ = [
23 |     "call_kernel",
24 |     "group_barrier",
25 |     "AddressSpace",
26 |     "atomic_fence",
27 |     "AtomicRef",
28 |     "Group",
29 |     "Item",
30 |     "LocalAccessor",
31 |     "MemoryOrder",
32 |     "MemoryScope",
33 |     "NdItem",
34 |     "NdRange",
35 |     "Range",
36 |     "PrivateArray",
37 |     "group_barrier",
38 |     "call_kernel",
39 | ]
40 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | import pytest
 7 | from numba.core.errors import TypingError
 8 | 
 9 | import numba_dpex as dpex
10 | from numba_dpex import int32, usm_ndarray
11 | from numba_dpex.core.exceptions import KernelHasReturnValueError
12 | from numba_dpex.core.types.kernel_api.index_space_ids import ItemType
13 | 
14 | i32arrty = usm_ndarray(ndim=1, dtype=int32, layout="C")
15 | item_ty = ItemType(ndim=1)
16 | 
17 | 
18 | def f(item, a):
19 |     return a
20 | 
21 | 
22 | list_of_sig = [
23 |     None,
24 |     (i32arrty(item_ty, i32arrty)),
25 | ]
26 | 
27 | 
28 | @pytest.fixture(params=list_of_sig)
29 | def sig(request):
30 |     return request.param
31 | 
32 | 
33 | def test_return(sig):
34 |     a = dpnp.arange(1024, dtype=dpnp.int32)
35 | 
36 |     with pytest.raises((TypingError, KernelHasReturnValueError)) as excinfo:
37 |         kernel_fn = dpex.kernel(sig)(f)
38 |         dpex.call_kernel(kernel_fn, dpex.Range(a.size), a)
39 | 
40 |     if isinstance(excinfo.type, TypingError):
41 |         assert "KernelHasReturnValueError" in excinfo.value.args[0]
42 | 


--------------------------------------------------------------------------------
/scripts/update_copyrights.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | 
 5 | def update_copyrights(root_dir, year):
 6 |     for folder, _, files in os.walk(root_dir):
 7 |         for filename in files:
 8 |             if filename[0] != "." and os.path.splitext(filename)[1] in [
 9 |                 ".py",
10 |                 ".h",
11 |                 ".c",
12 |                 ".cpp",
13 |             ]:
14 |                 filePath = os.path.abspath(os.path.join(folder, filename))
15 |                 args = [
16 |                     "annotate",
17 |                     "--copyright=Intel Corporation",
18 |                     "--license=Apache-2.0",
19 |                     "--year",
20 |                     str(year),
21 |                     "--merge-copyrights",
22 |                     filePath,
23 |                 ]
24 |                 subprocess.check_call(
25 |                     ["reuse", *args],
26 |                     shell=False,
27 |                 )
28 | 
29 | 
30 | path = os.path.dirname(os.path.realpath(__file__))
31 | source_path = os.path.dirname(path)
32 | 
33 | if __name__ == "__main__":
34 |     print("Provide new copyright year:")
35 |     year = input()
36 |     update_copyrights(source_path + "/numba_dpex", year)
37 | 


--------------------------------------------------------------------------------
/numba_dpex/dpctl_iface/wrappers.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core import cgutils
 6 | 
 7 | from numba_dpex.core.runtime import context as dpexrt
 8 | from numba_dpex.core.types import DpctlSyclEvent
 9 | 
10 | 
11 | def wrap_event_reference(ctx, builder, eref):
12 |     """Wrap dpctl event reference into datamodel so it can be boxed to
13 |     Python."""
14 | 
15 |     ty_event = DpctlSyclEvent()
16 | 
17 |     pyapi = ctx.get_python_api(builder)
18 | 
19 |     event_struct_proxy = cgutils.create_struct_proxy(ty_event)(ctx, builder)
20 | 
21 |     # Ref count after the call is equal to 1.
22 |     # TODO: get dpex RT from cached property once the PR is merged
23 |     # https://github.com/IntelPython/numba-dpex/pull/1027
24 |     # ctx.dpexrt.eventstruct_init( # noqa: W0621
25 |     dpexrt.DpexRTContext(ctx).eventstruct_init(
26 |         pyapi,
27 |         eref,
28 |         # calling _<method>() is by numba's design
29 |         event_struct_proxy._getpointer(),  # pylint: disable=W0212
30 |     )
31 | 
32 |     # calling _<method>() is by numba's design
33 |     event_value = event_struct_proxy._getvalue()  # pylint: disable=W0212
34 | 
35 |     return event_value
36 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_type.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl
 6 | import dpctl.tensor as dpt
 7 | import numpy as np
 8 | import pytest
 9 | from numba.misc.special import typeof
10 | 
11 | from numba_dpex.core.types import USMNdArray
12 | from numba_dpex.tests._helper import (
13 |     get_queue_or_skip,
14 |     skip_if_dtype_not_supported,
15 | )
16 | 
17 | list_of_dtypes = [
18 |     np.int32,
19 |     np.float32,
20 |     np.int64,
21 |     np.float64,
22 | ]
23 | 
24 | 
25 | @pytest.fixture(params=list_of_dtypes)
26 | def dtype(request):
27 |     return request.param
28 | 
29 | 
30 | list_of_usm_type = [
31 |     "shared",
32 |     "device",
33 |     "host",
34 | ]
35 | 
36 | 
37 | @pytest.fixture(params=list_of_usm_type)
38 | def usm_type(request):
39 |     return request.param
40 | 
41 | 
42 | def test_usm_ndarray_type(dtype, usm_type):
43 |     q = get_queue_or_skip()
44 |     skip_if_dtype_not_supported(dtype, q)
45 | 
46 |     a = np.array(np.random.random(10), dtype)
47 |     da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer=usm_type)
48 | 
49 |     assert isinstance(typeof(da), USMNdArray)
50 |     assert da.usm_type == usm_type
51 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/backtrace.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Backtrace
 4 | ==========
 5 | 
 6 | The ``backtrace`` command displays a summary of how your program got where it
 7 | is. Consider the following example
 8 | ``numba_dpex/examples/debug/simple_dpex_func.py``:
 9 | 
10 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/simple_dpex_func.py
11 |     :lines: 5-
12 |     :linenos:
13 |     :lineno-match:
14 | 
15 | 
16 | The section presents two examples of using Intel Distribution for GDB* to
17 | generate backtrace from a numa_dpex.kernel function. The first example presents
18 | the case where the kernel function does not invoke any other function. The
19 | second example presents the case where the kernel function invokes a
20 | numba_dpex.func.
21 | 
22 | Example 1:
23 | 
24 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/backtrace_kernel
25 |     :language: shell-session
26 |     :emphasize-lines: 8,9
27 | 
28 | Example 2:
29 | 
30 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/backtrace
31 |     :language: shell-session
32 |     :emphasize-lines: 8-10
33 | 
34 | See also:
35 | 
36 |     - `Backtraces in GDB*
37 |       <https://sourceware.org/gdb/onlinedocs/gdb/Backtrace.html#Backtrace>`_
38 | 


--------------------------------------------------------------------------------
/numba_dpex/core/parfors/kernel_templates/kernel_template_iface.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import abc
 6 | 
 7 | 
 8 | class KernelTemplateInterface(metaclass=abc.ABCMeta):
 9 |     @classmethod
10 |     def __subclasshook__(cls, subclass):
11 |         return hasattr(
12 |             callable(subclass._generate_kernel_stub_as_string)
13 |             and callable(subclass._generate_kernel_ir)
14 |             and callable(subclass.dump_kernel_string)
15 |             and callable(subclass.dump_kernel_ir)
16 |             and hasattr(subclass, "kernel_ir")
17 |             and hasattr(subclass, "kernel_string")
18 |         )
19 | 
20 |     @abc.abstractmethod
21 |     def _generate_kernel_stub_as_string(self):
22 |         """Generates as a string a stub for a numba_dpex kernel function"""
23 |         raise NotImplementedError
24 | 
25 |     @abc.abstractmethod
26 |     def _generate_kernel_ir(self):
27 |         raise NotImplementedError
28 | 
29 |     @abc.abstractmethod
30 |     def dump_kernel_string(self):
31 |         raise NotImplementedError
32 | 
33 |     @property
34 |     @abc.abstractmethod
35 |     def py_func(self):
36 |         raise NotImplementedError
37 | 
38 |     @property
39 |     @abc.abstractmethod
40 |     def kernel_string(self):
41 |         raise NotImplementedError
42 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/DpnpNdArray/test_boxing_unboxing.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests for boxing for dpnp.ndarray
 7 | """
 8 | 
 9 | import dpnp
10 | 
11 | from numba_dpex import dpjit
12 | 
13 | 
14 | def test_boxing_unboxing():
15 |     """Tests basic boxing and unboxing of a dpnp.ndarray object.
16 | 
17 |     Checks if we can pass in and return a dpctl.ndarray object to and
18 |     from a dpjit decorated function.
19 |     """
20 | 
21 |     @dpjit
22 |     def func(a):
23 |         return a
24 | 
25 |     a = dpnp.empty(10, dtype=dpnp.float32)
26 |     try:
27 |         b = func(a)
28 |     except:
29 |         assert False, "Failure during unbox/box of dpnp.ndarray"
30 | 
31 |     assert a.shape == b.shape
32 |     assert a.device == b.device
33 |     assert a.strides == b.strides
34 |     assert a.dtype == b.dtype
35 |     # To ensure we are returning the original array when boxing
36 |     assert id(a) == id(b)
37 | 
38 | 
39 | def test_stride_calc_at_unboxing():
40 |     """Tests if strides were correctly computed during unboxing."""
41 | 
42 |     def _tester(a):
43 |         return a.strides
44 | 
45 |     b = dpnp.empty((4, 16, 4), dtype=dpnp.float32)
46 |     strides = dpjit(_tester)(b)
47 | 
48 |     # Numba computes strides as bytes
49 |     assert list(strides) == [256, 16, 4]
50 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/kernel/vector_sum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """The example demonstrates a 1D vector addition kernel.
 6 | """
 7 | 
 8 | import dpnp
 9 | import numpy.testing as testing
10 | 
11 | import numba_dpex as ndpx
12 | 
13 | 
14 | # Data parallel kernel implementing vector sum
15 | @ndpx.kernel
16 | def kernel_vector_sum(item, a, b, c):
17 |     i = item.get_id(0)
18 |     c[i] = a[i] + b[i]
19 | 
20 | 
21 | # Utility function for printing and testing
22 | def driver(a, b, c, global_size):
23 |     ndpx.call_kernel(kernel_vector_sum, ndpx.Range(global_size), a, b, c)
24 |     a_np = dpnp.asnumpy(a)  # Copy dpnp array a to NumPy array a_np
25 |     b_np = dpnp.asnumpy(b)  # Copy dpnp array b to NumPy array b_np
26 |     c_np = dpnp.asnumpy(c)  # Copy dpnp array c to NumPy array c_np
27 |     testing.assert_equal(c_np, a_np + b_np)
28 | 
29 | 
30 | # Main function
31 | def main():
32 |     N = 10
33 |     global_size = N
34 |     print("Vector size N", N)
35 | 
36 |     # Create random vectors on the default device
37 |     a = dpnp.random.random(N)
38 |     b = dpnp.random.random(N)
39 |     c = dpnp.ones_like(a)
40 | 
41 |     print("Executing on device:")
42 |     a.device.print_device_info()
43 |     driver(a, b, c, global_size)
44 |     print("Done...")
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_nrt_python_helper.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Re-definition of NRT functions for marshalling from / to Python objects
 9 | /// defined in numba/core/runtime/_nrt_python.c.
10 | ///
11 | //===----------------------------------------------------------------------===//
12 | 
13 | #ifndef _NRT_PYTHON_HELPER_H_
14 | #define _NRT_PYTHON_HELPER_H_
15 | 
16 | #define NO_IMPORT_ARRAY
17 | #include "_meminfo_helper.h"
18 | 
19 | /*!
20 |  * @brief A pyTypeObject to describe a Python object to wrap Numba's MemInfo
21 |  *
22 |  */
23 | extern PyTypeObject MemInfoType;
24 | 
25 | void MemInfo_dealloc(MemInfoObject *self);
26 | int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds);
27 | int MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags);
28 | PyObject *MemInfo_acquire(MemInfoObject *self);
29 | PyObject *MemInfo_release(MemInfoObject *self);
30 | PyObject *MemInfo_get_data(MemInfoObject *self, void *closure);
31 | PyObject *MemInfo_get_refcount(MemInfoObject *self, void *closure);
32 | PyObject *MemInfo_get_external_allocator(MemInfoObject *self, void *closure);
33 | PyObject *MemInfo_get_parent(MemInfoObject *self, void *closure);
34 | 
35 | #endif /* _NRT_PYTHON_HELPER_H_ */
36 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from .dpctl_types import DpctlSyclEvent, DpctlSyclQueue
 6 | from .dpnp_ndarray_type import DpnpNdArray
 7 | from .kernel_api.literal_intenum import IntEnumLiteral
 8 | from .kernel_api.ranges import NdRangeType, RangeType
 9 | from .kernel_dispatcher_type import KernelDispatcherType
10 | from .numba_types_short_names import (
11 |     b1,
12 |     bool_,
13 |     boolean,
14 |     double,
15 |     f4,
16 |     f8,
17 |     float32,
18 |     float64,
19 |     float_,
20 |     i4,
21 |     i8,
22 |     int32,
23 |     int64,
24 |     none,
25 |     u4,
26 |     u8,
27 |     uint32,
28 |     uint64,
29 |     void,
30 | )
31 | from .usm_ndarray_type import USMNdArray
32 | 
33 | usm_ndarray = USMNdArray
34 | 
35 | __all__ = [
36 |     "DpctlSyclQueue",
37 |     "DpctlSyclEvent",
38 |     "DpnpNdArray",
39 |     "IntEnumLiteral",
40 |     "KernelDispatcherType",
41 |     "NdRangeType",
42 |     "RangeType",
43 |     "USMNdArray",
44 |     "none",
45 |     "boolean",
46 |     "bool_",
47 |     "uint32",
48 |     "uint64",
49 |     "int32",
50 |     "int64",
51 |     "float32",
52 |     "float64",
53 |     "b1",
54 |     "i4",
55 |     "i8",
56 |     "u4",
57 |     "u8",
58 |     "f4",
59 |     "f8",
60 |     "float_",
61 |     "double",
62 |     "usm_ndarray",
63 |     "void",
64 | ]
65 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/experimental/tools/boost_hash.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2005-2014 Daniel James.
 2 | // Distributed under the Boost Software License, Version 1.0. (See accompanying
 3 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | //  Based on Peter Dimov's proposal
 6 | //  http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
 7 | //  issue 6.18.
 8 | //
 9 | //  This also contains public domain code from MurmurHash. From the
10 | //  MurmurHash header:
11 | 
12 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
13 | // domain. The author hereby disclaims copyright to this source code.
14 | 
15 | // 2023 Intel Corporation
16 | // Copied hash_combine and hash_combine_impl from boost
17 | // (https://www.boost.org/doc/libs/1_76_0/boost/container_hash/hash.hpp) and
18 | // changed hash_combine to use std::hash<T> instead of boost::hash<T>.
19 | 
20 | #include <functional>
21 | 
22 | namespace boost
23 | {
24 | namespace hash_detail
25 | {
26 | template <typename SizeT>
27 | inline void hash_combine_impl(SizeT &seed, SizeT value)
28 | {
29 |     seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
30 | }
31 | } // namespace hash_detail
32 | 
33 | template <class T> inline void hash_combine(std::size_t &seed, T const &v)
34 | {
35 |     std::hash<T> hasher;
36 |     return boost::hash_detail::hash_combine_impl(seed, hasher(v));
37 | }
38 | } // namespace boost
39 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/kernel/atomic_op.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """The example demonstrates the use of :class:`numba_dpex.kernel_api.AtomicRef`.
 6 | 
 7 | The kernel shows the implementation of a reduction operation in numba-dpex
 8 | where every work-item is updating a global accumulator atomically.
 9 | """
10 | import dpnp
11 | 
12 | import numba_dpex as dpex
13 | from numba_dpex import kernel_api as kapi
14 | 
15 | 
16 | @dpex.kernel
17 | def atomic_reduction(item: kapi.Item, a, res):
18 |     """Array reduction using :func:`AtomicRef.fetch_add`.
19 | 
20 |     Args:
21 |         item (kapi.Item): Index space id for each work item.
22 |         a (dpnp.ndarray): An 1-d array to be reduced.
23 |         res (dpnp.ndarray): A single element array into which the result is
24 |             accumulated.
25 |     """
26 |     idx = item.get_id(0)
27 |     acc = kapi.AtomicRef(res, 0)
28 |     acc.fetch_add(a[idx])
29 | 
30 | 
31 | def main():
32 |     N = 1024
33 | 
34 |     a = dpnp.arange(0, N)
35 |     res = dpnp.zeros(1, dtype=a.dtype)
36 | 
37 |     print("Executing on device:")
38 |     a.device.print_device_info()
39 | 
40 |     dpex.call_kernel(atomic_reduction, dpex.Range(N), a, res)
41 |     print(f"Summation of {N} integers = {res[0]}")
42 | 
43 |     assert res[0] == N * (N - 1) / 2
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_dbg_printer.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// A helper macro to print debug prints.
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #pragma once
13 | 
14 | /* Debugging facilities - enabled at compile-time */
15 | /* #undef NDEBUG */
16 | #if 0
17 | #include <stdio.h>
18 | #define DPEXRT_DEBUG(X)                                                        \
19 |     {                                                                          \
20 |         X;                                                                     \
21 |         fflush(stdout);                                                        \
22 |     }
23 | #else
24 | #define DPEXRT_DEBUG(X)                                                        \
25 |     if (0) {                                                                   \
26 |         X;                                                                     \
27 |     }
28 | #endif
29 | 
30 | /*
31 |  * Debugging printf function used internally
32 |  */
33 | static inline void drt_debug_print(const char *fmt, ...)
34 | {
35 |     va_list args;
36 | 
37 |     va_start(args, fmt);
38 |     vfprintf(stderr, fmt, args);
39 |     va_end(args);
40 | }
41 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/_meminfo_helper.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #ifndef _NRT_ARRAY_STRUCT_H_
 6 | #define _NRT_ARRAY_STRUCT_H_
 7 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 8 | #include <Python.h>
 9 | 
10 | #include <numpy/arrayobject.h>
11 | #include <numpy/arrayscalars.h>
12 | #include <numpy/ndarrayobject.h>
13 | 
14 | #include "numba/_numba_common.h"
15 | #include "numba/_pymodule.h"
16 | #include "numba/core/runtime/nrt.h"
17 | 
18 | /*
19 |  * The MemInfo structure.
20 |  * NOTE: copy from numba/core/runtime/nrt.c
21 |  */
22 | struct MemInfo
23 | {
24 |     size_t refct;
25 |     NRT_dtor_function dtor;
26 |     void *dtor_info;
27 |     void *data;
28 |     size_t size; /* only used for NRT allocated memory */
29 |     NRT_ExternalAllocator *external_allocator;
30 | };
31 | 
32 | /*!
33 |  * @brief A wrapper struct to store a MemInfo pointer along with the PyObject
34 |  * that is associated with the MeMinfo.
35 |  *
36 |  * The struct is stored in the dtor_info attribute of a MemInfo object and
37 |  * used by the destructor to free the MemInfo and DecRef the Pyobject.
38 |  *
39 |  */
40 | typedef struct
41 | {
42 |     PyObject *owner;
43 |     NRT_MemInfo *mi;
44 | } MemInfoDtorInfo;
45 | 
46 | typedef struct
47 | {
48 |     PyObject_HEAD NRT_MemInfo *meminfo;
49 | } MemInfoObject;
50 | 
51 | #endif /* _NRT_ARRAY_STRUCT_H_ */
52 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_api/test_local_accessor.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy
 6 | import pytest
 7 | 
 8 | from numba_dpex import kernel_api as kapi
 9 | 
10 | 
11 | def _slm_kernel(nd_item: kapi.NdItem, a, slm):
12 |     i = nd_item.get_global_linear_id()
13 |     j = nd_item.get_local_linear_id()
14 | 
15 |     slm[j] = 100
16 |     a[i] = slm[i]
17 | 
18 | 
19 | def test_local_accessor_data_inaccessible_outside_kernel():
20 |     la = kapi.LocalAccessor((100,), dtype=numpy.float32)
21 | 
22 |     with pytest.raises(NotImplementedError):
23 |         print(la[0])
24 | 
25 |     with pytest.raises(NotImplementedError):
26 |         la[0] = 10
27 | 
28 | 
29 | def test_local_accessor_use_inside_kernel():
30 | 
31 |     a = numpy.empty(32)
32 |     slm = kapi.LocalAccessor(32, dtype=a.dtype)
33 | 
34 |     # launches one work group with 32 work item. Each work item initializes its
35 |     # position in the SLM to 100 and then writes it to the global array `a`.
36 |     kapi.call_kernel(_slm_kernel, kapi.NdRange((32,), (32,)), a, slm)
37 | 
38 |     assert numpy.all(a == 100)
39 | 
40 | 
41 | def test_local_accessor_usage_not_allowed_with_range_kernel():
42 | 
43 |     a = numpy.empty(32)
44 |     slm = kapi.LocalAccessor(32, dtype=a.dtype)
45 | 
46 |     with pytest.raises(TypeError):
47 |         kapi.call_kernel(_slm_kernel, kapi.Range((32,)), a, slm)
48 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_math_functions.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import math
 6 | 
 7 | import dpnp
 8 | import numpy
 9 | import pytest
10 | 
11 | import numba_dpex as dpex
12 | from numba_dpex.tests._helper import get_all_dtypes
13 | 
14 | list_of_unary_ops = ["fabs", "exp", "log", "sqrt", "sin", "cos", "tan"]
15 | 
16 | 
17 | @pytest.fixture(params=list_of_unary_ops)
18 | def unary_op(request):
19 |     return request.param
20 | 
21 | 
22 | list_of_dtypes = get_all_dtypes(
23 |     no_bool=True, no_int=True, no_float16=True, no_none=True, no_complex=True
24 | )
25 | 
26 | 
27 | @pytest.fixture(params=list_of_dtypes)
28 | def input_arrays(request):
29 |     # The size of input and out arrays to be used
30 |     N = 2048
31 |     a = dpnp.arange(N, dtype=request.param)
32 |     b = dpnp.arange(N, dtype=request.param)
33 |     return a, b
34 | 
35 | 
36 | def test_binary_ops(unary_op, input_arrays):
37 |     a, b = input_arrays
38 |     uop = getattr(math, unary_op)
39 |     dpnp_uop = getattr(dpnp, unary_op)
40 | 
41 |     @dpex.kernel
42 |     def f(item, a, b):
43 |         i = item.get_id(0)
44 |         b[i] = uop(a[i])
45 | 
46 |     dpex.call_kernel(f, dpex.Range(a.size), a, b)
47 | 
48 |     expected = dpnp_uop(a)
49 | 
50 |     np_expected = dpnp.asnumpy(expected)
51 |     np_actual = dpnp.asnumpy(b)
52 | 
53 |     assert numpy.allclose(np_expected, np_actual)
54 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/memory_enums.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """A collection of FlagEnum classes that syntactically represents the SYCL
 6 | memory enum classes.
 7 | """
 8 | 
 9 | from numba_dpex.kernel_api.flag_enum import FlagEnum
10 | 
11 | 
12 | class MemoryOrder(FlagEnum):
13 |     """
14 |     Analogue of :sycl_memory_order:`sycl::memory_order <>` enumeration.
15 | 
16 |     The integer values of the enums is kept consistent with the corresponding
17 |     implementation in dpcpp.
18 | 
19 |     """
20 | 
21 |     RELAXED = 0
22 |     ACQUIRE = 1
23 |     CONSUME_UNSUPPORTED = 2
24 |     RELEASE = 3
25 |     ACQ_REL = 4
26 |     SEQ_CST = 5
27 | 
28 | 
29 | class MemoryScope(FlagEnum):
30 |     """
31 |     Analogue of :sycl_memory_scope:`sycl::memory_scope <>` enumeration.
32 | 
33 |     The integer values of the enums is kept consistent with the corresponding
34 |     implementation in dpcpp.
35 | 
36 |     """
37 | 
38 |     WORK_ITEM = 0
39 |     SUB_GROUP = 1
40 |     WORK_GROUP = 2
41 |     DEVICE = 3
42 |     SYSTEM = 4
43 | 
44 | 
45 | class AddressSpace(FlagEnum):
46 |     """Analogue of :sycl_addr_space:`SYCL address space classes <>`.
47 | 
48 |     The integer values of the enums is kept consistent with the corresponding
49 |     implementation in dpcpp.
50 |     """
51 | 
52 |     PRIVATE = 0
53 |     GLOBAL = 1
54 |     CONSTANT = 2
55 |     LOCAL = 3
56 |     GENERIC = 4
57 | 


--------------------------------------------------------------------------------
/docs/_templates/autoapi/macros.rst:
--------------------------------------------------------------------------------
 1 | {% macro _render_item_name(obj, sig=False) -%}
 2 | :py:obj:`{{ obj.name }} <{{ obj.id }}>`
 3 |      {%- if sig -%}
 4 |        \ (
 5 |        {%- for arg in obj.obj.args -%}
 6 |           {%- if arg[0] %}{{ arg[0]|replace('*', '\*') }}{% endif -%}{{  arg[1] -}}
 7 |           {%- if not loop.last  %}, {% endif -%}
 8 |        {%- endfor -%}
 9 |        ){%- endif -%}
10 | {%- endmacro %}
11 | 
12 | {% macro _item(obj, sig=False, label='') %}
13 |    * - {{ _render_item_name(obj, sig) }}
14 |      - {% if label %}:summarylabel:`{{ label }}` {% endif %}{% if obj.summary %}{{ obj.summary }}{% else %}\-{% endif +%}
15 | {% endmacro %}
16 | 
17 | {% macro auto_summary(objs, title='') -%}
18 | .. list-table:: {{ title }}
19 |    :header-rows: 0
20 |    :widths: auto
21 |    :class: summarytable
22 | 
23 |   {% for obj in objs -%}
24 |     {%- set sig = (obj.type in ['method', 'function'] and not 'property' in obj.properties) -%}
25 | 
26 |     {%- if 'property' in obj.properties -%}
27 |       {%- set label = 'prop' -%}
28 |     {%- elif 'classmethod' in obj.properties -%}
29 |       {%- set label = 'class' -%}
30 |     {%- elif 'abstractmethod' in obj.properties -%}
31 |       {%- set label = 'abc' -%}
32 |     {%- elif 'staticmethod' in obj.properties -%}
33 |       {%- set label = 'static' -%}
34 |     {%- else -%}
35 |       {%- set label = '' -%}
36 |     {%- endif -%}
37 | 
38 |     {{- _item(obj, sig=sig, label=label) -}}
39 |   {%- endfor -%}
40 | 
41 | {% endmacro %}
42 | 


--------------------------------------------------------------------------------
/conda-recipe/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euxo pipefail
 4 | 
 5 | # Intel LLVM must cooperate with compiler and sysroot from conda
 6 | export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:${BUILD_PREFIX}/lib"
 7 | 
 8 | echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
 9 | ICPXCFG="$(pwd)/icpx_for_conda.cfg"
10 | ICXCFG="$(pwd)/icpx_for_conda.cfg"
11 | 
12 | read -r GLIBC_MAJOR GLIBC_MINOR <<<"$(conda list '^sysroot_linux-64$' \
13 |     | tail -n 1 | awk '{print $2}' | grep -oP '\d+' | head -n 2 | tr '\n' ' ')"
14 | 
15 | export ICXCFG
16 | export ICPXCFG
17 | 
18 | export CC=icx
19 | export CXX=icpx
20 | 
21 | export CMAKE_GENERATOR=Ninja
22 | # Make CMake verbose
23 | export VERBOSE=1
24 | 
25 | # new llvm-spirv location
26 | # starting from dpcpp_impl_linux-64=2022.0.0=intel_3610
27 | export PATH=$CONDA_PREFIX/bin-llvm:$PATH
28 | 
29 | # -wnx flags mean: --wheel --no-isolation --skip-dependency-check
30 | ${PYTHON} -m build -w -n -x
31 | ${PYTHON} -m wheel tags --remove --build "$GIT_DESCRIBE_NUMBER" \
32 |     --platform-tag "manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" \
33 |     dist/numba_dpex*.whl
34 | ${PYTHON} -m pip install dist/numba_dpex*.whl \
35 |     --no-build-isolation \
36 |     --no-deps \
37 |     --only-binary :all: \
38 |     --no-index \
39 |     --prefix "${PREFIX}" \
40 |     -vv
41 | 
42 | # Copy wheel package
43 | if [[ -v WHEELS_OUTPUT_FOLDER ]]; then
44 |     cp dist/numba_dpex*.whl "${WHEELS_OUTPUT_FOLDER[@]}"
45 | fi
46 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_api/test_range_kernel_launch.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy
 6 | 
 7 | from numba_dpex import kernel_api as kapi
 8 | 
 9 | 
10 | def test_range_kernel_call1D():
11 |     def vecadd(item: kapi.Item, a, b, c):
12 |         idx = item.get_id(0)
13 |         c[idx] = a[idx] + b[idx]
14 | 
15 |     a = numpy.ones(100)
16 |     b = numpy.ones(100)
17 |     c = numpy.empty(100)
18 | 
19 |     kapi.call_kernel(vecadd, kapi.Range(100), a, b, c)
20 | 
21 |     assert numpy.allclose(c, a + b)
22 | 
23 | 
24 | def test_range_kernel_call2D():
25 |     def vecadd(item: kapi.Item, a, b, c):
26 |         idx = item.get_id(0)
27 |         jdx = item.get_id(1)
28 |         c[idx, jdx] = a[idx, jdx] + b[idx, jdx]
29 | 
30 |     a = numpy.ones((10, 10))
31 |     b = numpy.ones((10, 10))
32 |     c = numpy.empty((10, 10))
33 | 
34 |     kapi.call_kernel(vecadd, kapi.Range(10, 10), a, b, c)
35 | 
36 |     assert numpy.allclose(c, a + b)
37 | 
38 | 
39 | def test_range_kernel_call3D():
40 |     def vecadd(item: kapi.Item, a, b, c):
41 |         idx = item.get_id(0)
42 |         jdx = item.get_id(1)
43 |         kdx = item.get_id(2)
44 |         c[idx, jdx, kdx] = a[idx, jdx, kdx] + b[idx, jdx, kdx]
45 | 
46 |     a = numpy.ones((5, 5, 5))
47 |     b = numpy.ones((5, 5, 5))
48 |     c = numpy.empty((5, 5, 5))
49 | 
50 |     kapi.call_kernel(vecadd, kapi.Range(5, 5, 5), a, b, c)
51 | 
52 |     assert numpy.allclose(c, a + b)
53 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/private_array.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Implements a simple array intended to be used inside kernel work item.
 6 | Implementation is intended to be used in pure Python code when prototyping a
 7 | kernel function.
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | class PrivateArray:
14 |     """An array that gets allocated on the private memory of a work-item.
15 | 
16 |     The class should be used to allocate small arrays on the private
17 |     per-work-item memory for fast accesses inside a kernel. It is similar in
18 |     intent to the :sycl_private_memory:`sycl::private_memory <>` class but is
19 |     not a direct analogue.
20 |     """
21 | 
22 |     def __init__(self, shape, dtype, fill_zeros=False) -> None:
23 |         """Creates a new PrivateArray instance of the given shape and dtype."""
24 | 
25 |         if fill_zeros:
26 |             self._data = np.zeros(shape=shape, dtype=dtype)
27 |         else:
28 |             self._data = np.empty(shape=shape, dtype=dtype)
29 | 
30 |     def __getitem__(self, idx_obj):
31 |         """Returns the value stored at the position represented by idx_obj in
32 |         the self._data ndarray.
33 |         """
34 | 
35 |         return self._data[idx_obj]
36 | 
37 |     def __setitem__(self, idx_obj, val):
38 |         """Assigns a new value to the position represented by idx_obj in
39 |         the self._data ndarray.
40 |         """
41 | 
42 |         self._data[idx_obj] = val
43 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_barriers.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | 
 7 | import numba_dpex as dpex
 8 | from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier
 9 | 
10 | 
11 | def test_group_barrier():
12 |     """A test for group_barrier function."""
13 | 
14 |     @dpex.kernel
15 |     def _kernel(nd_item: NdItem, a):
16 |         i = nd_item.get_global_id(0)
17 | 
18 |         a[i] += 1
19 |         group_barrier(nd_item.get_group(), MemoryScope.DEVICE)
20 | 
21 |         if i == 0:
22 |             for idx in range(1, a.size):
23 |                 a[0] += a[idx]
24 | 
25 |     N = 16
26 |     a = dpnp.ones(N, dtype=dpnp.int32)
27 | 
28 |     dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a)
29 | 
30 |     assert a[0] == N * 2
31 | 
32 | 
33 | def test_group_barrier_device_func():
34 |     """A test for group_barrier function."""
35 | 
36 |     @dpex.device_func
37 |     def _increment_value(nd_item: NdItem, a):
38 |         i = nd_item.get_global_id(0)
39 | 
40 |         a[i] += 1
41 |         group_barrier(nd_item.get_group(), MemoryScope.DEVICE)
42 | 
43 |         if i == 0:
44 |             for idx in range(1, a.size):
45 |                 a[0] += a[idx]
46 | 
47 |     @dpex.kernel
48 |     def _kernel(nd_item: NdItem, a):
49 |         _increment_value(nd_item, a)
50 | 
51 |     N = 16
52 |     a = dpnp.ones(N, dtype=dpnp.int32)
53 | 
54 |     dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a)
55 | 
56 |     assert a[0] == N * 2
57 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/kernel_programming/supported-python-features.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | A kapi function when run in the purely interpreted mode by the CPython
 3 | interpreter is a regular Python function, and as such in theory any Python
 4 | feature can be used in the body of the function. In practice, to be
 5 | JIT compilable and executable on a device only a subset of Python language
 6 | features are supported in a kapi function. The restriction stems from both
 7 | limitations in the Numba compiler tooling and also from the device-specific
 8 | calling convention and other restrictions applied by a device's ABI.
 9 | 
10 | This section provides a partial support matrix for Python features with respect
11 | to their usage in a kapi function.
12 | 
13 | 
14 | Built-in types
15 | --------------
16 | 
17 | **Supported Types**
18 | 
19 | - ``int``
20 | - ``float``
21 | 
22 | **Unsupported Types**
23 | 
24 | - ``complex``
25 | - ``bool``
26 | - ``None``
27 | - ``tuple``
28 | 
29 | Built-in functions
30 | ------------------
31 | 
32 | The following built-in functions are supported:
33 | 
34 | - ``abs()``
35 | - ``float``
36 | - ``int``
37 | - ``len()``
38 | - ``range()``
39 | - ``round()``
40 | 
41 | Unsupported Constructs
42 | ----------------------
43 | 
44 | The following Python constructs are **not supported**:
45 | 
46 | - Exception handling (``try .. except``, ``try .. finally``)
47 | - Context management (the ``with`` statement)
48 | - Comprehensions (either list, dict, set or generator comprehensions)
49 | - Generator (any ``yield`` statements)
50 | - The ``raise`` statement
51 | - The ``assert`` statement
52 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_api/test_ndrange_kernel_launch.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy
 6 | 
 7 | from numba_dpex import kernel_api as kapi
 8 | 
 9 | 
10 | def test_range_kernel_call1D():
11 |     def vecadd(item: kapi.NdItem, a, b, c):
12 |         idx = item.get_global_id(0)
13 |         c[idx] = a[idx] + b[idx]
14 | 
15 |     a = numpy.ones(100)
16 |     b = numpy.ones(100)
17 |     c = numpy.empty(100)
18 | 
19 |     kapi.call_kernel(vecadd, kapi.NdRange((100,), (20,)), a, b, c)
20 | 
21 |     assert numpy.allclose(c, a + b)
22 | 
23 | 
24 | def test_range_kernel_call2D():
25 |     def vecadd(item: kapi.NdItem, a, b, c):
26 |         idx = item.get_global_id(0)
27 |         jdx = item.get_global_id(1)
28 |         c[idx, jdx] = a[idx, jdx] + b[idx, jdx]
29 | 
30 |     a = numpy.ones((10, 10))
31 |     b = numpy.ones((10, 10))
32 |     c = numpy.empty((10, 10))
33 | 
34 |     kapi.call_kernel(vecadd, kapi.NdRange((10, 10), (2, 2)), a, b, c)
35 | 
36 |     assert numpy.allclose(c, a + b)
37 | 
38 | 
39 | def test_range_kernel_call3D():
40 |     def vecadd(item: kapi.Item, a, b, c):
41 |         idx = item.get_global_id(0)
42 |         jdx = item.get_global_id(1)
43 |         kdx = item.get_global_id(2)
44 |         c[idx, jdx, kdx] = a[idx, jdx, kdx] + b[idx, jdx, kdx]
45 | 
46 |     a = numpy.ones((8, 8, 8))
47 |     b = numpy.ones((8, 8, 8))
48 |     c = numpy.empty((8, 8, 8))
49 | 
50 |     kapi.call_kernel(vecadd, kapi.NdRange((8, 8, 8), (2, 2, 2)), a, b, c)
51 | 
52 |     assert numpy.allclose(c, a + b)
53 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/test_slicing.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests for slicing dpnp.ndarray
 7 | """
 8 | 
 9 | import dpnp
10 | import numpy
11 | 
12 | from numba_dpex import dpjit
13 | 
14 | 
15 | def test_1d_slicing():
16 |     """Tests if dpjit properly computes strides and returns them to Python."""
17 | 
18 |     def _tester(a):
19 |         return a[1:5]
20 | 
21 |     a = dpnp.arange(10)
22 |     b = dpnp.asnumpy(dpjit(_tester)(a))
23 | 
24 |     na = numpy.arange(10)
25 |     nb = _tester(na)
26 | 
27 |     assert (b == nb).all()
28 | 
29 | 
30 | def test_1d_slicing2():
31 |     """Tests if dpjit properly computes strides and returns them to Python."""
32 | 
33 |     def _tester(a):
34 |         b = a[1:4]
35 |         a[6:9] = b
36 | 
37 |     a = dpnp.arange(10)
38 |     b = dpnp.asnumpy(dpjit(_tester)(a))
39 | 
40 |     na = numpy.arange(10)
41 |     nb = _tester(na)
42 | 
43 |     assert (b == nb).all()
44 | 
45 | 
46 | def test_multidim_slicing():
47 |     """Tests if dpjit properly slices strides and returns them to Python."""
48 | 
49 |     def _tester(a, b):
50 |         b[:, :, 0] = a
51 | 
52 |     a = dpnp.arange(64, dtype=numpy.int64)
53 |     a = a.reshape(4, 16)
54 |     b = dpnp.empty((4, 16, 4), dtype=numpy.int64)
55 |     dpjit(_tester)(a, b)
56 | 
57 |     na = numpy.arange(64, dtype=numpy.int64)
58 |     na = na.reshape(4, 16)
59 |     nb = numpy.empty((4, 16, 4), dtype=numpy.int64)
60 |     _tester(na, nb)
61 | 
62 |     assert (nb[:, :, 0] == dpnp.asnumpy(b)[:, :, 0]).all()
63 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/test_itanium_mangler_extension.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import pytest
 6 | from numba import float32, float64, int32, int64, uint32, uint64
 7 | from numba.core import types
 8 | 
 9 | import numba_dpex.core.utils.itanium_mangler as itanium_mangler
10 | from numba_dpex.kernel_api import AddressSpace as address_space
11 | 
12 | list_of_dtypes = [
13 |     (int32, "i"),
14 |     (int64, "x"),
15 |     (uint32, "j"),
16 |     (uint64, "y"),
17 |     (float32, "f"),
18 |     (float64, "d"),
19 | ]
20 | 
21 | 
22 | @pytest.fixture(params=list_of_dtypes)
23 | def dtypes(request):
24 |     return request.param
25 | 
26 | 
27 | list_of_addrspaces = [
28 |     (address_space.PRIVATE.value, "3AS0"),
29 |     (address_space.GLOBAL.value, "3AS1"),
30 |     (address_space.LOCAL.value, "3AS3"),
31 |     (address_space.GENERIC.value, "3AS4"),
32 | ]
33 | 
34 | 
35 | @pytest.fixture(params=list_of_addrspaces)
36 | def addrspaces(request):
37 |     return request.param
38 | 
39 | 
40 | def test_mangling_arg_type(dtypes):
41 |     dtype, expected_str = dtypes
42 |     got = itanium_mangler.mangle_type(types.CPointer(dtype))
43 |     expected = "P" + expected_str
44 |     assert got == expected
45 | 
46 | 
47 | def test_mangling_arg_type_2(dtypes, addrspaces):
48 |     dtype, expected_dtype_str = dtypes
49 |     addrspace, expected_addrspace_str = addrspaces
50 |     got = itanium_mangler.mangle_type(
51 |         types.CPointer(dtype, addrspace=addrspace)
52 |     )
53 |     expected = "PU" + expected_addrspace_str + expected_dtype_str
54 |     assert got == expected
55 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/altering.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Altering Execution
 4 | ==================
 5 | 
 6 | See `GDB* documentation <https://sourceware.org/gdb/onlinedocs/gdb/Altering.html>`_.
 7 | 
 8 | .. _assignment-to-variables:
 9 | 
10 | Assignment to Variables
11 | -----------------------
12 | 
13 | To alter the value of a variable, evaluate an assignment expression.
14 | This also works for function arguments.
15 | 
16 | .. note::
17 | 
18 |    Altering arguments has limitation. For it to work correctly
19 |    arguments should not be modified in code.
20 |    See `Numba issue <https://github.com/numba/numba/pull/7196>`_.
21 | 
22 | Example
23 | ```````
24 | 
25 | Source code :file:`numba_dpex/examples/debug/side-by-side-2.py`:
26 | 
27 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/side-by-side-2.py
28 |    :pyobject: common_loop_body
29 |    :linenos:
30 |    :lineno-match:
31 |    :emphasize-lines: 6
32 | 
33 | Debug session:
34 | 
35 | .. code-block:: shell-session
36 |    :emphasize-lines: 11-
37 | 
38 |    $ gdb-oneapi -q python
39 |    ...
40 |    (gdb) set environment NUMBA_OPT 0
41 |    (gdb) set environment NUMBA_EXTEND_VARIABLE_LIFETIMES 1
42 |    (gdb) break side-by-side-2.py:29 if param_a == 5
43 |    ...
44 |    (gdb) run numba_dpex/examples/debug/side-by-side-2.py --api=numba-dpex-kernel
45 |    ...
46 |    Thread 2.1 hit Breakpoint 1, with SIMD lane 5, __main__::common_loop_body (i=5, a=..., b=...) at side-by-side-2.py:29
47 |    29          result = param_c + param_d
48 |    (gdb) print param_c
49 |    $1 = 15
50 |    (gdb) print param_c=200
51 |    $2 = 200
52 |    (gdb) print param_c
53 |    $3 = 200
54 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/synchronization.rst:
--------------------------------------------------------------------------------
 1 | Synchronization Functions
 2 | =========================
 3 | 
 4 | Numba-dpex only supports some of the SYCL synchronization operations. For
 5 | synchronization of all threads in the same thread block, numba-dpex provides
 6 | a helper function called ``numba_dpex.barrier()``. This function implements the
 7 | same pattern as barriers in traditional multi-threaded programming: invoking the
 8 | function forces a thread to wait until all threads in the block reach the
 9 | barrier, at which point it returns control to all its callers.
10 | 
11 | ``numba_dpex.barrier()`` supports two memory fence options:
12 | 
13 | - ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory
14 |   fence to ensure correct ordering of memory operations to global memory. Using
15 |   the option can be useful when work-items, for example, write to buffer or
16 |   image objects and then want to read the updated data. Passing no arguments to
17 |   ``numba_dpex.barrier()`` is equivalent to setting the global memory fence
18 |   option. For example,
19 | 
20 |   .. literalinclude:: ../../../numba_dpex/examples/barrier.py
21 |    :pyobject: no_arg_barrier_support
22 | 
23 | - ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush
24 |   any variables stored in local memory or queue a memory fence to ensure
25 |   correct ordering of memory operations to local memory. For example,
26 | 
27 | .. literalinclude:: ../../../numba_dpex/examples/barrier.py
28 |    :pyobject: local_memory
29 | 
30 | 
31 | .. note::
32 | 
33 |     The ``numba_dpex.barrier()`` function is semantically equivalent to
34 |     ``numba.cuda.syncthreads``.
35 | 


--------------------------------------------------------------------------------
/scripts/run_examples.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | check() {
 6 |   echo "Run $1 ..."
 7 |   python "$1" | grep "$SYCL_DEVICE_FILTER"
 8 |   # python $1 | grep Done
 9 | }
10 | 
11 | run_checks() {
12 |   check numba_dpex/examples/atomic_op.py
13 |   check numba_dpex/examples/barrier.py
14 |   check numba_dpex/examples/blacksholes_kernel.py
15 |   check numba_dpex/examples/blacksholes_njit.py
16 |   check numba_dpex/examples/dpex_func.py
17 |   check numba_dpex/examples/dpex_with_context.py
18 |   check numba_dpex/examples/matmul.py
19 |   check numba_dpex/examples/pairwise_distance.py
20 |   check numba_dpex/examples/rand.py
21 |   check numba_dpex/examples/sum2D.py
22 |   check numba_dpex/examples/sum_ndarray.py
23 |   check numba_dpex/examples/sum.py
24 |   check numba_dpex/examples/sum_reduction_ocl.py
25 |   check numba_dpex/examples/sum_reduction.py
26 |   check numba_dpex/examples/sum_reduction_recursive_ocl.py
27 |   # check numba_dpex/examples/usm_ndarray.py  # See https://github.com/IntelPython/numba-dpex/issues/436
28 | 
29 |   check numba_dpex/examples/auto_offload_examples/sum-1d.py
30 |   check numba_dpex/examples/auto_offload_examples/sum-2d.py
31 |   check numba_dpex/examples/auto_offload_examples/sum-3d.py
32 |   check numba_dpex/examples/auto_offload_examples/sum-4d.py
33 |   check numba_dpex/examples/auto_offload_examples/sum-5d.py
34 | 
35 |   check numba_dpex/examples/debug/dpex_func.py
36 |   check numba_dpex/examples/debug/sum.py
37 | }
38 | 
39 | run_with_device() {
40 |   echo "Run with SYCL_DEVICE_FILTER=$1 ..."
41 |   SYCL_DEVICE_FILTER=$1 run_checks
42 | }
43 | 
44 | run_with_device level_zero:gpu:0
45 | run_with_device opencl:gpu:0
46 | run_with_device opencl:cpu:0
47 | 
48 | echo Done
49 | 


--------------------------------------------------------------------------------
/numba_dpex/dpctl_iface/_helpers.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core import types
 6 | 
 7 | from numba_dpex.core.types.kernel_api.local_accessor import LocalAccessorType
 8 | 
 9 | 
10 | def numba_type_to_dpctl_typenum(context, ty):
11 |     """
12 |     This function looks up the dpctl defined enum values from
13 |     ``DPCTLKernelArgType``.
14 |     """
15 | 
16 |     from dpctl._sycl_queue import kernel_arg_type as kargty
17 | 
18 |     if ty == types.boolean:
19 |         return context.get_constant(types.int32, kargty.dpctl_uint8.value)
20 |     elif ty == types.int32 or isinstance(ty, types.scalars.IntegerLiteral):
21 |         return context.get_constant(types.int32, kargty.dpctl_int32.value)
22 |     elif ty == types.uint32:
23 |         return context.get_constant(types.int32, kargty.dpctl_uint32.value)
24 |     elif ty == types.int64:
25 |         return context.get_constant(types.int32, kargty.dpctl_int64.value)
26 |     elif ty == types.uint64:
27 |         return context.get_constant(types.int32, kargty.dpctl_uint64.value)
28 |     elif ty == types.float32:
29 |         return context.get_constant(types.int32, kargty.dpctl_float32.value)
30 |     elif ty == types.float64:
31 |         return context.get_constant(types.int32, kargty.dpctl_float64.value)
32 |     elif ty == types.voidptr or isinstance(ty, types.CPointer):
33 |         return context.get_constant(types.int32, kargty.dpctl_void_ptr.value)
34 |     elif isinstance(ty, LocalAccessorType):
35 |         return context.get_constant(
36 |             types.int32, kargty.dpctl_local_accessor.value
37 |         )
38 |     else:
39 |         raise NotImplementedError
40 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl
 6 | import pytest
 7 | 
 8 | from numba_dpex.core.types import USMNdArray, dpctl_types, float32
 9 | 
10 | 
11 | def test_usmndarray_negative_tests():
12 |     default_device = dpctl.SyclDevice().filter_string
13 | 
14 |     usmarr1 = USMNdArray(1, device=None, queue=None, dtype=float32)
15 |     assert usmarr1.dtype.name == "float32"
16 |     assert usmarr1.ndim == 1
17 |     assert usmarr1.layout == "C"
18 |     assert usmarr1.addrspace == 1
19 |     assert usmarr1.usm_type == "device"
20 | 
21 |     assert usmarr1.queue.sycl_device == default_device
22 | 
23 |     usmarr2 = USMNdArray(1, device=default_device, queue=None, dtype=float32)
24 |     assert usmarr2.dtype.name == "float32"
25 |     assert usmarr2.ndim == 1
26 |     assert usmarr2.layout == "C"
27 |     assert usmarr2.addrspace == 1
28 |     assert usmarr2.usm_type == "device"
29 |     assert usmarr2.queue.sycl_device == default_device
30 | 
31 |     queue = dpctl_types.DpctlSyclQueue(dpctl.SyclQueue())
32 | 
33 |     usmarr3 = USMNdArray(1, device=None, queue=queue, dtype=float32)
34 |     assert usmarr3.dtype.name == "float32"
35 |     assert usmarr3.ndim == 1
36 |     assert usmarr3.layout == "C"
37 |     assert usmarr3.addrspace == 1
38 |     assert usmarr3.usm_type == "device"
39 | 
40 |     with pytest.raises(TypeError):
41 |         USMNdArray(1, device=default_device, queue=queue, dtype=float32)
42 | 
43 |     with pytest.raises(TypeError):
44 |         USMNdArray(1, queue=0, dtype=float32)
45 | 
46 |     with pytest.raises(TypeError):
47 |         USMNdArray(1, device=0, dtype=float32)
48 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/kernel/vector_sum2D.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 4 | #
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | """The example demonstrates a 2-D vector addition kernel.
 8 | """
 9 | 
10 | import dpctl
11 | import dpctl.tensor as dpt
12 | import numpy as np
13 | 
14 | import numba_dpex as ndpx
15 | 
16 | 
17 | @ndpx.kernel
18 | def data_parallel_sum(item, a, b, c):
19 |     """
20 |     A two-dimensional vector addition example using the ``kernel`` decorator.
21 |     """
22 |     i = item.get_id(0)
23 |     j = item.get_id(1)
24 |     c[i, j] = a[i, j] + b[i, j]
25 | 
26 | 
27 | def driver(a, b, c, global_size):
28 |     ndpx.call_kernel(data_parallel_sum, global_size, a, b, c)
29 | 
30 | 
31 | def main():
32 |     # Array dimensions
33 |     X = 8
34 |     Y = 8
35 |     global_size = ndpx.Range(X, Y)
36 | 
37 |     a = np.arange(X * Y, dtype=np.float32).reshape(X, Y)
38 |     b = np.arange(X * Y, dtype=np.float32).reshape(X, Y)
39 |     c = np.empty_like(a).reshape(X, Y)
40 | 
41 |     c = a + b
42 | 
43 |     device = dpctl.select_default_device()
44 |     a_dpt = dpt.arange(X * Y, dtype=dpt.float32, device=device)
45 |     a_dpt = dpt.reshape(a_dpt, (X, Y))
46 |     b_dpt = dpt.arange(X * Y, dtype=dpt.float32, device=device)
47 |     b_dpt = dpt.reshape(b_dpt, (X, Y))
48 |     c_dpt = dpt.empty_like(a_dpt)
49 |     c_dpt = dpt.reshape(c_dpt, (X, Y))
50 | 
51 |     print("Executing on device:")
52 |     device.print_device_info()
53 | 
54 |     print("Running kernel ...")
55 |     driver(a_dpt, b_dpt, c_dpt, global_size)
56 |     c_out = dpt.asnumpy(c_dpt)
57 |     assert np.allclose(c, c_out)
58 | 
59 |     print("Done...")
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/kernel_api/ranges.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from contextlib import ExitStack
 6 | 
 7 | from numba.core import cgutils, errors, types
 8 | 
 9 | 
10 | class RangeType(types.Type):
11 |     """Numba-dpex type corresponding to
12 |     :class:`numba_dpex.kernel_api.ranges.Range`
13 |     """
14 | 
15 |     def __init__(self, ndim: int):
16 |         self._ndim = ndim
17 |         if ndim < 1 or ndim > 3:
18 |             raise errors.TypingError(
19 |                 "RangeType can only have 1,2, or 3 dimensions"
20 |             )
21 |         super(RangeType, self).__init__(name="Range<" + str(ndim) + ">")
22 | 
23 |     @property
24 |     def ndim(self):
25 |         return self._ndim
26 | 
27 |     @property
28 |     def key(self):
29 |         return self._ndim
30 | 
31 |     @property
32 |     def mangling_args(self):
33 |         args = [self.ndim]
34 |         return self.__class__.__name__, args
35 | 
36 | 
37 | class NdRangeType(types.Type):
38 |     """Numba-dpex type corresponding to
39 |     :class:`numba_dpex.kernel_api.ranges.NdRange`
40 |     """
41 | 
42 |     def __init__(self, ndim: int):
43 |         self._ndim = ndim
44 |         if ndim < 1 or ndim > 3:
45 |             raise errors.TypingError(
46 |                 "RangeType can only have 1,2, or 3 dimensions"
47 |             )
48 |         super(NdRangeType, self).__init__(name="NdRange<" + str(ndim) + ">")
49 | 
50 |     @property
51 |     def ndim(self):
52 |         return self._ndim
53 | 
54 |     @property
55 |     def key(self):
56 |         return self._ndim
57 | 
58 |     @property
59 |     def mangling_args(self):
60 |         args = [self.ndim]
61 |         return self.__class__.__name__, args
62 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/reduction.rst:
--------------------------------------------------------------------------------
 1 | Reduction on SYCL-supported Devices
 2 | ===================================
 3 | 
 4 | Numba-dpex does not yet provide any specific decorator to implement
 5 | reduction kernels. However, a kernel reduction can be written explicitly. This
 6 | section provides two approaches for writing a reduction kernel as a
 7 | ``numba_dpex.kernel`` function.
 8 | 
 9 | 
10 | Example 1
11 | ---------
12 | 
13 | This example demonstrates a summation reduction on a one-dimensional array.
14 | 
15 | Full example can be found at ``numba_dpex/examples/sum_reduction.py``.
16 | 
17 | In this example, to reduce the array we invoke the kernel multiple times.
18 | 
19 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction.py
20 |    :pyobject: sum_reduction_kernel
21 | 
22 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction.py
23 |    :pyobject: sum_reduce
24 | 
25 | Example 2
26 | ---------
27 | 
28 | Full example can be found at
29 | ``numba_dpex/examples/sum_reduction_recursive_ocl.py``.
30 | 
31 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py
32 |    :pyobject: sum_reduction_kernel
33 | 
34 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py
35 |    :pyobject: sum_recursive_reduction
36 | 
37 | .. literalinclude:: ../../../numba_dpex/examples/sum_reduction_recursive_ocl.py
38 |    :pyobject: sum_reduce
39 | 
40 | .. note::
41 | 
42 |     Numba-dpex does not yet provide any analogue to the ``numba.cuda.reduce``
43 |     decorator for writing reductions kernel. Such a decorator will be added in
44 |     future releases.
45 | 
46 | Full examples
47 | -------------
48 | 
49 | - ``numba_dpex/examples/sum_reduction_recursive_ocl.py``
50 | - ``numba_dpex/examples/sum_reduction_ocl.py``
51 | - ``numba_dpex/examples/sum_reduction.py``
52 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/kernel/device_func.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Demonstrates the usage of the :func:`numba_dpex.device_func` decorator.
 6 | 
 7 | Refer the API documentation and the Kenrel programming guide for further
 8 | details.
 9 | """
10 | 
11 | import dpnp
12 | 
13 | import numba_dpex as dpex
14 | from numba_dpex import kernel_api as kapi
15 | 
16 | 
17 | @dpex.device_func
18 | def increment_by_1(a):
19 |     """A device callable function that can be invoked from a kernel or
20 |     another device function.
21 |     """
22 |     return a + 1
23 | 
24 | 
25 | @dpex.device_func
26 | def increment_and_sum_up(nd_item: kapi.NdItem, a):
27 |     """Demonstrates the usage of group_barrier and NdItem usage in a
28 |     device_func.
29 |     """
30 |     i = nd_item.get_global_id(0)
31 | 
32 |     a[i] += 1
33 |     kapi.group_barrier(nd_item.get_group(), kapi.MemoryScope.DEVICE)
34 | 
35 |     if i == 0:
36 |         for idx in range(1, a.size):
37 |             a[0] += a[idx]
38 | 
39 | 
40 | @dpex.kernel
41 | def kernel1(item: kapi.Item, a, b):
42 |     """Demonstrates calling a device function from a kernel."""
43 |     i = item.get_id(0)
44 |     b[i] = increment_by_1(a[i])
45 | 
46 | 
47 | @dpex.kernel
48 | def kernel2(nd_item: kapi.NdItem, a):
49 |     """The kernel delegates everything to a device_func and calls it."""
50 |     increment_and_sum_up(nd_item, a)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     # Array size
55 |     N = 100
56 |     a = dpnp.ones(N, dtype=dpnp.int32)
57 |     b = dpnp.zeros(N, dtype=dpnp.int32)
58 | 
59 |     dpex.call_kernel(kernel1, dpex.Range(N), a, b)
60 |     # b should be [2, 2, ...., 2]
61 |     print(b)
62 | 
63 |     dpex.call_kernel(kernel2, dpex.NdRange((N,), (N,)), b)
64 |     # b[0] should be 300
65 |     print(b[0])
66 | 


--------------------------------------------------------------------------------
/docs/backups/user_guides/kernel_programming_guide/memory_allocation_address_space.rst:
--------------------------------------------------------------------------------
 1 | Supported Address Space Qualifiers
 2 | ==================================
 3 | 
 4 | The address space qualifier may be used to specify the region of memory that is
 5 | used to allocate the object.
 6 | 
 7 | Numba-dpex supports three disjoint named address spaces:
 8 | 
 9 | 1. Global Address Space
10 |     Global Address Space refers to memory objects allocated from the global
11 |     memory pool and will be shared among all work-items. Arguments passed to any
12 |     kernel are allocated in the global address space. In the below example,
13 |     arguments `a`, `b` and `c` will be allocated in the global address space:
14 | 
15 |     .. literalinclude:: ../../../numba_dpex/examples/sum.py
16 | 
17 | 
18 | 2. Local Address Space
19 |     Local Address Space refers to memory objects that need to be allocated in
20 |     local memory pool and are shared by all work-items of a work-group.
21 |     Numba-dpex does not support passing arguments that are allocated in the
22 |     local address space to `@numba_dpex.kernel`. Users are allowed to allocate
23 |     static arrays in the local address space inside the `@numba_dpex.kernel`. In
24 |     the example below `numba_dpex.local.array(shape, dtype)` is the API used to
25 |     allocate a static array in the local address space:
26 | 
27 |     .. literalinclude:: ../../../numba_dpex/examples/barrier.py
28 |       :lines: 54-87
29 | 
30 | 3. Private Address Space
31 |     Private Address Space refers to memory objects that are local to each
32 |     work-item and is not shared with any other work-item. In the example below
33 |     `numba_dpex.private.array(shape, dtype)` is the API used to allocate a
34 |     static array in the private address space:
35 | 
36 |     .. literalinclude:: ../../../numba_dpex/examples/kernel_private_memory.py
37 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: Coverage
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 | 
 8 | permissions: read-all
 9 | 
10 | jobs:
11 |   main:
12 |     name: Generate coverage and push to Coveralls.io
13 |     runs-on: ubuntu-latest
14 |     permissions:
15 |       pull-requests: write
16 |     defaults:
17 |       run:
18 |         shell: bash -l {0}
19 |     steps:
20 |       - name: Cancel Previous Runs
21 |         uses: styfle/cancel-workflow-action@0.12.1
22 |         with:
23 |           access_token: ${{ github.token }}
24 | 
25 |       - name: Checkout repo
26 |         uses: actions/checkout@v4
27 |         with:
28 |           fetch-depth: 0
29 | 
30 |       - uses: conda-incubator/setup-miniconda@v3
31 |         with:
32 |           python-version: '3.10'
33 |           miniforge-variant: Mambaforge
34 |           miniforge-version: latest
35 |           activate-environment: "coverage"
36 |           channel-priority: "disabled"
37 |           environment-file: environment/coverage.yml
38 | 
39 |       - name: Build numba-dpex
40 |         run: |
41 |           export PATH=$CONDA/bin-llvm:$PATH
42 |           CC=icx CXX=icpx python setup.py develop
43 | 
44 |       - name: Test installation
45 |         run: |
46 |           conda list
47 |           python -c "import numba_dpex; print(numba_dpex.__file__)"
48 | 
49 |       - name: Dump coverage test environment
50 |         run: |
51 |           conda env export > /tmp/env-cov.yml
52 |           cat /tmp/env-cov.yml
53 | 
54 |       # Ignoring test due to opencl driver optimization bug
55 |       - name: Run tests with coverage
56 |         run: |
57 |           pytest  -q --cov=./ --cov-report xml --pyargs numba_dpex \
58 |             -k 'not test_1d_strided_dpnp_array_in_kernel[2]'
59 | 
60 |       - name: Coveralls
61 |         uses: coverallsapp/github-action@v2
62 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/parfors/prange/test_pairwise_distance.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl
 6 | import dpnp
 7 | import numba as nb
 8 | import pytest
 9 | 
10 | from numba_dpex import dpjit
11 | 
12 | 
13 | def test_pairwise_distance():
14 |     @dpjit
15 |     def pairwise_distance(X1, X2, D):
16 |         """Naïve pairwise distance impl - take an array representing M points in N
17 |         dimensions, and return the M x M matrix of Euclidean distances
18 | 
19 |         Args:
20 |             X1 : Set of points
21 |             X2 : Set of points
22 |             D  : Outputted distance matrix
23 |         """
24 |         # Size of inputs
25 |         X1_rows = X1.shape[0]
26 |         X2_rows = X2.shape[0]
27 |         X1_cols = X1.shape[1]
28 | 
29 |         # TODO: get rid of it once prange supports dtype
30 |         # https://github.com/IntelPython/numba-dpex/issues/1063
31 |         float0 = X1.dtype.type(0.0)
32 | 
33 |         # Outermost parallel loop over the matrix X1
34 |         for i in nb.prange(X1_rows):
35 |             # Loop over the matrix X2
36 |             for j in range(X2_rows):
37 |                 d = float0
38 |                 # Compute exclidean distance
39 |                 for k in range(X1_cols):
40 |                     tmp = X1[i, k] - X2[j, k]
41 |                     d += tmp * tmp
42 |                 # Write computed distance to distance matrix
43 |                 D[i, j] = dpnp.sqrt(d)
44 | 
45 |     q = dpctl.SyclQueue()
46 |     X1 = dpnp.ones((100, 2), sycl_queue=q)
47 |     X2 = dpnp.ones((100, 2), sycl_queue=q)
48 |     D = dpnp.empty((100, 100), sycl_queue=q)
49 | 
50 |     try:
51 |         pairwise_distance(X1, X2, D)
52 |     except:
53 |         pytest.fail("Failed to compile prange loop for pairwise distance calc")
54 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/debugging_environment.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Configure debugging environment
 4 | =================================
 5 | 
 6 | 1) Activate the debugger and compiler:
 7 | 
 8 |     .. code-block:: bash
 9 | 
10 |         export ONEAPI_ROOT=/path/to/oneapi
11 |         source $ONEAPI_ROOT/debugger/latest/env/vars.sh
12 |         source $ONEAPI_ROOT/compiler/latest/env/vars.sh
13 | 
14 | 2) Create and activate conda environment with the installed numba-dpex:
15 | 
16 |     .. code-block:: bash
17 | 
18 |         conda create numba-dpex-dev numba-dpex
19 |         conda activate numba-dpex-dev
20 | 
21 | 3) Activate NEO drivers (optional).
22 | 
23 |     If you want to use the local NEO driver, activate the variables for it. See
24 |     the :ref:`NEO-driver`.
25 | 
26 | 4) Check debugging environment.
27 | 
28 |     You can check the correctness of the work with the following example:
29 | 
30 |     .. literalinclude:: ./../../../../numba_dpex/examples/debug/simple_sum.py
31 |         :lines: 5-
32 |         :linenos:
33 |         :lineno-match:
34 | 
35 |     Launch the Intel® Distribution for GDB* and set a breakpoint in the kernel:
36 | 
37 |     .. code-block:: shell-session
38 | 
39 |         $ gdb-oneapi -q --args python simple_sum.py
40 |         (gdb) break simple_sum.py:22
41 |         No source file named simple_sum.py.
42 |         Make breakpoint pending on future shared library load? (y or [n]) y
43 |         Breakpoint 1 (simple_sum.py:22) pending.
44 |         (gdb) run
45 | 
46 |     In the output you can see that the breakpoint was hit successfully:
47 | 
48 |     .. code-block:: shell-session
49 | 
50 |         Thread 2.2 hit Breakpoint 1, with SIMD lanes [0-7], __main__::data_parallel_sum () at simple_sum.py:22
51 |         22           i = dpex.get_global_id(0)
52 |         (gdb) continue
53 |         Done...
54 |         ...
55 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_inline_threshold_config.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from numba.core import compiler
 6 | 
 7 | import numba_dpex as dpex
 8 | from numba_dpex.kernel_api import Item
 9 | 
10 | 
11 | def kernel_func(item: Item, a, b, c):
12 |     i = item.get_id(0)
13 |     c[i] = a[i] + b[i]
14 | 
15 | 
16 | def test_inline_threshold_set_using_config():
17 |     oldConfig = dpex.config.INLINE_THRESHOLD
18 |     dpex.config.INLINE_THRESHOLD = None
19 | 
20 |     disp = dpex.kernel(kernel_func)
21 |     flags = compiler.Flags()
22 |     disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions)
23 | 
24 |     assert flags.inline_threshold == 0
25 | 
26 |     dpex.config.INLINE_THRESHOLD = 2
27 | 
28 |     flags = compiler.Flags()
29 |     disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions)
30 | 
31 |     assert flags.inline_threshold == 2
32 | 
33 |     dpex.config.INLINE_THRESHOLD = oldConfig
34 | 
35 | 
36 | def test_inline_threshold_set_using_decorator_option():
37 |     """
38 |     Test setting the inline_threshold value using the kernel decorator flag
39 |     """
40 | 
41 |     disp = dpex.kernel(inline_threshold=2)(kernel_func)
42 |     flags = compiler.Flags()
43 |     disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions)
44 | 
45 |     assert flags.inline_threshold == 2
46 | 
47 | 
48 | def test_inline_threshold_set_using_decorator_supersedes_config_option():
49 |     oldConfig = dpex.config.INLINE_THRESHOLD
50 |     dpex.config.INLINE_THRESHOLD = None
51 | 
52 |     disp = dpex.kernel(inline_threshold=3)(kernel_func)
53 |     flags = compiler.Flags()
54 |     disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions)
55 | 
56 |     print(flags.inline_threshold)
57 |     assert flags.inline_threshold == 3
58 | 
59 |     dpex.config.INLINE_THRESHOLD = oldConfig
60 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/core/runtime/test_llvm_registration.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import llvmlite.binding as llb
 6 | 
 7 | from numba_dpex.core import runtime
 8 | 
 9 | 
10 | def test_llvm_symbol_registered():
11 |     """Checks if the functions in the _dpexrt_python module are accessible
12 |     using llvmlite.
13 |     """
14 |     assert (
15 |         llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_from_python")
16 |         == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_from_python
17 |     )
18 | 
19 |     assert (
20 |         llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_to_python_acqref")
21 |         == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_to_python_acqref
22 |     )
23 | 
24 |     assert (
25 |         llb.address_of_symbol("NRT_ExternalAllocator_new_for_usm")
26 |         == runtime._dpexrt_python.NRT_ExternalAllocator_new_for_usm
27 |     )
28 | 
29 |     assert (
30 |         llb.address_of_symbol("DPEXRT_sycl_queue_from_python")
31 |         == runtime._dpexrt_python.DPEXRT_sycl_queue_from_python
32 |     )
33 | 
34 |     assert (
35 |         llb.address_of_symbol("DPEXRT_sycl_queue_to_python")
36 |         == runtime._dpexrt_python.DPEXRT_sycl_queue_to_python
37 |     )
38 | 
39 |     assert (
40 |         llb.address_of_symbol("DPEXRTQueue_CreateFromFilterString")
41 |         == runtime._dpexrt_python.DPEXRTQueue_CreateFromFilterString
42 |     )
43 | 
44 |     assert (
45 |         llb.address_of_symbol("DpexrtQueue_SubmitRange")
46 |         == runtime._dpexrt_python.DpexrtQueue_SubmitRange
47 |     )
48 | 
49 |     assert (
50 |         llb.address_of_symbol("DPEXRT_MemInfo_alloc")
51 |         == runtime._dpexrt_python.DPEXRT_MemInfo_alloc
52 |     )
53 | 
54 |     assert (
55 |         llb.address_of_symbol("DPEXRT_MemInfo_fill")
56 |         == runtime._dpexrt_python.DPEXRT_MemInfo_fill
57 |     )
58 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/codegen/test_intenum_literal_codegen.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import re
 6 | 
 7 | import dpctl
 8 | from numba.core import types
 9 | 
10 | import numba_dpex as dpex
11 | from numba_dpex import DpctlSyclQueue, DpnpNdArray, int64
12 | from numba_dpex.kernel_api.flag_enum import FlagEnum
13 | 
14 | 
15 | def test_compilation_as_literal_constant():
16 |     """Tests if FlagEnum objects are treaded as scalar constants inside
17 |     numba-dpex generated code.
18 | 
19 |     The test case compiles the kernel `pass_flags_to_func` that includes a
20 |     call to the device_func `bitwise_or_flags`. The `bitwise_or_flags` function
21 |     is passed two FlagEnum arguments. The test case evaluates the generated
22 |     LLVM IR for `pass_flags_to_func` to see if the call to `bitwise_or_flags`
23 |     has the scalar arguments `i64 1` and `i64 2`.
24 |     """
25 | 
26 |     class PseudoFlags(FlagEnum):
27 |         FLAG1 = 1
28 |         FLAG2 = 2
29 | 
30 |     @dpex.device_func
31 |     def bitwise_or_flags(flag1, flag2):
32 |         return flag1 | flag2
33 | 
34 |     def pass_flags_to_func(a):
35 |         f1 = PseudoFlags.FLAG1
36 |         f2 = PseudoFlags.FLAG2
37 |         a[0] = bitwise_or_flags(f1, f2)
38 | 
39 |     queue_ty = DpctlSyclQueue(dpctl.SyclQueue())
40 |     i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty)
41 |     kernel_sig = types.void(i64arr_ty)
42 | 
43 |     disp = dpex.kernel(inline_threshold=0)(pass_flags_to_func)
44 |     disp.compile(kernel_sig)
45 |     kcres = disp.overloads[kernel_sig.args]
46 |     llvm_ir_mod = kcres.library._final_module.__str__()
47 | 
48 |     pattern = re.compile(
49 |         r"call spir_func i32 @\_Z.*bitwise\_or"
50 |         r"\_flags.*\(i64\*\s(\w+)?\s*%.*, i64 1, i64 2\)"
51 |     )
52 | 
53 |     assert re.search(pattern, llvm_ir_mod) is not None
54 | 


--------------------------------------------------------------------------------
/numba_dpex/kernel_api/barrier.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Python functions that simulate SYCL's group_barrier function.
 6 | """
 7 | 
 8 | from .index_space_ids import Group
 9 | from .memory_enums import MemoryScope
10 | 
11 | 
12 | def group_barrier(
13 |     group: Group, fence_scope: MemoryScope = MemoryScope.WORK_GROUP
14 | ):
15 |     """Performs a barrier operation across all work-items in a work-group.
16 | 
17 |     The function is equivalent to the ``sycl::group_barrier`` function. It
18 |     synchronizes work within a group of work-items. All the work-items
19 |     of the group must execute the barrier call before any work-item
20 |     continues execution beyond the barrier.
21 | 
22 |     The ``group_barrier`` performs a memory fence operation ensuring that memory
23 |     accesses issued before the barrier are not re-ordered with those issued
24 |     after the barrier. All work-items in group G execute a release fence prior
25 |     to synchronizing at the barrier, all work-items in group G execute an
26 |     acquire fence afterwards, and there is an implicit synchronization of these
27 |     fences as if provided by an explicit atomic operation on an atomic object.
28 | 
29 |     .. important::
30 |         The function is not implemented yet for pure CPython execution and is
31 |         only supported in JIT compiled mode of execution.
32 | 
33 |     Args:
34 |         group (Group): Indicates the work-group inside which the barrier is to
35 |             be executed.
36 |         fence_scope (MemoryScope) (optional): scope of any memory
37 |             consistency operations that are performed by the barrier.
38 |     Raises:
39 |         NotImplementedError: When the function is called directly from Python.
40 |     """
41 | 
42 |     # TODO: A pure Python simulation of a group_barrier will be added later.
43 |     raise NotImplementedError
44 | 


--------------------------------------------------------------------------------
/conda-recipe/bld.bat:
--------------------------------------------------------------------------------
 1 | @REM new llvm-spirv location
 2 | @REM starting from dpcpp_impl_win-64=2022.0.0=intel_3638 location is env\Library\bin-llvm
 3 | @REM used BUILD_PREFIX as compiler installed in build section of meta.yml
 4 | set "PATH=%BUILD_PREFIX%\Library\bin-llvm;%PATH%"
 5 | 
 6 | REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4
 7 | set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%"
 8 | SET "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%"
 9 | 
10 | REM Since the 60.0.0 release, setuptools includes a local, vendored copy
11 | REM of distutils (from late copies of CPython) that is enabled by default.
12 | REM It breaks build for Windows, so use distutils from "stdlib" as before.
13 | REM @TODO: remove the setting, once transition to build backend on Windows
14 | REM to cmake is complete.
15 | REM UPD: Seems to work fine with setuptools 69, so we need to set minimal
16 | REM requirements before removing it.
17 | SET "SETUPTOOLS_USE_DISTUTILS=stdlib"
18 | 
19 | set "CC=icx"
20 | set "CXX=icx"
21 | 
22 | set "CMAKE_GENERATOR=Ninja"
23 | :: Make CMake verbose
24 | set "VERBOSE=1"
25 | 
26 | %PYTHON% -m build -w -n -x
27 | if %ERRORLEVEL% neq 0 exit 1
28 | 
29 | :: `pip install dist\numpy*.whl` does not work on windows,
30 | :: so use a loop; there's only one wheel in dist/ anyway
31 | for /f %%f in ('dir /b /S .\dist') do (
32 |     %PYTHON% -m wheel tags --remove --build %GIT_DESCRIBE_NUMBER% %%f
33 |     if %ERRORLEVEL% neq 0 exit 1
34 | )
35 | 
36 | :: wheel file was renamed
37 | for /f %%f in ('dir /b /S .\dist') do (
38 |     %PYTHON% -m pip install %%f ^
39 |       --no-build-isolation ^
40 |       --no-deps ^
41 |       --only-binary :all: ^
42 |       --no-index ^
43 |       --prefix %PREFIX% ^
44 |       -vv
45 |     if %ERRORLEVEL% neq 0 exit 1
46 | )
47 | 
48 | :: Copy wheel package
49 | if NOT "%WHEELS_OUTPUT_FOLDER%"=="" (
50 |     copy dist\numba_dpex*.whl %WHEELS_OUTPUT_FOLDER%
51 |     if errorlevel 1 exit 1
52 | )
53 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/debugging/data.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./../../ext_links.txt
 2 | 
 3 | Examining Data
 4 | ==============
 5 | 
 6 | See `GDB* documentation <https://www.sourceware.org/gdb/onlinedocs/gdb/Data.html>`_.
 7 | 
 8 | .. _print:
 9 | 
10 | ``print expr``
11 | --------------
12 | 
13 | To print the value of a variable, run the ``print <variable>`` command.
14 | 
15 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/commands/docs/local_variables_0
16 |     :language: shell-session
17 |     :lines: 67-72
18 |     :emphasize-lines: 1-6
19 | 
20 | .. note::
21 | 
22 |     Displaying complex data types requires Numba 0.55 or higher.
23 | 
24 | Example - Complex Data Types
25 | ````````````````````````````
26 | 
27 | Source code :file:`numba_dpex/examples/debug/side-by-side-2.py`:
28 | 
29 | .. literalinclude:: ./../../../../numba_dpex/examples/debug/side-by-side-2.py
30 |    :pyobject: common_loop_body
31 |    :linenos:
32 |    :lineno-match:
33 |    :emphasize-lines: 6
34 | 
35 | Debug session:
36 | 
37 | .. code-block:: shell-session
38 |    :emphasize-lines: 9-
39 | 
40 |    $ gdb-oneapi -q python
41 |    ...
42 |    (gdb) set environment NUMBA_OPT 0
43 |    (gdb) set environment NUMBA_EXTEND_VARIABLE_LIFETIMES 1
44 |    (gdb) break side-by-side-2.py:29 if param_a == 5
45 |    ...
46 |    (gdb) run numba_dpex/examples/debug/side-by-side-2.py --api=numba-dpex-kernel
47 |    ...
48 |    Thread 2.1 hit Breakpoint 1, with SIMD lane 5, __main__::common_loop_body (i=5, a=..., b=...) at side-by-side-2.py:29
49 |    29          result = param_c + param_d
50 |    (gdb) print a
51 |    $1 = {meminfo = 0x0, parent = 0x0, nitems = 10, itemsize = 4,
52 |      data = 0x555558461000, shape = {10}, strides = {4}}
53 |    (gdb) x/10f a.data
54 |    0x555558461000: 0       1       2       3
55 |    0x555558461010: 4       5       6       7
56 |    0x555558461020: 8       9
57 |    (gdb) print a.data[5]
58 |    $2 = 5
59 | 
60 | This example prints array and its element.
61 | 


--------------------------------------------------------------------------------
/scripts/config_cpu_device.ps1:
--------------------------------------------------------------------------------
 1 | # Original code: https://github.com/IntelPython/dpctl/blob/0e595728eb9dfc943774b654035e9b339bde8dce/.github/workflows/conda-package.yml#L220-L250
 2 | echo "OCL_ICD_FILENAMES=C:\Miniconda\Library\lib\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
 3 | try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
 4 | if ($list.count -eq 0) {
 5 |     if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) {
 6 |         New-Item -Path HKLM:\SOFTWARE\Khronos
 7 |     }
 8 |     if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) {
 9 |         New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL
10 |     }
11 |     if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) {
12 |         New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors
13 |     }
14 |     New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name C:\Miniconda\Library\lib\intelocl64.dll -Value 0
15 |     try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
16 |     Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)
17 |     # Now copy OpenCL.dll into system folder
18 |     $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll"
19 |     $python_ocl_icd_loader="C:\Miniconda\Library\bin\OpenCL.dll"
20 |     Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader
21 |     if (Test-Path -Path $system_ocl_icd_loader) {
22 |         Write-Output "$system_ocl_icd_loader has been copied"
23 |         $acl = Get-Acl $system_ocl_icd_loader
24 |         Write-Output $acl
25 |     } else {
26 |         Write-Output "OCL-ICD-Loader was not copied"
27 |     }
28 |     # Variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default
29 |     echo "TBB_DLL_PATH=C:\Miniconda\Library\bin" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
30 | }
31 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpctl.tensor as dpt
 6 | import dpnp
 7 | import numpy
 8 | import pytest
 9 | 
10 | import numba_dpex as dpex
11 | from numba_dpex.tests._helper import get_all_dtypes
12 | 
13 | 
14 | @dpex.kernel
15 | def sum_2d(item, a, b, c):
16 |     """
17 |     Vector addition using the ``kernel`` decorator.
18 |     """
19 |     i = item.get_id(0)
20 |     j = item.get_id(1)
21 |     c[i, j] = a[i, j] + b[i, j]
22 | 
23 | 
24 | @dpex.kernel
25 | def sum_2d_slice(item, a, b, c):
26 |     """
27 |     Vector addition using the ``kernel`` decorator.
28 |     """
29 |     i = item.get_id(0)
30 |     j = item.get_id(1)
31 |     ai, bi, ci = a[i], b[i], c[i]
32 |     ci[j] = ai[j] + bi[j]
33 | 
34 | 
35 | @pytest.mark.parametrize(
36 |     "usm_type",
37 |     [
38 |         "shared",
39 |         "device",
40 |         "host",
41 |     ],
42 | )
43 | @pytest.mark.parametrize(
44 |     "dtype",
45 |     get_all_dtypes(
46 |         no_bool=True, no_float16=True, no_none=True, no_complex=True
47 |     ),
48 | )
49 | @pytest.mark.parametrize(
50 |     "kernel",
51 |     [
52 |         sum_2d,
53 |         sum_2d_slice,
54 |     ],
55 | )
56 | @pytest.mark.parametrize(
57 |     "np",
58 |     [
59 |         dpt,
60 |         dpnp,
61 |     ],
62 | )
63 | def test_consuming_usm_ndarray(
64 |     kernel,
65 |     dtype,
66 |     usm_type,
67 |     np,
68 | ):
69 |     N = 1000
70 |     global_size = N * N
71 | 
72 |     a = np.arange(global_size, dtype=dtype, usm_type=usm_type)
73 |     a = np.reshape(a, (N, N))
74 | 
75 |     b = np.arange(global_size, dtype=dtype, usm_type=usm_type)
76 |     b = np.reshape(b, (N, N))
77 | 
78 |     c = np.empty_like(a)
79 | 
80 |     dpex.call_kernel(kernel, dpex.Range(N, N), a, b, c)
81 | 
82 |     na, nb, nc = np.asnumpy(a), np.asnumpy(b), np.asnumpy(c)
83 | 
84 |     assert numpy.array_equal(nc, na + nb)
85 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_target_specific_overload.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | from numba.core.extending import overload
 7 | 
 8 | import numba_dpex as dpex
 9 | from numba_dpex.core.descriptor import dpex_kernel_target
10 | from numba_dpex.kernel_api import Item, Range
11 | from numba_dpex.kernel_api_impl.spirv.target import SPIRV_TARGET_NAME
12 | 
13 | 
14 | def scalar_add(a, b):
15 |     return a + b
16 | 
17 | 
18 | @overload(scalar_add, target=SPIRV_TARGET_NAME)
19 | def _ol_scalar_add(a, b):
20 |     def ol_scalar_add_impl(a, b):
21 |         return a + b
22 | 
23 |     return ol_scalar_add_impl
24 | 
25 | 
26 | @dpex.kernel
27 | def kernel_calling_overload(item: Item, a, b, c):
28 |     i = item.get_id(0)
29 |     c[i] = scalar_add(a[i], b[i])
30 | 
31 | 
32 | a = dpnp.ones(10, dtype=dpnp.int64)
33 | b = dpnp.ones(10, dtype=dpnp.int64)
34 | c = dpnp.zeros(10, dtype=dpnp.int64)
35 | 
36 | dpex.call_kernel(kernel_calling_overload, Range(10), a, b, c)
37 | 
38 | 
39 | def test_end_to_end_overload_execution():
40 |     """Tests that an overload function can be called from an experimental.kernel
41 |     decorated function and works end to end.
42 |     """
43 |     for i in range(c.shape[0]):
44 |         assert c[i] == scalar_add(a[i], b[i])
45 | 
46 | 
47 | def test_overload_registration():
48 |     """Tests that the overload _ol_scalar_add is registered only in the
49 |     "dpex_kernel_exp" target and not in the "dpex_kernel" target.
50 |     """
51 | 
52 |     def check_for_overload_registration(targetctx, key):
53 |         found_key = False
54 |         for fn_key in targetctx._defns.keys():
55 |             if isinstance(fn_key, str) and fn_key.startswith(key):
56 |                 found_key = True
57 |                 break
58 |         return found_key
59 | 
60 |     assert check_for_overload_registration(
61 |         dpex_kernel_target.target_context, "_ol_scalar_add"
62 |     )
63 | 


--------------------------------------------------------------------------------
/numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | //===----------------------------------------------------------------------===//
 6 | ///
 7 | /// \file
 8 | /// Defines dpctl style function(s) that interact with nrt meminfo and sycl.
 9 | ///
10 | //===----------------------------------------------------------------------===//
11 | 
12 | #ifndef _EXPERIMENTAL_H_
13 | #define _EXPERIMENTAL_H_
14 | 
15 | #include "dpctl_capi.h"
16 | #include "numba/core/runtime/nrt_external.h"
17 | 
18 | #ifdef __cplusplus
19 | extern "C"
20 | {
21 | #endif
22 | 
23 |     /*!
24 |      * @brief Acquires meminfos and schedules a host task to release them.
25 |      *
26 |      * @param    nrt            NRT public API functions,
27 |      * @param    QRef           Queue reference,
28 |      * @param    meminfo_array  Array of meminfo pointers to perform actions on,
29 |      * @param    meminfo_array_size Length of meminfo_array,
30 |      * @param    depERefs       Array of dependent events for the host task,
31 |      * @param    nDepERefs      Length of depERefs,
32 |      * @param    status         Variable to write status to. Same style as
33 |      * dpctl,
34 |      * @return   {return}       Event reference to the host task.
35 |      */
36 |     DPCTLSyclEventRef
37 |     DPEXRT_nrt_acquire_meminfo_and_schedule_release(NRT_api_functions *nrt,
38 |                                                     DPCTLSyclQueueRef QRef,
39 |                                                     NRT_MemInfo **meminfo_array,
40 |                                                     size_t meminfo_array_size,
41 |                                                     DPCTLSyclEventRef *depERefs,
42 |                                                     size_t nDepERefs,
43 |                                                     int *status);
44 | #ifdef __cplusplus
45 | }
46 | #endif
47 | 
48 | #endif /* _EXPERIMENTAL_H_ */
49 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import dpnp
 6 | import numpy
 7 | import pytest
 8 | 
 9 | import numba_dpex as dpex
10 | from numba_dpex.tests._helper import get_all_dtypes
11 | 
12 | N = 1024
13 | 
14 | 
15 | @dpex.kernel
16 | def kernel_scalar(item, a, b, c):
17 |     i = item.get_id(0)
18 |     b[i] = a[i] * c
19 | 
20 | 
21 | @dpex.kernel
22 | def kernel_array(item, a, b, c):
23 |     i = item.get_id(0)
24 |     b[i] = a[i] * c[i]
25 | 
26 | 
27 | list_of_dtypes = get_all_dtypes(
28 |     no_bool=True, no_int=True, no_float=True, no_none=True
29 | )
30 | 
31 | list_of_usm_types = ["shared", "device", "host"]
32 | 
33 | 
34 | @pytest.fixture(params=list_of_dtypes)
35 | def input_arrays(request):
36 |     a = dpnp.ones(N, dtype=request.param)
37 |     c = dpnp.zeros(N, dtype=request.param)
38 |     b = dpnp.empty_like(a)
39 |     return a, b, c
40 | 
41 | 
42 | def test_numeric_kernel_arg_complex_scalar(input_arrays):
43 |     """Tests passing complex type scalar and dpnp arrays to a kernel function.
44 | 
45 |     Args:
46 |         input_arrays (dpnp.ndarray): Array arguments to be passed to a kernel.
47 |     """
48 |     a, b, _ = input_arrays
49 |     s = a.dtype.type(2 + 1j)
50 | 
51 |     dpex.call_kernel(kernel_scalar, dpex.Range(N), a, b, s)
52 | 
53 |     nb = dpnp.asnumpy(b)
54 |     nexpected = numpy.full_like(nb, fill_value=2 + 1j)
55 | 
56 |     assert numpy.allclose(nb, nexpected)
57 | 
58 | 
59 | def test_numeric_kernel_arg_complex_array(input_arrays):
60 |     """Tests passing complex type dpnp arrays to a kernel function.
61 | 
62 |     Args:
63 |         input_arrays (dpnp.ndarray): Array arguments to be passed to a kernel.
64 |     """
65 | 
66 |     a, b, c = input_arrays
67 | 
68 |     dpex.call_kernel(kernel_array, dpex.Range(N), a, b, c)
69 | 
70 |     nb = dpnp.asnumpy(b)
71 |     nexpected = numpy.full_like(nb, fill_value=0 + 0j)
72 | 
73 |     assert numpy.allclose(nb, nexpected)
74 | 


--------------------------------------------------------------------------------
/.github/workflows/coverity.yml:
--------------------------------------------------------------------------------
 1 | name: Coverity
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |   workflow_dispatch:
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   Coverity:
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     env:
19 |       CHECKERS: --concurrency --security --rule --enable-constraint-fpp --enable-fnptr --enable-virtual --webapp-security --enable-audit-checkers --enable-default
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: actions/setup-java@v4
24 |       with:
25 |         distribution: zulu
26 |         java-version: 11
27 | 
28 |     - name: URL encode project name
29 |       run: echo "COV_PROJECT=${{ github.repository }}" | sed -e 's:/:%2F:g' -e 's/ /%20/g' >> $GITHUB_ENV
30 | 
31 |     - name: Coverity Download
32 |       run: |
33 |         mkdir -p /tmp/cov-analysis
34 |         wget https://scan.coverity.com/download/linux64 --post-data "token=${{secrets.COV_TOKEN}}&project=${{env.COV_PROJECT}}" -O cov-analysis.tgz
35 |         tar -xzf cov-analysis.tgz --strip 1 -C /tmp/cov-analysis
36 |         rm cov-analysis.tgz
37 | 
38 |     - name: Coverity Full Scan
39 |       if: ${{ github.event_name != 'pull_request' }}
40 |       run: |
41 |         export PATH=$PATH:/tmp/cov-analysis/bin
42 |         set -x
43 |         cov-build --dir cov-int --fs-capture-search $GITHUB_WORKSPACE --no-command
44 |         # Not available in package, maybe will be once approved?
45 |         # cov-analyze --dir cov-int --ticker-mode none --strip-path $GITHUB_WORKSPACE $CHECKERS
46 | 
47 |         tar czvf numba-dpex.tgz cov-int
48 |         rm -rf cov-int
49 | 
50 |         curl --form token=${{ secrets.COV_TOKEN }} \
51 |             --form email=${{ secrets.COV_EMAIL }} \
52 |             --form file=@numba-dpex.tgz \
53 |             --form version="${{ github.sha }}" \
54 |             --form description="Coverity Scan ${{ github.repository }} / ${{ github.ref }}" \
55 |             https://scan.coverity.com/builds?project=${{env.COV_PROJECT}}
56 | 


--------------------------------------------------------------------------------
/numba_dpex/core/debuginfo.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Implements a custom debug metadata generator class for numba-dpex kernels.
 6 | """
 7 | 
 8 | from numba.core import debuginfo
 9 | 
10 | 
11 | class DIBuilder(debuginfo.DIBuilder):
12 |     """Overrides Numba's default DIBuilder with numba-dpex-specific customizations."""
13 | 
14 |     # pylint: disable=too-many-arguments
15 |     def mark_subprogram(self, function, qualname, argnames, argtypes, line):
16 |         """Sets DW_AT_name and DW_AT_linkagename tags for a kernel decorated function.
17 | 
18 |         Numba generates a unique name for every function it compiles, but in
19 |         upstream Numba the unique name is not used as the "qualified" name of
20 |         the function. The behavior leads to a bug discovered in Numba-dpex when
21 |         a compiled function uses closure variables.
22 |         Refer (https://github.com/IntelPython/numba-dpex/issues/898).
23 |         To resolve the issue numba-dpex uses the unique_name as the qualified
24 |         name. Refer to
25 |         :class:`numba_dpex.core.passes.passes.QualNameDisambiguationLowering`.
26 |         However, doing so breaks setting GDB breakpoints based on function
27 |         name as the function name is no longer what is in the source, but what
28 |         is the unique name generated by Numba. To fix it, numba-dpex uses a
29 |         modified DISubprogram metadata generator. The name (DW_AT_name) tag is
30 |         set to the base function name, discarding the unique qualifier and
31 |         linkagename is set to an empty string.
32 |         """
33 |         name = qualname[0 : qualname.find("$")]  # noqa: E203
34 |         argmap = dict(zip(argnames, argtypes))
35 | 
36 |         di_subp = self._add_subprogram(
37 |             name=name,
38 |             linkagename="",
39 |             line=line,
40 |             function=function,
41 |             argmap=argmap,
42 |         )
43 |         function.set_metadata("dbg", di_subp)
44 | 


--------------------------------------------------------------------------------
/numba_dpex/core/types/kernel_api/literal_intenum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Definition of a new Literal type in numba-dpex that allows treating IntEnum
 6 | members as integer literals inside a JIT compiled function.
 7 | """
 8 | from enum import IntEnum
 9 | 
10 | from numba.core.pythonapi import box
11 | from numba.core.typeconv import Conversion
12 | from numba.core.types import Integer, Literal
13 | from numba.core.typing.typeof import typeof
14 | 
15 | from numba_dpex.core.exceptions import IllegalIntEnumLiteralValueError
16 | from numba_dpex.kernel_api.flag_enum import FlagEnum
17 | 
18 | 
19 | class IntEnumLiteral(Literal, Integer):
20 |     """A Literal type for IntEnum objects. The type contains the original Python
21 |     value of the IntEnum class in it.
22 |     """
23 | 
24 |     #  pylint: disable=W0231
25 |     def __init__(self, value):
26 |         self._literal_init(value)
27 |         self.name = f"Literal[IntEnum]({value})"
28 |         if issubclass(value, FlagEnum):
29 |             basetype = typeof(value.basetype())
30 |             Integer.__init__(
31 |                 self,
32 |                 name=self.name,
33 |                 bitwidth=basetype.bitwidth,
34 |                 signed=basetype.signed,
35 |             )
36 |         else:
37 |             raise IllegalIntEnumLiteralValueError
38 | 
39 |     def can_convert_to(self, typingctx, other) -> bool:
40 |         conv = typingctx.can_convert(self.literal_type, other)
41 |         if conv is not None:
42 |             return max(conv, Conversion.promote)
43 |         return False
44 | 
45 | 
46 | Literal.ctor_map[IntEnum] = IntEnumLiteral
47 | 
48 | 
49 | @box(IntEnumLiteral)
50 | def box_literal_integer(typ, val, ctx):
51 |     """Defines how a Numba representation for an IntEnumLiteral object should
52 |     be converted to a PyObject* object and returned back to Python.
53 |     """
54 |     val = ctx.context.cast(ctx.builder, val, typ, typ.literal_type)
55 |     return ctx.box(typ.literal_type, val)
56 | 


--------------------------------------------------------------------------------
/numba_dpex/tests/dpjit_tests/test_dpex_target_overload_isolation.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Tests if dpex target overloads are not available at numba.njit and only
 7 | available at numba_dpex.dpjit.
 8 | """
 9 | 
10 | import pytest
11 | from numba import njit, types
12 | from numba.core import errors
13 | from numba.extending import intrinsic, overload
14 | 
15 | from numba_dpex import dpjit
16 | from numba_dpex.core.targets.dpjit_target import DPEX_TARGET_NAME
17 | 
18 | 
19 | def foo():
20 |     return 1
21 | 
22 | 
23 | @overload(foo, target=DPEX_TARGET_NAME)
24 | def ol_foo():
25 |     return lambda: 1
26 | 
27 | 
28 | @intrinsic(target=DPEX_TARGET_NAME)
29 | def intrinsic_foo(
30 |     ty_context,
31 | ):
32 |     """A numba "intrinsic" function to inject dpctl.SyclEvent constructor code.
33 | 
34 |     Args:
35 |         ty_context (numba.core.typing.context.Context): The typing context
36 |             for the codegen.
37 | 
38 |     Returns:
39 |         tuple(numba.core.typing.templates.Signature, function): A tuple of
40 |             numba function signature type and a function object.
41 |     """
42 | 
43 |     sig = types.int32(types.void)
44 | 
45 |     def codegen(context, builder, sig, args: list):
46 |         return context.get_constant(types.int32, 1)
47 | 
48 |     return sig, codegen
49 | 
50 | 
51 | def bar():
52 |     return foo()
53 | 
54 | 
55 | def intrinsic_bar():
56 |     res = intrinsic_foo()
57 |     return res
58 | 
59 | 
60 | def test_dpex_overload_from_njit():
61 |     bar_njit = njit(bar)
62 | 
63 |     with pytest.raises(errors.TypingError):
64 |         bar_njit()
65 | 
66 | 
67 | def test_dpex_overload_from_dpjit():
68 |     bar_dpjit = dpjit(bar)
69 |     bar_dpjit()
70 | 
71 | 
72 | def test_dpex_intrinsic_from_njit():
73 |     bar_njit = njit(intrinsic_bar)
74 | 
75 |     with pytest.raises(errors.TypingError):
76 |         bar_njit()
77 | 
78 | 
79 | def test_dpex_intrinsic_from_dpjit():
80 |     bar_dpjit = dpjit(intrinsic_bar)
81 |     bar_dpjit()
82 | 


--------------------------------------------------------------------------------
/scripts/set_examples_to_doc.py:
--------------------------------------------------------------------------------
 1 | """ This script is needed to convert gdb scripts from commands to documentation
 2 | """
 3 | 
 4 | import os
 5 | 
 6 | 
 7 | def convert_commands_to_docs():
 8 |     commands_dir = os.getcwd() + "/numba_dpex/examples/debug/commands"
 9 |     examples = os.listdir(commands_dir)
10 |     os.chdir(commands_dir + "/docs")
11 |     for file in examples:
12 |         if file != "docs":
13 |             with open(commands_dir + "/" + file, "r") as open_file:
14 |                 read_lines = open_file.readlines()
15 |                 if os.path.exists(file):
16 |                     os.remove(file)
17 |                 with open(file, "a") as write_file:
18 |                     for line in read_lines:
19 |                         if (
20 |                             line.startswith("# Expected")
21 |                             or line.startswith("echo Done")
22 |                             or line.startswith("quit")
23 |                             or line.startswith("set trace-commands")
24 |                             or line.startswith("set pagination")
25 |                         ):
26 |                             continue
27 |                         if line.startswith("# Run: "):
28 |                             line = line.replace("# Run:", "$")
29 |                             words = line.split()
30 |                             for i in range(len(words)):
31 |                                 if words[i] == "-command" or words[
32 |                                     i
33 |                                 ].startswith("commands"):
34 |                                     words[i] = ""
35 |                             line = " ".join(words)
36 |                             line = " ".join(line.split()) + "\n"
37 |                         elif line.startswith("# "):
38 |                             line = line.replace("# ", "")
39 |                         else:
40 |                             line = "(gdb) " + line
41 | 
42 |                         write_file.write(line)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     convert_commands_to_docs()
47 | 


--------------------------------------------------------------------------------
/docs/source/useful_links.rst:
--------------------------------------------------------------------------------
 1 | .. _useful_links:
 2 | .. include:: ./ext_links.txt
 3 | 
 4 | Useful links
 5 | ============
 6 | 
 7 | .. list-table:: **Companion documentation**
 8 |    :widths: 70 200
 9 |    :header-rows: 1
10 | 
11 |    * - Document
12 |      - Description
13 |    * - `Data Parallel Extension for Numpy*`_
14 |      - Documentation for programming NumPy-like codes on data parallel devices
15 |    * - `Data Parallel Extension for Numba*`_
16 |      - Documentation for programming Numba codes on data parallel devices the same way as you program Numba on CPU
17 |    * - `Data Parallel Control`_
18 |      - Documentation how to manage data and devices, how to interchange data between different tensor implementations,
19 |        and how to write data parallel extensions
20 |    * - `Intel VTune Profiler`_
21 |      - Performance profiler supporting  analysis of bottlenecks from function leve down to low level instructions.
22 |        Supports Python and Numba
23 |    * - `Intel Advisor`_
24 |      - Analyzes native and Python codes and provides an advice for better composition of heterogeneous algorithms
25 |    * - `Python* Array API Standard`_
26 |      - Standard for writing portable Numpy-like codes targeting different hardware vendors and frameworks
27 |        operating with tensor data
28 |    * - `SYCL*`_
29 |      - Standard for writing C++-like codes for heterogeneous computing
30 |    * - `DPC++`_
31 |      - Free e-book how to program data parallel devices using Data Parallel C++
32 |    * - `OpenCl*`_
33 |      - OpenCl* Standard for heterogeneous programming
34 |    * - `IEEE 754-2019 Standard for Floating-Point Arithmetic`_
35 |      - Standard for floating-point arithmetic, essential for writing robust numerical codes
36 |    * - `Numpy*`_
37 |      - Documentation for Numpy - foundational CPU library for array programming. Used in conjunction with
38 |        `Data Parallel Extension for Numpy*`_.
39 |    * - `Numba*`_
40 |      - Documentation for Numba - Just-In-Time compiler for Numpy-like codes. Used in conjunction with
41 |        `Data Parallel Extension for Numba*`_.
42 | 


--------------------------------------------------------------------------------
/numba_dpex/examples/kernel/scan.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """An implementation of the Hillis-Steele algorithm to compute prefix sums.
 6 | 
 7 | The algorithm is implemented to work with a single work group of N work items,
 8 | where N is the number of elements.
 9 | """
10 | 
11 | import dpnp as np
12 | 
13 | import numba_dpex as ndpx
14 | from numba_dpex import kernel_api as kapi
15 | 
16 | # 1D array size
17 | N = 64
18 | 
19 | 
20 | @ndpx.kernel
21 | def kernel_hillis_steele_scan(nditem: kapi.NdItem, a, slm_b, slm_c):
22 |     # Get local and global id and workgroup size
23 |     gid = nditem.get_global_id(0)
24 |     lid = nditem.get_local_id(0)
25 |     ls = nditem.get_local_range(0)
26 |     gr = nditem.get_group()
27 | 
28 |     # Initialize locals
29 |     slm_c[lid] = slm_b[lid] = a[gid]
30 | 
31 |     kapi.group_barrier(gr)
32 | 
33 |     # Calculate prefix sum
34 |     d = 1
35 |     while d < ls:
36 |         if lid > d:
37 |             slm_c[lid] = slm_b[lid] + slm_b[lid - d]
38 |         else:
39 |             slm_c[lid] = slm_b[lid]
40 | 
41 |         kapi.group_barrier(gr)
42 | 
43 |         # Swap c and b
44 |         e = slm_c[lid]
45 |         slm_c[lid] = slm_b[lid]
46 |         slm_b[lid] = e
47 | 
48 |         # Double the stride
49 |         d *= 2
50 | 
51 |     kapi.group_barrier(gr, kapi.MemoryScope.DEVICE)
52 | 
53 |     a[gid] = slm_b[lid]
54 | 
55 | 
56 | def main():
57 |     arr = np.arange(N)
58 |     print("Original array:", arr)
59 | 
60 |     print("Using device ...")
61 |     print(arr.device)
62 | 
63 |     # Create temporals in local memory
64 |     slm_b = kapi.LocalAccessor(N, dtype=arr.dtype)
65 |     slm_c = kapi.LocalAccessor(N, dtype=arr.dtype)
66 | 
67 |     ndpx.call_kernel(
68 |         kernel_hillis_steele_scan, ndpx.NdRange((N,), (N,)), arr, slm_b, slm_c
69 |     )
70 | 
71 |     # the output should be [0, 1, 3, 6, ...]
72 |     arr_np = np.asnumpy(arr)
73 |     print(arr_np)
74 | 
75 |     print("Done...")
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/docs/source/sycl_spec_links.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """Links to the SYCL 2020 specification that are used in docstring.
 6 | 
 7 | The module provides a dictionary in the format needed by the sphinx.ext.extlinks
 8 | extension.
 9 | """
10 | 
11 | sycl_ext_links = {
12 |     "sycl_item": (
13 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class%s",
14 |         None,
15 |     ),
16 |     "sycl_group": (
17 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#group-class%s",
18 |         None,
19 |     ),
20 |     "sycl_nditem": (
21 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:nditem.class%s",
22 |         None,
23 |     ),
24 |     "sycl_ndrange": (
25 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsubsec:nd-range-class%s",
26 |         None,
27 |     ),
28 |     "sycl_range": (
29 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class%s",
30 |         None,
31 |     ),
32 |     "sycl_atomic_ref": (
33 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references%s",
34 |         None,
35 |     ),
36 |     "sycl_local_accessor": (
37 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:accessor.local%s",
38 |         None,
39 |     ),
40 |     "sycl_private_memory": (
41 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_hierarchical_invoke%s",
42 |         None,
43 |     ),
44 |     "sycl_memory_scope": (
45 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-scope%s",
46 |         None,
47 |     ),
48 |     "sycl_memory_order": (
49 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-ordering%s",
50 |         None,
51 |     ),
52 |     "sycl_addr_space": (
53 |         "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_address_space_classes%s",
54 |         None,
55 |     ),
56 | }
57 | 


--------------------------------------------------------------------------------