├── RAPIDS_BRANCH ├── VERSION ├── python ├── rmm │ ├── rmm │ │ ├── py.typed │ │ ├── _cuda │ │ │ ├── __init__.pxd │ │ │ ├── stream.pxd │ │ │ └── CMakeLists.txt │ │ ├── VERSION │ │ ├── allocators │ │ │ ├── __init__.py │ │ │ ├── cupy.py │ │ │ └── torch.py │ │ ├── pylibrmm │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_device_buffer.pyx │ │ │ ├── helper.pyi │ │ │ ├── helper.pxd │ │ │ ├── utils.pyi │ │ │ ├── utils.pxd │ │ │ ├── cuda_stream.pyi │ │ │ ├── memory_resource │ │ │ │ ├── experimental.pyi │ │ │ │ ├── experimental.pxd │ │ │ │ ├── __init__.pxd │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── experimental.pyx │ │ │ │ ├── __init__.py │ │ │ │ └── _memory_resource.pxd │ │ │ ├── cuda_stream_pool.pxd │ │ │ ├── __init__.py │ │ │ ├── cuda_stream.pxd │ │ │ ├── cuda_stream_pool.pyi │ │ │ ├── stream.pyi │ │ │ ├── logger.pyi │ │ │ ├── stream.pxd │ │ │ ├── CMakeLists.txt │ │ │ ├── utils.pyx │ │ │ ├── cuda_stream.pyx │ │ │ ├── helper.pyx │ │ │ ├── cuda_stream_pool.pyx │ │ │ └── device_buffer.pxd │ │ ├── librmm │ │ │ ├── __init__.py │ │ │ ├── _logger.pyx │ │ │ ├── cuda_stream_pool.pxd │ │ │ ├── cuda_stream_view.pxd │ │ │ ├── cuda_stream.pxd │ │ │ ├── per_device_resource.pxd │ │ │ ├── _logger.pxd │ │ │ ├── device_uvector.pxd │ │ │ ├── CMakeLists.txt │ │ │ ├── device_buffer.pxd │ │ │ └── _torch_allocator.cpp │ │ ├── mr │ │ │ ├── experimental.py │ │ │ └── __init__.py │ │ ├── tests │ │ │ ├── test_version.py │ │ │ ├── conftest.py │ │ │ ├── test_arena_memory_resource.py │ │ │ ├── test_limiting_resource_adaptor.py │ │ │ ├── test_cython.py │ │ │ ├── test_prefetch_resource_adaptor.py │ │ │ ├── test_fixed_size_memory_resource.py │ │ │ ├── test_rmm_pytorch.py │ │ │ ├── test_failure_callback_resource_adaptor.py │ │ │ ├── test_binning_memory_resource.py │ │ │ ├── test_tracking_resource_adaptor.py │ │ │ ├── test_pool_memory_resource.py │ │ │ ├── test_cuda_async_managed_memory_resource.py │ │ │ ├── test_cuda_async_view_memory_resource.py │ │ │ ├── test_cupy_integration.py │ │ │ ├── test_allocations.py │ │ │ └── test_helpers.py │ │ ├── _version.py │ │ └── __init__.py │ ├── LICENSE │ ├── README.md │ ├── .coveragerc │ └── CMakeLists.txt └── librmm │ ├── LICENSE │ ├── README.md │ ├── librmm │ ├── VERSION │ ├── __init__.py │ └── _version.py │ └── CMakeLists.txt ├── img └── rapids_logo.png ├── docs ├── cpp │ ├── errors.md │ ├── utilities.md │ ├── cuda_streams.md │ ├── data_containers.md │ ├── thrust_integrations.md │ ├── cuda_device_management.md │ ├── memory_resources │ │ ├── index.md │ │ ├── memory_resources.md │ │ └── memory_resource_adaptors.md │ ├── rmm_namespace.md │ └── index.md ├── _static │ └── RAPIDS-logo-purple.png ├── python │ ├── rmm.md │ ├── mr.md │ ├── statistics.md │ ├── index.md │ ├── librmm.md │ ├── allocators.md │ └── pylibrmm.md ├── index.md └── Makefile ├── .shellcheckrc ├── cpp ├── scripts │ ├── load-pretty-printers.in │ └── doxygen.sh ├── gcovr.cfg ├── tests │ ├── libcudacxx_flag_test │ │ ├── README.md │ │ ├── libcudacxx_flag_test.cpp │ │ └── libcudacxx_flag_test.sh │ ├── mr │ │ ├── mr_ref_fixed_size_tests.cpp │ │ ├── mr_ref_pinned_tests.cpp │ │ ├── mr_ref_system_tests.cpp │ │ ├── mr_ref_pinned_pool_tests.cpp │ │ ├── test_utils.hpp │ │ ├── binning_mr_tests.cpp │ │ ├── mr_ref_cuda_tests.cpp │ │ ├── mr_ref_pool_tests.cpp │ │ ├── mr_ref_arena_tests.cpp │ │ ├── mr_ref_binning_tests.cpp │ │ ├── mr_ref_managed_tests.cpp │ │ ├── mr_ref_cuda_async_tests.cpp │ │ ├── mr_ref_test_allocation.hpp │ │ ├── mr_ref_test_basic.hpp │ │ ├── cuda_async_view_mr_tests.cpp │ │ └── thrust_allocator_tests.cu │ ├── mock_resource.hpp │ ├── byte_literals.hpp │ └── device_check_resource_adaptor.hpp ├── examples │ ├── README.md │ ├── versions.cmake │ ├── set_cuda_architecture.cmake │ ├── basic │ │ ├── README.md │ │ ├── CMakeLists.txt │ │ └── src │ │ │ └── basic.cpp │ ├── fetch_dependencies.cmake │ └── build.sh ├── include │ ├── rmm │ │ ├── detail │ │ │ ├── export.hpp │ │ │ ├── thrust_namespace.h │ │ │ ├── exec_check_disable.hpp │ │ │ ├── format.hpp │ │ │ ├── nvtx │ │ │ │ └── ranges.hpp │ │ │ ├── cuda_memory_resource.hpp │ │ │ └── logging_assert.hpp │ │ ├── device_vector.hpp │ │ ├── mr │ │ │ ├── is_resource_adaptor.hpp │ │ │ └── detail │ │ │ │ └── fixed_size_free_list.hpp │ │ ├── logger.hpp │ │ └── prefetch.hpp │ └── doxygen_groups.h ├── cmake │ ├── thirdparty │ │ ├── get_cccl.cmake │ │ └── get_nvtx.cmake │ └── config.json ├── src │ ├── exec_policy.cpp │ ├── error.cpp │ ├── logger.cpp │ ├── cuda_stream_pool.cpp │ ├── prefetch.cpp │ ├── aligned.cpp │ ├── cuda_stream.cpp │ ├── cuda_device.cpp │ └── cuda_stream_view.cpp ├── doxygen │ └── main_page.md ├── benchmarks │ ├── cuda_stream_pool │ │ └── cuda_stream_pool_bench.cpp │ └── synchronization │ │ └── synchronization.cpp └── .clangd ├── .github ├── copy-pr-bot.yaml ├── ISSUE_TEMPLATE │ ├── submit-question.md │ ├── feature_request.md │ ├── documentation-request.md │ └── bug_report.md ├── ops-bot.yaml ├── workflows │ ├── labeler.yml │ ├── trigger-breaking-change-alert.yaml │ ├── new-issues-to-triage-projects.yml │ └── test.yaml ├── PULL_REQUEST_TEMPLATE.md ├── release.yml ├── labeler.yml └── CODEOWNERS ├── conda ├── recipes │ ├── rmm │ │ └── conda_build_config.yaml │ └── librmm │ │ └── conda_build_config.yaml └── environments │ ├── all_cuda-129_arch-x86_64.yaml │ ├── all_cuda-130_arch-x86_64.yaml │ ├── all_cuda-129_arch-aarch64.yaml │ └── all_cuda-130_arch-aarch64.yaml ├── ci ├── run_pytests.sh ├── validate_wheel.sh ├── run_ctests.sh ├── check_style.sh ├── build_wheel_cpp.sh ├── test_wheel.sh ├── build_python.sh ├── build_cpp.sh ├── build_docs.sh ├── check_symbols.sh ├── test_python.sh ├── test_cpp.sh └── build_wheel_python.sh ├── .devcontainer ├── Dockerfile ├── README.md ├── cuda13.0-pip │ └── devcontainer.json ├── cuda12.9-pip │ └── devcontainer.json ├── cuda13.0-conda │ └── devcontainer.json └── cuda12.9-conda │ └── devcontainer.json ├── print_env.sh └── cmake └── rapids_config.cmake /RAPIDS_BRANCH: -------------------------------------------------------------------------------- 1 | main 2 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 26.02.00 2 | -------------------------------------------------------------------------------- /python/rmm/rmm/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/rmm/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /python/rmm/rmm/_cuda/__init__.pxd: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/librmm/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /python/librmm/README.md: -------------------------------------------------------------------------------- 1 | ../../README.md -------------------------------------------------------------------------------- /python/rmm/README.md: -------------------------------------------------------------------------------- 1 | ../../README.md -------------------------------------------------------------------------------- /python/rmm/rmm/VERSION: -------------------------------------------------------------------------------- 1 | ../../../VERSION -------------------------------------------------------------------------------- /python/rmm/rmm/allocators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/librmm/librmm/VERSION: -------------------------------------------------------------------------------- 1 | ../../../VERSION -------------------------------------------------------------------------------- /img/rapids_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rapidsai/rmm/HEAD/img/rapids_logo.png -------------------------------------------------------------------------------- /docs/cpp/errors.md: -------------------------------------------------------------------------------- 1 | # Errors 2 | 3 | ```{doxygengroup} errors 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /.shellcheckrc: -------------------------------------------------------------------------------- 1 | # Disable file checks (otherwise every use of `gha-tools` will get flagged) 2 | disable=SC1091 3 | -------------------------------------------------------------------------------- /docs/_static/RAPIDS-logo-purple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rapidsai/rmm/HEAD/docs/_static/RAPIDS-logo-purple.png -------------------------------------------------------------------------------- /docs/cpp/utilities.md: -------------------------------------------------------------------------------- 1 | # Utilities 2 | 3 | ```{doxygengroup} utilities 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/cpp/cuda_streams.md: -------------------------------------------------------------------------------- 1 | # CUDA Streams 2 | 3 | ```{doxygengroup} cuda_streams 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/cpp/data_containers.md: -------------------------------------------------------------------------------- 1 | # Data Containers 2 | 3 | ```{doxygengroup} data_containers 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/cpp/thrust_integrations.md: -------------------------------------------------------------------------------- 1 | # Thrust Integrations 2 | 3 | ```{doxygengroup} thrust_integrations 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2020, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/_logger.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /docs/cpp/cuda_device_management.md: -------------------------------------------------------------------------------- 1 | # CUDA Device Management 2 | 3 | ```{doxygengroup} cuda_device_management 4 | :members: 5 | :content-only: 6 | ``` 7 | -------------------------------------------------------------------------------- /docs/cpp/memory_resources/index.md: -------------------------------------------------------------------------------- 1 | # Memory Resources 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | memory_resources 7 | memory_resource_adaptors 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/python/rmm.md: -------------------------------------------------------------------------------- 1 | # rmm (top-level) 2 | 3 | ```{eval-rst} 4 | .. automodule:: rmm 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /cpp/scripts/load-pretty-printers.in: -------------------------------------------------------------------------------- 1 | source @CCCL_SOURCE_DIR@/thrust/scripts/gdb-pretty-printers.py 2 | source @PROJECT_SOURCE_DIR@/scripts/gdb-pretty-printers.py 3 | -------------------------------------------------------------------------------- /cpp/gcovr.cfg: -------------------------------------------------------------------------------- 1 | exclude=build/.* 2 | exclude=tests/.* 3 | exclude=benchmarks/.* 4 | html=yes 5 | html-details=yes 6 | sort-percentage=yes 7 | exclude-throw-branches=yes 8 | -------------------------------------------------------------------------------- /docs/cpp/memory_resources/memory_resources.md: -------------------------------------------------------------------------------- 1 | # Memory Resources 2 | 3 | ```{doxygengroup} memory_resources 4 | :members: 5 | :undoc-members: 6 | :content-only: 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/cpp/rmm_namespace.md: -------------------------------------------------------------------------------- 1 | # Namespaces 2 | 3 | ```{doxygennamespace} rmm 4 | :desc-only: 5 | ``` 6 | 7 | ```{doxygennamespace} rmm::mr 8 | :desc-only: 9 | ``` 10 | -------------------------------------------------------------------------------- /python/rmm/.coveragerc: -------------------------------------------------------------------------------- 1 | # Configuration file for Python coverage tests 2 | [run] 3 | include = rmm/* 4 | omit = rmm/tests/* 5 | disable_warnings=include-ignored,no-data-collected 6 | -------------------------------------------------------------------------------- /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for `copy-pr-bot` GitHub App 2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ 3 | 4 | enabled: true 5 | auto_sync_draft: false 6 | -------------------------------------------------------------------------------- /docs/cpp/memory_resources/memory_resource_adaptors.md: -------------------------------------------------------------------------------- 1 | # Memory Resource Adaptors 2 | 3 | ```{doxygengroup} memory_resource_adaptors 4 | :members: 5 | :undoc-members: 6 | :content-only: 7 | ``` 8 | -------------------------------------------------------------------------------- /python/rmm/rmm/_cuda/stream.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.stream cimport Stream 5 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/helper.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | def parse_bytes(s: int | str) -> int: ... 5 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/helper.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | 5 | cdef object parse_bytes(object s) except * 6 | -------------------------------------------------------------------------------- /docs/python/mr.md: -------------------------------------------------------------------------------- 1 | # rmm.mr (Memory Resources) 2 | 3 | ```{eval-rst} 4 | .. automodule:: rmm.mr 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | ``` 10 | -------------------------------------------------------------------------------- /cpp/tests/libcudacxx_flag_test/README.md: -------------------------------------------------------------------------------- 1 | This directory contains test files to verify that RMM reports a compile error when the `LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE` preprocessor macro is not defined. 2 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/utils.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.stream import Stream 5 | 6 | def as_stream(stream: Stream) -> Stream: ... 7 | -------------------------------------------------------------------------------- /docs/python/statistics.md: -------------------------------------------------------------------------------- 1 | # rmm.statistics 2 | 3 | ```{eval-rst} 4 | .. automodule:: rmm.statistics 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | :special-members: __call__ 10 | ``` 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/submit-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Submit question 3 | about: Ask a general question about RMM 4 | title: "[QST]" 5 | labels: "? - Needs Triage, question" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is your question?** 11 | -------------------------------------------------------------------------------- /docs/python/index.md: -------------------------------------------------------------------------------- 1 | # Python API 2 | 3 | This page documents the Python API for RMM, imported as the module `rmm`. 4 | 5 | ```{toctree} 6 | :maxdepth: 2 7 | 8 | rmm 9 | mr 10 | allocators 11 | statistics 12 | pylibrmm 13 | librmm 14 | ``` 15 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/utils.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.stream cimport Stream 5 | 6 | 7 | cdef Stream as_stream(Stream stream) except * 8 | -------------------------------------------------------------------------------- /cpp/examples/README.md: -------------------------------------------------------------------------------- 1 | # RMM C++ Examples 2 | 3 | This folder contains examples to demonstrate librmm use cases. Running `build.sh` builds all examples. 4 | 5 | Current examples: 6 | 7 | - Basic: demonstrates memory resource construction and allocating a `device_uvector` on a stream. 8 | -------------------------------------------------------------------------------- /conda/recipes/rmm/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | c_compiler_version: 2 | - 14 3 | 4 | cxx_compiler_version: 5 | - 14 6 | 7 | cuda_compiler: 8 | - cuda-nvcc 9 | 10 | c_stdlib: 11 | - sysroot 12 | 13 | c_stdlib_version: 14 | - "2.28" 15 | 16 | cmake_version: 17 | - ">=3.30.4" 18 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # RMM: RAPIDS Memory Manager 2 | 3 | RMM (RAPIDS Memory Manager) is a library for allocating and managing GPU memory in C++ and Python. 4 | 5 | ```{toctree} 6 | :maxdepth: 2 7 | :caption: Contents 8 | 9 | user_guide/guide 10 | cpp/index 11 | python/index 12 | ``` 13 | -------------------------------------------------------------------------------- /conda/recipes/librmm/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | c_compiler_version: 2 | - 14 3 | 4 | cxx_compiler_version: 5 | - 14 6 | 7 | cuda_compiler: 8 | - cuda-nvcc 9 | 10 | c_stdlib: 11 | - sysroot 12 | 13 | c_stdlib_version: 14 | - "2.28" 15 | 16 | cmake_version: 17 | - ">=3.30.4" 18 | -------------------------------------------------------------------------------- /docs/python/librmm.md: -------------------------------------------------------------------------------- 1 | # rmm.librmm 2 | 3 | This module contains internal C++ library bindings for RMM. Most users should not need to use this module directly. 4 | 5 | ```{eval-rst} 6 | .. automodule:: rmm.librmm 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | ``` 11 | -------------------------------------------------------------------------------- /python/librmm/librmm/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from librmm._version import __git_commit__, __version__ 5 | from librmm.load import load_library 6 | 7 | __all__ = ["__git_commit__", "__version__", "load_library"] 8 | -------------------------------------------------------------------------------- /.github/ops-bot.yaml: -------------------------------------------------------------------------------- 1 | # This file controls which features from the `ops-bot` repository below are enabled. 2 | # - https://github.com/rapidsai/ops-bot 3 | 4 | auto_merger: true 5 | branch_checker: true 6 | label_checker: true 7 | release_drafter: true 8 | recently_updated: true 9 | forward_merger: true 10 | merge_barriers: true 11 | -------------------------------------------------------------------------------- /ci/run_pytests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # Support invoking run_pytests.sh outside the script directory 8 | cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/rmm/ 9 | 10 | pytest --cache-clear -v "$@" . 11 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from enum import IntEnum 5 | 6 | class CudaStreamFlags(IntEnum): 7 | SYNC_DEFAULT = ... 8 | NON_BLOCKING = ... 9 | 10 | class CudaStream: 11 | def __init__(self) -> None: ... 12 | -------------------------------------------------------------------------------- /cpp/examples/versions.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | set(RMM_TAG main) 9 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/experimental.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.memory_resource._memory_resource import DeviceMemoryResource 5 | 6 | class CudaAsyncManagedMemoryResource(DeviceMemoryResource): 7 | def __init__(self) -> None: ... 8 | def pool_handle(self) -> int: ... 9 | -------------------------------------------------------------------------------- /docs/cpp/index.md: -------------------------------------------------------------------------------- 1 | # C++ API 2 | 3 | This page documents the C++ API for RMM, also called `librmm`. 4 | 5 | ```{toctree} 6 | :maxdepth: 2 7 | 8 | memory_resources/index 9 | data_containers 10 | thrust_integrations 11 | cuda_device_management 12 | cuda_streams 13 | errors 14 | utilities 15 | rmm_namespace 16 | ``` 17 | 18 | ## Indices and tables 19 | 20 | - {ref}`genindex` 21 | - {ref}`search` 22 | -------------------------------------------------------------------------------- /python/librmm/librmm/_version.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import importlib.resources 5 | 6 | __version__ = ( 7 | importlib.resources.files(__package__) 8 | .joinpath("VERSION") 9 | .read_text() 10 | .strip() 11 | ) 12 | __git_commit__ = "" 13 | 14 | __all__ = ["__git_commit__", "__version__"] 15 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/experimental.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # import from the private _memory_resource to avoid a circular import 5 | from rmm.pylibrmm.memory_resource._memory_resource cimport DeviceMemoryResource 6 | 7 | 8 | cdef class CudaAsyncManagedMemoryResource(DeviceMemoryResource): 9 | pass 10 | -------------------------------------------------------------------------------- /python/rmm/rmm/mr/experimental.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Experimental memory resource features that may change or be removed in future releases.""" 5 | 6 | from rmm.pylibrmm.memory_resource.experimental import ( 7 | CudaAsyncManagedMemoryResource, 8 | ) 9 | 10 | __all__ = [ 11 | "CudaAsyncManagedMemoryResource", 12 | ] 13 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream_pool.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | cimport cython 5 | from libc.stddef cimport size_t 6 | from libcpp.memory cimport unique_ptr 7 | 8 | from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool 9 | 10 | 11 | @cython.final 12 | cdef class CudaStreamPool: 13 | cdef unique_ptr[cuda_stream_pool] c_obj 14 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm import memory_resource 5 | 6 | from .cuda_stream_pool import CudaStreamPool 7 | from .cuda_stream import CudaStreamFlags 8 | from .device_buffer import DeviceBuffer 9 | 10 | __all__ = [ 11 | "CudaStreamPool", 12 | "CudaStreamFlags", 13 | "DeviceBuffer", 14 | "memory_resource", 15 | ] 16 | -------------------------------------------------------------------------------- /docs/python/allocators.md: -------------------------------------------------------------------------------- 1 | # rmm.allocators 2 | 3 | ```{eval-rst} 4 | .. automodule:: rmm.allocators.cupy 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | 10 | ```{eval-rst} 11 | .. automodule:: rmm.allocators.numba 12 | :members: 13 | :inherited-members: 14 | :undoc-members: 15 | :show-inheritance: 16 | ``` 17 | 18 | ```{eval-rst} 19 | .. automodule:: rmm.allocators.torch 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | ``` 24 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_version.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import rmm 5 | 6 | 7 | def test_version_constants_are_populated() -> None: 8 | # __git_commit__ will only be non-empty in a built distribution 9 | assert isinstance(rmm.__git_commit__, str) 10 | 11 | # __version__ should always be non-empty 12 | assert isinstance(rmm.__version__, str) 13 | assert len(rmm.__version__) > 0 14 | -------------------------------------------------------------------------------- /ci/validate_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | wheel_dir_relative_path=$1 8 | 9 | rapids-logger "validate packages with 'pydistcheck'" 10 | 11 | pydistcheck \ 12 | --inspect \ 13 | "$(echo "${wheel_dir_relative_path}"/*.whl)" 14 | 15 | rapids-logger "validate packages with 'twine'" 16 | 17 | twine check \ 18 | --strict \ 19 | "$(echo "${wheel_dir_relative_path}"/*.whl)" 20 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | cimport cython 5 | from cuda.bindings.cyruntime cimport cudaStream_t 6 | from libcpp cimport bool 7 | from libcpp.memory cimport unique_ptr 8 | 9 | from rmm.librmm.cuda_stream cimport cuda_stream 10 | 11 | 12 | @cython.final 13 | cdef class CudaStream: 14 | cdef unique_ptr[cuda_stream] c_obj 15 | cdef cudaStream_t value(self) except * nogil 16 | cdef bool is_valid(self) except * nogil 17 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream_pool.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Optional 5 | 6 | from rmm.pylibrmm.cuda_stream import CudaStreamFlags 7 | from rmm.pylibrmm.stream import Stream 8 | 9 | class CudaStreamPool: 10 | def __init__( 11 | self, 12 | pool_size: int = ..., 13 | flags: CudaStreamFlags = ..., 14 | ) -> None: ... 15 | def get_stream(self, stream_id: Optional[int] = ...) -> Stream: ... 16 | def get_pool_size(self) -> int: ... 17 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/export.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | // Macros used for defining symbol visibility, only GLIBC is supported 9 | #if (defined(__GNUC__) && !defined(__MINGW32__) && !defined(__MINGW64__)) 10 | #define RMM_EXPORT __attribute__((visibility("default"))) 11 | #define RMM_HIDDEN __attribute__((visibility("hidden"))) 12 | #define RMM_NAMESPACE RMM_EXPORT rmm 13 | #else 14 | #define RMM_EXPORT 15 | #define RMM_HIDDEN 16 | #define RMM_NAMESPACE rmm 17 | #endif 18 | -------------------------------------------------------------------------------- /python/rmm/rmm/_version.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import importlib.resources 5 | 6 | __version__ = ( 7 | importlib.resources.files(__package__) 8 | .joinpath("VERSION") 9 | .read_text() 10 | .strip() 11 | ) 12 | try: 13 | __git_commit__ = ( 14 | importlib.resources.files(__package__) 15 | .joinpath("GIT_COMMIT") 16 | .read_text() 17 | .strip() 18 | ) 19 | except FileNotFoundError: 20 | __git_commit__ = "" 21 | 22 | __all__ = ["__git_commit__", "__version__"] 23 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/stream.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from typing import Any, Optional 5 | 6 | class Stream: 7 | def __init__(self, obj: Optional[Any] = None) -> None: ... 8 | def __cuda_stream__(self) -> tuple[int, int]: ... 9 | def synchronize(self) -> None: ... 10 | def is_default(self) -> bool: ... 11 | def __eq__(self, other: Any) -> bool: ... 12 | def __hash__(self) -> int: ... 13 | 14 | DEFAULT_STREAM: Stream 15 | LEGACY_DEFAULT_STREAM: Stream 16 | PER_THREAD_DEFAULT_STREAM: Stream 17 | -------------------------------------------------------------------------------- /cpp/examples/set_cuda_architecture.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) 9 | include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/rapids_config.cmake) 10 | 11 | include(rapids-cmake) 12 | include(rapids-cpm) 13 | include(rapids-cuda) 14 | include(rapids-export) 15 | include(rapids-find) 16 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/thrust_namespace.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include // namespace macros 9 | 10 | #ifdef THRUST_WRAPPED_NAMESPACE 11 | 12 | // Ensure the namespace exist before we import it 13 | // so that this include can occur before thrust includes 14 | namespace THRUST_WRAPPED_NAMESPACE { 15 | namespace thrust { 16 | } 17 | } // namespace THRUST_WRAPPED_NAMESPACE 18 | 19 | namespace rmm { 20 | using namespace THRUST_WRAPPED_NAMESPACE; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /cpp/cmake/thirdparty/get_cccl.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | # Use CPM to find or clone CCCL 9 | function(find_and_configure_cccl) 10 | 11 | include(${rapids-cmake-dir}/cpm/cccl.cmake) 12 | rapids_cpm_cccl(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports) 13 | 14 | endfunction() 15 | 16 | find_and_configure_cccl() 17 | -------------------------------------------------------------------------------- /cpp/cmake/thirdparty/get_nvtx.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | # Use CPM to find or clone NVTX3 9 | function(find_and_configure_nvtx3) 10 | 11 | include(${rapids-cmake-dir}/cpm/nvtx3.cmake) 12 | rapids_cpm_nvtx3(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports) 13 | 14 | endfunction() 15 | 16 | find_and_configure_nvtx3() 17 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_fixed_size_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_basic.hpp" 7 | 8 | namespace rmm::test { 9 | namespace { 10 | 11 | // Note: Fixed_Size MR cannot handle dynamic allocation sizes, so only basic tests are included 12 | INSTANTIATE_TEST_SUITE_P(FixedSizeResourceTests, 13 | mr_ref_test, 14 | ::testing::Values("Fixed_Size"), 15 | [](auto const& info) { return info.param; }); 16 | 17 | } // namespace 18 | } // namespace rmm::test 19 | -------------------------------------------------------------------------------- /cpp/examples/basic/README.md: -------------------------------------------------------------------------------- 1 | # Basic Standalone librmm CUDA C++ application 2 | 3 | This C++ example demonstrates a basic librmm use case and provides a minimal 4 | example of building your own application based on librmm using CMake. 5 | 6 | The example source code creates a device memory resource, sets it to the 7 | current device resource, and then uses it to allocate a buffer. The buffer is 8 | initialized with data and then deallocated. 9 | 10 | ## Compile and execute 11 | 12 | ```bash 13 | # Configure project 14 | cmake -S . -B build/ 15 | # Build 16 | cmake --build build/ --parallel $PARALLEL_LEVEL 17 | # Execute 18 | build/basic_example 19 | ``` 20 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/cuda_stream_pool.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.librmm.cuda_stream cimport cuda_stream_flags 5 | from rmm.librmm.cuda_stream_view cimport cuda_stream_view 6 | 7 | 8 | cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: 9 | cdef cppclass cuda_stream_pool: 10 | cuda_stream_pool(size_t pool_size) 11 | cuda_stream_pool(size_t pool_size, cuda_stream_flags flags) 12 | cuda_stream_view get_stream() 13 | cuda_stream_view get_stream(size_t stream_id) except + 14 | size_t get_pool_size() 15 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | 2 | name: "Pull Request Labeler" 3 | on: 4 | - pull_request_target 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | triage: 11 | permissions: 12 | contents: read # for actions/labeler to determine modified files 13 | pull-requests: write # for actions/labeler to add labels to PRs 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Harden the runner (Audit all outbound calls) 17 | uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 18 | with: 19 | egress-policy: audit 20 | 21 | - uses: actions/labeler@v5 22 | with: 23 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 24 | -------------------------------------------------------------------------------- /cpp/src/exec_policy.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | 8 | namespace rmm { 9 | 10 | exec_policy::exec_policy(cuda_stream_view stream, device_async_resource_ref mr) 11 | : thrust_exec_policy_t( 12 | thrust::cuda::par(mr::thrust_allocator(stream, mr)).on(stream.value())) 13 | { 14 | } 15 | 16 | exec_policy_nosync::exec_policy_nosync(cuda_stream_view stream, device_async_resource_ref mr) 17 | : thrust_exec_policy_nosync_t( 18 | thrust::cuda::par_nosync(mr::thrust_allocator(stream, mr)).on(stream.value())) 19 | { 20 | } 21 | 22 | } // namespace rmm 23 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Checklist 11 | - [ ] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/rmm/blob/HEAD/CONTRIBUTING.md). 12 | - [ ] New or existing tests cover these changes. 13 | - [ ] The documentation is up to date with these changes. 14 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | # GitHub Auto-Generated Release Notes Configuration for RAPIDS 2 | # This file configures how GitHub automatically generates release notes 3 | 4 | changelog: 5 | exclude: 6 | labels: 7 | - ignore-for-release 8 | - dependencies 9 | authors: 10 | - rapids-bot[bot] 11 | - dependabot[bot] 12 | categories: 13 | - title: 🚨 Breaking Changes 14 | labels: 15 | - breaking 16 | - title: 🐛 Bug Fixes 17 | labels: 18 | - bug 19 | - title: 📖 Documentation 20 | labels: 21 | - doc 22 | - title: 🚀 New Features 23 | labels: 24 | - feature request 25 | - title: 🛠️ Improvements 26 | labels: 27 | - improvement 28 | -------------------------------------------------------------------------------- /cpp/src/error.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace rmm { 11 | 12 | bad_alloc::bad_alloc(const char* msg) : _what{std::string{std::bad_alloc::what()} + ": " + msg} {} 13 | 14 | bad_alloc::bad_alloc(std::string const& msg) : bad_alloc{msg.c_str()} {} 15 | 16 | const char* bad_alloc::what() const noexcept { return _what.c_str(); } 17 | 18 | out_of_memory::out_of_memory(const char* msg) : bad_alloc{std::string{"out_of_memory: "} + msg} {} 19 | 20 | out_of_memory::out_of_memory(std::string const& msg) : out_of_memory{msg.c_str()} {} 21 | 22 | } // namespace rmm 23 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/logger.pyi: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.librmm._logger import level_enum 5 | 6 | def should_log(level: level_enum) -> bool: ... 7 | def set_logging_level(level: level_enum) -> None: ... 8 | def get_logging_level() -> level_enum: ... 9 | def flush_logger() -> None: ... 10 | def set_flush_level(level: level_enum) -> None: ... 11 | def get_flush_level() -> level_enum: ... 12 | 13 | # explicitly export level_enum 14 | 15 | __all__ = [ 16 | "flush_logger", 17 | "get_flush_level", 18 | "get_logging_level", 19 | "level_enum", 20 | "set_flush_level", 21 | "set_logging_level", 22 | "should_log", 23 | ] 24 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # https://github.com/actions/labeler#common-examples 2 | # Generated from https://github.com/rapidsai/cudf/blob/main/.github/CODEOWNERS 3 | # Labels culled from https://github.com/rapidsai/rmm/labels 4 | 5 | Python: 6 | - changed-files: 7 | any-glob-to-any-file: 8 | - 'python/**' 9 | CMake: 10 | - changed-files: 11 | any-glob-to-any-file: 12 | - '**/CMakeLists.txt' 13 | - '**/cmake/**' 14 | conda: 15 | - changed-files: 16 | any-glob-to-any-file: 17 | - 'conda/**' 18 | cpp: 19 | - changed-files: 20 | any-glob-to-any-file: 21 | - 'include/**' 22 | - 'tests/**' 23 | - 'doxygen/**' 24 | ci: 25 | - changed-files: 26 | any-glob-to-any-file: 27 | - 'ci/**' 28 | -------------------------------------------------------------------------------- /cpp/include/rmm/device_vector.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace RMM_NAMESPACE { 15 | /** 16 | * @addtogroup thrust_integrations 17 | * @{ 18 | * @file 19 | */ 20 | /** 21 | * @brief Alias for a thrust::device_vector that uses RMM for memory allocation. 22 | * 23 | */ 24 | template 25 | using device_vector = thrust::device_vector>; 26 | 27 | /** @} */ // end of group 28 | } // namespace RMM_NAMESPACE 29 | -------------------------------------------------------------------------------- /cpp/doxygen/main_page.md: -------------------------------------------------------------------------------- 1 | # librmm 2 | 3 | Achieving optimal performance in GPU-centric workflows frequently requires customizing how host and 4 | device memory are allocated. For example, using "pinned" host memory for asynchronous 5 | host <-> device memory transfers, or using a device memory pool sub-allocator to reduce the cost of 6 | dynamic device memory allocation. 7 | 8 | The goal of the RAPIDS Memory Manager (RMM) is to provide: 9 | - A common interface that allows customizing device and host memory allocation 10 | - A collection of implementations of the interface 11 | - A collection of data structures that use the interface for memory allocation 12 | 13 | \htmlonly For more information on APIs provided by rmm, see the modules page\endhtmlonly. 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for RMM 4 | title: "[FEA]" 5 | labels: "? - Needs Triage, feature request" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I wish I could use RMM to do [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context, code examples, or references to existing implementations about the feature request here. 21 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/cuda_stream_view.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from cuda.bindings.cyruntime cimport cudaStream_t 5 | from libcpp cimport bool 6 | 7 | 8 | cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: 9 | cdef cppclass cuda_stream_view: 10 | cuda_stream_view() 11 | cuda_stream_view(cudaStream_t) 12 | cudaStream_t value() 13 | bool is_default() 14 | bool is_per_thread_default() 15 | void synchronize() except + 16 | 17 | cdef bool operator==(cuda_stream_view const, cuda_stream_view const) 18 | 19 | const cuda_stream_view cuda_stream_default 20 | const cuda_stream_view cuda_stream_legacy 21 | const cuda_stream_view cuda_stream_per_thread 22 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/stream.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from cuda.bindings.cyruntime cimport cudaStream_t 5 | from libc.stdint cimport uintptr_t 6 | from libcpp cimport bool 7 | 8 | from rmm.librmm.cuda_stream_view cimport cuda_stream_view 9 | 10 | 11 | cdef class Stream: 12 | cdef cudaStream_t _cuda_stream 13 | cdef object _owner 14 | 15 | @staticmethod 16 | cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=*) 17 | 18 | cdef cuda_stream_view view(self) noexcept nogil 19 | cdef void c_synchronize(self) except * nogil 20 | cdef bool c_is_default(self) noexcept nogil 21 | cdef void _init_with_new_cuda_stream(self) except * 22 | cdef void _init_from_stream(self, Stream stream) except * 23 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_pinned_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | 9 | namespace rmm::test { 10 | namespace { 11 | 12 | INSTANTIATE_TEST_SUITE_P(PinnedResourceTests, 13 | mr_ref_test, 14 | ::testing::Values("Pinned"), 15 | [](auto const& info) { return info.param; }); 16 | 17 | INSTANTIATE_TEST_SUITE_P(PinnedResourceAllocationTests, 18 | mr_ref_allocation_test, 19 | ::testing::Values("Pinned"), 20 | [](auto const& info) { return info.param; }); 21 | 22 | } // namespace 23 | } // namespace rmm::test 24 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_system_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | 9 | namespace rmm::test { 10 | namespace { 11 | 12 | INSTANTIATE_TEST_SUITE_P(SystemResourceTests, 13 | mr_ref_test, 14 | ::testing::Values("System"), 15 | [](auto const& info) { return info.param; }); 16 | 17 | INSTANTIATE_TEST_SUITE_P(SystemResourceAllocationTests, 18 | mr_ref_allocation_test, 19 | ::testing::Values("System"), 20 | [](auto const& info) { return info.param; }); 21 | 22 | } // namespace 23 | } // namespace rmm::test 24 | -------------------------------------------------------------------------------- /cpp/tests/libcudacxx_flag_test/libcudacxx_flag_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | /** 7 | * @file libcudacxx_flag_test.cpp 8 | * @brief Test that verifies the error message when `LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE` 9 | * is not defined. 10 | * 11 | * This test is expected to fail to compile with a clear error message. 12 | * To run this test, you need to compile it separately without defining 13 | * `LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE`. 14 | * 15 | * Example: 16 | * `g++ -std=c++17 -I../../include libcudacxx_flag_test.cpp` 17 | */ 18 | 19 | // Include a header that requires LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE 20 | #include 21 | 22 | int main() { return 0; } 23 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_pinned_pool_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | 9 | namespace rmm::test { 10 | namespace { 11 | 12 | INSTANTIATE_TEST_SUITE_P(PinnedPoolResourceTests, 13 | mr_ref_test, 14 | ::testing::Values("PinnedPool"), 15 | [](auto const& info) { return info.param; }); 16 | 17 | INSTANTIATE_TEST_SUITE_P(PinnedPoolResourceAllocationTests, 18 | mr_ref_allocation_test, 19 | ::testing::Values("PinnedPool"), 20 | [](auto const& info) { return info.param; }); 21 | 22 | } // namespace 23 | } // namespace rmm::test 24 | -------------------------------------------------------------------------------- /cpp/examples/basic/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # cmake-format: off 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # cmake-format: on 5 | 6 | cmake_minimum_required(VERSION 3.30.4) 7 | 8 | include(../set_cuda_architecture.cmake) 9 | 10 | # initialize CUDA architectures 11 | rapids_cuda_init_architectures(basic_example) 12 | 13 | project( 14 | basic_example 15 | VERSION 0.0.1 16 | LANGUAGES CXX CUDA) 17 | 18 | include(../fetch_dependencies.cmake) 19 | 20 | include(rapids-cmake) 21 | rapids_cmake_build_type("Release") 22 | 23 | # Configure your project here 24 | add_executable(basic_example src/basic.cpp) 25 | target_link_libraries(basic_example PRIVATE rmm::rmm) 26 | target_compile_features(basic_example PRIVATE cxx_std_17) 27 | 28 | install(TARGETS basic_example DESTINATION bin/examples/librmm) 29 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # Minimal makefile for Sphinx documentation 5 | # 6 | 7 | # You can set these variables from the command line, and also 8 | # from the environment for the first two. 9 | SPHINXOPTS = -n -v -W --keep-going 10 | SPHINXBUILD ?= sphinx-build 11 | SOURCEDIR = . 12 | BUILDDIR = _build 13 | 14 | # Put it first so that "make" without argument is like "make help". 15 | help: 16 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 17 | 18 | .PHONY: help Makefile 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from collections.abc import Generator 5 | 6 | import pytest 7 | 8 | import rmm 9 | import rmm.mr 10 | import rmm.statistics 11 | 12 | 13 | @pytest.fixture(scope="function", autouse=True) 14 | def rmm_auto_reinitialize() -> Generator[None, None, None]: 15 | # Run the test 16 | yield 17 | 18 | # Automatically reinitialize the current memory resource after running each 19 | # test 20 | 21 | rmm.reinitialize() 22 | 23 | 24 | @pytest.fixture 25 | def stats_mr() -> Generator[rmm.mr.DeviceMemoryResource, None, None]: 26 | """Fixture that makes a StatisticsResourceAdaptor available to the test""" 27 | with rmm.statistics.statistics(): 28 | yield rmm.mr.get_current_device_resource() 29 | -------------------------------------------------------------------------------- /cpp/tests/mr/test_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace rmm::test { 14 | 15 | /** 16 | * @brief Returns if a pointer points to a device memory or managed memory 17 | * allocation. 18 | */ 19 | inline bool is_device_accessible_memory(void* ptr) 20 | { 21 | cudaPointerAttributes attributes{}; 22 | if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } 23 | return attributes.devicePointer != nullptr; 24 | } 25 | 26 | inline bool is_properly_aligned(void* ptr) 27 | { 28 | return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); 29 | } 30 | 31 | } // namespace rmm::test 32 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/cuda_stream.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from cuda.bindings.cyruntime cimport cudaStream_t 5 | from libc.stdint cimport uint32_t 6 | from libcpp cimport bool 7 | 8 | from rmm.librmm.cuda_stream_view cimport cuda_stream_view 9 | 10 | 11 | cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: 12 | 13 | cpdef enum class cuda_stream_flags "rmm::cuda_stream::flags" (uint32_t): 14 | sync_default "rmm::cuda_stream::flags::sync_default" 15 | non_blocking "rmm::cuda_stream::flags::non_blocking" 16 | cdef cppclass cuda_stream: 17 | cuda_stream() except + 18 | bool is_valid() except + 19 | cudaStream_t value() except + 20 | cuda_stream_view view() except + 21 | void synchronize() except + 22 | void synchronize_no_throw() 23 | -------------------------------------------------------------------------------- /python/rmm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) 9 | 10 | include(../../cmake/rapids_config.cmake) 11 | 12 | project( 13 | rmm-python 14 | VERSION "${RAPIDS_VERSION}" 15 | LANGUAGES CXX) 16 | 17 | find_package(rmm "${RAPIDS_VERSION}" REQUIRED) 18 | 19 | include(rapids-cython-core) 20 | rapids_cython_init() 21 | 22 | # pass through logging level to spdlog 23 | add_compile_definitions("RMM_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${RMM_LOGGING_LEVEL}") 24 | 25 | add_subdirectory(rmm/_cuda) 26 | add_subdirectory(rmm/librmm) 27 | add_subdirectory(rmm/pylibrmm) 28 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/__init__.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.memory_resource._memory_resource cimport ( 5 | ArenaMemoryResource, 6 | BinningMemoryResource, 7 | CallbackMemoryResource, 8 | CudaAsyncMemoryResource, 9 | CudaAsyncViewMemoryResource, 10 | CudaMemoryResource, 11 | DeviceMemoryResource, 12 | FailureCallbackResourceAdaptor, 13 | FixedSizeMemoryResource, 14 | LimitingResourceAdaptor, 15 | LoggingResourceAdaptor, 16 | ManagedMemoryResource, 17 | PinnedHostMemoryResource, 18 | PoolMemoryResource, 19 | PrefetchResourceAdaptor, 20 | SamHeadroomMemoryResource, 21 | StatisticsResourceAdaptor, 22 | SystemMemoryResource, 23 | TrackingResourceAdaptor, 24 | UpstreamResourceAdaptor, 25 | get_current_device_resource, 26 | ) 27 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | #cpp code owners 2 | include/ @rapidsai/rmm-cpp-codeowners 3 | tests/ @rapidsai/rmm-cpp-codeowners 4 | benchmarks/ @rapidsai/rmm-cpp-codeowners 5 | 6 | #python code owners 7 | python/ @rapidsai/rmm-python-codeowners 8 | 9 | #cmake code owners 10 | **/CMakeLists.txt @rapidsai/rmm-cmake-codeowners 11 | **/cmake/ @rapidsai/rmm-cmake-codeowners 12 | 13 | #CI code owners 14 | /.github/ @rapidsai/ci-codeowners 15 | /ci/ @rapidsai/ci-codeowners 16 | /.pre-commit-config.yaml @rapidsai/ci-codeowners 17 | /.shellcheckrc @rapidsai/ci-codeowners 18 | 19 | #packaging code owners 20 | /.devcontainer/ @rapidsai/packaging-codeowners 21 | /conda/ @rapidsai/packaging-codeowners 22 | /dependencies.yaml @rapidsai/packaging-codeowners 23 | /build.sh @rapidsai/packaging-codeowners 24 | pyproject.toml @rapidsai/packaging-codeowners 25 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/exec_check_disable.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | /** 8 | * @brief Macro for suppressing __host__ / __device__ function markup 9 | * checks that the NVCC compiler does. 10 | * 11 | * At times it is useful to place rmm host only types inside containers 12 | * that work on both host and device. Doing so will generate warnings 13 | * of using a host only type inside a host / device type. 14 | * 15 | * This macro can be used to silence said warnings 16 | * 17 | */ 18 | 19 | // #pragma nv_exec_check_disable is only recognized by NVCC so verify 20 | // that we have both the NVCC compiler and we are compiling a CUDA 21 | // source 22 | #if defined(__CUDACC__) && defined(__NVCC__) 23 | #define RMM_EXEC_CHECK_DISABLE _Pragma("nv_exec_check_disable") 24 | #else 25 | #define RMM_EXEC_CHECK_DISABLE 26 | #endif 27 | -------------------------------------------------------------------------------- /cpp/src/logger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace rmm { 13 | 14 | rapids_logger::sink_ptr default_sink() 15 | { 16 | auto* filename = std::getenv("RMM_DEBUG_LOG_FILE"); 17 | if (filename != nullptr) { 18 | return std::make_shared(filename, true); 19 | } 20 | return std::make_shared(); 21 | } 22 | 23 | std::string default_pattern() { return "[%6t][%H:%M:%S:%f][%-6l] %v"; } 24 | 25 | rapids_logger::logger& default_logger() 26 | { 27 | static rapids_logger::logger logger_ = [] { 28 | rapids_logger::logger logger_{"RMM", {default_sink()}}; 29 | logger_.set_pattern(default_pattern()); 30 | return logger_; 31 | }(); 32 | return logger_; 33 | } 34 | 35 | } // namespace rmm 36 | -------------------------------------------------------------------------------- /python/rmm/rmm/_cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | set(linked_libraries rmm::rmm) 9 | 10 | rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" 11 | CXX) 12 | 13 | # mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols 14 | # loaded from other DSOs 15 | foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) 16 | set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden 17 | CXX_VISIBILITY_PRESET hidden) 18 | endforeach() 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation request 3 | about: Report incorrect or needed documentation 4 | title: "[DOC]" 5 | labels: "? - Needs Triage, doc" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Report incorrect documentation 11 | 12 | **Location of incorrect documentation** 13 | Provide links and line numbers if applicable. 14 | 15 | **Describe the problems or issues found in the documentation** 16 | A clear and concise description of what you found to be incorrect. 17 | 18 | **Steps taken to verify documentation is incorrect** 19 | List any steps you have taken. 20 | 21 | **Suggested fix for documentation** 22 | Detail proposed changes to fix the documentation if you have any. 23 | 24 | --- 25 | 26 | ## Report needed documentation 27 | 28 | **Report needed documentation** 29 | A clear and concise description of what documentation is needed and why. 30 | 31 | **Steps taken to search for needed documentation** 32 | List any steps you have taken. 33 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/per_device_resource.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | from rmm.librmm.memory_resource cimport device_memory_resource 4 | 5 | 6 | cdef extern from "rmm/mr/per_device_resource.hpp" namespace "rmm" nogil: 7 | cdef cppclass cuda_device_id: 8 | ctypedef int value_type 9 | 10 | cuda_device_id(value_type id) 11 | 12 | value_type value() 13 | 14 | cdef extern from "rmm/mr/per_device_resource.hpp" \ 15 | namespace "rmm::mr" nogil: 16 | cdef device_memory_resource* set_current_device_resource( 17 | device_memory_resource* new_mr 18 | ) 19 | cdef device_memory_resource* get_current_device_resource() 20 | cdef device_memory_resource* set_per_device_resource( 21 | cuda_device_id id, device_memory_resource* new_mr 22 | ) 23 | cdef device_memory_resource* get_per_device_resource ( 24 | cuda_device_id id 25 | ) 26 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import numpy as np 5 | 6 | from libcpp.memory cimport make_unique 7 | 8 | from rmm.librmm.cuda_stream_view cimport cuda_stream_default 9 | from rmm.librmm.device_buffer cimport device_buffer 10 | from rmm.pylibrmm.device_buffer cimport DeviceBuffer 11 | 12 | 13 | def test_release(): 14 | expect = DeviceBuffer.to_device(b'abc') 15 | cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') 16 | 17 | got = DeviceBuffer.c_from_unique_ptr( 18 | make_unique[device_buffer](buf.c_release(), 19 | cuda_stream_default.value()) 20 | ) 21 | np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host()) 22 | 23 | 24 | def test_size_after_release(): 25 | cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') 26 | buf.c_release() 27 | assert buf.size == 0 28 | -------------------------------------------------------------------------------- /cpp/examples/fetch_dependencies.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) 9 | 10 | set(CPM_DOWNLOAD_VERSION v0.40.5) 11 | file( 12 | DOWNLOAD 13 | https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake 14 | ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) 15 | include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) 16 | 17 | # find or build it via CPM 18 | CPMFindPackage( 19 | NAME rmm 20 | FIND_PACKAGE_ARGUMENTS "PATHS ${rmm_ROOT} ${rmm_ROOT}/latest" GIT_REPOSITORY 21 | https://github.com/rapidsai/rmm 22 | GIT_TAG ${RMM_TAG} 23 | GIT_SHALLOW 24 | TRUE 25 | SOURCE_SUBDIR 26 | cpp) 27 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/_logger.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from libcpp cimport bool 5 | from libcpp.string cimport string 6 | 7 | 8 | cdef extern from "rapids_logger/logger.hpp" namespace "rapids_logger" nogil: 9 | cpdef enum class level_enum: 10 | trace 11 | debug 12 | info 13 | warn 14 | error 15 | critical 16 | off 17 | n_levels 18 | 19 | cdef cppclass logger: 20 | logger(string name, string filename) except + 21 | void set_level(level_enum log_level) except + 22 | level_enum level() except + 23 | void flush() except + 24 | void flush_on(level_enum level) except + 25 | level_enum flush_level() except + 26 | bool should_log(level_enum msg_level) except + 27 | 28 | 29 | cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: 30 | cdef logger& default_logger() except + 31 | -------------------------------------------------------------------------------- /ci/run_ctests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # Support customizing the ctests' install location 8 | # First, try the installed location (CI/conda environments) 9 | installed_test_location="${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/librmm/" 10 | # Fall back to the build directory (devcontainer environments) 11 | devcontainers_test_location="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/../cpp/build/latest" 12 | 13 | if [[ -d "${installed_test_location}" ]]; then 14 | cd "${installed_test_location}" 15 | elif [[ -d "${devcontainers_test_location}" ]]; then 16 | cd "${devcontainers_test_location}" 17 | else 18 | echo "Error: Test location not found. Searched:" >&2 19 | echo " - ${installed_test_location}" >&2 20 | echo " - ${devcontainers_test_location}" >&2 21 | exit 1 22 | fi 23 | 24 | ctest --no-tests=error --output-on-failure "$@" 25 | -------------------------------------------------------------------------------- /python/librmm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) 9 | 10 | include(../../cmake/rapids_config.cmake) 11 | 12 | project( 13 | librmm-python 14 | VERSION "${RAPIDS_VERSION}" 15 | LANGUAGES CXX) 16 | 17 | # Check if rmm is already available. If so, it's the user's responsibility to ensure that the CMake 18 | # package is also available at build time of the Python rmm package. 19 | find_package(rmm "${RAPIDS_VERSION}") 20 | 21 | if(rmm_FOUND) 22 | return() 23 | endif() 24 | 25 | unset(rmm_FOUND) 26 | 27 | set(BUILD_TESTS OFF) 28 | set(BUILD_BENCHMARKS OFF) 29 | set(CUDA_STATIC_RUNTIME ON) 30 | 31 | add_subdirectory(../../cpp rmm-cpp) 32 | -------------------------------------------------------------------------------- /cpp/include/rmm/mr/is_resource_adaptor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | namespace RMM_NAMESPACE { 13 | namespace mr { 14 | 15 | /** 16 | * @addtogroup memory_resources 17 | * @{ 18 | * @file 19 | */ 20 | 21 | /** 22 | * @brief Concept to check whether a resource is a resource adaptor by checking for 23 | * `get_upstream_resource`. 24 | */ 25 | template 26 | inline constexpr bool is_resource_adaptor = false; 27 | 28 | template 29 | inline constexpr bool is_resource_adaptor< 30 | Resource, 31 | cuda::std::void_t().get_upstream_resource())>> = 32 | rmm::detail::polyfill::resource; 33 | 34 | /** @} */ // end of group 35 | } // namespace mr 36 | } // namespace RMM_NAMESPACE 37 | -------------------------------------------------------------------------------- /cpp/tests/mock_resource.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | 11 | namespace rmm::test { 12 | 13 | class mock_resource : public rmm::mr::device_memory_resource { 14 | public: 15 | MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override)); 16 | MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (noexcept, override)); 17 | bool operator==(mock_resource const&) const noexcept { return true; } 18 | bool operator!=(mock_resource const&) const { return false; } 19 | friend void get_property(mock_resource const&, cuda::mr::device_accessible) noexcept {} 20 | using size_pair = std::pair; 21 | }; 22 | 23 | // static property checks 24 | static_assert( 25 | rmm::detail::polyfill::async_resource_with); 26 | 27 | } // namespace rmm::test 28 | -------------------------------------------------------------------------------- /cpp/include/rmm/logger.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace RMM_NAMESPACE { 14 | 15 | /** 16 | * @brief Returns the default sink for the global logger. 17 | * 18 | * If the environment variable `RMM_DEBUG_LOG_FILE` is defined, the default sink is a sink to that 19 | * file. Otherwise, the default is to dump to stderr. 20 | * 21 | * @return sink_ptr The sink to use 22 | */ 23 | rapids_logger::sink_ptr default_sink(); 24 | 25 | /** 26 | * @brief Returns the default log pattern for the global logger. 27 | * 28 | * @return std::string The default log pattern. 29 | */ 30 | std::string default_pattern(); 31 | 32 | /** 33 | * @brief Get the default logger. 34 | * 35 | * @return logger& The default logger 36 | */ 37 | rapids_logger::logger& default_logger(); 38 | 39 | } // namespace RMM_NAMESPACE 40 | -------------------------------------------------------------------------------- /cpp/examples/basic/src/basic.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int main(int argc, char** argv) 12 | { 13 | // Construct a CUDA async memory resource using RAPIDS Memory Manager (RMM). 14 | // This uses a memory pool managed by the CUDA driver, using half of the 15 | // available GPU memory. 16 | rmm::mr::cuda_async_memory_resource mr{rmm::percent_of_free_device_memory(50)}; 17 | 18 | // Create a CUDA stream for asynchronous allocations 19 | auto stream = rmm::cuda_stream{}; 20 | 21 | // Create a device_uvector with this stream and memory resource 22 | auto const size{12345}; 23 | rmm::device_uvector vec(size, stream, mr); 24 | std::cout << "vec size: " << vec.size() << std::endl; 25 | 26 | // Synchronize the stream 27 | stream.synchronize(); 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /cpp/tests/byte_literals.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | 8 | namespace rmm::test { 9 | 10 | constexpr auto kilo{long{1} << 10}; 11 | constexpr auto mega{long{1} << 20}; 12 | constexpr auto giga{long{1} << 30}; 13 | constexpr auto tera{long{1} << 40}; 14 | constexpr auto peta{long{1} << 50}; 15 | 16 | // user-defined Byte literals 17 | constexpr unsigned long long operator""_B(unsigned long long val) { return val; } 18 | constexpr unsigned long long operator""_KiB(unsigned long long const val) { return kilo * val; } 19 | constexpr unsigned long long operator""_MiB(unsigned long long const val) { return mega * val; } 20 | constexpr unsigned long long operator""_GiB(unsigned long long const val) { return giga * val; } 21 | constexpr unsigned long long operator""_TiB(unsigned long long const val) { return tera * val; } 22 | constexpr unsigned long long operator""_PiB(unsigned long long const val) { return peta * val; } 23 | 24 | } // namespace rmm::test 25 | -------------------------------------------------------------------------------- /.github/workflows/trigger-breaking-change-alert.yaml: -------------------------------------------------------------------------------- 1 | name: Trigger Breaking Change Notifications 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - closed 7 | - reopened 8 | - labeled 9 | - unlabeled 10 | 11 | jobs: 12 | trigger-notifier: 13 | if: contains(github.event.pull_request.labels.*.name, 'breaking') 14 | secrets: inherit 15 | uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@main 16 | with: 17 | sender_login: ${{ github.event.sender.login }} 18 | sender_avatar: ${{ github.event.sender.avatar_url }} 19 | repo: ${{ github.repository }} 20 | pr_number: ${{ github.event.pull_request.number }} 21 | pr_title: "${{ github.event.pull_request.title }}" 22 | pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" 23 | pr_base_ref: ${{ github.event.pull_request.base.ref }} 24 | pr_author: ${{ github.event.pull_request.user.login }} 25 | event_action: ${{ github.event.action }} 26 | pr_merged: ${{ github.event.pull_request.merged }} 27 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | set(cython_sources _memory_resource.pyx experimental.pyx) 9 | set(linked_libraries rmm::rmm) 10 | 11 | rapids_cython_create_modules( 12 | CXX 13 | SOURCE_FILES "${cython_sources}" 14 | LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibrmm_memory_resource_ ASSOCIATED_TARGETS 15 | rmm) 16 | 17 | # mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols 18 | # loaded from other DSOs 19 | foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) 20 | set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden 21 | CXX_VISIBILITY_PRESET hidden) 22 | endforeach() 23 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_arena_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for ArenaMemoryResource.""" 5 | 6 | import pytest 7 | from test_helpers import _allocs, _dtypes, _nelems, array_tester 8 | 9 | import rmm 10 | 11 | 12 | @pytest.mark.parametrize("dtype", _dtypes) 13 | @pytest.mark.parametrize("nelem", _nelems) 14 | @pytest.mark.parametrize("alloc", _allocs) 15 | @pytest.mark.parametrize( 16 | "upstream_mr", 17 | [ 18 | lambda: rmm.mr.CudaMemoryResource(), 19 | lambda: rmm.mr.ManagedMemoryResource(), 20 | lambda: rmm.mr.PoolMemoryResource( 21 | rmm.mr.CudaMemoryResource(), 1 << 20 22 | ), 23 | ], 24 | ) 25 | def test_arena_memory_resource(dtype, nelem, alloc, upstream_mr): 26 | upstream = upstream_mr() 27 | mr = rmm.mr.ArenaMemoryResource(upstream) 28 | 29 | rmm.mr.set_current_device_resource(mr) 30 | assert rmm.mr.get_current_device_resource_type() is type(mr) 31 | array_tester(dtype, nelem, alloc) 32 | -------------------------------------------------------------------------------- /cpp/tests/mr/binning_mr_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // explicit instantiation for test coverage purposes 13 | template class rmm::mr::binning_memory_resource; 14 | 15 | namespace rmm::test { 16 | 17 | using cuda_mr = rmm::mr::cuda_memory_resource; 18 | using binning_mr = rmm::mr::binning_memory_resource; 19 | 20 | TEST(BinningTest, ThrowOnNullUpstream) 21 | { 22 | auto construct_nullptr = []() { binning_mr mr{nullptr}; }; 23 | EXPECT_THROW(construct_nullptr(), rmm::logic_error); 24 | } 25 | 26 | TEST(BinningTest, ExplicitBinMR) 27 | { 28 | cuda_mr cuda{}; 29 | binning_mr mr{&cuda}; 30 | mr.add_bin(1024, &cuda); 31 | auto* ptr = mr.allocate_sync(512); 32 | EXPECT_NE(ptr, nullptr); 33 | mr.deallocate_sync(ptr, 512); 34 | } 35 | 36 | } // namespace rmm::test 37 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_cuda_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(CudaResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("CUDA"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(CudaResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("CUDA"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(CudaMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("CUDA"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_pool_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(PoolResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("Pool"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(PoolResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("Pool"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(PoolMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("Pool"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/format.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace RMM_NAMESPACE { 16 | namespace detail { 17 | 18 | // Stringify a size in bytes to a human-readable value 19 | inline std::string format_bytes(std::size_t value) 20 | { 21 | static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; 22 | 23 | int index = 0; 24 | auto size = static_cast(value); 25 | while (size > 1024) { 26 | size /= 1024; 27 | index++; 28 | } 29 | 30 | return std::to_string(size) + ' ' + units.at(index); 31 | } 32 | 33 | // Stringify a stream ID 34 | inline std::string format_stream(rmm::cuda_stream_view stream) 35 | { 36 | std::stringstream sstr{}; 37 | sstr << std::hex << stream.value(); 38 | return sstr.str(); 39 | } 40 | 41 | } // namespace detail 42 | } // namespace RMM_NAMESPACE 43 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_arena_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(ArenaResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("Arena"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(ArenaResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("Arena"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(ArenaMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("Arena"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_limiting_resource_adaptor.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for LimitingResourceAdaptor.""" 5 | 6 | import pytest 7 | 8 | import rmm 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "mr", 13 | [ 14 | rmm.mr.CudaMemoryResource, 15 | pytest.param(rmm.mr.CudaAsyncMemoryResource), 16 | ], 17 | ) 18 | def test_limiting_resource_adaptor(mr): 19 | cuda_mr = mr() 20 | 21 | allocation_limit = 1 << 20 22 | num_buffers = 2 23 | buffer_size = allocation_limit // num_buffers 24 | 25 | mr = rmm.mr.LimitingResourceAdaptor( 26 | cuda_mr, allocation_limit=allocation_limit 27 | ) 28 | assert mr.get_allocation_limit() == allocation_limit 29 | 30 | rmm.mr.set_current_device_resource(mr) 31 | 32 | buffers = [rmm.DeviceBuffer(size=buffer_size) for _ in range(num_buffers)] 33 | 34 | assert mr.get_allocated_bytes() == sum(b.size for b in buffers) 35 | 36 | with pytest.raises(MemoryError): 37 | rmm.DeviceBuffer(size=1) 38 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_binning_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(BinningResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("Binning"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(BinningResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("Binning"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(BinningMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("Binning"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_managed_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(ManagedResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("Managed"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(ManagedResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("Managed"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(ManagedMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("Managed"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /docs/python/pylibrmm.md: -------------------------------------------------------------------------------- 1 | # rmm.pylibrmm 2 | 3 | This module contains the low-level Cython bindings for RMM. Some components from this module are re-exported through the top-level `rmm` module for convenience, while others are available only through this module. 4 | 5 | ## Overview 6 | 7 | `rmm.pylibrmm` provides the Cython layer that wraps RMM's C++ functionality: 8 | 9 | - `DeviceBuffer` - GPU memory buffer (available as `rmm.DeviceBuffer`) 10 | - `memory_resource` - Memory resource implementations (available as `rmm.mr`) 11 | - Logging utilities (available through `rmm`) 12 | - CUDA stream wrappers (documented below) 13 | 14 | ## CUDA Stream Classes 15 | 16 | The stream classes are available only through `rmm.pylibrmm` and provide low-level CUDA stream management. 17 | 18 | ### rmm.pylibrmm.stream 19 | 20 | ```{eval-rst} 21 | .. automodule:: rmm.pylibrmm.stream 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | ``` 26 | 27 | ### rmm.pylibrmm.cuda_stream 28 | 29 | ```{eval-rst} 30 | .. automodule:: rmm.pylibrmm.cuda_stream 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | ``` 35 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_cuda_async_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test_allocation.hpp" 7 | #include "mr_ref_test_basic.hpp" 8 | #include "mr_ref_test_mt.hpp" 9 | 10 | namespace rmm::test { 11 | namespace { 12 | 13 | INSTANTIATE_TEST_SUITE_P(CudaAsyncResourceTests, 14 | mr_ref_test, 15 | ::testing::Values("CUDA_Async"), 16 | [](auto const& info) { return info.param; }); 17 | 18 | INSTANTIATE_TEST_SUITE_P(CudaAsyncResourceAllocationTests, 19 | mr_ref_allocation_test, 20 | ::testing::Values("CUDA_Async"), 21 | [](auto const& info) { return info.param; }); 22 | 23 | INSTANTIATE_TEST_SUITE_P(CudaAsyncMultiThreadResourceTests, 24 | mr_ref_test_mt, 25 | ::testing::Values("CUDA_Async"), 26 | [](auto const& info) { return info.param; }); 27 | 28 | } // namespace 29 | } // namespace rmm::test 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report to help us improve RMM 4 | title: "[BUG]" 5 | labels: "? - Needs Triage, bug" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Steps/Code to reproduce bug** 14 | Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports to craft a minimal bug report. This helps us reproduce the issue you're having and resolve the issue more quickly. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Environment details (please complete the following information):** 20 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)] 21 | - Method of RMM install: [conda, Docker, or from source] 22 | - If method of install is [Docker], provide `docker pull` & `docker run` commands used 23 | - Please run and attach the output of the `rmm/print_env.sh` script to gather relevant environment details 24 | 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /python/rmm/rmm/allocators/cupy.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | from typing import Any 4 | 5 | from rmm import pylibrmm 6 | from rmm.pylibrmm.stream import Stream 7 | 8 | try: 9 | import cupy 10 | except ImportError: 11 | cupy = None 12 | 13 | 14 | def rmm_cupy_allocator(nbytes: int) -> Any: 15 | """ 16 | A CuPy allocator that makes use of RMM. 17 | 18 | Examples 19 | -------- 20 | >>> from rmm.allocators.cupy import rmm_cupy_allocator 21 | >>> import cupy 22 | >>> cupy.cuda.set_allocator(rmm_cupy_allocator) 23 | """ 24 | if cupy is None: 25 | raise ModuleNotFoundError("No module named 'cupy'") 26 | 27 | stream = Stream(obj=cupy.cuda.get_current_stream()) 28 | buf = pylibrmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) 29 | dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id() 30 | mem = cupy.cuda.UnownedMemory( 31 | ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id 32 | ) 33 | ptr = cupy.cuda.memory.MemoryPointer(mem, 0) 34 | 35 | return ptr 36 | -------------------------------------------------------------------------------- /python/rmm/rmm/allocators/torch.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | rmm_torch_allocator = None 5 | 6 | try: 7 | from torch.cuda.memory import CUDAPluggableAllocator 8 | except ImportError: 9 | pass 10 | else: 11 | from torch.cuda import is_available 12 | 13 | if is_available(): 14 | import pathlib 15 | 16 | # To support editable installs, we cannot search for the compiled torch 17 | # allocator .so relative to the current file because the current file 18 | # is pure Python and will therefore be in the source directory. 19 | # Instead, we search relative to an arbitrary file in the compiled 20 | # package. We use the librmm._logger module because it is small. 21 | from rmm.librmm import _logger 22 | 23 | sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so" 24 | rmm_torch_allocator = CUDAPluggableAllocator( 25 | str(sofile.absolute()), 26 | alloc_fn_name="allocate", 27 | free_fn_name="deallocate", 28 | ) 29 | del pathlib, sofile 30 | del is_available 31 | -------------------------------------------------------------------------------- /conda/environments/all_cuda-129_arch-x86_64.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `rapids-dependency-file-generator`. 2 | # To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. 3 | channels: 4 | - rapidsai-nightly 5 | - rapidsai 6 | - conda-forge 7 | dependencies: 8 | - breathe>=4.35.0 9 | - c-compiler 10 | - clang-tools==20.1.4 11 | - clang==20.1.4 12 | - cmake>=3.30.4 13 | - cuda-nvcc 14 | - cuda-python>=12.9.2,<13.0 15 | - cuda-version=12.9 16 | - cxx-compiler 17 | - cython>=3.0.0,<3.2.0 18 | - doxygen=1.9.1 19 | - gcc_linux-64=14.* 20 | - gcovr>=5.0 21 | - graphviz 22 | - identify>=2.5.20 23 | - ipython 24 | - make 25 | - myst-parser 26 | - nbsphinx 27 | - ninja 28 | - numba-cuda>=0.22.1,<0.23.0 29 | - numba>=0.60.0,<0.62.0 30 | - numpy>=1.23,<3.0 31 | - numpydoc 32 | - packaging 33 | - pre-commit 34 | - pydata-sphinx-theme>=0.15.4 35 | - pytest-cov 36 | - pytest<9.0.0 37 | - python>=3.10,<3.14 38 | - rapids-build-backend>=0.4.0,<0.5.0 39 | - rapids-logger==0.2.*,>=0.0.0a0 40 | - scikit-build-core >=0.10.0 41 | - sphinx 42 | - sphinx-copybutton 43 | - sphinx-markdown-tables 44 | - sphinxcontrib-jquery 45 | - sysroot_linux-64==2.28 46 | name: all_cuda-129_arch-x86_64 47 | -------------------------------------------------------------------------------- /conda/environments/all_cuda-130_arch-x86_64.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `rapids-dependency-file-generator`. 2 | # To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. 3 | channels: 4 | - rapidsai-nightly 5 | - rapidsai 6 | - conda-forge 7 | dependencies: 8 | - breathe>=4.35.0 9 | - c-compiler 10 | - clang-tools==20.1.4 11 | - clang==20.1.4 12 | - cmake>=3.30.4 13 | - cuda-nvcc 14 | - cuda-python>=13.0.1,<14.0 15 | - cuda-version=13.0 16 | - cxx-compiler 17 | - cython>=3.0.0,<3.2.0 18 | - doxygen=1.9.1 19 | - gcc_linux-64=14.* 20 | - gcovr>=5.0 21 | - graphviz 22 | - identify>=2.5.20 23 | - ipython 24 | - make 25 | - myst-parser 26 | - nbsphinx 27 | - ninja 28 | - numba-cuda>=0.22.1,<0.23.0 29 | - numba>=0.60.0,<0.62.0 30 | - numpy>=1.23,<3.0 31 | - numpydoc 32 | - packaging 33 | - pre-commit 34 | - pydata-sphinx-theme>=0.15.4 35 | - pytest-cov 36 | - pytest<9.0.0 37 | - python>=3.10,<3.14 38 | - rapids-build-backend>=0.4.0,<0.5.0 39 | - rapids-logger==0.2.*,>=0.0.0a0 40 | - scikit-build-core >=0.10.0 41 | - sphinx 42 | - sphinx-copybutton 43 | - sphinx-markdown-tables 44 | - sphinxcontrib-jquery 45 | - sysroot_linux-64==2.28 46 | name: all_cuda-130_arch-x86_64 47 | -------------------------------------------------------------------------------- /cpp/src/cuda_stream_pool.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace rmm { 15 | 16 | cuda_stream_pool::cuda_stream_pool(std::size_t pool_size, cuda_stream::flags flags) 17 | { 18 | RMM_EXPECTS(pool_size > 0, "Stream pool size must be greater than zero"); 19 | streams_.reserve(pool_size); 20 | std::generate_n( 21 | std::back_inserter(streams_), pool_size, [flags]() { return cuda_stream(flags); }); 22 | } 23 | 24 | rmm::cuda_stream_view cuda_stream_pool::get_stream() const noexcept 25 | { 26 | return streams_[(next_stream.fetch_add(1, std::memory_order_relaxed)) % streams_.size()].view(); 27 | } 28 | 29 | rmm::cuda_stream_view cuda_stream_pool::get_stream(std::size_t stream_id) const 30 | { 31 | return streams_[stream_id % streams_.size()].view(); 32 | } 33 | 34 | std::size_t cuda_stream_pool::get_pool_size() const noexcept { return streams_.size(); } 35 | 36 | } // namespace rmm 37 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | set(cython_sources cuda_stream.pyx cuda_stream_pool.pyx device_buffer.pyx logger.pyx helper.pyx 9 | stream.pyx utils.pyx) 10 | set(linked_libraries rmm::rmm) 11 | 12 | # Build all of the Cython targets 13 | rapids_cython_create_modules( 14 | CXX 15 | SOURCE_FILES "${cython_sources}" 16 | LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibrmm_ ASSOCIATED_TARGETS rmm) 17 | 18 | # mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols 19 | # loaded from other DSOs 20 | foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) 21 | set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden 22 | CXX_VISIBILITY_PRESET hidden) 23 | endforeach() 24 | 25 | add_subdirectory(memory_resource) 26 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/utils.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Utility functions for rmm""" 5 | 6 | from rmm.pylibrmm.stream cimport Stream 7 | 8 | 9 | cdef Stream as_stream(Stream stream) except *: 10 | """ 11 | Convert a stream argument to a Stream instance. 12 | 13 | This function converts the provided stream argument to a valid Stream 14 | instance. For now, all it does is check for None and raise a TypeError if 15 | the argument is None. In the future it will be extended to accept other 16 | types of stream representations i.e. anything supporting the 17 | __cuda_stream__ protocol. 18 | 19 | Parameters 20 | ---------- 21 | stream : Stream 22 | The stream to convert 23 | 24 | Returns 25 | ------- 26 | Stream 27 | The converted Stream instance 28 | 29 | Raises 30 | ------ 31 | TypeError 32 | If stream is None 33 | """ 34 | if stream is None: 35 | raise TypeError( 36 | "stream argument cannot be None. " 37 | "Please provide a valid Stream instance." 38 | ) 39 | return stream 40 | -------------------------------------------------------------------------------- /conda/environments/all_cuda-129_arch-aarch64.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `rapids-dependency-file-generator`. 2 | # To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. 3 | channels: 4 | - rapidsai-nightly 5 | - rapidsai 6 | - conda-forge 7 | dependencies: 8 | - breathe>=4.35.0 9 | - c-compiler 10 | - clang-tools==20.1.4 11 | - clang==20.1.4 12 | - cmake>=3.30.4 13 | - cuda-nvcc 14 | - cuda-python>=12.9.2,<13.0 15 | - cuda-version=12.9 16 | - cxx-compiler 17 | - cython>=3.0.0,<3.2.0 18 | - doxygen=1.9.1 19 | - gcc_linux-aarch64=14.* 20 | - gcovr>=5.0 21 | - graphviz 22 | - identify>=2.5.20 23 | - ipython 24 | - make 25 | - myst-parser 26 | - nbsphinx 27 | - ninja 28 | - numba-cuda>=0.22.1,<0.23.0 29 | - numba>=0.60.0,<0.62.0 30 | - numpy>=1.23,<3.0 31 | - numpydoc 32 | - packaging 33 | - pre-commit 34 | - pydata-sphinx-theme>=0.15.4 35 | - pytest-cov 36 | - pytest<9.0.0 37 | - python>=3.10,<3.14 38 | - rapids-build-backend>=0.4.0,<0.5.0 39 | - rapids-logger==0.2.*,>=0.0.0a0 40 | - scikit-build-core >=0.10.0 41 | - sphinx 42 | - sphinx-copybutton 43 | - sphinx-markdown-tables 44 | - sphinxcontrib-jquery 45 | - sysroot_linux-aarch64==2.28 46 | name: all_cuda-129_arch-aarch64 47 | -------------------------------------------------------------------------------- /conda/environments/all_cuda-130_arch-aarch64.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `rapids-dependency-file-generator`. 2 | # To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. 3 | channels: 4 | - rapidsai-nightly 5 | - rapidsai 6 | - conda-forge 7 | dependencies: 8 | - breathe>=4.35.0 9 | - c-compiler 10 | - clang-tools==20.1.4 11 | - clang==20.1.4 12 | - cmake>=3.30.4 13 | - cuda-nvcc 14 | - cuda-python>=13.0.1,<14.0 15 | - cuda-version=13.0 16 | - cxx-compiler 17 | - cython>=3.0.0,<3.2.0 18 | - doxygen=1.9.1 19 | - gcc_linux-aarch64=14.* 20 | - gcovr>=5.0 21 | - graphviz 22 | - identify>=2.5.20 23 | - ipython 24 | - make 25 | - myst-parser 26 | - nbsphinx 27 | - ninja 28 | - numba-cuda>=0.22.1,<0.23.0 29 | - numba>=0.60.0,<0.62.0 30 | - numpy>=1.23,<3.0 31 | - numpydoc 32 | - packaging 33 | - pre-commit 34 | - pydata-sphinx-theme>=0.15.4 35 | - pytest-cov 36 | - pytest<9.0.0 37 | - python>=3.10,<3.14 38 | - rapids-build-backend>=0.4.0,<0.5.0 39 | - rapids-logger==0.2.*,>=0.0.0a0 40 | - scikit-build-core >=0.10.0 41 | - sphinx 42 | - sphinx-copybutton 43 | - sphinx-markdown-tables 44 | - sphinxcontrib-jquery 45 | - sysroot_linux-aarch64==2.28 46 | name: all_cuda-130_arch-aarch64 47 | -------------------------------------------------------------------------------- /ci/check_style.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | rapids-logger "Create checks conda environment" 8 | 9 | . /opt/conda/etc/profile.d/conda.sh 10 | 11 | rapids-logger "Configuring conda strict channel priority" 12 | conda config --set channel_priority strict 13 | 14 | rapids-dependency-file-generator \ 15 | --output conda \ 16 | --file-key checks \ 17 | --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml 18 | 19 | rapids-mamba-retry env create --yes -f env.yaml -n checks 20 | conda activate checks 21 | 22 | RAPIDS_BRANCH="$(cat "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../RAPIDS_BRANCH)" 23 | FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/${RAPIDS_BRANCH}/cmake-format-rapids-cmake.json" 24 | RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-format-rapids-cmake.json 25 | export RAPIDS_CMAKE_FORMAT_FILE 26 | mkdir -p "$(dirname ${RAPIDS_CMAKE_FORMAT_FILE})" 27 | wget -O ${RAPIDS_CMAKE_FORMAT_FILE} "${FORMAT_FILE_URL}" 28 | 29 | # Run pre-commit checks 30 | pre-commit run --all-files --show-diff-on-failure 31 | -------------------------------------------------------------------------------- /ci/build_wheel_cpp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | package_dir="python/librmm" 8 | 9 | source rapids-configure-sccache 10 | source rapids-date-string 11 | source rapids-init-pip 12 | 13 | export SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX="librmm-${RAPIDS_CONDA_ARCH}-cuda${RAPIDS_CUDA_VERSION%%.*}-wheel-preprocessor-cache" 14 | export SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true 15 | 16 | rapids-generate-version > ./VERSION 17 | 18 | cd "${package_dir}" 19 | 20 | sccache --stop-server 2>/dev/null || true 21 | 22 | # Creates artifacts directory for telemetry 23 | source rapids-telemetry-setup 24 | 25 | dist_dir="$(mktemp -d)" 26 | rapids-telemetry-record build.log rapids-pip-retry wheel . -w "${dist_dir}" -v --no-deps --disable-pip-version-check 27 | 28 | rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats 29 | sccache --stop-server >/dev/null 2>&1 || true 30 | 31 | python -m auditwheel repair \ 32 | --exclude librapids_logger.so \ 33 | -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \ 34 | "${dist_dir}"/* 35 | 36 | ../../ci/validate_wheel.sh "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" 37 | -------------------------------------------------------------------------------- /cpp/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2021, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | static void BM_StreamPoolGetStream(benchmark::State& state) 16 | { 17 | rmm::cuda_stream_pool stream_pool{}; 18 | 19 | for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) 20 | auto stream = stream_pool.get_stream(); 21 | cudaStreamQuery(stream.value()); 22 | } 23 | 24 | state.SetItemsProcessed(static_cast(state.iterations())); 25 | } 26 | BENCHMARK(BM_StreamPoolGetStream)->Unit(benchmark::kMicrosecond); 27 | 28 | static void BM_CudaStreamClass(benchmark::State& state) 29 | { 30 | for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) 31 | auto stream = rmm::cuda_stream{}; 32 | cudaStreamQuery(stream.view().value()); 33 | } 34 | 35 | state.SetItemsProcessed(static_cast(state.iterations())); 36 | } 37 | BENCHMARK(BM_CudaStreamClass)->Unit(benchmark::kMicrosecond); 38 | 39 | BENCHMARK_MAIN(); 40 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_cython.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import functools 5 | import importlib 6 | import sys 7 | from collections.abc import Callable 8 | from typing import Any 9 | 10 | 11 | def py_func(func: Callable[..., Any]) -> Callable[..., Any]: 12 | """ 13 | Wraps func in a plain Python function. 14 | """ 15 | 16 | @functools.wraps(func) 17 | def wrapped(*args: Any, **kwargs: Any) -> Any: 18 | return func(*args, **kwargs) 19 | 20 | return wrapped 21 | 22 | 23 | cython_test_modules = ["rmm.pylibrmm.tests.test_device_buffer"] 24 | 25 | 26 | for mod_name in cython_test_modules: 27 | try: 28 | # For each callable in `mod` with name `test_*`, 29 | # wrap the callable in a plain Python function 30 | # and set the result as an attribute of this module. 31 | mod = importlib.import_module(mod_name) 32 | for name in dir(mod): 33 | item = getattr(mod, name) 34 | if callable(item) and name.startswith("test_"): 35 | item = py_func(item) 36 | setattr(sys.modules[__name__], name, item) 37 | except ImportError: 38 | pass 39 | -------------------------------------------------------------------------------- /cpp/cmake/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "parse": { 3 | "additional_commands": { 4 | "CPMFindPackage": { 5 | "kwargs": { 6 | "NAME": 1, 7 | "GITHUB_REPOSITORY": "?", 8 | "GIT_TAG": "?", 9 | "VERSION": "?", 10 | "GIT_SHALLOW": "?", 11 | "OPTIONS": "*", 12 | "FIND_PACKAGE_ARGUMENTS": "*" 13 | } 14 | }, 15 | "ConfigureTest": { 16 | "flags": ["TEST_NAME", "TEST_SRC"] 17 | }, 18 | "ConfigureBench": { 19 | "flags": ["BENCH_NAME", "BENCH_SRC"] 20 | } 21 | } 22 | }, 23 | "format": { 24 | "line_width": 100, 25 | "tab_size": 2, 26 | "command_case": "unchanged", 27 | "max_lines_hwrap": 1, 28 | "max_pargs_hwrap": 999 29 | }, 30 | "lint": { 31 | "disabled_codes": ["C0301", "C0112"], 32 | "function_pattern": "[0-9A-z_]+", 33 | "macro_pattern": "[0-9A-z_]+", 34 | "global_var_pattern": "[A-z][0-9A-z_]+", 35 | "internal_var_pattern": "_[A-z][0-9A-z_]+", 36 | "local_var_pattern": "[A-z][A-z0-9_]+", 37 | "private_var_pattern": "_[0-9A-z_]+", 38 | "public_var_pattern": "[A-z][0-9A-z_]+", 39 | "argument_var_pattern": "[A-z][A-z0-9_]+", 40 | "keyword_pattern": "[A-z][0-9A-z_]+" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cpp/src/prefetch.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace rmm { 12 | 13 | void prefetch(void const* ptr, 14 | std::size_t size, 15 | rmm::cuda_device_id device, 16 | rmm::cuda_stream_view stream) 17 | { 18 | if (!rmm::detail::concurrent_managed_access::is_supported()) { return; } 19 | 20 | #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000 21 | cudaMemLocation location{ 22 | (device.value() == cudaCpuDeviceId) ? cudaMemLocationTypeHost : cudaMemLocationTypeDevice, 23 | device.value()}; 24 | constexpr int flags = 0; 25 | cudaError_t result = cudaMemPrefetchAsync(ptr, size, location, flags, stream.value()); 26 | #else 27 | cudaError_t result = cudaMemPrefetchAsync(ptr, size, device.value(), stream.value()); 28 | #endif 29 | // cudaErrorInvalidValue is returned when non-managed memory is passed to 30 | // cudaMemPrefetchAsync. We treat this as a no-op. 31 | if (result != cudaErrorInvalidValue && result != cudaSuccess) { RMM_CUDA_TRY(result); } 32 | } 33 | 34 | } // namespace rmm 35 | -------------------------------------------------------------------------------- /ci/test_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -eou pipefail 6 | 7 | source rapids-init-pip 8 | 9 | RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")" 10 | LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) 11 | RMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github python) 12 | 13 | # generate constraints (possibly pinning to oldest support versions of dependencies) 14 | rapids-generate-pip-constraints test_python ./constraints.txt 15 | 16 | # notes: 17 | # 18 | # * echo to expand wildcard before adding `[test]` requires for pip 19 | # * need to provide --constraint="${PIP_CONSTRAINT}" because that environment variable is 20 | # ignored if any other --constraint are passed via the CLI 21 | # 22 | rapids-pip-retry install \ 23 | -v \ 24 | --constraint ./constraints.txt \ 25 | --constraint "${PIP_CONSTRAINT}" \ 26 | "$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ 27 | "$(echo "${RMM_WHEELHOUSE}"/rmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" 28 | 29 | timeout 15m python -m pytest ./python/rmm/rmm/tests 30 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_prefetch_resource_adaptor.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for PrefetchResourceAdaptor.""" 5 | 6 | import numpy as np 7 | import pytest 8 | from cuda.bindings import runtime 9 | from test_helpers import ( 10 | _CONCURRENT_MANAGED_ACCESS_SUPPORTED, 11 | assert_prefetched, 12 | ) 13 | 14 | import rmm 15 | 16 | 17 | @pytest.mark.parametrize("managed", [True, False]) 18 | def test_prefetch_resource_adaptor(managed): 19 | if managed: 20 | upstream_mr = rmm.mr.ManagedMemoryResource() 21 | else: 22 | upstream_mr = rmm.mr.CudaMemoryResource() 23 | mr = rmm.mr.PrefetchResourceAdaptor(upstream_mr) 24 | rmm.mr.set_current_device_resource(mr) 25 | 26 | # This allocation should be prefetched 27 | db = rmm.DeviceBuffer.to_device(np.zeros(256, dtype="u1")) 28 | 29 | err, device_id = runtime.cudaGetDevice() 30 | assert err == runtime.cudaError_t.cudaSuccess 31 | 32 | if managed and _CONCURRENT_MANAGED_ACCESS_SUPPORTED: 33 | assert_prefetched(db, device_id) 34 | db.prefetch() # just test that it doesn't throw 35 | if managed and _CONCURRENT_MANAGED_ACCESS_SUPPORTED: 36 | assert_prefetched(db, device_id) 37 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/device_uvector.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.librmm.cuda_stream_view cimport cuda_stream_view 5 | from rmm.librmm.device_buffer cimport device_buffer 6 | from rmm.librmm.memory_resource cimport device_memory_resource 7 | 8 | 9 | cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: 10 | cdef cppclass device_uvector[T]: 11 | device_uvector(size_t size, cuda_stream_view stream) except + 12 | T* element_ptr(size_t index) 13 | void set_element(size_t element_index, const T& v, cuda_stream_view s) 14 | void set_element_async( 15 | size_t element_index, 16 | const T& v, 17 | cuda_stream_view s 18 | ) except + 19 | T front_element(cuda_stream_view s) except + 20 | T back_element(cuda_stream_view s) except + 21 | void reserve(size_t new_capacity, cuda_stream_view stream) except + 22 | void resize(size_t new_size, cuda_stream_view stream) except + 23 | void shrink_to_fit(cuda_stream_view stream) except + 24 | device_buffer release() 25 | size_t capacity() 26 | T* data() 27 | size_t size() 28 | device_memory_resource* memory_resource() 29 | -------------------------------------------------------------------------------- /cpp/src/aligned.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace rmm { 13 | 14 | bool is_pow2(std::size_t value) noexcept { return (value != 0U) && ((value & (value - 1)) == 0U); } 15 | 16 | bool is_supported_alignment(std::size_t alignment) noexcept { return is_pow2(alignment); } 17 | 18 | std::size_t align_up(std::size_t value, std::size_t alignment) noexcept 19 | { 20 | assert(is_supported_alignment(alignment)); 21 | return (value + (alignment - 1)) & ~(alignment - 1); 22 | } 23 | 24 | std::size_t align_down(std::size_t value, std::size_t alignment) noexcept 25 | { 26 | assert(is_supported_alignment(alignment)); 27 | return value & ~(alignment - 1); 28 | } 29 | 30 | bool is_aligned(std::size_t value, std::size_t alignment) noexcept 31 | { 32 | assert(is_supported_alignment(alignment)); 33 | return value == align_down(value, alignment); 34 | } 35 | 36 | bool is_pointer_aligned(void* ptr, std::size_t alignment) noexcept 37 | { 38 | // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) 39 | return is_aligned(reinterpret_cast(ptr), alignment); 40 | } 41 | 42 | } // namespace rmm 43 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_fixed_size_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for FixedSizeMemoryResource.""" 5 | 6 | import pytest 7 | from test_helpers import ( 8 | _SYSTEM_MEMORY_SUPPORTED, 9 | _allocs, 10 | _dtypes, 11 | _nelems, 12 | array_tester, 13 | ) 14 | 15 | import rmm 16 | 17 | 18 | @pytest.mark.parametrize("dtype", _dtypes) 19 | @pytest.mark.parametrize("nelem", _nelems) 20 | @pytest.mark.parametrize("alloc", _allocs) 21 | @pytest.mark.parametrize( 22 | "upstream", 23 | [ 24 | lambda: rmm.mr.CudaMemoryResource(), 25 | lambda: rmm.mr.ManagedMemoryResource(), 26 | ] 27 | + ( 28 | [ 29 | lambda: rmm.mr.SystemMemoryResource(), 30 | lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20), 31 | ] 32 | if _SYSTEM_MEMORY_SUPPORTED 33 | else [] 34 | ), 35 | ) 36 | def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): 37 | mr = rmm.mr.FixedSizeMemoryResource( 38 | upstream(), block_size=1 << 20, blocks_to_preallocate=128 39 | ) 40 | rmm.mr.set_current_device_resource(mr) 41 | assert rmm.mr.get_current_device_resource_type() is type(mr) 42 | array_tester(dtype, nelem, alloc) 43 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/nvtx/ranges.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #if defined(RMM_NVTX) 9 | #include 10 | 11 | namespace rmm { 12 | /** 13 | * @brief Tag type for librmm's NVTX domain. 14 | */ 15 | struct librmm_domain { 16 | static constexpr char const* name{"librmm"}; ///< Name of the librmm domain 17 | }; 18 | 19 | /** 20 | * @brief Alias for an NVTX range in the librmm domain. 21 | * 22 | * Customizes an NVTX range with the given input. 23 | * 24 | * Example: 25 | * ```cpp 26 | * void some_function(){ 27 | * rmm::scoped_range rng{"custom_name"}; // Customizes range name 28 | * ... 29 | * } 30 | * ``` 31 | */ 32 | using scoped_range = ::nvtx3::scoped_range_in; 33 | 34 | } // namespace rmm 35 | 36 | /** 37 | * @brief Convenience macro for generating an NVTX range in the `librmm` domain 38 | * from the lifetime of a function. 39 | * 40 | * Uses the name of the immediately enclosing function returned by `__func__` to 41 | * name the range. 42 | * 43 | * Example: 44 | * ```cpp 45 | * void some_function(){ 46 | * RMM_FUNC_RANGE(); 47 | * ... 48 | * } 49 | * ``` 50 | */ 51 | #define RMM_FUNC_RANGE() NVTX3_FUNC_RANGE_IN(rmm::librmm_domain) 52 | #else 53 | #define RMM_FUNC_RANGE() 54 | #endif 55 | -------------------------------------------------------------------------------- /python/rmm/rmm/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import warnings 5 | 6 | # This path is only taken for wheels where librmm is a separate Python package. 7 | try: 8 | import librmm 9 | except ModuleNotFoundError: 10 | pass 11 | else: 12 | librmm.load_library() 13 | del librmm 14 | 15 | from rmm import mr 16 | from rmm._version import __git_commit__, __version__ 17 | from rmm.mr import disable_logging, enable_logging, get_log_filenames 18 | from rmm.pylibrmm.device_buffer import DeviceBuffer 19 | from rmm.pylibrmm.logger import ( 20 | flush_logger, 21 | get_flush_level, 22 | get_logging_level, 23 | level_enum, 24 | set_flush_level, 25 | set_logging_level, 26 | should_log, 27 | ) 28 | from rmm.rmm import ( 29 | RMMError, 30 | is_initialized, 31 | register_reinitialize_hook, 32 | reinitialize, 33 | unregister_reinitialize_hook, 34 | ) 35 | 36 | __all__ = [ 37 | "DeviceBuffer", 38 | "disable_logging", 39 | "RMMError", 40 | "enable_logging", 41 | "flush_logger", 42 | "get_flush_level", 43 | "get_log_filenames", 44 | "get_logging_level", 45 | "is_initialized", 46 | "level_enum", 47 | "mr", 48 | "register_reinitialize_hook", 49 | "reinitialize", 50 | "set_flush_level", 51 | "set_logging_level", 52 | "should_log", 53 | "unregister_reinitialize_hook", 54 | ] 55 | -------------------------------------------------------------------------------- /ci/build_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | source rapids-configure-sccache 8 | source rapids-date-string 9 | 10 | export CMAKE_GENERATOR=Ninja 11 | 12 | rapids-print-env 13 | 14 | rapids-generate-version > ./VERSION 15 | 16 | rapids-logger "Begin py build" 17 | 18 | CPP_CHANNEL=$(rapids-download-conda-from-github cpp) 19 | 20 | sccache --stop-server 2>/dev/null || true 21 | 22 | RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) 23 | export RAPIDS_PACKAGE_VERSION 24 | 25 | # Creates and exports $RATTLER_CHANNELS 26 | source rapids-rattler-channel-string 27 | 28 | # Creates artifacts directory for telemetry 29 | source rapids-telemetry-setup 30 | 31 | # --no-build-id allows for caching with `sccache` 32 | # more info is available at 33 | # https://rattler.build/latest/tips_and_tricks/#using-sccache-or-ccache-with-rattler-build 34 | rapids-telemetry-record build.log rattler-build build \ 35 | --recipe conda/recipes/rmm \ 36 | --experimental \ 37 | --no-build-id \ 38 | --output-dir "$RAPIDS_CONDA_BLD_OUTPUT_DIR" \ 39 | -c "${CPP_CHANNEL}" \ 40 | "${RATTLER_CHANNELS[@]}" 41 | 42 | rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats 43 | sccache --stop-server >/dev/null 2>&1 || true 44 | 45 | # See https://github.com/prefix-dev/rattler-build/issues/1424 46 | rm -rf "$RAPIDS_CONDA_BLD_OUTPUT_DIR"/build_cache 47 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | cimport cython 5 | from enum import IntEnum 6 | from cuda.bindings.cyruntime cimport cudaStream_t 7 | from libcpp cimport bool 8 | 9 | from rmm.librmm.cuda_stream cimport cuda_stream, cuda_stream_flags 10 | 11 | 12 | class CudaStreamFlags(IntEnum): 13 | """ 14 | Enumeration of CUDA stream creation flags. 15 | 16 | Attributes 17 | ---------- 18 | SYNC_DEFAULT : int 19 | Created stream synchronizes with the default stream. 20 | NON_BLOCKING : int 21 | Created stream does not synchronize with the default stream. 22 | """ 23 | SYNC_DEFAULT = (cuda_stream_flags.sync_default) 24 | NON_BLOCKING = (cuda_stream_flags.non_blocking) 25 | 26 | 27 | @cython.final 28 | cdef class CudaStream: 29 | """ 30 | Wrapper around a CUDA stream with RAII semantics. 31 | When a CudaStream instance is GC'd, the underlying 32 | CUDA stream is destroyed. 33 | """ 34 | def __cinit__(self): 35 | with nogil: 36 | self.c_obj.reset(new cuda_stream()) 37 | 38 | def __dealloc__(self): 39 | with nogil: 40 | self.c_obj.reset() 41 | 42 | cdef cudaStream_t value(self) except * nogil: 43 | return self.c_obj.get()[0].value() 44 | 45 | cdef bool is_valid(self) except * nogil: 46 | return self.c_obj.get()[0].is_valid() 47 | -------------------------------------------------------------------------------- /ci/build_cpp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | source rapids-configure-sccache 8 | source rapids-date-string 9 | 10 | export CMAKE_GENERATOR=Ninja 11 | 12 | rapids-print-env 13 | 14 | rapids-logger "Begin cpp build" 15 | 16 | sccache --stop-server 2>/dev/null || true 17 | 18 | RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) 19 | export RAPIDS_PACKAGE_VERSION 20 | 21 | # Creates and exports $RATTLER_CHANNELS 22 | source rapids-rattler-channel-string 23 | 24 | # Creates artifacts directory for telemetry 25 | source rapids-telemetry-setup 26 | 27 | # --no-build-id allows for caching with `sccache` 28 | # more info is available at 29 | # https://rattler.build/latest/tips_and_tricks/#using-sccache-or-ccache-with-rattler-build 30 | rapids-telemetry-record build.log rattler-build build \ 31 | --recipe conda/recipes/librmm \ 32 | --experimental \ 33 | --no-build-id \ 34 | --output-dir "$RAPIDS_CONDA_BLD_OUTPUT_DIR" \ 35 | "${RATTLER_CHANNELS[@]}" 36 | 37 | rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats 38 | sccache --stop-server >/dev/null 2>&1 || true 39 | 40 | # remove build_cache directory 41 | rm -rf "$RAPIDS_CONDA_BLD_OUTPUT_DIR"/build_cache 42 | 43 | # Run the libcudacxx flag test at build time, since compilers are available 44 | rapids-logger "Run libcudacxx_flag_test" 45 | ./cpp/tests/libcudacxx_flag_test/libcudacxx_flag_test.sh 46 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_rmm_pytorch.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import gc 5 | 6 | import pytest 7 | 8 | from rmm.allocators.torch import rmm_torch_allocator 9 | 10 | torch = pytest.importorskip("torch") 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def torch_allocator(): 15 | if not torch.cuda.is_available(): 16 | pytest.skip("pytorch built without CUDA support") 17 | try: 18 | from torch.cuda.memory import change_current_allocator 19 | except ImportError: 20 | pytest.skip("pytorch pluggable allocator not available") 21 | change_current_allocator(rmm_torch_allocator) 22 | 23 | 24 | def test_rmm_torch_allocator(torch_allocator, stats_mr): 25 | assert stats_mr.allocation_counts.current_bytes == 0 26 | x = torch.tensor([1, 2]).cuda() 27 | assert stats_mr.allocation_counts.current_bytes > 0 28 | del x 29 | gc.collect() 30 | assert stats_mr.allocation_counts.current_bytes == 0 31 | 32 | 33 | def test_rmm_torch_allocator_using_stream(torch_allocator, stats_mr): 34 | assert stats_mr.allocation_counts.current_bytes == 0 35 | s = torch.cuda.Stream() 36 | with torch.cuda.stream(s): 37 | x = torch.tensor([1, 2]).cuda() 38 | torch.cuda.current_stream().wait_stream(s) 39 | assert stats_mr.allocation_counts.current_bytes > 0 40 | del x 41 | gc.collect() 42 | assert stats_mr.allocation_counts.current_bytes == 0 43 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/cuda_memory_resource.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | #pragma once 6 | 7 | #ifndef LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE 8 | #error \ 9 | "RMM requires LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to be defined. Please add -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to the compiler flags (this is done automatically when using RMM via CMake)." 10 | #endif // LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE 11 | 12 | #include 13 | 14 | #if CCCL_MAJOR_VERSION < 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION < 1) 15 | #error "RMM requires CCCL version 3.1 or newer." 16 | #endif 17 | 18 | #include 19 | 20 | #include 21 | 22 | namespace RMM_NAMESPACE { 23 | namespace detail { 24 | namespace polyfill { 25 | 26 | template 27 | inline constexpr bool resource = cuda::mr::synchronous_resource; 28 | template 29 | inline constexpr bool resource_with = cuda::mr::synchronous_resource_with; 30 | template 31 | inline constexpr bool async_resource = cuda::mr::resource; 32 | template 33 | inline constexpr bool async_resource_with = cuda::mr::resource_with; 34 | 35 | } // namespace polyfill 36 | } // namespace detail 37 | } // namespace RMM_NAMESPACE 38 | -------------------------------------------------------------------------------- /cpp/tests/libcudacxx_flag_test/libcudacxx_flag_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # Get the directory of this script 8 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 9 | cd "${SCRIPT_DIR}" 10 | 11 | # Path to the RMM include directory (absolute path) 12 | RMM_INCLUDE_DIR="${SCRIPT_DIR}/../../include" 13 | 14 | echo "Testing compilation failure when LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE is not defined" 15 | echo "Using RMM include directory: ${RMM_INCLUDE_DIR}" 16 | 17 | # Create a temporary file for compilation errors 18 | ERROR_FILE="$(mktemp)" 19 | trap 'rm -f "${ERROR_FILE}"' EXIT 20 | 21 | # Try to compile the file without defining LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE 22 | if g++ -std=c++17 -I"${RMM_INCLUDE_DIR}" libcudacxx_flag_test.cpp -o libcudacxx_flag_test 2> "${ERROR_FILE}"; then 23 | echo "Test failed: Compilation succeeded when it should have failed" >&2 24 | exit 1 25 | fi 26 | 27 | # Check if the error message contains the expected text 28 | if ! grep -q "RMM requires LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE to be defined" "${ERROR_FILE}"; then 29 | echo "Test failed: Compilation failed but with an unexpected error message:" >&2 30 | cat "${ERROR_FILE}" 31 | exit 1 32 | fi 33 | 34 | # Don't show the error message, to avoid confusing it with a real error in the CI logs. 35 | echo "Test passed: Compilation failed with the expected error message" 36 | exit 0 37 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | 8 | set(cython_sources _logger.pyx) 9 | set(linked_libraries rmm::rmm) 10 | 11 | # Build all of the Cython targets 12 | rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" 13 | CXX) 14 | 15 | # mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols 16 | # loaded from other DSOs 17 | foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) 18 | set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden 19 | CXX_VISIBILITY_PRESET hidden) 20 | endforeach() 21 | 22 | add_library(_torch_allocator SHARED _torch_allocator.cpp) 23 | # Want the output to be called _torch_allocator.so 24 | set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so") 25 | target_link_libraries(_torch_allocator PRIVATE rmm::rmm) 26 | cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}" 27 | OUTPUT_VARIABLE _torch_allocator_location) 28 | install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}") 29 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/experimental.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Experimental memory resource features.""" 5 | 6 | from libc.stdint cimport uintptr_t 7 | 8 | from rmm.librmm.memory_resource cimport cuda_async_managed_memory_resource 9 | # import from the private _memory_resource to avoid a circular import 10 | from rmm.pylibrmm.memory_resource._memory_resource cimport DeviceMemoryResource 11 | 12 | 13 | cdef class CudaAsyncManagedMemoryResource(DeviceMemoryResource): 14 | """ 15 | Memory resource that uses ``cudaMallocFromPoolAsync``/``cudaFreeAsync`` for 16 | allocation/deallocation with a managed memory pool. 17 | 18 | This resource uses the default managed memory pool for the current device. 19 | Managed memory can be accessed from both the host and device. 20 | 21 | Requires CUDA 13.0 or higher and support for concurrent managed access 22 | (not supported on WSL2). 23 | """ 24 | def __cinit__(self): 25 | self.c_obj.reset( 26 | new cuda_async_managed_memory_resource() 27 | ) 28 | 29 | def pool_handle(self): 30 | """ 31 | Returns the underlying CUDA memory pool handle. 32 | 33 | Returns 34 | ------- 35 | int 36 | Handle to the underlying CUDA memory pool 37 | """ 38 | cdef cuda_async_managed_memory_resource* c_mr = \ 39 | (self.c_obj.get()) 40 | return (c_mr.pool_handle()) 41 | -------------------------------------------------------------------------------- /ci/build_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | rapids-logger "Downloading artifacts from previous jobs" 8 | CPP_CHANNEL=$(rapids-download-conda-from-github cpp) 9 | PYTHON_CHANNEL=$(rapids-download-conda-from-github python) 10 | 11 | rapids-logger "Create test conda environment" 12 | 13 | . /opt/conda/etc/profile.d/conda.sh 14 | 15 | rapids-logger "Configuring conda strict channel priority" 16 | conda config --set channel_priority strict 17 | 18 | RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" 19 | export RAPIDS_VERSION_MAJOR_MINOR 20 | 21 | rapids-dependency-file-generator \ 22 | --output conda \ 23 | --file-key docs \ 24 | --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \ 25 | --prepend-channel "${CPP_CHANNEL}" \ 26 | --prepend-channel "${PYTHON_CHANNEL}" \ 27 | | tee env.yaml 28 | 29 | rapids-mamba-retry env create --yes -f env.yaml -n docs 30 | conda activate docs 31 | 32 | rapids-print-env 33 | 34 | RAPIDS_DOCS_DIR="$(mktemp -d)" 35 | export RAPIDS_DOCS_DIR 36 | 37 | 38 | rapids-logger "Build CPP docs" 39 | pushd cpp/doxygen 40 | doxygen Doxyfile 41 | mkdir -p "${RAPIDS_DOCS_DIR}/librmm/html" 42 | mv html/* "${RAPIDS_DOCS_DIR}/librmm/html" 43 | popd 44 | 45 | rapids-logger "Build Python docs" 46 | pushd docs 47 | make dirhtml 48 | mkdir -p "${RAPIDS_DOCS_DIR}/rmm/html" 49 | mv _build/dirhtml/* "${RAPIDS_DOCS_DIR}/rmm/html" 50 | popd 51 | 52 | RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs 53 | -------------------------------------------------------------------------------- /ci/check_symbols.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -eEuo pipefail 6 | 7 | echo "checking for symbol visibility issues" 8 | 9 | WHEEL_FILE=${1} 10 | 11 | raise-symbols-found-error() { 12 | local pattern="${1}" 13 | 14 | err_msg="ERROR: Found some exported symbols matching the pattern '${pattern}'. 15 | 16 | These should be marked with 'hidden' visibility. 17 | See https://cmake.org/cmake/help/latest/prop_tgt/LANG_VISIBILITY_PRESET.html and https://gcc.gnu.org/wiki/Visibility for details. 18 | " 19 | 20 | echo "" 21 | echo "${err_msg}" 22 | exit 1 23 | } 24 | 25 | WHEEL_EXPORT_DIR="$(mktemp -d)" 26 | 27 | unzip \ 28 | -d "${WHEEL_EXPORT_DIR}" \ 29 | "${WHEEL_FILE}" 30 | 31 | dso_files=$( 32 | find \ 33 | "${WHEEL_EXPORT_DIR}" \ 34 | -type f \ 35 | \( -name '*.so' -o -name '*.so.*' \) 36 | ) 37 | 38 | for dso_file in ${dso_files}; do 39 | echo "" 40 | echo "checking exported symbols in '${dso_file}'" 41 | symbol_file="./syms.txt" 42 | readelf --symbols --wide "${dso_file}" \ 43 | | c++filt \ 44 | > "${symbol_file}" 45 | 46 | echo "symbol counts by type" 47 | echo " * GLOBAL: $(grep --count -E ' GLOBAL ' < ${symbol_file})" 48 | echo " * WEAK: $(grep --count -E ' WEAK ' < ${symbol_file})" 49 | echo " * LOCAL: $(grep --count -E ' LOCAL ' < ${symbol_file})" 50 | 51 | echo "No symbol visibility issues found" 52 | done 53 | 54 | echo "" 55 | echo "No symbol visibility issues found in any DSOs" 56 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.5 2 | 3 | ARG BASE 4 | ARG PYTHON_PACKAGE_MANAGER=conda 5 | 6 | FROM ${BASE} as pip-base 7 | 8 | ENV DEFAULT_VIRTUAL_ENV=rapids 9 | 10 | FROM ${BASE} as conda-base 11 | 12 | ENV DEFAULT_CONDA_ENV=rapids 13 | 14 | FROM ${PYTHON_PACKAGE_MANAGER}-base 15 | 16 | ARG TARGETARCH 17 | 18 | ARG CUDA 19 | ENV CUDAARCHS="RAPIDS" 20 | ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}" 21 | 22 | ARG PYTHON_PACKAGE_MANAGER 23 | ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}" 24 | 25 | ENV PYTHONSAFEPATH="1" 26 | ENV PYTHONUNBUFFERED="1" 27 | ENV PYTHONDONTWRITEBYTECODE="1" 28 | 29 | ENV HISTFILE="/home/coder/.cache/._bash_history" 30 | 31 | ### 32 | # sccache configuration 33 | ### 34 | ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" 35 | ENV SCCACHE_REGION="us-east-2" 36 | ENV SCCACHE_BUCKET="rapids-sccache-devs" 37 | ENV SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true 38 | ENV SCCACHE_IDLE_TIMEOUT=0 39 | 40 | ### 41 | # sccache-dist configuration 42 | ### 43 | # Enable sccache-dist by default 44 | ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1 45 | # Compile locally if max retries exceeded 46 | ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true 47 | # Retry transient errors 4 times (for a total of 5 attempts) 48 | ENV SCCACHE_DIST_MAX_RETRIES=4 49 | # 1hr 59min (to accommodate debug builds) 50 | ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140 51 | ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com" 52 | 53 | # Build as much in parallel as possible 54 | ENV INFER_NUM_DEVICE_ARCHITECTURES=1 55 | ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20 56 | -------------------------------------------------------------------------------- /print_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # Reports relevant environment information useful for diagnosing and 5 | # debugging RMM issues. 6 | # Usage: 7 | # "./print_env.sh" - prints to stdout 8 | # "./print_env.sh > env.txt" - prints to file "env.txt" 9 | 10 | echo "**git***" 11 | git log --decorate -n 1 12 | echo 13 | 14 | echo "***OS Information***" 15 | cat /etc/*-release 16 | uname -a 17 | echo 18 | 19 | echo "***GPU Information***" 20 | nvidia-smi 21 | echo 22 | 23 | echo "***CPU***" 24 | lscpu 25 | echo 26 | 27 | echo "***CMake***" 28 | which cmake && cmake --version 29 | echo 30 | 31 | echo "***g++***" 32 | which g++ && g++ --version 33 | echo 34 | 35 | echo "***nvcc***" 36 | which nvcc && nvcc --version 37 | echo 38 | 39 | echo "***Python***" 40 | which python && python --version 41 | echo 42 | 43 | echo "***Environment Variables***" 44 | 45 | printf '%-32s: %s\n' PATH "$PATH" 46 | 47 | printf '%-32s: %s\n' LD_LIBRARY_PATH "$LD_LIBRARY_PATH" 48 | 49 | printf '%-32s: %s\n' NUMBAPRO_NVVM "$NUMBAPRO_NVVM" 50 | 51 | printf '%-32s: %s\n' NUMBAPRO_LIBDEVICE "$NUMBAPRO_LIBDEVICE" 52 | 53 | printf '%-32s: %s\n' CONDA_PREFIX "$CONDA_PREFIX" 54 | 55 | printf '%-32s: %s\n' PYTHON_PATH "$PYTHON_PATH" 56 | 57 | echo 58 | 59 | # Print conda packages if conda exists 60 | if type "conda" > /dev/null; then 61 | echo '***conda packages***' 62 | which conda && conda list 63 | echo 64 | # Print pip packages if pip exists 65 | elif type "pip" > /dev/null; then 66 | echo "***pip packages***" 67 | which pip && pip list 68 | echo 69 | fi 70 | -------------------------------------------------------------------------------- /cpp/.clangd: -------------------------------------------------------------------------------- 1 | # https://clangd.llvm.org/config 2 | 3 | # Apply a config conditionally to all C files 4 | If: 5 | PathMatch: .*\.(c|h)$ 6 | 7 | --- 8 | 9 | # Apply a config conditionally to all C++ files 10 | If: 11 | PathMatch: .*\.(c|h)pp 12 | 13 | --- 14 | 15 | # Apply a config conditionally to all CUDA files 16 | If: 17 | PathMatch: .*\.cuh? 18 | CompileFlags: 19 | Add: 20 | - "-x" 21 | - "cuda" 22 | # No error on unknown CUDA versions 23 | - "-Wno-unknown-cuda-version" 24 | # Allow variadic CUDA functions 25 | - "-Xclang=-fcuda-allow-variadic-functions" 26 | Diagnostics: 27 | Suppress: 28 | - "variadic_device_fn" 29 | - "attributes_not_allowed" 30 | 31 | --- 32 | 33 | # Tweak the clangd parse settings for all files 34 | CompileFlags: 35 | Add: 36 | # report all errors 37 | - "-ferror-limit=0" 38 | - "-fmacro-backtrace-limit=0" 39 | - "-ftemplate-backtrace-limit=0" 40 | # Skip the CUDA version check 41 | - "--no-cuda-version-check" 42 | Remove: 43 | # remove gcc's -fcoroutines 44 | - -fcoroutines 45 | # remove nvc++ flags unknown to clang 46 | - "-gpu=*" 47 | - "-stdpar*" 48 | # remove nvcc flags unknown to clang 49 | - "-arch*" 50 | - "-gencode*" 51 | - "--generate-code*" 52 | - "-ccbin*" 53 | - "-t=*" 54 | - "--threads*" 55 | - "-Xptxas*" 56 | - "-Xcudafe*" 57 | - "-Xfatbin*" 58 | - "-Xcompiler*" 59 | - "--diag-suppress*" 60 | - "--diag_suppress*" 61 | - "--compiler-options*" 62 | - "--expt-extended-lambda" 63 | - "--expt-relaxed-constexpr" 64 | - "-forward-unknown-to-host-compiler" 65 | - "-Werror=cross-execution-space-call" 66 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_failure_callback_resource_adaptor.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for FailureCallbackResourceAdaptor.""" 5 | 6 | import pytest 7 | 8 | import rmm 9 | 10 | 11 | def test_failure_callback_resource_adaptor(): 12 | retried = [False] 13 | 14 | def callback(nbytes: int) -> bool: 15 | if retried[0]: 16 | return False 17 | else: 18 | retried[0] = True 19 | return True 20 | 21 | def allocate_func(size, stream): 22 | raise MemoryError("Intentional allocation failure") 23 | 24 | def deallocate_func(ptr, size, stream): 25 | pass 26 | 27 | failing_mr = rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) 28 | mr = rmm.mr.FailureCallbackResourceAdaptor(failing_mr, callback) 29 | rmm.mr.set_current_device_resource(mr) 30 | 31 | with pytest.raises(MemoryError): 32 | rmm.DeviceBuffer(size=256) 33 | assert retried[0] 34 | 35 | 36 | def test_failure_callback_resource_adaptor_error(): 37 | def callback(nbytes: int) -> bool: 38 | raise RuntimeError("MyError") 39 | 40 | def allocate_func(size, stream): 41 | raise MemoryError("Intentional allocation failure") 42 | 43 | def deallocate_func(ptr, size, stream): 44 | pass 45 | 46 | failing_mr = rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) 47 | mr = rmm.mr.FailureCallbackResourceAdaptor(failing_mr, callback) 48 | rmm.mr.set_current_device_resource(mr) 49 | 50 | with pytest.raises(RuntimeError, match="MyError"): 51 | rmm.DeviceBuffer(size=256) 52 | -------------------------------------------------------------------------------- /cpp/scripts/doxygen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | ############################## 5 | # RMM doxygen warnings check # 6 | ############################## 7 | 8 | # skip if doxygen is not installed 9 | if ! [ -x "$(command -v doxygen)" ]; then 10 | echo -e "warning: doxygen is not installed" 11 | exit 0 12 | fi 13 | 14 | # Utility to return version as number for comparison 15 | function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; } 16 | 17 | # doxygen supported version 1.9.1 18 | DOXYGEN_VERSION=$(doxygen --version) 19 | if [ ! "$(version "$DOXYGEN_VERSION")" -eq "$(version "1.9.1")" ] ; then 20 | echo -e "warning: Unsupported doxygen version $DOXYGEN_VERSION" 21 | echo -e "Expecting doxygen version 1.9.1" 22 | exit 0 23 | fi 24 | 25 | RAPIDS_VERSION="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/' VERSION)" 26 | RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/' VERSION)" 27 | export RAPIDS_VERSION 28 | export RAPIDS_VERSION_MAJOR_MINOR 29 | 30 | # Run doxygen, ignore missing tag files error 31 | TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..." 32 | TAG_ERROR2="error: cannot open tag file .*.tag for writing" 33 | DOXYGEN_STDERR=$(cd cpp/doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; } | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d") 34 | RETVAL=$? 35 | 36 | if [ "$RETVAL" != "0" ] || [ -n "$DOXYGEN_STDERR" ]; then 37 | echo -e "$DOXYGEN_STDERR" 38 | RETVAL=1 #because return value is not generated by doxygen 1.8.20 39 | fi 40 | 41 | exit $RETVAL 42 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_test_allocation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "mr_ref_test.hpp" 9 | 10 | namespace rmm::test { 11 | 12 | // Parameterized test definitions for mr_ref_allocation_test 13 | 14 | TEST_P(mr_ref_allocation_test, AllocateDefault) { test_various_allocations(this->ref); } 15 | 16 | TEST_P(mr_ref_allocation_test, AllocateDefaultStream) 17 | { 18 | test_various_async_allocations(this->ref, cuda_stream_view{}); 19 | } 20 | 21 | TEST_P(mr_ref_allocation_test, AllocateOnStream) 22 | { 23 | test_various_async_allocations(this->ref, this->stream); 24 | } 25 | 26 | TEST_P(mr_ref_allocation_test, RandomAllocations) { test_random_allocations(this->ref); } 27 | 28 | TEST_P(mr_ref_allocation_test, RandomAllocationsDefaultStream) 29 | { 30 | test_random_async_allocations( 31 | this->ref, default_num_allocations, default_max_size, cuda_stream_view{}); 32 | } 33 | 34 | TEST_P(mr_ref_allocation_test, RandomAllocationsStream) 35 | { 36 | test_random_async_allocations(this->ref, default_num_allocations, default_max_size, this->stream); 37 | } 38 | 39 | TEST_P(mr_ref_allocation_test, MixedRandomAllocationFree) 40 | { 41 | test_mixed_random_allocation_free(this->ref, default_max_size); 42 | } 43 | 44 | TEST_P(mr_ref_allocation_test, MixedRandomAllocationFreeDefaultStream) 45 | { 46 | test_mixed_random_async_allocation_free(this->ref, default_max_size, cuda_stream_view{}); 47 | } 48 | 49 | TEST_P(mr_ref_allocation_test, MixedRandomAllocationFreeStream) 50 | { 51 | test_mixed_random_async_allocation_free(this->ref, default_max_size, this->stream); 52 | } 53 | 54 | } // namespace rmm::test 55 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_binning_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for BinningMemoryResource.""" 5 | 6 | import pytest 7 | from test_helpers import ( 8 | _SYSTEM_MEMORY_SUPPORTED, 9 | _allocs, 10 | _dtypes, 11 | _nelems, 12 | array_tester, 13 | ) 14 | 15 | import rmm 16 | 17 | 18 | @pytest.mark.parametrize("dtype", _dtypes) 19 | @pytest.mark.parametrize("nelem", _nelems) 20 | @pytest.mark.parametrize("alloc", _allocs) 21 | @pytest.mark.parametrize( 22 | "upstream_mr", 23 | [ 24 | lambda: rmm.mr.CudaMemoryResource(), 25 | lambda: rmm.mr.ManagedMemoryResource(), 26 | lambda: rmm.mr.PoolMemoryResource( 27 | rmm.mr.CudaMemoryResource(), 1 << 20 28 | ), 29 | ] 30 | + ( 31 | [ 32 | lambda: rmm.mr.SystemMemoryResource(), 33 | lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20), 34 | ] 35 | if _SYSTEM_MEMORY_SUPPORTED 36 | else [] 37 | ), 38 | ) 39 | def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): 40 | upstream = upstream_mr() 41 | 42 | # Add fixed-size bins 256KiB, 512KiB, 1MiB, 2MiB, 4MiB 43 | mr = rmm.mr.BinningMemoryResource(upstream, 18, 22) 44 | 45 | # Test adding some explicit bin MRs 46 | fixed_mr = rmm.mr.FixedSizeMemoryResource(upstream, 1 << 10) 47 | cuda_mr = rmm.mr.CudaMemoryResource() 48 | mr.add_bin(1 << 10, fixed_mr) # 1KiB bin 49 | mr.add_bin(1 << 23, cuda_mr) # 8MiB bin 50 | 51 | rmm.mr.set_current_device_resource(mr) 52 | assert rmm.mr.get_current_device_resource_type() is type(mr) 53 | array_tester(dtype, nelem, alloc) 54 | -------------------------------------------------------------------------------- /cpp/src/cuda_stream.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | namespace rmm { 15 | 16 | cuda_stream::cuda_stream(cuda_stream::flags flags) 17 | : stream_{[flags]() { 18 | auto* stream = new cudaStream_t; // NOLINT(cppcoreguidelines-owning-memory) 19 | // TODO: use std::to_underlying once C++23 is allowed. 20 | RMM_CUDA_TRY(cudaStreamCreateWithFlags( 21 | stream, static_cast>(flags))); 22 | return stream; 23 | }(), 24 | [](cudaStream_t* stream) { 25 | RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream)); 26 | delete stream; // NOLINT(cppcoreguidelines-owning-memory) 27 | }} 28 | { 29 | } 30 | 31 | bool cuda_stream::is_valid() const { return stream_ != nullptr; } 32 | 33 | cudaStream_t cuda_stream::value() const 34 | { 35 | RMM_LOGGING_ASSERT(is_valid()); 36 | return *stream_; 37 | } 38 | 39 | cuda_stream::operator cudaStream_t() const noexcept { return value(); } 40 | 41 | cuda_stream_view cuda_stream::view() const { return cuda_stream_view{value()}; } 42 | 43 | cuda_stream::operator cuda_stream_view() const { return view(); } 44 | 45 | void cuda_stream::synchronize() const { RMM_CUDA_TRY(cudaStreamSynchronize(value())); } 46 | 47 | void cuda_stream::synchronize_no_throw() const noexcept 48 | { 49 | RMM_ASSERT_CUDA_SUCCESS(cudaStreamSynchronize(value())); 50 | } 51 | 52 | } // namespace rmm 53 | -------------------------------------------------------------------------------- /cpp/tests/mr/mr_ref_test_basic.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "mr_ref_test.hpp" 9 | 10 | namespace rmm::test { 11 | 12 | // Parameterized test definitions for mr_ref_test (basic tests) 13 | 14 | TEST_P(mr_ref_test, SetCurrentDeviceResourceRef) 15 | { 16 | rmm::mr::cuda_memory_resource cuda_mr{}; 17 | auto cuda_ref = rmm::device_async_resource_ref{cuda_mr}; 18 | 19 | rmm::mr::set_current_device_resource_ref(cuda_ref); 20 | auto old = rmm::mr::set_current_device_resource_ref(this->ref); 21 | 22 | // old mr should equal a cuda mr 23 | EXPECT_EQ(old, cuda_ref); 24 | 25 | // current dev resource should equal this resource 26 | EXPECT_EQ(this->ref, rmm::mr::get_current_device_resource_ref()); 27 | 28 | test_get_current_device_resource_ref(); 29 | 30 | // Resetting should reset to initial cuda resource 31 | rmm::mr::reset_current_device_resource_ref(); 32 | EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()}, 33 | rmm::mr::get_current_device_resource_ref()); 34 | } 35 | 36 | TEST_P(mr_ref_test, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); } 37 | 38 | // Simple reproducer for https://github.com/rapidsai/rmm/issues/861 39 | TEST_P(mr_ref_test, AllocationsAreDifferent) { concurrent_allocations_are_different(this->ref); } 40 | 41 | TEST_P(mr_ref_test, AsyncAllocationsAreDifferentDefaultStream) 42 | { 43 | concurrent_async_allocations_are_different(this->ref, cuda_stream_view{}); 44 | } 45 | 46 | TEST_P(mr_ref_test, AsyncAllocationsAreDifferent) 47 | { 48 | concurrent_async_allocations_are_different(this->ref, this->stream); 49 | } 50 | 51 | } // namespace rmm::test 52 | -------------------------------------------------------------------------------- /cmake/rapids_config.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # cmake-format: off 3 | # SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # cmake-format: on 6 | # ============================================================================= 7 | file(READ "${CMAKE_CURRENT_LIST_DIR}/../VERSION" _rapids_version) 8 | if(_rapids_version MATCHES [[^([0-9][0-9])\.([0-9][0-9])\.([0-9][0-9])]]) 9 | set(RAPIDS_VERSION_MAJOR "${CMAKE_MATCH_1}") 10 | set(RAPIDS_VERSION_MINOR "${CMAKE_MATCH_2}") 11 | set(RAPIDS_VERSION_PATCH "${CMAKE_MATCH_3}") 12 | set(RAPIDS_VERSION_MAJOR_MINOR "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}") 13 | set(RAPIDS_VERSION "${RAPIDS_VERSION_MAJOR}.${RAPIDS_VERSION_MINOR}.${RAPIDS_VERSION_PATCH}") 14 | else() 15 | string(REPLACE "\n" "\n " _rapids_version_formatted " ${_rapids_version}") 16 | message( 17 | FATAL_ERROR 18 | "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") 19 | endif() 20 | 21 | # Use STRINGS to trim whitespace/newlines 22 | file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH" _rapids_branch) 23 | if(NOT _rapids_branch) 24 | message( 25 | FATAL_ERROR 26 | "Could not determine branch name to use for checking out rapids-cmake. The file \"${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH\" is missing." 27 | ) 28 | endif() 29 | 30 | if(NOT rapids-cmake-version) 31 | set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}") 32 | endif() 33 | if(NOT rapids-cmake-branch) 34 | set(rapids-cmake-branch "${_rapids_branch}") 35 | endif() 36 | include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake") 37 | 38 | # Don't use sccache-dist for CMake's compiler tests 39 | set(ENV{SCCACHE_NO_DIST_COMPILE} "1") 40 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/helper.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Helper functions for rmm""" 5 | 6 | import re 7 | 8 | 9 | cdef dict BYTE_SIZES = { 10 | 'b': 1, 11 | '': 1, 12 | 'kb': 1000, 13 | 'mb': 1000**2, 14 | 'gb': 1000**3, 15 | 'tb': 1000**4, 16 | 'pb': 1000**5, 17 | 'kib': 1024, 18 | 'mib': 1024**2, 19 | 'gib': 1024**3, 20 | 'tib': 1024**4, 21 | 'pib': 1024**5, 22 | } 23 | 24 | 25 | pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$") 26 | 27 | cdef object parse_bytes(object s): 28 | """Parse a string or integer into a number of bytes. 29 | 30 | Parameters 31 | ---------- 32 | s : int | str 33 | Size in bytes. If an integer is provided, it is returned as-is. 34 | A string is parsed as a floating point number with an (optional, 35 | case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb) 36 | and binary prefixes (kib, mib, ..., pib) are supported. 37 | 38 | Returns 39 | ------- 40 | Requested size in bytes as an integer. 41 | 42 | Raises 43 | ------ 44 | ValueError 45 | If it is not possible to parse the input as a byte specification. 46 | """ 47 | cdef str suffix 48 | cdef double n 49 | cdef int multiplier 50 | 51 | if isinstance(s, int): 52 | return s 53 | 54 | match = pattern.match(s) 55 | 56 | if match is None: 57 | raise ValueError(f"Could not parse {s} as a byte specification") 58 | 59 | n = float(match.group(1)) 60 | 61 | suffix = match.group(2) 62 | if suffix is None: 63 | suffix = "" 64 | 65 | multiplier = BYTE_SIZES[suffix.lower()] 66 | 67 | return int(n*multiplier) 68 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_tracking_resource_adaptor.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for TrackingResourceAdaptor.""" 5 | 6 | import gc 7 | 8 | import rmm 9 | 10 | 11 | def test_tracking_resource_adaptor(): 12 | cuda_mr = rmm.mr.CudaMemoryResource() 13 | 14 | mr = rmm.mr.TrackingResourceAdaptor(cuda_mr, capture_stacks=True) 15 | 16 | rmm.mr.set_current_device_resource(mr) 17 | 18 | buffers = [rmm.DeviceBuffer(size=1000) for _ in range(10)] 19 | 20 | for i in range(9, 0, -2): 21 | del buffers[i] 22 | 23 | assert mr.get_allocated_bytes() == 5000 24 | 25 | # Push a new Tracking adaptor 26 | mr2 = rmm.mr.TrackingResourceAdaptor(mr, capture_stacks=True) 27 | rmm.mr.set_current_device_resource(mr2) 28 | 29 | for _ in range(2): 30 | buffers.append(rmm.DeviceBuffer(size=1000)) 31 | 32 | assert mr2.get_allocated_bytes() == 2000 33 | assert mr.get_allocated_bytes() == 7000 34 | 35 | # Ensure we get back a non-empty string for the allocations 36 | assert len(mr.get_outstanding_allocations_str()) > 0 37 | 38 | del buffers 39 | gc.collect() 40 | 41 | assert mr2.get_allocated_bytes() == 0 42 | assert mr.get_allocated_bytes() == 0 43 | 44 | # make sure the allocations string is now empty 45 | assert len(mr2.get_outstanding_allocations_str()) == 0 46 | assert len(mr.get_outstanding_allocations_str()) == 0 47 | 48 | 49 | def test_mr_allocate_deallocate(): 50 | mr = rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource()) 51 | size = 1 << 23 # 8 MiB 52 | ptr = mr.allocate(size) 53 | assert mr.get_allocated_bytes() == 1 << 23 54 | mr.deallocate(ptr, size) 55 | assert mr.get_allocated_bytes() == 0 56 | -------------------------------------------------------------------------------- /cpp/include/rmm/detail/logging_assert.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | // Only include if needed in RMM_LOGGING_ASSERT below. The 9 | // logger can be extremely expensive to compile, so we want to avoid including 10 | // it. 11 | #if !defined(NDEBUG) 12 | #include 13 | #include 14 | 15 | #include 16 | #endif 17 | 18 | /** 19 | * @brief Assertion that logs a CRITICAL log message on failure. 20 | */ 21 | #ifdef NDEBUG 22 | #define RMM_LOGGING_ASSERT(_expr) (void)0 23 | #elif RMM_LOG_ACTIVE_LEVEL < RMM_LOG_LEVEL_OFF 24 | #define RMM_LOGGING_ASSERT(_expr) \ 25 | do { \ 26 | bool const success = (_expr); \ 27 | if (!success) { \ 28 | RMM_LOG_CRITICAL( \ 29 | "[" __FILE__ ":" RMM_STRINGIFY(__LINE__) "] Assertion " RMM_STRINGIFY(_expr) " failed."); \ 30 | rmm::default_logger().flush(); \ 31 | /* NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) */ \ 32 | assert(success); \ 33 | } \ 34 | } while (0) 35 | #else 36 | #define RMM_LOGGING_ASSERT(_expr) assert((_expr)); 37 | #endif 38 | -------------------------------------------------------------------------------- /cpp/examples/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # librmm examples build script 6 | 7 | set -euo pipefail 8 | 9 | # Parallelism control 10 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} 11 | # Installation disabled by default 12 | INSTALL_EXAMPLES=false 13 | 14 | # Check for -i or --install flags to enable installation 15 | ARGS=$(getopt -o i --long install -- "$@") 16 | eval set -- "$ARGS" 17 | # shellcheck disable=SC2078 18 | while [ : ]; do 19 | case "$1" in 20 | -i | --install) 21 | INSTALL_EXAMPLES=true 22 | shift 23 | ;; 24 | --) shift; 25 | break 26 | ;; 27 | esac 28 | done 29 | 30 | # Root of examples 31 | EXAMPLES_DIR=$(dirname "$(realpath "$0")") 32 | 33 | # Set up default librmm build directory and install prefix if conda build 34 | if [ "${CONDA_BUILD:-"0"}" == "1" ]; then 35 | LIB_BUILD_DIR="${LIB_BUILD_DIR:-${SRC_DIR/cpp/build}}" 36 | INSTALL_PREFIX="${INSTALL_PREFIX:-${PREFIX}}" 37 | fi 38 | 39 | # librmm build directory 40 | LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")} 41 | 42 | ################################################################################ 43 | # Add individual librmm examples build scripts down below 44 | 45 | build_example() { 46 | example_dir=${1} 47 | example_dir="${EXAMPLES_DIR}/${example_dir}" 48 | build_dir="${example_dir}/build" 49 | 50 | # Configure 51 | cmake -S "${example_dir}" -B "${build_dir}" -Drmm_ROOT="${LIB_BUILD_DIR}" 52 | # Build 53 | cmake --build "${build_dir}" -j"${PARALLEL_LEVEL}" 54 | # Install if needed 55 | if [ "$INSTALL_EXAMPLES" = true ]; then 56 | cmake --install "${build_dir}" --prefix "${INSTALL_PREFIX:-${example_dir}/install}" 57 | fi 58 | } 59 | 60 | build_example basic 61 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/device_buffer.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.librmm.cuda_stream_view cimport cuda_stream_view 5 | from rmm.librmm.memory_resource cimport device_memory_resource 6 | 7 | 8 | cdef extern from "rmm/mr/per_device_resource.hpp" namespace "rmm" nogil: 9 | cdef cppclass cuda_device_id: 10 | ctypedef int value_type 11 | cuda_device_id() 12 | cuda_device_id(value_type id) 13 | value_type value() 14 | 15 | cdef cuda_device_id get_current_cuda_device() 16 | 17 | cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil: 18 | cdef void prefetch(const void* ptr, 19 | size_t bytes, 20 | cuda_device_id device, 21 | cuda_stream_view stream) except + 22 | 23 | cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: 24 | cdef cppclass device_buffer: 25 | device_buffer() 26 | device_buffer( 27 | size_t size, 28 | cuda_stream_view stream, 29 | device_memory_resource * 30 | ) except + 31 | device_buffer( 32 | const void* source_data, 33 | size_t size, 34 | cuda_stream_view stream, 35 | device_memory_resource * 36 | ) except + 37 | device_buffer( 38 | const device_buffer buf, 39 | cuda_stream_view stream, 40 | device_memory_resource * 41 | ) except + 42 | void reserve(size_t new_capacity, cuda_stream_view stream) except + 43 | void resize(size_t new_size, cuda_stream_view stream) except + 44 | void shrink_to_fit(cuda_stream_view stream) except + 45 | void* data() 46 | size_t size() 47 | size_t capacity() 48 | -------------------------------------------------------------------------------- /cpp/src/cuda_device.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | namespace rmm { 16 | 17 | cuda_device_id get_current_cuda_device() 18 | { 19 | cuda_device_id::value_type dev_id{-1}; 20 | RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id)); 21 | return cuda_device_id{dev_id}; 22 | } 23 | 24 | int get_num_cuda_devices() 25 | { 26 | cuda_device_id::value_type num_dev{-1}; 27 | RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev)); 28 | return num_dev; 29 | } 30 | 31 | std::pair available_device_memory() 32 | { 33 | std::size_t free{}; 34 | std::size_t total{}; 35 | RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); 36 | return {free, total}; 37 | } 38 | 39 | std::size_t percent_of_free_device_memory(int percent) 40 | { 41 | [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); 42 | auto fraction = static_cast(percent) / 100.0; 43 | return rmm::align_down(static_cast(static_cast(free) * fraction), 44 | rmm::CUDA_ALLOCATION_ALIGNMENT); 45 | } 46 | 47 | cuda_set_device_raii::cuda_set_device_raii(cuda_device_id dev_id) 48 | : old_device_{get_current_cuda_device()}, 49 | needs_reset_{dev_id.value() >= 0 && old_device_ != dev_id} 50 | { 51 | if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); } 52 | } 53 | 54 | cuda_set_device_raii::~cuda_set_device_raii() noexcept 55 | { 56 | if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); } 57 | } 58 | 59 | } // namespace rmm 60 | -------------------------------------------------------------------------------- /cpp/include/doxygen_groups.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | /** 7 | * @file 8 | * @brief Doxygen group definitions 9 | */ 10 | 11 | // This header is only processed by doxygen and does 12 | // not need to be included in any source file. 13 | // Below are the main groups that doxygen uses to build 14 | // the Modules page in the specified order. 15 | // 16 | // To add a new API to an existing group, just use the 17 | // @ingroup tag to the API's doxygen comment. 18 | // Add a new group by first specifying in the hierarchy below. 19 | 20 | /** 21 | * @namespace rmm 22 | * @brief RAPIDS Memory Manager - The top-level namespace for all RMM functionality 23 | * 24 | * The rmm namespace provides a comprehensive set of memory management 25 | * utilities for CUDA applications, including memory resources, CUDA stream 26 | * management, device-side data containers, and memory allocation utilities. 27 | */ 28 | 29 | /** 30 | * @namespace rmm::mr 31 | * @brief Memory Resource classes and adaptors 32 | * 33 | * The rmm::mr namespace contains all base memory resource classes that 34 | * implement various CUDA memory allocation strategies, adaptors for 35 | * suballocation such as pool and arena adaptors, and adaptors that add 36 | * functionality such as logging, alignment, and statistics tracking to 37 | * existing memory resources. 38 | */ 39 | 40 | /** 41 | * @defgroup memory_resources Memory Resources 42 | * @defgroup memory_resource_adaptors Memory Resource Adaptors 43 | * @defgroup cuda_device_management CUDA Device Management 44 | * @defgroup cuda_streams CUDA Streams 45 | * @defgroup data_containers Data Containers 46 | * @defgroup errors Errors 47 | * @defgroup thrust_integrations Thrust Integrations 48 | * @defgroup utilities Utilities 49 | */ 50 | -------------------------------------------------------------------------------- /.devcontainer/README.md: -------------------------------------------------------------------------------- 1 | # RMM Development Containers 2 | 3 | This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces). 4 | 5 | This container is a turnkey development environment for building and testing the RMM C++ and Python libraries. 6 | 7 | ## Table of Contents 8 | 9 | * [Prerequisites](#prerequisites) 10 | * [Host bind mounts](#host-bind-mounts) 11 | * [Launch a Dev Container](#launch-a-dev-container) 12 | 13 | ## Prerequisites 14 | 15 | * [VSCode](https://code.visualstudio.com/download) 16 | * [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) 17 | 18 | ## Host bind mounts 19 | 20 | By default, the following directories are bind-mounted into the devcontainer: 21 | 22 | * `${repo}:/home/coder/rmm` 23 | * `${repo}/../.aws:/home/coder/.aws` 24 | * `${repo}/../.local:/home/coder/.local` 25 | * `${repo}/../.cache:/home/coder/.cache` 26 | * `${repo}/../.conda:/home/coder/.conda` 27 | * `${repo}/../.config:/home/coder/.config` 28 | 29 | This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs. 30 | 31 | ## Launch a Dev Container 32 | 33 | To launch a devcontainer from VSCode, open the RMM repo and select the "Reopen in Container" button in the bottom right:
34 | 35 | Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command. 36 | -------------------------------------------------------------------------------- /ci/test_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # Support invoking test_python.sh outside the script directory 8 | cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ 9 | 10 | rapids-logger "Create test conda environment" 11 | 12 | . /opt/conda/etc/profile.d/conda.sh 13 | 14 | rapids-logger "Configuring conda strict channel priority" 15 | conda config --set channel_priority strict 16 | 17 | rapids-logger "Downloading artifacts from previous jobs" 18 | CPP_CHANNEL=$(rapids-download-conda-from-github cpp) 19 | PYTHON_CHANNEL=$(rapids-download-conda-from-github python) 20 | 21 | rapids-dependency-file-generator \ 22 | --output conda \ 23 | --file-key test_python \ 24 | --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ 25 | --prepend-channel "${CPP_CHANNEL}" \ 26 | --prepend-channel "${PYTHON_CHANNEL}" \ 27 | | tee env.yaml 28 | 29 | rapids-mamba-retry env create --yes -f env.yaml -n test 30 | 31 | # Temporarily allow unbound variables for conda activation. 32 | set +u 33 | conda activate test 34 | set -u 35 | 36 | rapids-print-env 37 | 38 | RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} 39 | RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} 40 | mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" 41 | 42 | rapids-logger "Check GPU usage" 43 | nvidia-smi 44 | 45 | rapids-logger "pytest rmm" 46 | 47 | timeout 10m ./ci/run_pytests.sh \ 48 | --junitxml="${RAPIDS_TESTS_DIR}/junit-rmm.xml" \ 49 | --cov-config=.coveragerc \ 50 | --cov=rmm \ 51 | --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/rmm-coverage.xml" \ 52 | --cov-report term \ 53 | && EXITCODE=$? || EXITCODE=$?; 54 | 55 | rapids-logger "Test script exiting with value: $EXITCODE" 56 | exit "${EXITCODE}" 57 | -------------------------------------------------------------------------------- /cpp/include/rmm/prefetch.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace RMM_NAMESPACE { 17 | 18 | /** 19 | * @addtogroup utilities 20 | * @{ 21 | * @file 22 | */ 23 | 24 | /** 25 | * @brief Prefetch memory to the specified device on the specified stream. 26 | * 27 | * This function is a no-op if the pointer is not to CUDA managed memory or if 28 | * concurrent managed access is not supported. 29 | * 30 | * @throw rmm::cuda_error if the prefetch fails. 31 | * 32 | * @param ptr The pointer to the memory to prefetch 33 | * @param size The number of bytes to prefetch 34 | * @param device The device to prefetch to 35 | * @param stream The stream to use for the prefetch 36 | */ 37 | void prefetch(void const* ptr, 38 | std::size_t size, 39 | rmm::cuda_device_id device, 40 | rmm::cuda_stream_view stream); 41 | 42 | /** 43 | * @brief Prefetch a span of memory to the specified device on the specified stream. 44 | * 45 | * This function is a no-op if the buffer is not backed by CUDA managed memory. 46 | * 47 | * @throw rmm::cuda_error if the prefetch fails. 48 | * 49 | * @param data The span to prefetch 50 | * @param device The device to prefetch to 51 | * @param stream The stream to use for the prefetch 52 | */ 53 | template 54 | void prefetch(cuda::std::span data, 55 | rmm::cuda_device_id device, 56 | rmm::cuda_stream_view stream) 57 | { 58 | prefetch(data.data(), data.size_bytes(), device, stream); 59 | } 60 | 61 | /** @} */ // end of group 62 | 63 | } // namespace RMM_NAMESPACE 64 | -------------------------------------------------------------------------------- /.github/workflows/new-issues-to-triage-projects.yml: -------------------------------------------------------------------------------- 1 | name: Auto Assign New Issues to Triage Project 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | 7 | env: 8 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 9 | 10 | jobs: 11 | assign_one_project: 12 | runs-on: ubuntu-latest 13 | name: Assign to New Issues to Triage Project 14 | steps: 15 | - name: Harden the runner (Audit all outbound calls) 16 | uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 17 | with: 18 | egress-policy: audit 19 | 20 | - name: Process bug issues 21 | uses: docker://takanabe/github-actions-automate-projects:v0.0.1 22 | if: contains(github.event.issue.labels.*.name, 'bug') && contains(github.event.issue.labels.*.name, '? - Needs Triage') 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | GITHUB_PROJECT_URL: https://github.com/rapidsai/rmm/projects/12 26 | GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' 27 | - name: Process feature issues 28 | uses: docker://takanabe/github-actions-automate-projects:v0.0.1 29 | if: contains(github.event.issue.labels.*.name, 'feature request') && contains(github.event.issue.labels.*.name, '? - Needs Triage') 30 | env: 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | GITHUB_PROJECT_URL: https://github.com/rapidsai/rmm/projects/13 33 | GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' 34 | - name: Process other issues 35 | uses: docker://takanabe/github-actions-automate-projects:v0.0.1 36 | if: contains(github.event.issue.labels.*.name, '? - Needs Triage') && (!contains(github.event.issue.labels.*.name, 'bug') && !contains(github.event.issue.labels.*.name, 'feature request')) 37 | env: 38 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 39 | GITHUB_PROJECT_URL: https://github.com/rapidsai/rmm/projects/14 40 | GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' 41 | -------------------------------------------------------------------------------- /.devcontainer/cuda13.0-pip/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "context": "${localWorkspaceFolder}/.devcontainer", 4 | "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", 5 | "args": { 6 | "CUDA": "13.0", 7 | "PYTHON_PACKAGE_MANAGER": "pip", 8 | "BASE": "rapidsai/devcontainers:26.02-cpp-cuda13.0" 9 | } 10 | }, 11 | "runArgs": [ 12 | "--rm", 13 | "--name", 14 | "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda13.0-pip" 15 | ], 16 | "hostRequirements": {"gpu": "optional"}, 17 | "features": { 18 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} 19 | }, 20 | "overrideFeatureInstallOrder": [ 21 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" 22 | ], 23 | "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs}"], 24 | "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], 25 | "workspaceFolder": "/home/coder", 26 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent", 27 | "mounts": [ 28 | "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 29 | "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 30 | "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", 31 | "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" 32 | ], 33 | "customizations": { 34 | "vscode": { 35 | "extensions": [ 36 | "ms-python.flake8", 37 | "nvidia.nsight-vscode-edition" 38 | ] 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cpp/src/cuda_stream_view.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace rmm { 16 | 17 | cuda_stream_view::cuda_stream_view(cudaStream_t stream) noexcept : stream_{stream} {} 18 | 19 | cuda_stream_view::cuda_stream_view(cuda::stream_ref stream) noexcept : stream_{stream.get()} {} 20 | 21 | cudaStream_t cuda_stream_view::value() const noexcept { return stream_; } 22 | 23 | cuda_stream_view::operator cudaStream_t() const noexcept { return value(); } 24 | 25 | cuda_stream_view::operator cuda::stream_ref() const noexcept { return value(); } 26 | 27 | bool cuda_stream_view::is_per_thread_default() const noexcept 28 | { 29 | #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM 30 | return *this == cuda_stream_per_thread || value() == nullptr; 31 | #else 32 | return *this == cuda_stream_per_thread; 33 | #endif 34 | } 35 | 36 | bool cuda_stream_view::is_default() const noexcept 37 | { 38 | #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM 39 | return *this == cuda_stream_legacy; 40 | #else 41 | return *this == cuda_stream_legacy || value() == nullptr; 42 | #endif 43 | } 44 | 45 | void cuda_stream_view::synchronize() const { RMM_CUDA_TRY(cudaStreamSynchronize(stream_)); } 46 | 47 | void cuda_stream_view::synchronize_no_throw() const noexcept 48 | { 49 | RMM_ASSERT_CUDA_SUCCESS(cudaStreamSynchronize(stream_)); 50 | } 51 | 52 | bool operator==(cuda_stream_view lhs, cuda_stream_view rhs) { return lhs.value() == rhs.value(); } 53 | 54 | bool operator!=(cuda_stream_view lhs, cuda_stream_view rhs) { return not(lhs == rhs); } 55 | 56 | std::ostream& operator<<(std::ostream& os, cuda_stream_view stream) 57 | { 58 | os << stream.value(); 59 | return os; 60 | } 61 | 62 | } // namespace rmm 63 | -------------------------------------------------------------------------------- /ci/test_cpp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # Support invoking test_cpp.sh outside the script directory 8 | cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ 9 | 10 | . /opt/conda/etc/profile.d/conda.sh 11 | 12 | rapids-logger "Configuring conda strict channel priority" 13 | conda config --set channel_priority strict 14 | 15 | CPP_CHANNEL=$(rapids-download-conda-from-github cpp) 16 | RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/ 17 | mkdir -p "${RAPIDS_TESTS_DIR}" 18 | 19 | rapids-logger "Generate C++ testing dependencies" 20 | rapids-dependency-file-generator \ 21 | --output conda \ 22 | --file-key test_cpp \ 23 | --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" \ 24 | --prepend-channel "${CPP_CHANNEL}" \ 25 | | tee env.yaml 26 | 27 | rapids-mamba-retry env create --yes -f env.yaml -n test 28 | 29 | # Temporarily allow unbound variables for conda activation. 30 | set +u 31 | conda activate test 32 | set -u 33 | 34 | rapids-print-env 35 | 36 | rapids-logger "Check GPU usage" 37 | nvidia-smi 38 | 39 | # Run librmm gtests from librmm-tests package 40 | rapids-logger "Run gtests" 41 | 42 | export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ 43 | timeout 15m ./ci/run_ctests.sh -j20 && EXITCODE=$? || EXITCODE=$?; 44 | 45 | # Run all examples from librmm-example package 46 | for example in "${CONDA_PREFIX}"/bin/examples/librmm/*; do 47 | if [ -x "$example" ]; then 48 | rapids-logger "Running example: $(basename "$example")" 49 | timeout 15m "$example" && EXAMPLE_EXITCODE=$? || EXAMPLE_EXITCODE=$?; 50 | if [ "$EXAMPLE_EXITCODE" -ne 0 ]; then 51 | rapids-logger "Example $(basename "$example") failed with exit code: $EXAMPLE_EXITCODE" 52 | EXITCODE=$EXAMPLE_EXITCODE 53 | break 54 | fi 55 | fi 56 | done 57 | 58 | rapids-logger "Test script exiting with value: $EXITCODE" 59 | exit "${EXITCODE}" 60 | -------------------------------------------------------------------------------- /cpp/benchmarks/synchronization/synchronization.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2019-2021, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "synchronization.hpp" 7 | 8 | #include 9 | 10 | #ifdef NDEBUG 11 | #define RMM_CUDA_ASSERT_OK(expr) expr 12 | #else 13 | #define RMM_CUDA_ASSERT_OK(expr) \ 14 | do { \ 15 | cudaError_t const status = (expr); \ 16 | assert(cudaSuccess == status); \ 17 | } while (0); 18 | #endif 19 | 20 | cuda_event_timer::cuda_event_timer(benchmark::State& state, 21 | bool flush_l2_cache, 22 | rmm::cuda_stream_view stream) 23 | : stream(stream), p_state(&state) 24 | { 25 | // flush all of L2$ 26 | if (flush_l2_cache) { 27 | int current_device = 0; 28 | RMM_CUDA_TRY(cudaGetDevice(¤t_device)); 29 | 30 | int l2_cache_bytes = 0; 31 | RMM_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); 32 | 33 | if (l2_cache_bytes > 0) { 34 | const int memset_value = 0; 35 | rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream); 36 | RMM_CUDA_TRY( 37 | cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value())); 38 | } 39 | } 40 | 41 | RMM_CUDA_TRY(cudaEventCreate(&start)); 42 | RMM_CUDA_TRY(cudaEventCreate(&stop)); 43 | RMM_CUDA_TRY(cudaEventRecord(start, stream.value())); 44 | } 45 | 46 | cuda_event_timer::~cuda_event_timer() 47 | { 48 | RMM_CUDA_ASSERT_OK(cudaEventRecord(stop, stream.value())); 49 | RMM_CUDA_ASSERT_OK(cudaEventSynchronize(stop)); 50 | 51 | float milliseconds = 0.0F; 52 | RMM_CUDA_ASSERT_OK(cudaEventElapsedTime(&milliseconds, start, stop)); 53 | const auto to_milliseconds{1.0F / 1000}; 54 | p_state->SetIterationTime(milliseconds * to_milliseconds); 55 | RMM_CUDA_ASSERT_OK(cudaEventDestroy(start)); 56 | RMM_CUDA_ASSERT_OK(cudaEventDestroy(stop)); 57 | } 58 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.9-pip/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "context": "${localWorkspaceFolder}/.devcontainer", 4 | "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", 5 | "args": { 6 | "CUDA": "12.9", 7 | "PYTHON_PACKAGE_MANAGER": "pip", 8 | "BASE": "rapidsai/devcontainers:26.02-cpp-cuda12.9" 9 | } 10 | }, 11 | "runArgs": [ 12 | "--rm", 13 | "--name", 14 | "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda12.9-pip", 15 | "--ulimit", 16 | "nofile=500000" 17 | ], 18 | "hostRequirements": {"gpu": "optional"}, 19 | "features": { 20 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} 21 | }, 22 | "overrideFeatureInstallOrder": [ 23 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" 24 | ], 25 | "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs}"], 26 | "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], 27 | "workspaceFolder": "/home/coder", 28 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent", 29 | "mounts": [ 30 | "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 31 | "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 32 | "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", 33 | "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" 34 | ], 35 | "customizations": { 36 | "vscode": { 37 | "extensions": [ 38 | "ms-python.flake8", 39 | "nvidia.nsight-vscode-edition" 40 | ] 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | branch: 7 | description: | 8 | branch: git branch the workflow run targets. 9 | Required even when 'sha' is provided because it is also used for organizing artifacts. 10 | required: true 11 | type: string 12 | date: 13 | description: "date: Date (YYYY-MM-DD) this run is for. Used to organize artifacts produced by nightly builds" 14 | required: true 15 | type: string 16 | sha: 17 | description: "sha: full git commit SHA to check out" 18 | required: true 19 | type: string 20 | build_type: 21 | description: "build_type: one of [branch, nightly, pull-request]" 22 | type: string 23 | default: nightly 24 | 25 | jobs: 26 | cpp-tests: 27 | secrets: inherit 28 | uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main 29 | with: 30 | build_type: ${{ inputs.build_type }} 31 | branch: ${{ inputs.branch }} 32 | date: ${{ inputs.date }} 33 | script: ci/test_cpp.sh 34 | sha: ${{ inputs.sha }} 35 | sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN 36 | python-tests: 37 | secrets: inherit 38 | uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main 39 | with: 40 | build_type: ${{ inputs.build_type }} 41 | branch: ${{ inputs.branch }} 42 | date: ${{ inputs.date }} 43 | script: ci/test_python.sh 44 | sha: ${{ inputs.sha }} 45 | sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN 46 | wheel-tests: 47 | secrets: inherit 48 | uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main 49 | with: 50 | build_type: ${{ inputs.build_type }} 51 | branch: ${{ inputs.branch }} 52 | date: ${{ inputs.date }} 53 | sha: ${{ inputs.sha }} 54 | script: ci/test_wheel.sh 55 | sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN 56 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_pool_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for PoolMemoryResource.""" 5 | 6 | import pytest 7 | from numba import cuda 8 | from test_helpers import _allocs, _dtypes, _nelems, array_tester 9 | 10 | import rmm 11 | from rmm.pylibrmm.stream import Stream 12 | 13 | 14 | @pytest.mark.parametrize("dtype", _dtypes) 15 | @pytest.mark.parametrize("nelem", _nelems) 16 | @pytest.mark.parametrize("alloc", _allocs) 17 | def test_pool_memory_resource(dtype, nelem, alloc): 18 | mr = rmm.mr.PoolMemoryResource( 19 | rmm.mr.CudaMemoryResource(), 20 | initial_pool_size="4MiB", 21 | maximum_pool_size="8MiB", 22 | ) 23 | rmm.mr.set_current_device_resource(mr) 24 | assert rmm.mr.get_current_device_resource_type() is type(mr) 25 | array_tester(dtype, nelem, alloc) 26 | 27 | 28 | def test_reinitialize_max_pool_size(): 29 | rmm.reinitialize( 30 | pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB" 31 | ) 32 | rmm.DeviceBuffer().resize((1 << 23) - 1) 33 | 34 | 35 | def test_reinitialize_max_pool_size_exceeded(): 36 | rmm.reinitialize( 37 | pool_allocator=True, initial_pool_size=0, maximum_pool_size=1 << 23 38 | ) 39 | with pytest.raises(MemoryError): 40 | rmm.DeviceBuffer().resize(1 << 24) 41 | 42 | 43 | @pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()]) 44 | def test_rmm_pool_numba_stream(stream): 45 | rmm.reinitialize(pool_allocator=True) 46 | 47 | stream = Stream(stream) 48 | a = rmm.DeviceBuffer(size=3, stream=stream) 49 | 50 | assert a.size == 3 51 | assert a.ptr != 0 52 | 53 | 54 | def test_mr_upstream_lifetime(): 55 | # Simple test to ensure upstream MRs are deallocated before downstream MR 56 | cuda_mr = rmm.mr.CudaMemoryResource() 57 | 58 | pool_mr = rmm.mr.PoolMemoryResource(cuda_mr) 59 | 60 | # Delete cuda_mr first. Should be kept alive by pool_mr 61 | del cuda_mr 62 | del pool_mr 63 | -------------------------------------------------------------------------------- /.devcontainer/cuda13.0-conda/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "context": "${localWorkspaceFolder}/.devcontainer", 4 | "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", 5 | "args": { 6 | "CUDA": "13.0", 7 | "PYTHON_PACKAGE_MANAGER": "conda", 8 | "BASE": "rapidsai/devcontainers:26.02-cpp-mambaforge" 9 | } 10 | }, 11 | "runArgs": [ 12 | "--rm", 13 | "--name", 14 | "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda13.0-conda" 15 | ], 16 | "hostRequirements": {"gpu": "optional"}, 17 | "features": { 18 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} 19 | }, 20 | "overrideFeatureInstallOrder": [ 21 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" 22 | ], 23 | "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.0-envs}"], 24 | "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], 25 | "workspaceFolder": "/home/coder", 26 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent", 27 | "mounts": [ 28 | "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 29 | "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 30 | "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", 31 | "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", 32 | "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" 33 | ], 34 | "customizations": { 35 | "vscode": { 36 | "extensions": [ 37 | "ms-python.flake8", 38 | "nvidia.nsight-vscode-edition" 39 | ] 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_cuda_async_managed_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for CudaAsyncManagedMemoryResource.""" 5 | 6 | import numpy as np 7 | import pytest 8 | from test_helpers import ( 9 | _ASYNC_MANAGED_MEMORY_SUPPORTED, 10 | _allocs, 11 | _dtypes, 12 | _nelems, 13 | array_tester, 14 | ) 15 | 16 | import rmm 17 | from rmm.pylibrmm.stream import Stream 18 | 19 | 20 | @pytest.mark.skipif( 21 | not _ASYNC_MANAGED_MEMORY_SUPPORTED, 22 | reason="CudaAsyncManagedMemoryResource requires CUDA 13.0+", 23 | ) 24 | @pytest.mark.parametrize("dtype", _dtypes) 25 | @pytest.mark.parametrize("nelem", _nelems) 26 | @pytest.mark.parametrize("alloc", _allocs) 27 | def test_cuda_async_managed_memory_resource(dtype, nelem, alloc): 28 | mr = rmm.mr.experimental.CudaAsyncManagedMemoryResource() 29 | rmm.mr.set_current_device_resource(mr) 30 | assert rmm.mr.get_current_device_resource_type() is type(mr) 31 | array_tester(dtype, nelem, alloc) 32 | 33 | 34 | @pytest.mark.skipif( 35 | not _ASYNC_MANAGED_MEMORY_SUPPORTED, 36 | reason="CudaAsyncManagedMemoryResource requires CUDA 13.0+", 37 | ) 38 | @pytest.mark.parametrize("nelems", _nelems) 39 | def test_cuda_async_managed_memory_resource_stream(nelems): 40 | mr = rmm.mr.experimental.CudaAsyncManagedMemoryResource() 41 | rmm.mr.set_current_device_resource(mr) 42 | stream = Stream() 43 | expected = np.full(nelems, 5, dtype="u1") 44 | dbuf = rmm.DeviceBuffer.to_device(expected, stream=stream) 45 | result = np.asarray(dbuf.copy_to_host()) 46 | np.testing.assert_equal(expected, result) 47 | 48 | 49 | @pytest.mark.skipif( 50 | not _ASYNC_MANAGED_MEMORY_SUPPORTED, 51 | reason="CudaAsyncManagedMemoryResource requires CUDA 13.0+", 52 | ) 53 | def test_cuda_async_managed_memory_resource_pool_handle(): 54 | mr = rmm.mr.experimental.CudaAsyncManagedMemoryResource() 55 | pool_handle = mr.pool_handle() 56 | assert isinstance(pool_handle, int) 57 | assert pool_handle != 0 58 | -------------------------------------------------------------------------------- /ci/build_wheel_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | package_dir="python/rmm" 8 | 9 | source rapids-configure-sccache 10 | source rapids-date-string 11 | source rapids-init-pip 12 | 13 | export SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX="rmm-${RAPIDS_CONDA_ARCH}-cuda${RAPIDS_CUDA_VERSION%%.*}-wheel-preprocessor-cache" 14 | export SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true 15 | 16 | rapids-generate-version > ./VERSION 17 | 18 | pushd "${package_dir}" 19 | 20 | RAPIDS_PY_CUDA_SUFFIX=$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}") 21 | LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) 22 | 23 | # ensure 'rmm' wheel builds always use the 'librmm' just built in the same CI run 24 | # 25 | # Using env variable PIP_CONSTRAINT (initialized by 'rapids-init-pip') is necessary to ensure the constraints 26 | # are used when creating the isolated build environment. 27 | echo "librmm-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" >> "${PIP_CONSTRAINT}" 28 | 29 | sccache --stop-server 2>/dev/null || true 30 | 31 | # Creates artifacts directory for telemetry 32 | source rapids-telemetry-setup 33 | 34 | rapids-telemetry-record build.log rapids-pip-retry wheel \ 35 | -v \ 36 | -w dist \ 37 | --no-deps \ 38 | --disable-pip-version-check \ 39 | . 40 | 41 | rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats 42 | sccache --stop-server >/dev/null 2>&1 || true 43 | 44 | EXCLUDE_ARGS=( 45 | --exclude "librapids_logger.so" 46 | --exclude "librmm.so" 47 | ) 48 | python -m auditwheel repair \ 49 | "${EXCLUDE_ARGS[@]}" \ 50 | -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \ 51 | dist/* 52 | 53 | ../../ci/validate_wheel.sh "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" 54 | 55 | absolute_wheel_dir=$(realpath "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}") 56 | # switch back to the root of the repo and check symbol visibility 57 | popd 58 | ci/check_symbols.sh "$(echo "${absolute_wheel_dir}"/rmm_*.whl)" 59 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.9-conda/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "context": "${localWorkspaceFolder}/.devcontainer", 4 | "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", 5 | "args": { 6 | "CUDA": "12.9", 7 | "PYTHON_PACKAGE_MANAGER": "conda", 8 | "BASE": "rapidsai/devcontainers:26.02-cpp-mambaforge" 9 | } 10 | }, 11 | "runArgs": [ 12 | "--rm", 13 | "--name", 14 | "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda12.9-conda", 15 | "--ulimit", 16 | "nofile=500000" 17 | ], 18 | "hostRequirements": {"gpu": "optional"}, 19 | "features": { 20 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} 21 | }, 22 | "overrideFeatureInstallOrder": [ 23 | "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" 24 | ], 25 | "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs}"], 26 | "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], 27 | "workspaceFolder": "/home/coder", 28 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent", 29 | "mounts": [ 30 | "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 31 | "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 32 | "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", 33 | "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", 34 | "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" 35 | ], 36 | "customizations": { 37 | "vscode": { 38 | "extensions": [ 39 | "ms-python.flake8", 40 | "nvidia.nsight-vscode-edition" 41 | ] 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_cuda_async_view_memory_resource.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for CudaAsyncViewMemoryResource.""" 5 | 6 | import pytest 7 | from cuda.bindings import runtime 8 | from test_helpers import _allocs, _dtypes, _nelems, array_tester 9 | 10 | import rmm 11 | 12 | 13 | @pytest.mark.parametrize("dtype", _dtypes) 14 | @pytest.mark.parametrize("nelem", _nelems) 15 | @pytest.mark.parametrize("alloc", _allocs) 16 | def test_cuda_async_view_memory_resource_default_pool(dtype, nelem, alloc): 17 | # Get the default memory pool handle 18 | current_device = rmm._cuda.gpu.getDevice() 19 | err, pool = runtime.cudaDeviceGetDefaultMemPool(current_device) 20 | assert err == runtime.cudaError_t.cudaSuccess 21 | 22 | mr = rmm.mr.CudaAsyncViewMemoryResource(pool) 23 | rmm.mr.set_current_device_resource(mr) 24 | assert rmm.mr.get_current_device_resource_type() is type(mr) 25 | array_tester(dtype, nelem, alloc) 26 | 27 | 28 | @pytest.mark.parametrize("dtype", _dtypes) 29 | @pytest.mark.parametrize("nelem", _nelems) 30 | @pytest.mark.parametrize("alloc", _allocs) 31 | def test_cuda_async_view_memory_resource_custom_pool(dtype, nelem, alloc): 32 | # Create a memory pool handle 33 | props = runtime.cudaMemPoolProps() 34 | props.allocType = runtime.cudaMemAllocationType.cudaMemAllocationTypePinned 35 | props.location.id = rmm._cuda.gpu.getDevice() 36 | props.location.type = runtime.cudaMemLocationType.cudaMemLocationTypeDevice 37 | err, pool = runtime.cudaMemPoolCreate(props) 38 | assert err == runtime.cudaError_t.cudaSuccess 39 | 40 | mr = rmm.mr.CudaAsyncViewMemoryResource(pool) 41 | rmm.mr.set_current_device_resource(mr) 42 | assert rmm.mr.get_current_device_resource_type() is type(mr) 43 | array_tester(dtype, nelem, alloc) 44 | 45 | # After the pool is destroyed, new allocations should raise 46 | (err,) = runtime.cudaMemPoolDestroy(pool) 47 | assert err == runtime.cudaError_t.cudaSuccess 48 | with pytest.raises(MemoryError): 49 | array_tester(dtype, nelem, alloc) 50 | -------------------------------------------------------------------------------- /cpp/tests/device_check_resource_adaptor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | class device_check_resource_adaptor final : public rmm::mr::device_memory_resource { 15 | public: 16 | device_check_resource_adaptor(rmm::device_async_resource_ref upstream) 17 | : device_id{rmm::get_current_cuda_device()}, upstream_(upstream) 18 | { 19 | } 20 | 21 | /** 22 | * @briefreturn{rmm::device_async_resource_ref to the upstream resource} 23 | */ 24 | [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept 25 | { 26 | return upstream_; 27 | } 28 | 29 | private: 30 | [[nodiscard]] bool check_device_id() const { return device_id == rmm::get_current_cuda_device(); } 31 | 32 | void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override 33 | { 34 | bool const is_correct_device = check_device_id(); 35 | EXPECT_TRUE(is_correct_device); 36 | if (is_correct_device) { return get_upstream_resource().allocate(stream, bytes); } 37 | return nullptr; 38 | } 39 | 40 | void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override 41 | { 42 | bool const is_correct_device = check_device_id(); 43 | EXPECT_TRUE(is_correct_device); 44 | if (is_correct_device) { get_upstream_resource().deallocate(stream, ptr, bytes); } 45 | } 46 | 47 | [[nodiscard]] bool do_is_equal( 48 | rmm::mr::device_memory_resource const& other) const noexcept override 49 | { 50 | if (this == &other) { return true; } 51 | auto const* cast = dynamic_cast(&other); 52 | if (cast == nullptr) { return false; } 53 | return get_upstream_resource() == cast->get_upstream_resource(); 54 | } 55 | 56 | rmm::cuda_device_id device_id; 57 | rmm::device_async_resource_ref upstream_; 58 | }; 59 | -------------------------------------------------------------------------------- /python/rmm/rmm/librmm/_torch_allocator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // These signatures must match those required by CUDAPluggableAllocator in 13 | // github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h 14 | // Since the loading is done at runtime via dlopen, no error checking 15 | // can be performed for mismatching signatures. 16 | 17 | /** 18 | * @brief Allocate memory of at least \p size bytes. 19 | * 20 | * @throws rmm::bad_alloc When the requested allocation cannot be satisfied. 21 | * 22 | * @param size The number of bytes to allocate 23 | * @param device The device whose memory resource one should use 24 | * @param stream CUDA stream to perform allocation on 25 | * @return Pointer to the newly allocated memory 26 | */ 27 | extern "C" void* allocate(std::size_t size, int device, void* stream) 28 | { 29 | rmm::cuda_device_id const device_id{device}; 30 | rmm::cuda_set_device_raii with_device{device_id}; 31 | auto mr = rmm::mr::get_per_device_resource_ref(device_id); 32 | return mr.allocate( 33 | rmm::cuda_stream_view{static_cast(stream)}, size, rmm::CUDA_ALLOCATION_ALIGNMENT); 34 | } 35 | 36 | /** 37 | * @brief Deallocate memory pointed to by \p ptr. 38 | * 39 | * @param ptr Pointer to be deallocated 40 | * @param size The number of bytes in the allocation 41 | * @param device The device whose memory resource one should use 42 | * @param stream CUDA stream to perform deallocation on 43 | */ 44 | extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream) 45 | { 46 | rmm::cuda_device_id const device_id{device}; 47 | rmm::cuda_set_device_raii with_device{device_id}; 48 | auto mr = rmm::mr::get_per_device_resource_ref(device_id); 49 | mr.deallocate(rmm::cuda_stream_view{static_cast(stream)}, 50 | ptr, 51 | size, 52 | rmm::CUDA_ALLOCATION_ALIGNMENT); 53 | } 54 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/cuda_stream_pool.pyx: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | cimport cython 5 | from cython.operator cimport dereference as deref 6 | from libc.stddef cimport size_t 7 | 8 | from rmm.librmm.cuda_stream cimport cuda_stream_flags 9 | from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool 10 | from rmm.pylibrmm.stream cimport Stream 11 | 12 | from typing import Optional 13 | 14 | 15 | @cython.final 16 | cdef class CudaStreamPool: 17 | """ 18 | A pool of CUDA streams for efficient stream management. 19 | 20 | Provides thread-safe access to a collection of CUDA stream objects. 21 | Successive calls may return views of identical streams. 22 | """ 23 | 24 | def __cinit__(self, size_t pool_size = 16, 25 | cuda_stream_flags flags = cuda_stream_flags.sync_default): 26 | with nogil: 27 | self.c_obj.reset(new cuda_stream_pool(pool_size, flags)) 28 | 29 | def __dealloc__(self): 30 | with nogil: 31 | self.c_obj.reset() 32 | 33 | def get_stream(self, stream_id: Optional[int] = None) -> Stream: 34 | """ 35 | Get a Stream from the pool (optionally by ID). 36 | 37 | Parameters 38 | ---------- 39 | stream_id : Optional[int], optional 40 | The ID of the stream to get. If None, the next stream from the pool is 41 | returned. 42 | 43 | Returns 44 | ------- 45 | Stream 46 | A non-owning Stream object from the pool 47 | """ 48 | cdef size_t c_stream_id 49 | if stream_id is None: 50 | return Stream._from_cudaStream_t( 51 | deref(self.c_obj).get_stream().value(), owner=self) 52 | else: 53 | c_stream_id = (stream_id) 54 | return Stream._from_cudaStream_t( 55 | deref(self.c_obj).get_stream(c_stream_id).value(), owner=self) 56 | 57 | def get_pool_size(self) -> int: 58 | """ 59 | Get the pool size. 60 | 61 | Returns 62 | ------- 63 | int 64 | The number of streams in the pool 65 | """ 66 | return deref(self.c_obj).get_pool_size() 67 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_cupy_integration.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for CuPy integration with RMM.""" 5 | 6 | import pytest 7 | 8 | import rmm 9 | from rmm.allocators.cupy import rmm_cupy_allocator 10 | 11 | 12 | def test_rmm_cupy_allocator(): 13 | cupy = pytest.importorskip("cupy") 14 | 15 | m = rmm_cupy_allocator(42) 16 | assert m.mem.size == 42 17 | assert m.mem.ptr != 0 18 | assert isinstance(m.mem._owner, rmm.DeviceBuffer) 19 | 20 | m = rmm_cupy_allocator(0) 21 | assert m.mem.size == 0 22 | assert m.mem.ptr == 0 23 | assert isinstance(m.mem._owner, rmm.DeviceBuffer) 24 | 25 | cupy.cuda.set_allocator(rmm_cupy_allocator) 26 | a = cupy.arange(10) 27 | assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) 28 | 29 | 30 | @pytest.mark.parametrize("stream", ["null", "async"]) 31 | def test_rmm_pool_cupy_allocator_with_stream(stream): 32 | cupy = pytest.importorskip("cupy") 33 | 34 | rmm.reinitialize(pool_allocator=True) 35 | cupy.cuda.set_allocator(rmm_cupy_allocator) 36 | 37 | if stream == "null": 38 | stream = cupy.cuda.stream.Stream.null 39 | else: 40 | stream = cupy.cuda.stream.Stream() 41 | 42 | with stream: 43 | m = rmm_cupy_allocator(42) 44 | assert m.mem.size == 42 45 | assert m.mem.ptr != 0 46 | assert isinstance(m.mem._owner, rmm.DeviceBuffer) 47 | 48 | m = rmm_cupy_allocator(0) 49 | assert m.mem.size == 0 50 | assert m.mem.ptr == 0 51 | assert isinstance(m.mem._owner, rmm.DeviceBuffer) 52 | 53 | a = cupy.arange(10) 54 | assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) 55 | 56 | # Deleting all allocations known by the RMM pool is required 57 | # before rmm.reinitialize(), otherwise it may segfault. 58 | del a 59 | 60 | rmm.reinitialize() 61 | 62 | 63 | def test_rmm_pool_cupy_allocator_stream_lifetime(): 64 | cupy = pytest.importorskip("cupy") 65 | 66 | rmm.reinitialize(pool_allocator=True) 67 | cupy.cuda.set_allocator(rmm_cupy_allocator) 68 | 69 | stream = cupy.cuda.stream.Stream() 70 | 71 | stream.use() 72 | x = cupy.arange(10) 73 | del stream 74 | 75 | del x 76 | -------------------------------------------------------------------------------- /python/rmm/rmm/mr/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | from rmm.mr import experimental 4 | from rmm.pylibrmm.memory_resource import ( 5 | ArenaMemoryResource, 6 | BinningMemoryResource, 7 | CallbackMemoryResource, 8 | CudaAsyncMemoryResource, 9 | CudaAsyncViewMemoryResource, 10 | CudaMemoryResource, 11 | DeviceMemoryResource, 12 | FailureCallbackResourceAdaptor, 13 | FixedSizeMemoryResource, 14 | LimitingResourceAdaptor, 15 | LoggingResourceAdaptor, 16 | ManagedMemoryResource, 17 | PinnedHostMemoryResource, 18 | PoolMemoryResource, 19 | PrefetchResourceAdaptor, 20 | SamHeadroomMemoryResource, 21 | StatisticsResourceAdaptor, 22 | SystemMemoryResource, 23 | TrackingResourceAdaptor, 24 | UpstreamResourceAdaptor, 25 | available_device_memory, 26 | disable_logging, 27 | enable_logging, 28 | get_current_device_resource, 29 | get_current_device_resource_type, 30 | get_log_filenames, 31 | get_per_device_resource, 32 | get_per_device_resource_type, 33 | is_initialized, 34 | set_current_device_resource, 35 | set_per_device_resource, 36 | ) 37 | 38 | __all__ = [ 39 | "ArenaMemoryResource", 40 | "BinningMemoryResource", 41 | "CallbackMemoryResource", 42 | "CudaAsyncMemoryResource", 43 | "CudaAsyncViewMemoryResource", 44 | "CudaMemoryResource", 45 | "DeviceMemoryResource", 46 | "FailureCallbackResourceAdaptor", 47 | "FixedSizeMemoryResource", 48 | "LimitingResourceAdaptor", 49 | "LoggingResourceAdaptor", 50 | "ManagedMemoryResource", 51 | "PinnedHostMemoryResource", 52 | "PoolMemoryResource", 53 | "PrefetchResourceAdaptor", 54 | "SamHeadroomMemoryResource", 55 | "StatisticsResourceAdaptor", 56 | "SystemMemoryResource", 57 | "TrackingResourceAdaptor", 58 | "UpstreamResourceAdaptor", 59 | "available_device_memory", 60 | "disable_logging", 61 | "enable_logging", 62 | "experimental", 63 | "get_current_device_resource", 64 | "get_current_device_resource_type", 65 | "get_log_filenames", 66 | "get_per_device_resource", 67 | "get_per_device_resource_type", 68 | "is_initialized", 69 | "set_current_device_resource", 70 | "set_per_device_resource", 71 | ] 72 | -------------------------------------------------------------------------------- /cpp/tests/mr/cuda_async_view_mr_tests.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace rmm::test { 14 | namespace { 15 | 16 | using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource; 17 | 18 | // static property checks 19 | static_assert( 20 | rmm::detail::polyfill::resource_with); 21 | static_assert( 22 | rmm::detail::polyfill::async_resource_with); 23 | 24 | TEST(PoolTest, UsePool) 25 | { 26 | cudaMemPool_t memPool{}; 27 | RMM_CUDA_TRY(cudaDeviceGetDefaultMemPool(&memPool, rmm::get_current_cuda_device().value())); 28 | 29 | const auto pool_init_size{100}; 30 | cuda_async_view_mr mr{memPool}; 31 | void* ptr = mr.allocate(pool_init_size); 32 | mr.deallocate_sync(ptr, pool_init_size); 33 | RMM_CUDA_TRY(cudaDeviceSynchronize()); 34 | } 35 | 36 | TEST(PoolTest, NotTakingOwnershipOfPool) 37 | { 38 | cudaMemPoolProps poolProps = {}; 39 | poolProps.allocType = cudaMemAllocationTypePinned; 40 | poolProps.location.id = rmm::get_current_cuda_device().value(); 41 | poolProps.location.type = cudaMemLocationTypeDevice; 42 | 43 | cudaMemPool_t memPool{}; 44 | 45 | RMM_CUDA_TRY(cudaMemPoolCreate(&memPool, &poolProps)); 46 | 47 | { 48 | const auto pool_init_size{100}; 49 | cuda_async_view_mr mr{memPool}; 50 | void* ptr = mr.allocate(pool_init_size); 51 | mr.deallocate_sync(ptr, pool_init_size); 52 | RMM_CUDA_TRY(cudaDeviceSynchronize()); 53 | } 54 | 55 | auto destroy_valid_pool = [&]() { 56 | auto result = cudaMemPoolDestroy(memPool); 57 | RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool"); 58 | }; 59 | 60 | EXPECT_NO_THROW(destroy_valid_pool()); 61 | } 62 | 63 | TEST(PoolTest, ThrowIfNullptrPool) 64 | { 65 | auto construct_mr = []() { 66 | cudaMemPool_t memPool{nullptr}; 67 | cuda_async_view_mr mr{memPool}; 68 | }; 69 | 70 | EXPECT_THROW(construct_mr(), rmm::logic_error); 71 | } 72 | 73 | } // namespace 74 | } // namespace rmm::test 75 | -------------------------------------------------------------------------------- /cpp/tests/mr/thrust_allocator_tests.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #include "mr_ref_test.hpp" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | 20 | // explicit instantiation for test coverage purposes 21 | template class rmm::mr::thrust_allocator; 22 | 23 | namespace rmm::test { 24 | namespace { 25 | 26 | struct allocator_test : public mr_ref_test {}; 27 | 28 | // Disable until we support resource_ref with set_current_device_resource 29 | TEST_P(allocator_test, first) 30 | { 31 | rmm::mr::set_current_device_resource_ref(this->ref); 32 | auto const num_ints{100}; 33 | rmm::device_vector ints(num_ints, 1); 34 | EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end())); 35 | } 36 | 37 | TEST_P(allocator_test, defaults) 38 | { 39 | rmm::mr::set_current_device_resource_ref(this->ref); 40 | rmm::mr::thrust_allocator allocator(rmm::cuda_stream_default); 41 | EXPECT_EQ(allocator.stream(), rmm::cuda_stream_default); 42 | EXPECT_EQ(allocator.get_upstream_resource(), 43 | rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()}); 44 | } 45 | 46 | TEST_P(allocator_test, multi_device) 47 | { 48 | if (rmm::get_num_cuda_devices() < 2) { GTEST_SKIP() << "Needs at least two devices"; } 49 | cuda_set_device_raii with_device{rmm::get_current_cuda_device()}; 50 | rmm::cuda_stream stream{}; 51 | // make allocator on device-0 52 | rmm::mr::thrust_allocator allocator(stream.view(), this->ref); 53 | auto const size{100}; 54 | EXPECT_NO_THROW([&]() { 55 | auto vec = rmm::device_vector(size, allocator); 56 | // Destruct with device-1 active 57 | RMM_CUDA_TRY(cudaSetDevice(1)); 58 | }()); 59 | } 60 | 61 | INSTANTIATE_TEST_SUITE_P( 62 | ThrustAllocatorTests, 63 | allocator_test, 64 | ::testing::Values("CUDA", "CUDA_Async", "Managed", "Pool", "Arena", "Binning"), 65 | [](auto const& info) { return info.param; }); 66 | 67 | } // namespace 68 | } // namespace rmm::test 69 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_allocations.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Tests for basic RMM allocations.""" 5 | 6 | from itertools import product 7 | 8 | import pytest 9 | from test_helpers import ( 10 | _SYSTEM_MEMORY_SUPPORTED, 11 | _allocs, 12 | _dtypes, 13 | _nelems, 14 | array_tester, 15 | ) 16 | 17 | import rmm 18 | 19 | 20 | @pytest.mark.parametrize("dtype", _dtypes) 21 | @pytest.mark.parametrize("nelem", _nelems) 22 | @pytest.mark.parametrize("alloc", _allocs) 23 | def test_rmm_alloc(dtype, nelem, alloc): 24 | array_tester(dtype, nelem, alloc) 25 | 26 | 27 | # Test all combinations of default/managed and pooled/non-pooled allocation 28 | @pytest.mark.parametrize("dtype", _dtypes) 29 | @pytest.mark.parametrize("nelem", _nelems) 30 | @pytest.mark.parametrize("alloc", _allocs) 31 | @pytest.mark.parametrize( 32 | "managed, pool", list(product([False, True], [False, True])) 33 | ) 34 | def test_rmm_modes(dtype, nelem, alloc, managed, pool): 35 | assert rmm.is_initialized() 36 | array_tester(dtype, nelem, alloc) 37 | 38 | rmm.reinitialize(pool_allocator=pool, managed_memory=managed) 39 | 40 | assert rmm.is_initialized() 41 | 42 | array_tester(dtype, nelem, alloc) 43 | 44 | 45 | @pytest.mark.skipif( 46 | not _SYSTEM_MEMORY_SUPPORTED, 47 | reason="System memory not supported", 48 | ) 49 | @pytest.mark.parametrize("dtype", _dtypes) 50 | @pytest.mark.parametrize("nelem", _nelems) 51 | @pytest.mark.parametrize("alloc", _allocs) 52 | @pytest.mark.parametrize( 53 | "system, pool, headroom", 54 | list(product([False, True], [False, True], [False, True])), 55 | ) 56 | def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom): 57 | assert rmm.is_initialized() 58 | array_tester(dtype, nelem, alloc) 59 | 60 | if system: 61 | if headroom: 62 | base_mr = rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20) 63 | else: 64 | base_mr = rmm.mr.SystemMemoryResource() 65 | else: 66 | base_mr = rmm.mr.CudaMemoryResource() 67 | if pool: 68 | mr = rmm.mr.PoolMemoryResource(base_mr) 69 | else: 70 | mr = base_mr 71 | rmm.mr.set_current_device_resource(mr) 72 | 73 | assert rmm.is_initialized() 74 | 75 | array_tester(dtype, nelem, alloc) 76 | -------------------------------------------------------------------------------- /cpp/include/rmm/mr/detail/fixed_size_free_list.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace RMM_NAMESPACE { 14 | namespace mr::detail { 15 | 16 | struct fixed_size_free_list : free_list { 17 | fixed_size_free_list() = default; 18 | ~fixed_size_free_list() override = default; 19 | 20 | fixed_size_free_list(fixed_size_free_list const&) = delete; 21 | fixed_size_free_list& operator=(fixed_size_free_list const&) = delete; 22 | fixed_size_free_list(fixed_size_free_list&&) = delete; 23 | fixed_size_free_list& operator=(fixed_size_free_list&&) = delete; 24 | 25 | /** 26 | * @brief Construct a new free_list from range defined by input iterators 27 | * 28 | * @tparam InputIt Input iterator 29 | * @param first The start of the range to insert into the free_list 30 | * @param last The end of the range to insert into the free_list 31 | */ 32 | template 33 | fixed_size_free_list(InputIt first, InputIt last) 34 | { 35 | std::for_each(first, last, [this](block_type const& block) { insert(block); }); 36 | } 37 | 38 | /** 39 | * @brief Inserts a block into the `free_list` in the correct order, coalescing it with the 40 | * preceding and following blocks if either is contiguous. 41 | * 42 | * @param block The block to insert. 43 | */ 44 | void insert(block_type const& block) { push_back(block); } 45 | 46 | /** 47 | * @brief Inserts blocks from another free list into this free_list. 48 | * 49 | * @param other The free_list to insert into this free_list. 50 | */ 51 | void insert(free_list&& other) { splice(cend(), std::move(other)); } 52 | 53 | /** 54 | * @brief Returns the first block in the free list. 55 | * 56 | * @param size The size in bytes of the desired block (unused). 57 | * @return A block large enough to store `size` bytes. 58 | */ 59 | block_type get_block(std::size_t size) 60 | { 61 | if (is_empty()) { return block_type{}; } 62 | block_type block = *begin(); 63 | pop_front(); 64 | return block; 65 | } 66 | }; 67 | 68 | } // namespace mr::detail 69 | } // namespace RMM_NAMESPACE 70 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from rmm.pylibrmm.memory_resource._memory_resource import ( 5 | ArenaMemoryResource, 6 | BinningMemoryResource, 7 | CallbackMemoryResource, 8 | CudaAsyncMemoryResource, 9 | CudaAsyncViewMemoryResource, 10 | CudaMemoryResource, 11 | DeviceMemoryResource, 12 | FailureCallbackResourceAdaptor, 13 | FixedSizeMemoryResource, 14 | LimitingResourceAdaptor, 15 | LoggingResourceAdaptor, 16 | ManagedMemoryResource, 17 | PinnedHostMemoryResource, 18 | PoolMemoryResource, 19 | PrefetchResourceAdaptor, 20 | SamHeadroomMemoryResource, 21 | StatisticsResourceAdaptor, 22 | SystemMemoryResource, 23 | TrackingResourceAdaptor, 24 | UpstreamResourceAdaptor, 25 | available_device_memory, 26 | disable_logging, 27 | enable_logging, 28 | get_current_device_resource, 29 | get_current_device_resource_type, 30 | get_log_filenames, 31 | get_per_device_resource, 32 | get_per_device_resource_type, 33 | is_initialized, 34 | set_current_device_resource, 35 | set_per_device_resource, 36 | ) 37 | from rmm.pylibrmm.memory_resource import experimental 38 | 39 | __all__ = [ 40 | "ArenaMemoryResource", 41 | "BinningMemoryResource", 42 | "CallbackMemoryResource", 43 | "CudaAsyncMemoryResource", 44 | "CudaAsyncViewMemoryResource", 45 | "CudaMemoryResource", 46 | "DeviceMemoryResource", 47 | "FailureCallbackResourceAdaptor", 48 | "FixedSizeMemoryResource", 49 | "LimitingResourceAdaptor", 50 | "LoggingResourceAdaptor", 51 | "ManagedMemoryResource", 52 | "PinnedHostMemoryResource", 53 | "PoolMemoryResource", 54 | "PrefetchResourceAdaptor", 55 | "SamHeadroomMemoryResource", 56 | "StatisticsResourceAdaptor", 57 | "SystemMemoryResource", 58 | "TrackingResourceAdaptor", 59 | "UpstreamResourceAdaptor", 60 | "available_device_memory", 61 | "disable_logging", 62 | "enable_logging", 63 | "experimental", 64 | "get_current_device_resource", 65 | "get_current_device_resource_type", 66 | "get_log_filenames", 67 | "get_per_device_resource", 68 | "get_per_device_resource_type", 69 | "is_initialized", 70 | "set_current_device_resource", 71 | "set_per_device_resource", 72 | ] 73 | -------------------------------------------------------------------------------- /python/rmm/rmm/tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """Shared test utilities and constants for RMM tests.""" 5 | 6 | import numpy as np 7 | from cuda.bindings import runtime 8 | from numba import cuda 9 | 10 | import rmm 11 | from rmm.allocators.numba import RMMNumbaManager 12 | 13 | cuda.set_memory_manager(RMMNumbaManager) 14 | 15 | # Device capability checks 16 | _SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( 17 | runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess, 18 | rmm._cuda.gpu.getDevice(), 19 | ) 20 | 21 | _IS_INTEGRATED_MEMORY_SYSTEM = rmm._cuda.gpu.getDeviceAttribute( 22 | runtime.cudaDeviceAttr.cudaDevAttrIntegrated, rmm._cuda.gpu.getDevice() 23 | ) 24 | 25 | _CONCURRENT_MANAGED_ACCESS_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( 26 | runtime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, 27 | rmm._cuda.gpu.getDevice(), 28 | ) 29 | 30 | _ASYNC_MANAGED_MEMORY_SUPPORTED = ( 31 | _CONCURRENT_MANAGED_ACCESS_SUPPORTED 32 | and rmm._cuda.gpu.runtimeGetVersion() >= 13000 33 | ) 34 | 35 | _MEMORY_POOL_HANDLE_TYPES_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( 36 | runtime.cudaDeviceAttr.cudaDevAttrMemoryPoolSupportedHandleTypes, 37 | rmm._cuda.gpu.getDevice(), 38 | ) 39 | 40 | 41 | def array_tester(dtype, nelem, alloc): 42 | """Test helper for array allocation and copy operations.""" 43 | # data 44 | h_in = np.full(nelem, 3.2, dtype) 45 | h_result = np.empty(nelem, dtype) 46 | 47 | d_in = alloc.to_device(h_in) 48 | d_result = alloc.device_array_like(d_in) 49 | 50 | d_result.copy_to_device(d_in) 51 | h_result = d_result.copy_to_host() 52 | 53 | np.testing.assert_array_equal(h_result, h_in) 54 | 55 | 56 | def assert_prefetched(buffer, device_id): 57 | """Check if a buffer has been prefetched to a specific device.""" 58 | err, dev = runtime.cudaMemRangeGetAttribute( 59 | 4, 60 | runtime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocation, 61 | buffer.ptr, 62 | buffer.size, 63 | ) 64 | assert err == runtime.cudaError_t.cudaSuccess 65 | assert dev == device_id 66 | 67 | 68 | # Test parameter sets 69 | _dtypes = [ 70 | np.int8, 71 | np.int16, 72 | np.int32, 73 | np.int64, 74 | np.float32, 75 | np.float64, 76 | np.bool_, 77 | ] 78 | _nelems = [1, 2, 7, 8, 9, 32, 128] 79 | _allocs = [cuda] 80 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/device_buffer.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from libc.stdint cimport uintptr_t 5 | from libcpp.memory cimport unique_ptr 6 | 7 | from rmm.librmm.device_buffer cimport device_buffer 8 | from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource 9 | from rmm.pylibrmm.stream cimport Stream 10 | 11 | 12 | cdef class DeviceBuffer: 13 | cdef unique_ptr[device_buffer] c_obj 14 | 15 | # Holds a reference to the DeviceMemoryResource used for allocation. 16 | # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is 17 | # needed for deallocation 18 | cdef DeviceMemoryResource mr 19 | 20 | # Holds a reference to the stream used by the underlying `device_buffer`. 21 | # Ensures the stream does not get destroyed before this DeviceBuffer 22 | cdef Stream stream 23 | 24 | @staticmethod 25 | cdef DeviceBuffer c_from_unique_ptr( 26 | unique_ptr[device_buffer] ptr, 27 | Stream stream=*, 28 | DeviceMemoryResource mr=*, 29 | ) 30 | 31 | @staticmethod 32 | cdef DeviceBuffer c_to_device(const unsigned char[::1] b, 33 | Stream stream=*) except * 34 | cpdef copy_to_host(self, ary=*, Stream stream=*) 35 | cpdef copy_from_host(self, ary, Stream stream=*) 36 | cpdef copy_from_device(self, cuda_ary, Stream stream=*) 37 | cpdef bytes tobytes(self, Stream stream=*) 38 | 39 | cdef size_t c_size(self) except * 40 | cpdef void reserve(self, size_t new_capacity, Stream stream=*) except * 41 | cpdef void resize(self, size_t new_size, Stream stream=*) except * 42 | cpdef size_t capacity(self) except * 43 | cdef void* c_data(self) except * 44 | 45 | cdef device_buffer c_release(self) except * 46 | 47 | cpdef DeviceBuffer to_device(const unsigned char[::1] b, 48 | Stream stream=*) 49 | cpdef void copy_ptr_to_host(uintptr_t db, 50 | unsigned char[::1] hb, 51 | Stream stream=*) except * 52 | 53 | cpdef void copy_host_to_ptr(const unsigned char[::1] hb, 54 | uintptr_t db, 55 | Stream stream=*) except * 56 | 57 | cpdef void copy_device_to_ptr(uintptr_t d_src, 58 | uintptr_t d_dst, 59 | size_t count, 60 | Stream stream=*) except * 61 | -------------------------------------------------------------------------------- /python/rmm/rmm/pylibrmm/memory_resource/_memory_resource.pxd: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from libcpp.memory cimport shared_ptr 5 | 6 | from rmm.librmm.memory_resource cimport device_memory_resource 7 | 8 | 9 | cdef class DeviceMemoryResource: 10 | cdef shared_ptr[device_memory_resource] c_obj 11 | cdef device_memory_resource* get_mr(self) noexcept nogil 12 | 13 | cdef class UpstreamResourceAdaptor(DeviceMemoryResource): 14 | cdef readonly DeviceMemoryResource upstream_mr 15 | 16 | cpdef DeviceMemoryResource get_upstream(self) 17 | 18 | cdef class ArenaMemoryResource(UpstreamResourceAdaptor): 19 | pass 20 | 21 | cdef class CudaMemoryResource(DeviceMemoryResource): 22 | pass 23 | 24 | cdef class ManagedMemoryResource(DeviceMemoryResource): 25 | pass 26 | 27 | cdef class SystemMemoryResource(DeviceMemoryResource): 28 | pass 29 | 30 | cdef class PinnedHostMemoryResource(DeviceMemoryResource): 31 | pass 32 | 33 | cdef class SamHeadroomMemoryResource(DeviceMemoryResource): 34 | pass 35 | 36 | cdef class CudaAsyncMemoryResource(DeviceMemoryResource): 37 | pass 38 | 39 | cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource): 40 | pass 41 | 42 | cdef class PoolMemoryResource(UpstreamResourceAdaptor): 43 | pass 44 | 45 | cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): 46 | pass 47 | 48 | cdef class BinningMemoryResource(UpstreamResourceAdaptor): 49 | 50 | cdef readonly list _bin_mrs 51 | 52 | cpdef add_bin( 53 | self, 54 | size_t allocation_size, 55 | DeviceMemoryResource bin_resource=*) 56 | 57 | cdef class CallbackMemoryResource(DeviceMemoryResource): 58 | cdef object _allocate_func 59 | cdef object _deallocate_func 60 | 61 | cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): 62 | pass 63 | 64 | cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): 65 | cdef object _log_file_name 66 | cpdef get_file_name(self) 67 | cpdef flush(self) 68 | 69 | cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): 70 | pass 71 | 72 | cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): 73 | pass 74 | 75 | cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): 76 | cdef object _callback 77 | 78 | cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): 79 | pass 80 | 81 | cpdef DeviceMemoryResource get_current_device_resource() 82 | --------------------------------------------------------------------------------