├── .flake8
├── .flake8.cython
├── .github
    ├── CODEOWNERS
    ├── copy-pr-bot.yaml
    ├── ops-bot.yaml
    └── workflows
    │   ├── build.yaml
    │   ├── pr.yaml
    │   ├── test.yaml
    │   └── trigger-breaking-change-alert.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── VERSION
├── ci
    ├── build_python.sh
    ├── build_wheel.sh
    ├── check_style.sh
    ├── release
    │   └── update-version.sh
    ├── run_benchmark_pytests.sh
    ├── run_pytests.sh
    ├── test_python.sh
    ├── test_wheel.sh
    └── validate_wheel.sh
├── conda
    ├── environments
    │   └── builddocs.yml
    └── recipes
    │   └── ucx-py
    │       ├── conda_build_config.yaml
    │       └── recipe.yaml
├── debug-tests
    ├── README.md
    ├── client.py
    ├── debug_utils.py
    ├── multi-node-workers.sh
    ├── scheduler.sh
    ├── server.py
    ├── test_endpoint_error_callback.py
    ├── test_send_recv_many_workers.py
    └── utils.py
├── dependencies.yaml
├── docker
    ├── Dockerfile
    ├── README.md
    ├── UCXPy-MOFED.dockerfile
    ├── UCXPy-rdma-core.dockerfile
    ├── bench-all.sh
    ├── build-ucx-py.sh
    ├── build-ucx.sh
    ├── run.sh
    └── ucx-py-cuda11.5.yml
├── docs
    ├── Makefile
    └── source
    │   ├── _static
    │       └── Architecture.png
    │   ├── api.rst
    │   ├── conf.py
    │   ├── configuration.rst
    │   ├── deployment.rst
    │   ├── glossary.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── os-limits.rst
    │   ├── quickstart.rst
    │   ├── send-recv.rst
    │   ├── transport-monitoring.rst
    │   └── ucx-debug.rst
├── examples
    ├── cudf-example.py
    └── cupy-example.py
├── pyproject.toml
├── setup.py
├── tests
    ├── conftest.py
    ├── test_benchmark_cluster.py
    ├── test_config.py
    ├── test_custom_send_recv.py
    ├── test_disconnect.py
    ├── test_endpoint.py
    ├── test_from_worker_address.py
    ├── test_from_worker_address_error.py
    ├── test_info.py
    ├── test_multiple_nodes.py
    ├── test_probe.py
    ├── test_reset.py
    ├── test_rma.py
    ├── test_send_recv.py
    ├── test_send_recv_am.py
    ├── test_send_recv_two_workers.py
    ├── test_shutdown.py
    ├── test_tags.py
    ├── test_ucx_getters.py
    ├── test_version.py
    └── utils.py
└── ucp
    ├── VERSION
    ├── __init__.py
    ├── _libs
        ├── __init__.pxd
        ├── __init__.py
        ├── arr.pxd
        ├── arr.pyi
        ├── arr.pyx
        ├── exceptions.py
        ├── packed_remote_key.pyx
        ├── src
        │   ├── c_util.c
        │   └── c_util.h
        ├── tests
        │   ├── test_address_object.py
        │   ├── test_arr.py
        │   ├── test_cancel.py
        │   ├── test_config.py
        │   ├── test_endpoint.py
        │   ├── test_listener.py
        │   ├── test_mem.py
        │   ├── test_peer_send_recv.py
        │   ├── test_probe.py
        │   ├── test_rma.py
        │   ├── test_server_client.py
        │   └── test_server_client_am.py
        ├── transfer_am.pyx
        ├── transfer_common.pyx
        ├── transfer_stream.pyx
        ├── transfer_tag.pyx
        ├── typedefs.pyx
        ├── ucx_address.pyx
        ├── ucx_api.pyi
        ├── ucx_api.pyx
        ├── ucx_api_dep.pxd
        ├── ucx_context.pyx
        ├── ucx_endpoint.pyx
        ├── ucx_listener.pyx
        ├── ucx_memory_handle.pyx
        ├── ucx_object.pyx
        ├── ucx_request.pyx
        ├── ucx_rkey.pyx
        ├── ucx_rma.pyx
        ├── ucx_worker.pyx
        ├── ucx_worker_cb.pyx
        ├── ucxio.pyx
        ├── utils.py
        ├── utils.pyx
        └── utils_test.py
    ├── _version.py
    ├── benchmarks
        ├── README.md
        ├── __init__.py
        ├── asyncssh.py
        ├── backends
        │   ├── __init__.py
        │   ├── base.py
        │   ├── tornado.py
        │   ├── ucp_async.py
        │   └── ucp_core.py
        ├── cudf_merge.py
        ├── send_recv.py
        └── utils.py
    ├── comm.py
    ├── continuous_ucx_progress.py
    ├── core.py
    ├── exceptions.py
    └── utils.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore = E203,E211,E225,E226,E227,E901,E999,W503,W504
 3 | # E203: whitespace before ':' (black format differs for slices)
 4 | # E211: whitespace before '(' (used in multi-line imports)
 5 | # E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
 6 | # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
 7 | # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
 8 | # E999: invalid syntax (works for Python, not Cython)
 9 | # W503: line break before binary operator (breaks lines that start with a pointer)
10 | # W504: line break after binary operator (breaks lines that end with a pointer)
11 | 
12 | exclude =
13 |     .eggs,
14 |     *.egg,
15 |     build,
16 |     __init__.py,
17 | 
18 | max-line-length = 88
19 | 
20 | # Ignore black/flake8-pyi conflicts
21 | per-file-ignores =
22 |     *.pyi:E301 E302 E704
23 | 


--------------------------------------------------------------------------------
/.flake8.cython:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | [flake8]
18 | filename = *.pyx, *.pxd
19 | exclude = *.egg, build, docs, .git
20 | ignore = E999, E225, E226, E227, W503, W504, E211
21 | 
22 | max-line-length = 88
23 | 
24 | # Rules ignored:
25 | # E999: invalid syntax (works for Python, not Cython)
26 | # E211: whitespace before '(' (used in multi-line imports)
27 | # E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
28 | # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
29 | # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
30 | # W503: line break before binary operator (breaks lines that start with a pointer)
31 | # W504: line break after binary operator (breaks lines that end with a pointer)
32 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | #python code owners
 2 | ucp/            @rapidsai/ucxpy-python-codeowners
 3 | tests/          @rapidsai/ucxpy-python-codeowners
 4 | examples/       @rapidsai/ucxpy-python-codeowners
 5 | benchmarks/     @rapidsai/ucxpy-python-codeowners
 6 | 
 7 | #CI code owners
 8 | /.github/                @rapidsai/ci-codeowners
 9 | /ci/                     @rapidsai/ci-codeowners
10 | /.pre-commit-config.yaml @rapidsai/ci-codeowners
11 | 
12 | #packaging code owners
13 | /.devcontainer/    @rapidsai/packaging-codeowners
14 | /conda/            @rapidsai/packaging-codeowners
15 | /dependencies.yaml @rapidsai/packaging-codeowners
16 | /build.sh          @rapidsai/packaging-codeowners
17 | pyproject.toml     @rapidsai/packaging-codeowners
18 | 


--------------------------------------------------------------------------------
/.github/copy-pr-bot.yaml:
--------------------------------------------------------------------------------
1 | # Configuration file for `copy-pr-bot` GitHub App
2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
3 | 
4 | enabled: true
5 | auto_sync_draft: false
6 | 


--------------------------------------------------------------------------------
/.github/ops-bot.yaml:
--------------------------------------------------------------------------------
 1 | # This file controls which features from the `ops-bot` repository below are enabled.
 2 | # - https://github.com/rapidsai/ops-bot
 3 | 
 4 | auto_merger: true
 5 | branch_checker: false
 6 | label_checker: true
 7 | release_drafter: false
 8 | recently_updated: true
 9 | forward_merger: true
10 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "branch-*"
 7 |     tags:
 8 |       - v[0-9].[0-9][0-9].[0-9][0-9]
 9 |   workflow_dispatch:
10 |     inputs:
11 |       branch:
12 |         required: true
13 |         type: string
14 |       date:
15 |         required: true
16 |         type: string
17 |       sha:
18 |         required: true
19 |         type: string
20 |       build_type:
21 |         type: string
22 |         default: nightly
23 | 
24 | concurrency:
25 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
26 |   cancel-in-progress: true
27 | 
28 | jobs:
29 |   conda-python-build:
30 |     secrets: inherit
31 |     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.08
32 |     with:
33 |       build_type: ${{ inputs.build_type || 'branch' }}
34 |       branch: ${{ inputs.branch }}
35 |       date: ${{ inputs.date }}
36 |       script: ci/build_python.sh
37 |       sha: ${{ inputs.sha }}
38 |   upload-conda:
39 |     needs: [conda-python-build]
40 |     secrets: inherit
41 |     uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08
42 |     with:
43 |       build_type: ${{ inputs.build_type || 'branch' }}
44 |       branch: ${{ inputs.branch }}
45 |       date: ${{ inputs.date }}
46 |       sha: ${{ inputs.sha }}
47 |   wheel-build:
48 |     secrets: inherit
49 |     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
50 |     with:
51 |       build_type: ${{ inputs.build_type || 'branch' }}
52 |       branch: ${{ inputs.branch }}
53 |       sha: ${{ inputs.sha }}
54 |       date: ${{ inputs.date }}
55 |       script: ci/build_wheel.sh
56 |       package-name: ucx_py
57 |       package-type: python
58 |   wheel-publish:
59 |     needs: wheel-build
60 |     secrets: inherit
61 |     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
62 |     with:
63 |       build_type: ${{ inputs.build_type || 'branch' }}
64 |       branch: ${{ inputs.branch }}
65 |       sha: ${{ inputs.sha }}
66 |       date: ${{ inputs.date }}
67 |       package-name: ucx_py
68 |       package-type: python
69 | 


--------------------------------------------------------------------------------
/.github/workflows/pr.yaml:
--------------------------------------------------------------------------------
 1 | name: pr
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "pull-request/[0-9]+"
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.ref }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   pr-builder:
14 |     needs:
15 |       - checks
16 |       - conda-python-build
17 |       - conda-python-tests
18 |       - wheel-build
19 |       - wheel-tests
20 |       - telemetry-setup
21 |     secrets: inherit
22 |     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08
23 |     with:
24 |       needs: ${{ toJSON(needs) }}
25 |   telemetry-setup:
26 |     runs-on: ubuntu-latest
27 |     continue-on-error: true
28 |     env:
29 |       OTEL_SERVICE_NAME: "pr-ucx-py"
30 |     steps:
31 |       - name: Telemetry setup
32 |         # This gate is here and not at the job level because we need the job to not be skipped,
33 |         # since other jobs depend on it.
34 |         if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
35 |         uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
36 |   checks:
37 |     secrets: inherit
38 |     needs: telemetry-setup
39 |     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.08
40 |     with:
41 |       ignored_pr_jobs: telemetry-summarize
42 |   conda-python-build:
43 |     needs: checks
44 |     secrets: inherit
45 |     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.08
46 |     with:
47 |       build_type: pull-request
48 |       script: ci/build_python.sh
49 |   conda-python-tests:
50 |     needs: conda-python-build
51 |     secrets: inherit
52 |     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
53 |     with:
54 |       build_type: pull-request
55 |       container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
56 |       script: ci/test_python.sh
57 |   wheel-build:
58 |     needs: checks
59 |     secrets: inherit
60 |     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
61 |     with:
62 |       build_type: pull-request
63 |       script: ci/build_wheel.sh
64 |       package-name: ucx_py
65 |       package-type: python
66 |   wheel-tests:
67 |     needs: wheel-build
68 |     secrets: inherit
69 |     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
70 |     with:
71 |       build_type: pull-request
72 |       script: ci/test_wheel.sh
73 |       container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
74 |   telemetry-summarize:
75 |     # This job must use a self-hosted runner to record telemetry traces.
76 |     runs-on: linux-amd64-cpu4
77 |     needs: pr-builder
78 |     if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }}
79 |     continue-on-error: true
80 |     steps:
81 |       - name: Telemetry summarize
82 |         uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main
83 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       branch:
 7 |         required: true
 8 |         type: string
 9 |       date:
10 |         required: true
11 |         type: string
12 |       sha:
13 |         required: true
14 |         type: string
15 |       build_type:
16 |         type: string
17 |         default: nightly
18 | 
19 | jobs:
20 |   conda-python-tests:
21 |     secrets: inherit
22 |     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
23 |     with:
24 |       build_type: ${{ inputs.build_type }}
25 |       branch: ${{ inputs.branch }}
26 |       date: ${{ inputs.date }}
27 |       script: ci/test_python.sh
28 |       sha: ${{ inputs.sha }}
29 |       container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
30 |   wheel-tests:
31 |     secrets: inherit
32 |     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
33 |     with:
34 |       build_type: ${{ inputs.build_type }}
35 |       branch: ${{ inputs.branch }}
36 |       date: ${{ inputs.date }}
37 |       sha: ${{ inputs.sha }}
38 |       script: ci/test_wheel.sh
39 |       container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
40 | 


--------------------------------------------------------------------------------
/.github/workflows/trigger-breaking-change-alert.yaml:
--------------------------------------------------------------------------------
 1 | name: Trigger Breaking Change Notifications
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types:
 6 |       - closed
 7 |       - reopened
 8 |       - labeled
 9 |       - unlabeled
10 | 
11 | jobs:
12 |   trigger-notifier:
13 |     if: contains(github.event.pull_request.labels.*.name, 'breaking')
14 |     secrets: inherit
15 |     uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.08
16 |     with:
17 |       sender_login: ${{ github.event.sender.login }}
18 |       sender_avatar: ${{ github.event.sender.avatar_url }}
19 |       repo: ${{ github.repository }}
20 |       pr_number: ${{ github.event.pull_request.number }}
21 |       pr_title: "${{ github.event.pull_request.title }}"
22 |       pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}"
23 |       pr_base_ref: ${{ github.event.pull_request.base.ref }}
24 |       pr_author: ${{ github.event.pull_request.user.login }}
25 |       event_action: ${{ github.event.action }}
26 |       pr_merged: ${{ github.event.pull_request.merged }}
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.*~
 2 | build
 3 | *.so
 4 | ucp/_libs/*.a
 5 | ucp/_libs/*.o
 6 | ucp/_libs/*.c
 7 | _build
 8 | 
 9 | dask-worker-space
10 | __pytestcache__
11 | __pycache__
12 | *.egg-info/
13 | final_dist/
14 | dist/
15 | .vscode
16 | 
17 | *.sw[po]
18 | *.whl
19 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |       - repo: https://github.com/pycqa/isort
 3 |         rev: 5.12.0
 4 |         hooks:
 5 |               - id: isort
 6 |                 args: ["--settings-path=pyproject.toml"]
 7 |                 exclude: __init__.py$
 8 |                 types: [text]
 9 |                 types_or: [python, cython, pyi]
10 |       - repo: https://github.com/ambv/black
11 |         rev: 22.3.0
12 |         hooks:
13 |               - id: black
14 |       - repo: https://github.com/PyCQA/flake8
15 |         rev: 7.1.1
16 |         hooks:
17 |               - id: flake8
18 |                 args: ["--config=.flake8"]
19 |                 types: [file]
20 |                 types_or: [python, cython]
21 |                 additional_dependencies: ["flake8-force"]
22 |       - repo: https://github.com/rapidsai/pre-commit-hooks
23 |         rev: v0.4.0
24 |         hooks:
25 |               - id: verify-copyright
26 |               - id: verify-alpha-spec
27 |                 args:
28 |                   - --fix
29 |                   - --rapids-version=25.08
30 |       - repo: https://github.com/rapidsai/dependency-file-generator
31 |         rev: v1.17.0
32 |         hooks:
33 |               - id: rapids-dependency-file-generator
34 |                 args: ["--clean"]
35 | default_language_version:
36 |       python: python3
37 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: "ubuntu-22.04"
 5 |   tools:
 6 |     python: "mambaforge-22.9"
 7 |   jobs:
 8 |     post_create_environment:
 9 |       # explicitly passing matrix-entry so that 'libucx' (with appropriate CUDA suffix)
10 |       # is pulled in, and therefore tested in this no-CUDA environment
11 |       - |
12 |         pip install \
13 |           -C rapidsai.matrix-entry="cuda=12.x;cuda_suffixed=true" \
14 |           .
15 | 
16 | conda:
17 |   environment: conda/environments/builddocs.yml
18 | 
19 | sphinx:
20 |   configuration: docs/source/conf.py
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019-2021      NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without 
 4 | modification, are permitted provided that the following conditions 
 5 | are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright 
 8 | notice, this list of conditions and the following disclaimer.
 9 | 2. Redistributions in binary form must reproduce the above copyright
10 | notice, this list of conditions and the following disclaimer in the 
11 | documentation and/or other materials provided with the distribution.
12 | 3. Neither the name of the copyright holder nor the names of its 
13 | contributors may be used to endorse or promote products derived from 
14 | this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
20 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
22 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # Python type stubs
2 | recursive-include ucp *.pyi
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![https://ucx-py.readthedocs.io/en/latest/](https://readthedocs.org/projects/ucx-py/badge/ "ReadTheDocs")]( https://ucx-py.readthedocs.io/en/latest/ )
 2 | 
 3 | # Python Bindings for UCX
 4 | 
 5 | ## Installing
 6 | 
 7 | Users can either [install with Conda]( https://ucx-py.readthedocs.io/en/latest/install.html#conda ) or [build from source]( https://ucx-py.readthedocs.io/en/latest/install.html#source ).
 8 | 
 9 | ## Testing
10 | 
11 | To run ucx-py's tests, just use ``pytest``:
12 | 
13 | ```bash
14 | pytest -v
15 | ```
16 | 
17 | ### TCP Support
18 | 
19 | In order to use TCP add `tcp` to `UCX_TLS` and set `UCXPY_IFNAME` to the network interface you want to use. Some setup examples:
20 | 
21 | ```bash
22 | # TCP using "eth0" and CUDA support
23 | export UCX_TLS=tcp,cuda_copy,cuda_ipc
24 | export UCXPY_IFNAME="eth0"
25 | 
26 | # InfiniBand using "ib0" and CUDA support
27 | export UCX_TLS=rc,cuda_copy,cuda_ipc
28 | export UCXPY_IFNAME="ib0"
29 | 
30 | # TCP using "eno0" and no CUDA support
31 | export UCX_TLS=tcp
32 | export UCXPY_IFNAME="eno0"
33 | ```
34 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.45.0
2 | 


--------------------------------------------------------------------------------
/ci/build_python.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | source rapids-date-string
 7 | 
 8 | rapids-print-env
 9 | 
10 | rapids-generate-version > ./VERSION
11 | RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION)
12 | export RAPIDS_PACKAGE_VERSION
13 | 
14 | # populates `RATTLER_CHANNELS` array and `RATTLER_ARGS` array
15 | source rapids-rattler-channel-string
16 | 
17 | rapids-logger "Building ucx-py"
18 | 
19 | # Need `--experimental` flag to use `load_from_file` and `git.head_rev`
20 | rattler-build build --recipe conda/recipes/ucx-py \
21 |                     "${RATTLER_ARGS[@]}" \
22 |                     "${RATTLER_CHANNELS[@]}"
23 | 
24 | # remove build_cache directory to avoid uploading the entire source tree
25 | # tracked in https://github.com/prefix-dev/rattler-build/issues/1424
26 | rm -rf "$RAPIDS_CONDA_BLD_OUTPUT_DIR"/build_cache
27 | 


--------------------------------------------------------------------------------
/ci/build_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | source rapids-date-string
 7 | source rapids-init-pip
 8 | 
 9 | rapids-generate-version > ./VERSION
10 | 
11 | rapids-pip-retry wheel \
12 |     -v \
13 |     -w dist \
14 |     --no-deps \
15 |     --disable-pip-version-check \
16 |     --config-settings rapidsai.disable-cuda=false \
17 |     .
18 | 
19 | python -m auditwheel repair \
20 |     -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
21 |     --exclude "libucm.so.0" \
22 |     --exclude "libucp.so.0" \
23 |     --exclude "libucs.so.0" \
24 |     --exclude "libucs_signal.so.0" \
25 |     --exclude "libuct.so.0" \
26 |     dist/*
27 | 
28 | ./ci/validate_wheel.sh "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}"
29 | 


--------------------------------------------------------------------------------
/ci/check_style.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2023, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | rapids-logger "Create checks conda environment"
 7 | . /opt/conda/etc/profile.d/conda.sh
 8 | 
 9 | rapids-dependency-file-generator \
10 |   --output conda \
11 |   --file-key checks \
12 |   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
13 | 
14 | rapids-mamba-retry env create --yes -f env.yaml -n checks
15 | conda activate checks
16 | 
17 | # Run pre-commit checks
18 | pre-commit run --hook-stage manual --all-files --show-diff-on-failure
19 | 


--------------------------------------------------------------------------------
/ci/release/update-version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ########################
 3 | # ucx-py Version Updater #
 4 | ########################
 5 | 
 6 | ## Usage
 7 | # bash update-version.sh <new_version>
 8 | 
 9 | 
10 | # Format is Major.Minor.Patch - no leading 'v' or trailing 'a'
11 | # Example: 0.30.00
12 | NEXT_FULL_TAG=$1
13 | 
14 | # Get current version
15 | CURRENT_TAG=$(git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v')
16 | CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}')
17 | CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}')
18 | CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}')
19 | CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
20 | 
21 | #Get <major>.<minor> for next version
22 | NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
23 | NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
24 | NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
25 | 
26 | # Get RAPIDS version associated w/ ucx-py version
27 | NEXT_RAPIDS_SHORT_TAG="$(curl -sL https://version.gpuci.io/ucx-py/${NEXT_SHORT_TAG})"
28 | 
29 | # Need to distutils-normalize the versions for some use cases
30 | NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
31 | NEXT_FULL_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_FULL_TAG}'))")
32 | NEXT_RAPIDS_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_RAPIDS_SHORT_TAG}'))")
33 | 
34 | echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
35 | 
36 | # Inplace sed replace; workaround for Linux and Mac
37 | function sed_runner() {
38 |     sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
39 | }
40 | 
41 | DEPENDENCIES=(
42 |   cudf
43 |   rapids-dask-dependency
44 | )
45 | UCX_PY_DEPENDENCIES=(
46 |   ucx-py
47 | )
48 | for FILE in dependencies.yaml conda/environments/*.yml; do
49 |   for DEP in "${DEPENDENCIES[@]}"; do
50 |     sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_RAPIDS_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
51 |   done
52 |   for DEP in "${UCX_PY_DEPENDENCIES[@]}"; do
53 |     sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
54 |   done
55 | done
56 | 
57 | for DEP in "${DEPENDENCIES[@]}"; do
58 |   sed_runner "/\"${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_RAPIDS_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" pyproject.toml
59 | done
60 | 
61 | for FILE in .github/workflows/*.yaml; do
62 |   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_RAPIDS_SHORT_TAG}/g" "${FILE}"
63 | done
64 | 
65 | echo "${NEXT_FULL_TAG_PEP440}" > VERSION
66 | 
67 | sed_runner "s/--rapids-version=[[:digit:]]\{2\}.[[:digit:]]\{2\}/--rapids-version=${NEXT_RAPIDS_SHORT_TAG}/g" .pre-commit-config.yaml
68 | 


--------------------------------------------------------------------------------
/ci/run_benchmark_pytests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2024, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | # cd to root directory to prevent repo's `ucp` directory from being used
 7 | # in subsequent commands
 8 | pushd /
 9 | timeout 1m python -m ucp.benchmarks.send_recv -o cupy --server-dev 0 --client-dev 0 --reuse-alloc --backend ucp-async
10 | timeout 1m python -m ucp.benchmarks.send_recv -o cupy --server-dev 0 --client-dev 0 --reuse-alloc --backend ucp-core
11 | timeout 1m python -m ucp.benchmarks.cudf_merge --chunks-per-dev 4 --chunk-size 10000 --rmm-init-pool-size 2097152
12 | popd
13 | 


--------------------------------------------------------------------------------
/ci/run_pytests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2024, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | # Support invoking run_pytests.sh outside the script directory
 7 | cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 8 | 
 9 | timeout 10m pytest --cache-clear -vs "$@" tests
10 | timeout 2m pytest --cache-clear -vs "$@" ucp/_libs/tests
11 | 


--------------------------------------------------------------------------------
/ci/test_python.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | # Support invoking test_python.sh outside the script directory
 7 | cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 8 | 
 9 | rapids-logger "Create test conda environment using artifacts from previous job"
10 | . /opt/conda/etc/profile.d/conda.sh
11 | 
12 | UCX_PY_VERSION="$(head -1 ./VERSION)"
13 | PYTHON_CHANNEL=$(rapids-download-conda-from-github python)
14 | 
15 | rapids-dependency-file-generator \
16 |   --output conda \
17 |   --file-key test_python \
18 |   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
19 |   --prepend-channel "${PYTHON_CHANNEL}" \
20 |   | tee env.yaml
21 | 
22 | rapids-mamba-retry env create -yq -f env.yaml -n test
23 | conda activate test
24 | 
25 | rapids-print-env
26 | 
27 | rapids-logger "Check GPU usage"
28 | nvidia-smi
29 | 
30 | rapids-logger "Check NICs"
31 | awk 'END{print $1}' /etc/hosts
32 | cat /etc/hosts
33 | 
34 | run_tests() {
35 |   rapids-logger "UCX Version and Build Configuration"
36 |   ucx_info -v
37 | 
38 |   rapids-logger "Python pytest for ucx-py"
39 | 
40 |   # list test directory
41 |   ls tests/
42 | 
43 |   # Test with TCP/Sockets
44 |   rapids-logger "TEST WITH TCP ONLY"
45 |   ./ci/run_pytests.sh
46 | 
47 |   rapids-logger "Run local benchmark"
48 |   # cd to root directory to prevent repo's `ucp` directory from being used
49 |   # in subsequent commands
50 |   ./ci/run_benchmark_pytests.sh
51 | }
52 | 
53 | rapids-logger "Run tests with conda package"
54 | run_tests
55 | 
56 | 
57 | # The following block is untested in GH Actions
58 | TEST_UCX_MASTER=0
59 | if [[ "${TEST_UCX_MASTER}" == 1 ]]; then
60 |     rapids-logger "Build UCX master"
61 |     git clone https://github.com/openucx/ucx ucx-master
62 |     pushd ucx-master
63 |     ./autogen.sh
64 |     mkdir build
65 |     pushd build
66 |     ../contrib/configure-release --prefix="${CONDA_PREFIX}" --with-cuda="${CUDA_HOME}" --enable-mt
67 |     make -j install
68 | 
69 |     rapids-logger "Build UCX-Py"
70 |     popd; popd
71 |     git clean -ffdx
72 |     python setup.py build_ext --inplace
73 |     rapids-pip-retry install -e .
74 | 
75 |     rapids-logger "Run tests with pip package against ucx master"
76 |     run_tests
77 | fi
78 | 


--------------------------------------------------------------------------------
/ci/test_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 | 
 4 | set -eoxu pipefail
 5 | 
 6 | source rapids-init-pip
 7 | 
 8 | RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 9 | PYTHON_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="ucx_py_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github python)
10 | 
11 | # echo to expand wildcard before adding `[extra]` requires for pip
12 | rapids-pip-retry install $(echo "${PYTHON_WHEELHOUSE}"/ucx_py*.whl)[test]
13 | 
14 | cd tests
15 | timeout 10m python -m pytest --cache-clear -vs .
16 | cd ../ucp
17 | timeout 2m python -m pytest --cache-clear -vs ./_libs/tests/
18 | 


--------------------------------------------------------------------------------
/ci/validate_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2024, NVIDIA CORPORATION.
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | wheel_dir_relative_path=$1
 7 | 
 8 | rapids-logger "validate packages with 'pydistcheck'"
 9 | 
10 | pydistcheck \
11 |     --inspect \
12 |     "$(echo ${wheel_dir_relative_path}/*.whl)"
13 | 
14 | rapids-logger "validate packages with 'twine'"
15 | 
16 | twine check \
17 |     --strict \
18 |     "$(echo ${wheel_dir_relative_path}/*.whl)"
19 | 


--------------------------------------------------------------------------------
/conda/environments/builddocs.yml:
--------------------------------------------------------------------------------
 1 | name: ucx_dev
 2 | channels:
 3 | - rapidsai
 4 | - nvidia
 5 | - conda-forge
 6 | dependencies:
 7 | # the ceiling on sphinx can be removed when https://github.com/spatialaudio/nbsphinx/issues/825 is resolved
 8 | - sphinx>=8.0,<8.2.0
 9 | - sphinx-markdown-tables
10 | - sphinx_rtd_theme
11 | - sphinxcontrib-websupport
12 | - nbsphinx
13 | - numpydoc
14 | - recommonmark
15 | - pandoc=<2.0.0
16 | - pip
17 | - cython
18 | 


--------------------------------------------------------------------------------
/conda/recipes/ucx-py/conda_build_config.yaml:
--------------------------------------------------------------------------------
1 | c_compiler_version:
2 |   - 13
3 | 
4 | cxx_compiler_version:
5 |   - 13
6 | 
7 | ucx:
8 |   - "==1.15.*"
9 | 


--------------------------------------------------------------------------------
/conda/recipes/ucx-py/recipe.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.
 2 | schema_version: 1
 3 | 
 4 | context:
 5 |   version: ${{ env.get("RAPIDS_PACKAGE_VERSION") }}
 6 |   date_string: '${{ env.get("RAPIDS_DATE_STRING") }}'
 7 |   py_version: ${{ env.get("RAPIDS_PY_VERSION") }}
 8 |   py_buildstring: ${{ py_version | version_to_buildstring }}
 9 |   head_rev: '${{ git.head_rev(".")[:8] }}'
10 | 
11 | package:
12 |   name: ucx-py
13 |   version: ${{ version }}
14 | 
15 | source:
16 |   path: ../../..
17 | 
18 | build:
19 |   string: py${{ py_buildstring }}_${{ date_string }}_${{ head_rev }}
20 |   dynamic_linking:
21 |     overlinking_behavior: "error"
22 |   script:
23 |     content: |
24 |       python -m pip install --config-settings rapidsai.disable-cuda=true . -vv
25 | requirements:
26 |   build:
27 |     - ${{ compiler("c") }}
28 |     - ${{ compiler("cxx") }}
29 |   host:
30 |     - cython>=3.0.0
31 |     - pip
32 |     - python =${{ py_version }}
33 |     - rapids-build-backend>=0.3.1,<0.4.0dev0
34 |     - setuptools>=64.0.0
35 |     - ucx
36 |   run:
37 |     - numpy>=1.23,<3.0a0
38 |     - pynvml>=12.0.0,<13.0.0a0
39 |     - python
40 |     - ucx >=1.15.0,<1.19.0
41 |   ignore_run_exports:
42 |     from_package:
43 |       - ${{ compiler("c") }}
44 |       - ${{ compiler("cxx") }}
45 |     by_name:
46 |       - ucx
47 | 
48 | 
49 | tests:
50 |   - python:
51 |       imports:
52 |         - ucp
53 |       pip_check: false
54 | 
55 | 
56 | about:
57 |   homepage: ${{ load_from_file("pyproject.toml").project.urls.Homepage }}
58 |   license: ${{ load_from_file("pyproject.toml").project.license.text }}
59 |   summary: ${{ load_from_file("pyproject.toml").project.description }}
60 | 


--------------------------------------------------------------------------------
/debug-tests/README.md:
--------------------------------------------------------------------------------
 1 | ## Debug Tests
 2 | 
 3 | Files in this directory are useful for debugging purposes and often require being executed in two separate sessions (tmux/ssh/etc).
 4 | 
 5 | NOTE: This was moved outside of the tests directory to prevent users running potentially unstable tests by accident.
 6 | 
 7 | 
 8 | ## Send/Recv
 9 | 
10 | `send.py` and `recv.py` are used to debug/confirm nvlink message passing over 1000 iterations of either CuPy or cudf objects:
11 | 
12 | ### Process 1
13 | 
14 | > UCXPY_IFNAME=enp1s0f0 CUDA_VISIBLE_DEVICES=0,1 UCX_MEMTYPE_CACHE=n UCX_TLS=tcp,cuda_copy,cuda_ipc /usr/local/cuda/bin/nvprof python tests/debug-testssend.py
15 | 
16 | ### Process 2
17 | 
18 | > UCXPY_LOG_LEVEL=DEBUG UCX_LOG_LEVEL=DEBUG UCXPY_IFNAME=enp1s0f0 CUDA_VISIBLE_DEVICES=0,1 UCX_MEMTYPE_CACHE=n UCX_TLS=tcp,cuda_copy,cuda_ipc /usr/local/cuda/bin/nvprof python tests/recv.py
19 | 
20 | `nvprof` is used to verify NVLINK usage and we are looking at two things primarily:
21 | - existence of [CUDA memcpy PtoP]
22 | - balanced cudaMalloc/cudaFree
23 | 
24 | ### Multi-worker Setup
25 | This setup is particularly useful for IB testing when `multi-node-workers.sh`
26 | is placed in a NFS mount and can be executed independently on each machine
27 | 
28 | - bash scheduler.sh
29 | - bash multi-node-workers.sh
30 | 


--------------------------------------------------------------------------------
/debug-tests/client.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | import time
  4 | 
  5 | import pynvml
  6 | import pytest
  7 | from debug_utils import (
  8 |     ITERATIONS,
  9 |     parse_args,
 10 |     set_rmm,
 11 |     start_process,
 12 |     total_nvlink_transfer,
 13 | )
 14 | from utils import recv, send
 15 | 
 16 | import ucp
 17 | from ucp.utils import get_event_loop
 18 | 
 19 | pynvml.nvmlInit()
 20 | 
 21 | 
 22 | cmd = "nvidia-smi nvlink --setcontrol 0bz"  # Get output in bytes
 23 | # subprocess.check_call(cmd, shell=True)
 24 | 
 25 | pynvml = pytest.importorskip("pynvml", reason="PYNVML not installed")
 26 | 
 27 | 
 28 | async def get_ep(name, port):
 29 |     addr = ucp.get_address()
 30 |     ep = await ucp.create_endpoint(addr, port)
 31 |     return ep
 32 | 
 33 | 
 34 | def client(env, port, func, verbose):
 35 |     # wait for server to come up
 36 |     # receive cudf object
 37 |     # deserialize
 38 |     # assert deserialized msg is cdf
 39 |     # send receipt
 40 | 
 41 |     os.environ.update(env)
 42 |     before_rx, before_tx = total_nvlink_transfer()
 43 | 
 44 |     async def read():
 45 |         await asyncio.sleep(1)
 46 |         ep = await get_ep("client", port)
 47 | 
 48 |         for i in range(ITERATIONS):
 49 |             bytes_used = pynvml.nvmlDeviceGetMemoryInfo(
 50 |                 pynvml.nvmlDeviceGetHandleByIndex(0)
 51 |             ).used
 52 |             bytes_used
 53 |             # print("Bytes Used:", bytes_used, i)
 54 | 
 55 |             frames, msg = await recv(ep)
 56 | 
 57 |             # Send meta data
 58 |             await send(ep, frames)
 59 | 
 60 |         print("Shutting Down Client...")
 61 |         await ep.close()
 62 | 
 63 |     set_rmm()
 64 |     for i in range(ITERATIONS):
 65 |         print("ITER: ", i)
 66 |         t = time.time()
 67 |         get_event_loop().run_until_complete(read())
 68 |         if verbose:
 69 |             print("Time take for interation %d: %ss" % (i, time.time() - t))
 70 | 
 71 |     print("FINISHED")
 72 |     # num_bytes = nbytes(rx_cuda_obj)
 73 |     # print(f"TOTAL DATA RECEIVED: {num_bytes}")
 74 |     # nvlink only measures in KBs
 75 |     # if num_bytes > 90000:
 76 |     #     rx, tx = total_nvlink_transfer()
 77 |     #     msg = f"RX BEFORE SEND: {before_rx} -- RX AFTER SEND: {rx} \
 78 |     #            -- TOTAL DATA: {num_bytes}"
 79 |     #     print(msg)
 80 |     #     assert rx > before_rx
 81 | 
 82 |     # import cloudpickle
 83 |     # cuda_obj_generator = cloudpickle.loads(func)
 84 |     # pure_cuda_obj = cuda_obj_generator()
 85 | 
 86 |     # from cudf.testing import assert_eq
 87 |     # import cupy as cp
 88 | 
 89 |     # if isinstance(rx_cuda_obj, cp.ndarray):
 90 |     #     cp.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
 91 |     # else:
 92 |     #     assert_eq(rx_cuda_obj, pure_cuda_obj)
 93 | 
 94 | 
 95 | def main():
 96 |     args = parse_args(server_address=True)
 97 | 
 98 |     start_process(args, client)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/debug-tests/debug_utils.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import cloudpickle
  5 | import cupy
  6 | from utils import get_num_gpus
  7 | 
  8 | from dask.utils import parse_bytes
  9 | 
 10 | import rmm
 11 | from rmm.allocators.cupy import rmm_cupy_allocator
 12 | 
 13 | ITERATIONS = 100
 14 | 
 15 | 
 16 | def set_rmm():
 17 |     rmm.reinitialize(
 18 |         pool_allocator=True, managed_memory=False, initial_pool_size=parse_bytes("6GB")
 19 |     )
 20 |     cupy.cuda.set_allocator(rmm_cupy_allocator)
 21 | 
 22 | 
 23 | def parse_args(server_address=False):
 24 |     parser = argparse.ArgumentParser(description="Tester client process")
 25 |     if server_address is True:
 26 |         parser.add_argument(
 27 |             "-s",
 28 |             "--server",
 29 |             default=None,
 30 |             help="Server address, ucp.get_address() if not specified",
 31 |         )
 32 |     parser.add_argument("-p", "--port", default=13337, help="Server port", type=int)
 33 |     parser.add_argument(
 34 |         "-o",
 35 |         "--object_type",
 36 |         default="numpy",
 37 |         choices=["numpy", "cupy", "cudf"],
 38 |         help="In-memory array type.",
 39 |     )
 40 |     parser.add_argument(
 41 |         "-c",
 42 |         "--cpu-affinity",
 43 |         metavar="N",
 44 |         default=-1,
 45 |         type=int,
 46 |         help="CPU affinity (default -1: unset).",
 47 |     )
 48 |     parser.add_argument(
 49 |         "-v",
 50 |         "--verbose",
 51 |         default=False,
 52 |         action="store_true",
 53 |         help="Print timings per iteration.",
 54 |     )
 55 | 
 56 |     return parser.parse_args()
 57 | 
 58 | 
 59 | def get_cuda_devices():
 60 |     if "CUDA_VISIBLE_DEVICES" in os.environ:
 61 |         return os.environ["CUDA_VISIBLE_DEVICES"].split(",")
 62 |     else:
 63 |         ngpus = get_num_gpus()
 64 |         return list(range(ngpus))
 65 | 
 66 | 
 67 | def total_nvlink_transfer():
 68 |     import pynvml
 69 | 
 70 |     pynvml.nvmlShutdown()
 71 | 
 72 |     pynvml.nvmlInit()
 73 | 
 74 |     try:
 75 |         cuda_dev_id = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0])
 76 |     except Exception as e:
 77 |         print(e)
 78 |         cuda_dev_id = 0
 79 |     nlinks = pynvml.NVML_NVLINK_MAX_LINKS
 80 |     handle = pynvml.nvmlDeviceGetHandleByIndex(cuda_dev_id)
 81 |     rx = 0
 82 |     tx = 0
 83 |     for i in range(nlinks):
 84 |         transfer = pynvml.nvmlDeviceGetNvLinkUtilizationCounter(handle, i, 0)
 85 |         rx += transfer["rx"]
 86 |         tx += transfer["tx"]
 87 |     return rx, tx
 88 | 
 89 | 
 90 | def start_process(args, process_function):
 91 |     if args.cpu_affinity >= 0:
 92 |         os.sched_setaffinity(0, [args.cpu_affinity])
 93 | 
 94 |     base_env = os.environ
 95 |     env = base_env.copy()
 96 | 
 97 |     port = 15339
 98 | 
 99 |     # serialize function and send to the client and server
100 |     # server will use the return value of the contents,
101 |     # serialize the values, then send serialized values to client.
102 |     # client will compare return values of the deserialized
103 |     # data sent from the server
104 | 
105 |     obj = get_object(args.object_type)
106 |     obj_func = cloudpickle.dumps(obj)
107 | 
108 |     process_function(env, port, obj_func, args.verbose)
109 | 
110 | 
111 | def cudf_obj():
112 |     import numpy as np
113 | 
114 |     import cudf
115 | 
116 |     size = 2**26
117 |     return cudf.DataFrame(
118 |         {"a": np.random.random(size), "b": np.random.random(size), "c": ["a"] * size}
119 |     )
120 | 
121 | 
122 | def cudf_from_cupy_obj():
123 |     import cupy
124 |     import numpy as np
125 | 
126 |     import cudf
127 | 
128 |     size = 9**5
129 |     obj = cupy.arange(size)
130 |     data = [obj for i in range(10)]
131 |     data.extend([np.arange(10) for i in range(10)])
132 |     data.append(cudf.Series([1, 2, 3, 4]))
133 |     data.append({"key": "value"})
134 |     data.append({"key": cudf.Series([0.45, 0.134])})
135 |     return data
136 | 
137 | 
138 | def cupy_obj():
139 |     import cupy as cp
140 | 
141 |     size = 10**9
142 |     return cp.arange(size)
143 | 
144 | 
145 | def numpy_obj():
146 |     import numpy as np
147 | 
148 |     size = 2**20
149 |     obj = np.arange(size)
150 |     return obj
151 | 
152 | 
153 | def get_object(object_type):
154 |     if object_type == "numpy":
155 |         return numpy_obj
156 |     elif object_type == "cupy":
157 |         return cupy_obj
158 |     elif object_type == "cudf":
159 |         return cudf_obj
160 |     else:
161 |         raise TypeError("Object type %s unknown" % (object_type))
162 | 


--------------------------------------------------------------------------------
/debug-tests/multi-node-workers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | #export UCX_LOG_LEVEL=DEBUG
 4 | #export UCXPY_LOG_LEVEL=DEBUG
 5 | export UCX_MEMTYPE_CACHE=n
 6 | export UCX_TLS=tcp,cuda_copy,rc
 7 | 
 8 | UCX_NET_DEVICES=mlx5_0:1 CUDA_VISIBLE_DEVICES=0 python recv.py 2>&1 | tee /tmp/recv-log-0.txt &
 9 | UCX_NET_DEVICES=mlx5_0:1 CUDA_VISIBLE_DEVICES=1 python recv.py 2>&1 | tee /tmp/recv-log-1.txt &
10 | UCX_NET_DEVICES=mlx5_1:1 CUDA_VISIBLE_DEVICES=2 python recv.py 2>&1 | tee /tmp/recv-log-2.txt &
11 | UCX_NET_DEVICES=mlx5_1:1 CUDA_VISIBLE_DEVICES=3 python recv.py 2>&1 | tee /tmp/recv-log-3.txt &
12 | UCX_NET_DEVICES=mlx5_2:1 CUDA_VISIBLE_DEVICES=4 python recv.py 2>&1 | tee /tmp/recv-log-4.txt &
13 | UCX_NET_DEVICES=mlx5_2:1 CUDA_VISIBLE_DEVICES=5 python recv.py 2>&1 | tee /tmp/recv-log-5.txt &
14 | UCX_NET_DEVICES=mlx5_3:1 CUDA_VISIBLE_DEVICES=6 python recv.py 2>&1 | tee /tmp/recv-log-6.txt &
15 | UCX_NET_DEVICES=mlx5_3:1 CUDA_VISIBLE_DEVICES=7 python recv.py 2>&1 | tee /tmp/recv-log-7.txt &
16 | 
17 | sleep 3600
18 | 


--------------------------------------------------------------------------------
/debug-tests/scheduler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | #export UCX_LOG_LEVEL=TRACE
 5 | # export UCXPY_LOG_LEVEL=DEBUG
 6 | export UCX_MEMTYPE_CACHE=n
 7 | export UCX_TLS=tcp,cuda_copy,rc,cuda_ipc
 8 | 
 9 | UCX_NET_DEVICES=mlx5_0:1 CUDA_VISIBLE_DEVICES=0 python send.py 2>&1 | tee /tmp/send-log.txt &
10 | 


--------------------------------------------------------------------------------
/debug-tests/server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | import cloudpickle
 5 | import pytest
 6 | from debug_utils import ITERATIONS, parse_args, set_rmm, start_process
 7 | from utils import recv, send
 8 | 
 9 | from distributed.comm.utils import to_frames
10 | from distributed.protocol import to_serialize
11 | 
12 | import ucp
13 | from ucp.utils import get_event_loop
14 | 
15 | cmd = "nvidia-smi nvlink --setcontrol 0bz"  # Get output in bytes
16 | # subprocess.check_call(cmd, shell=True)
17 | 
18 | pynvml = pytest.importorskip("pynvml", reason="PYNVML not installed")
19 | 
20 | 
21 | async def get_ep(name, port):
22 |     addr = ucp.get_address()
23 |     ep = await ucp.create_endpoint(addr, port)
24 |     return ep
25 | 
26 | 
27 | def server(env, port, func, verbose):
28 |     # create listener receiver
29 |     # write cudf object
30 |     # confirm message is sent correctly
31 | 
32 |     os.environ.update(env)
33 | 
34 |     async def f(listener_port):
35 |         # coroutine shows up when the client asks
36 |         # to connect
37 |         set_rmm()
38 | 
39 |         async def write(ep):
40 | 
41 |             print("CREATING CUDA OBJECT IN SERVER...")
42 |             cuda_obj_generator = cloudpickle.loads(func)
43 |             cuda_obj = cuda_obj_generator()
44 |             msg = {"data": to_serialize(cuda_obj)}
45 |             frames = await to_frames(msg, serializers=("cuda", "dask", "pickle"))
46 |             while True:
47 |                 for i in range(ITERATIONS):
48 |                     print("ITER: ", i)
49 |                     # Send meta data
50 |                     await send(ep, frames)
51 | 
52 |                     frames, msg = await recv(ep)
53 | 
54 |                 print("CONFIRM RECEIPT")
55 |                 await ep.close()
56 |                 break
57 |             # lf.close()
58 |             del msg
59 |             del frames
60 | 
61 |         lf = ucp.create_listener(write, port=listener_port)
62 |         try:
63 |             while not lf.closed():
64 |                 await asyncio.sleep(0.1)
65 |         except ucp.UCXCloseError:
66 |             pass
67 | 
68 |     loop = get_event_loop()
69 |     while True:
70 |         loop.run_until_complete(f(port))
71 | 
72 | 
73 | def main():
74 |     args = parse_args(server_address=False)
75 | 
76 |     start_process(args, server)
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/debug-tests/utils.py:
--------------------------------------------------------------------------------
1 | ../tests/utils.py


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | RUN apt-get update && \
 4 |     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
 5 |     apt-get install -y \
 6 |         automake \
 7 |         dh-make \
 8 |         g++ \
 9 |         git \
10 |         libcap2 \
11 |         libtool \
12 |         make \
13 |         udev \
14 |         wget \
15 |     && apt-get remove -y openjdk-11-* || apt-get autoremove -y \
16 |     && apt-get clean && rm -rf /var/lib/apt/lists/*
17 | 
18 | COPY run.sh /root
19 | 
20 | WORKDIR /root
21 | 
22 | CMD [ "/root/run.sh" ]
23 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Docker container
 2 | 
 3 | ## Summary
 4 | 
 5 | Contains reference dockerfile and build script to run UCX-Py tests and benchmarks. This is a minimal setup, without support for CUDA, MOFED, or rdma-core.
 6 | 
 7 | ## Building Docker image
 8 | 
 9 | To begin, it's necessary to build the image, this is done as follows:
10 | 
11 | ```bash
12 | cd docker
13 | docker build -t ucx-py -f Dockerfile .
14 | ```
15 | 
16 | ## Running
17 | 
18 | Once building the Docker image is complete, the container can be started with the following command:
19 | 
20 | ```bash
21 | docker run ucx-py
22 | ```
23 | 
24 | The container above will run UCX-Py tests and benchmarks.
25 | 
26 | ## Infiniband/NVLink-enabled docker file
27 | 
28 | In addition to the reference Docker image, there are two further docker
29 | files which have support for CUDA devices and
30 | InfiniBand/NVLink-enabled communications using either
31 | [rdma-core](https://github.com/linux-rdma/rdma-core) or
32 | [MOFED](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/).
33 | In both cases, the default base image is
34 | [nvidia/cuda:11.5.2-devel-ubuntu20.04](https://hub.docker.com/r/nvidia/cuda/tags?page=1&name=11.5.2-devel-ubuntu20.04).
35 | 
36 | The rdma-core image should work as long as the host system has MOFED >= 5.0.
37 | If you use the MOFED image, then the host version (reported by `ofed_info
38 | -s`) should match that used when building the container.
39 | 
40 | To use one of these images, first build it
41 | ```bash
42 | docker build -t ucx-py-mofed -f UCXPy-MOFED.dockerfile .
43 | # or
44 | docker build -t ucx-py-rdma -f UCXPy-rdma-core.dockerfile .
45 | ```
46 | 
47 | ### Controlling build-args
48 | 
49 | You can control some of the behaviour of the docker file with docker `--build-arg` flags:
50 | 
51 | - `UCX_VERSION_TAG`: git committish for the version of UCX to build (default `v1.13.0`);
52 | - `CONDA_HOME`: Where to install conda in the image (default `/opt/conda`);
53 | - `CONDA_ENV`: What to name the conda environment (default `ucx`);
54 | - `CONDA_ENV_SPEC`: yaml file used when initially creating the conda environment (default `ucx-py-cuda11.5.yml`);
55 | - `CUDA_VERSION`: version of cuda toolkit in the base image (default `11.5.2`), must exist in the [nvidia/cuda](https://hub.docker.com/layers/cuda/nvidia/cuda) docker hub image list;
56 | - `DISTRIBUTION_VERSION`: version of distribution in the base image (default `ubuntu20.04`), must exist in the [nvidia/cuda](https://hub.docker.com/layers/cuda/nvidia/cuda) docker hub image list. Note that rdma-core provides forward-compatibility with version 28.0 (shipped with ubuntu20.04) supporting MOFED 5.0 and later. Other distributions may provide a different version of rdma-core for which MOFED compatibility may vary;
57 | - `MOFED_VERSION`: (MOFED image only) version of MOFED to download (default `5.3-1.0.5.0`), must match version on host system
58 | 
59 | ### Running
60 | 
61 | Running the container requires a number of additional flags to expose
62 | high-performance transports from the host. `docker run --privileged` is a
63 | catch-all that will definitely provide enough permissions (`ulimit -l unlimited`
64 | is then needed in the container). Alternately, provide `--ulimit memlock=-1` and
65 | expose devices with `--device /dev/infiniband`, see [the UCX
66 | documentation](https://openucx.readthedocs.io/en/master/running.html#running-in-docker-containers)
67 | for more details. To expose the infiniband devices using IPoIB, we need to in
68 | addition map the relevant host network interfaces, a catchall is just to use `--network host`.
69 | 
70 | For example, a run command that exposes all devices available in
71 | `/dev/infiniband` along with the network interfaces on the host is (assuming
72 | that the `ucx-py-rdma` image tag has been built as above):
73 | 
74 | ```bash
75 | docker run --ulimit memlock=-1 --device /dev/infiniband --network host -ti ucx-py-rdma /bin/bash
76 | ```
77 | 
78 | UCX-Py is installed via
79 | [mamba](https://mamba.readthedocs.io/en/latest/index.html) in the `ucx`
80 | environment; so 
81 | ```bash
82 | source /opt/conda/etc/profile.d/conda.sh
83 | source /opt/conda/etc/profile.d/mamba.sh
84 | mamba activate ucx
85 | ```
86 | in the container will provide a Python with UCX-Py available.
87 | 


--------------------------------------------------------------------------------
/docker/UCXPy-MOFED.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA_VERSION=11.5.2
 2 | ARG DISTRIBUTION_VERSION=ubuntu20.04
 3 | FROM nvidia/cuda:${CUDA_VERSION}-devel-${DISTRIBUTION_VERSION}
 4 | 
 5 | # Make available to later build stages
 6 | ARG DISTRIBUTION_VERSION
 7 | # Should match host OS OFED version (as reported by ofed_info -s)
 8 | ARG MOFED_VERSION=5.3-1.0.5.0
 9 | # Tag to checkout from UCX repository
10 | ARG UCX_VERSION_TAG=v1.13.0
11 | # Where to install conda, and what to name the created environment
12 | ARG CONDA_HOME=/opt/conda
13 | ARG CONDA_ENV=ucx
14 | # Name of conda spec file in the current working directory that
15 | # will be used to build the conda environment.
16 | ARG CONDA_ENV_SPEC=ucx-py-cuda11.5.yml
17 | 
18 | ENV CONDA_ENV="${CONDA_ENV}"
19 | ENV CONDA_HOME="${CONDA_HOME}"
20 | 
21 | # Where cuda is installed
22 | ENV CUDA_HOME="/usr/local/cuda"
23 | 
24 | SHELL ["/bin/bash", "-c"]
25 | 
26 | RUN apt-get update -y \
27 |     && apt-get --fix-missing upgrade -y \
28 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata \
29 |     && apt-get install -y \
30 |         automake \
31 |         dh-make \
32 |         git \
33 |         libcap2 \
34 |         libtool \
35 |         make \
36 |         pkg-config \
37 |         udev \
38 |         curl \
39 |     && apt-get autoremove -y \
40 |     && apt-get clean
41 | 
42 | RUN curl -fsSL https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
43 |     -o /minimamba.sh \
44 |     && bash /minimamba.sh -b -p ${CONDA_HOME} \
45 |     && rm /minimamba.sh
46 | 
47 | ENV PATH="${CONDA_HOME}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${CUDA_HOME}/bin"
48 | 
49 | RUN curl -fsSL https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-${DISTRIBUTION_VERSION}-x86_64.tgz | tar xz \
50 |     && (cd MLNX_OFED_LINUX-${MOFED_VERSION}-${DISTRIBUTION_VERSION}-x86_64 \
51 |         && yes | ./mlnxofedinstall --user-space-only --without-fw-update \
52 |            --without-neohost-backend) \
53 |     && rm -rf /var/lib/apt/lists/* \
54 |     && rm -rf /MLNX_OFED_LINUX-${MOFED_VERSION}-${DISTRIBUTION_VERSION}-x86_64
55 | 
56 | WORKDIR /root
57 | COPY ${CONDA_ENV_SPEC} /root/conda-env.yml
58 | COPY build-ucx.sh /root/build-ucx.sh
59 | COPY build-ucx-py.sh /root/build-ucx-py.sh
60 | COPY bench-all.sh /root/bench-all.sh
61 | 
62 | RUN mamba env create -n ${CONDA_ENV} --file /root/conda-env.yml
63 | RUN bash ./build-ucx.sh ${UCX_VERSION_TAG} ${CONDA_HOME} ${CONDA_ENV} ${CUDA_HOME}
64 | RUN bash ./build-ucx-py.sh ${CONDA_HOME} ${CONDA_ENV}
65 | CMD ["/root/bench-all.sh", "tcp,cuda_copy,cuda_ipc", "rc,cuda_copy", "all"]
66 | 


--------------------------------------------------------------------------------
/docker/UCXPy-rdma-core.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA_VERSION=11.5.2
 2 | ARG DISTRIBUTION_VERSION=ubuntu20.04
 3 | FROM nvidia/cuda:${CUDA_VERSION}-devel-${DISTRIBUTION_VERSION}
 4 | 
 5 | # Tag to checkout from UCX repository
 6 | ARG UCX_VERSION_TAG=v1.13.0
 7 | # Where to install conda, and what to name the created environment
 8 | ARG CONDA_HOME=/opt/conda
 9 | ARG CONDA_ENV=ucx
10 | # Name of conda spec file in the current working directory that
11 | # will be used to build the conda environment.
12 | ARG CONDA_ENV_SPEC=ucx-py-cuda11.5.yml
13 | 
14 | ENV CONDA_ENV="${CONDA_ENV}"
15 | ENV CONDA_HOME="${CONDA_HOME}"
16 | 
17 | # Where cuda is installed
18 | ENV CUDA_HOME="/usr/local/cuda"
19 | 
20 | SHELL ["/bin/bash", "-c"]
21 | 
22 | RUN apt-get update -y \
23 |     && apt-get --fix-missing upgrade -y \
24 |     && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata \
25 |     && apt-get install -y \
26 |         automake \
27 |         dh-make \
28 |         git \
29 |         libcap2 \
30 |         libtool \
31 |         make \
32 |         pkg-config \
33 |         udev \
34 |         curl \
35 |         librdmacm-dev \
36 |         rdma-core \
37 |     && apt-get autoremove -y \
38 |     && apt-get clean
39 | 
40 | RUN curl -fsSL https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
41 |     -o /minimamba.sh \
42 |     && bash /minimamba.sh -b -p ${CONDA_HOME} \
43 |     && rm /minimamba.sh
44 | 
45 | ENV PATH="${CONDA_HOME}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${CUDA_HOME}/bin"
46 | 
47 | WORKDIR /root
48 | COPY ${CONDA_ENV_SPEC} /root/conda-env.yml
49 | COPY build-ucx.sh /root/build-ucx.sh
50 | COPY build-ucx-py.sh /root/build-ucx-py.sh
51 | 
52 | RUN mamba env create -n ${CONDA_ENV} --file /root/conda-env.yml
53 | RUN bash ./build-ucx.sh ${UCX_VERSION_TAG} ${CONDA_HOME} ${CONDA_ENV} ${CUDA_HOME}
54 | RUN bash ./build-ucx-py.sh ${CONDA_HOME} ${CONDA_ENV}
55 | 


--------------------------------------------------------------------------------
/docker/bench-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2022, NVIDIA CORPORATION.
 3 | 
 4 | set -e
 5 | 
 6 | function logger {
 7 |     echo -e "\n$@\n"
 8 | }
 9 | 
10 | # Requires conda installed at /opt/conda and the ucx environment setup
11 | # See UCXPy-CUDA.dockerfile
12 | source /opt/conda/etc/profile.d/conda.sh
13 | conda activate ucx
14 | 
15 | cd ucx-py/
16 | # Benchmark using command-line provided transports or else default
17 | for tls in ${@:-"tcp" "all"}; do
18 |     export UCX_TLS=${tls}
19 |     logger "Python pytest for ucx-py"
20 | 
21 |     logger "Tests (UCX_TLS=${UCX_TLS})"
22 |     pytest --cache-clear -vs ucp/_libs/tests
23 |     pytest --cache-clear -vs tests/
24 | 
25 |     for array_type in "numpy" "cupy" "rmm"; do
26 |         logger "Benchmarks (UCX_TLS=${UCX_TLS}, array_type=${array_type})"
27 |         python ucp.benchmarks.send_recv -l ucp-async -o ${array_type} \
28 |             --server-dev 0 --client-dev 0 --reuse-alloc
29 |         python ucp.benchmarks.send_recv -l ucp-core -o ${array_type} \
30 |             --server-dev 0 --client-dev 0 --reuse-alloc
31 |     done
32 | done
33 | 


--------------------------------------------------------------------------------
/docker/build-ucx-py.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ex
 3 | 
 4 | CONDA_HOME=${1:-"/opt/conda"}
 5 | CONDA_ENV=${2:-"ucx"}
 6 | 
 7 | source ${CONDA_HOME}/etc/profile.d/conda.sh
 8 | source ${CONDA_HOME}/etc/profile.d/mamba.sh
 9 | mamba activate ${CONDA_ENV}
10 | 
11 | git clone https://github.com/rapidsai/ucx-py.git
12 | pip install -v ucx-py/
13 | 


--------------------------------------------------------------------------------
/docker/build-ucx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ex
 3 | 
 4 | UCX_VERSION_TAG=${1:-"v1.13.0"}
 5 | CONDA_HOME=${2:-"/opt/conda"}
 6 | CONDA_ENV=${3:-"ucx"}
 7 | CUDA_HOME=${4:-"/usr/local/cuda"}
 8 | # Send any remaining arguments to configure
 9 | CONFIGURE_ARGS=${@:5}
10 | 
11 | source ${CONDA_HOME}/etc/profile.d/conda.sh
12 | source ${CONDA_HOME}/etc/profile.d/mamba.sh
13 | mamba activate ${CONDA_ENV}
14 | 
15 | git clone https://github.com/openucx/ucx.git
16 | 
17 | cd ucx
18 | git checkout ${UCX_VERSION_TAG}
19 | ./autogen.sh
20 | mkdir build-linux && cd build-linux
21 | ../contrib/configure-release --prefix=${CONDA_PREFIX} --with-sysroot --enable-cma \
22 |     --enable-mt --with-gnu-ld --with-rdmacm --with-verbs \
23 |     --with-cuda=${CUDA_HOME} \
24 |     ${CONFIGURE_ARGS}
25 | make -j install
26 | 


--------------------------------------------------------------------------------
/docker/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | set -e
 4 | 
 5 | function logger {
 6 |     echo -e "\n$@\n"
 7 | }
 8 | 
 9 | PYTHON_PREFIX=$(python -c "import distutils.sysconfig; print(distutils.sysconfig.PREFIX)")
10 | 
11 | ################################################################################
12 | # SETUP - Install python packages and check environment
13 | ################################################################################
14 | 
15 | pip install \
16 |     "pytest" "pytest-asyncio" \
17 |     "dask" "distributed" \
18 | 	"cython"
19 | 
20 | logger "Check versions"
21 | python --version
22 | pip list
23 | 
24 | ################################################################################
25 | # BUILD - Build UCX master, UCX-Py and run tests
26 | ################################################################################
27 | logger "Build UCX master"
28 | cd $HOME
29 | git clone https://github.com/openucx/ucx
30 | cd ucx
31 | ./autogen.sh
32 | ./contrib/configure-devel \
33 |     --prefix=$PYTHON_PREFIX \
34 |     --enable-gtest=no \
35 |     --with-valgrind=no
36 | make -j install
37 | 
38 | echo $PYTHON_PREFIX >> /etc/ld.so.conf.d/python.conf
39 | ldconfig
40 | 
41 | logger "UCX Version and Build Information"
42 | ucx_info -v
43 | 
44 | 
45 | ################################################################################
46 | # TEST - Run pytests for ucx-py
47 | ################################################################################
48 | logger "Clone and Build UCX-Py"
49 | cd $HOME
50 | git clone https://github.com/rapidsai/ucx-py
51 | cd ucx-py
52 | python setup.py build_ext --inplace
53 | python -m pip install -e .
54 | 
55 | for tls in "tcp" "all"; do
56 |     export UCX_TLS=$tls
57 | 
58 |     logger "Python pytest for ucx-py"
59 | 
60 |     # Test with TCP/Sockets
61 |     logger "Tests (UCX_TLS=$UCX_TLS)"
62 |     pytest --cache-clear -vs ucp/_libs/tests
63 |     pytest --cache-clear -vs tests/
64 | 
65 |     logger "Benchmarks (UCX_TLS=$UCX_TLS)"
66 |     python -m ucp.benchmarks.send_recv -l ucp-async -o numpy \
67 |         --server-dev 0 --client-dev 0 --reuse-alloc
68 |     python -m ucp.benchmarks.send_recv -l ucp-core -o numpy \
69 |         --server-dev 0 --client-dev 0 --reuse-alloc
70 | done
71 | 


--------------------------------------------------------------------------------
/docker/ucx-py-cuda11.5.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - rapidsai
 3 |   - nvidia
 4 |   - conda-forge
 5 | 
 6 | dependencies:
 7 |   - python=3.10
 8 |   - cudatoolkit=11.5
 9 |   - setuptools
10 |   - cython>=3.0.0
11 |   - pytest
12 |   - pytest-asyncio
13 |   - dask
14 |   - distributed
15 |   - cupy
16 |   - numba>=0.59.1,<0.61.0a0
17 |   - rmm
18 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/_static/Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rapidsai/ucx-py/37b1e7e097b83218b2e767f0a05dbbc1cb502c2d/docs/source/_static/Architecture.png


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. currentmodule:: ucp
 5 | 
 6 | **ucp**
 7 | 
 8 | .. autosummary::
 9 |    ucp
10 |    ucp.create_listener
11 |    ucp.create_endpoint
12 |    ucp.get_address
13 |    ucp.get_config
14 |    ucp.get_ucp_worker
15 |    ucp.get_ucx_version
16 |    ucp.init
17 |    ucp.progress
18 |    ucp.reset
19 | 
20 | **Endpoint**
21 | 
22 | .. autosummary::
23 |    Endpoint
24 |    Endpoint.abort
25 |    Endpoint.close
26 |    Endpoint.closed
27 |    Endpoint.close_after_n_recv
28 |    Endpoint.cuda_support
29 |    Endpoint.get_ucp_endpoint
30 |    Endpoint.get_ucp_worker
31 |    Endpoint.recv
32 |    Endpoint.send
33 |    Endpoint.ucx_info
34 |    Endpoint.uid
35 | 
36 | **Listener**
37 | 
38 | .. autosummary::
39 |    Listener
40 |    Listener.close
41 |    Listener.closed
42 |    Listener.port
43 | 
44 | .. currentmodule:: ucp
45 | 
46 | .. autofunction:: create_listener
47 | .. autofunction:: create_endpoint
48 | .. autofunction:: get_address
49 | .. autofunction:: get_config
50 | .. autofunction:: get_ucp_worker
51 | .. autofunction:: get_ucx_version
52 | .. autofunction:: init
53 | .. autofunction:: progress
54 | .. autofunction:: reset
55 | 
56 | Endpoint
57 | --------
58 | 
59 | .. currentmodule:: ucp
60 | 
61 | .. autoclass:: Endpoint
62 |    :members:
63 | 
64 | 
65 | Listener
66 | --------
67 | 
68 | .. currentmodule:: ucp
69 | 
70 | .. autoclass:: Listener
71 |    :members:
72 | 


--------------------------------------------------------------------------------
/docs/source/deployment.rst:
--------------------------------------------------------------------------------
 1 | NVLink and Docker/Kubernetes
 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 3 | 
 4 | In order to use NVLink when running in containers using Docker and/or
 5 | Kubernetes the processes must share an IPC namespace for NVLink to work
 6 | correctly.
 7 | 
 8 | Many GPUs in one container
 9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
10 | 
11 | The simplest way to ensure that processing accessing GPUs share an IPC
12 | namespace is to run the processes within the same container. This means
13 | exposing multiple GPUs to a single container.
14 | 
15 | Many containers with a shared IPC namespace
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 | 
18 | If you wish to isolate your processes into multiple containers and
19 | expose one or more GPUs to each container you need to ensure they are
20 | using a shared IPC namespace.
21 | 
22 | In a Docker configuration you can mark one container as having a
23 | shareable IPC namespace with the flag ``--ipc="shareable"``. Other
24 | containers can then share that namespace with the flag
25 | ``--ipc="container: <_name-or-ID_>"`` and passing the name or ID of the
26 | container that is sharing it’s namespace.
27 | 
28 | You can also share the host IPC namespace with your container with the
29 | flag ``--ipc="host"``, however this is not recommended on multi-tenant
30 | hosts.
31 | 
32 | Privileged pods in a Kubernetes cluster `can also be configured to share
33 | the host IPC`_.
34 | 
35 | For more information see the `Docker documentation`_.
36 | 
37 | .. _can also be configured to share the host IPC: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#host-namespaces
38 | .. _Docker documentation: https://docs.docker.com/engine/reference/run/#ipc-settings---ipc


--------------------------------------------------------------------------------
/docs/source/glossary.rst:
--------------------------------------------------------------------------------
 1 | Glossary
 2 | --------
 3 | 
 4 | - ACK	Acknowledge
 5 | - am	Active Message
 6 | - AMO	Atomic Memory Operation
 7 | - ANL	Argonne National Laboratory
 8 | - AZP	AZure Pipeline
 9 | - bcopy	Byte copy
10 | - Bistro	Binary Instrumentation
11 | - BTL	Byte Transfer Layer
12 | - cm	Connection Manager
13 | - CMA	Cross Memory Attach
14 | - CQ	Completion Queue(Infiniband)
15 | - CQE	Completion Queue Entry(Infiniband)
16 | - csmock	static analysis tools
17 | - CUDA	Compute Unified Device Architecture(NVIDIA)
18 | - DC	Dynamically Connected transport(Infiniband)
19 | - ep	EndPoint
20 | - FC	Flow Control
21 | - fd	File Descriptor
22 | - GDR	GPUDirect RDMA
23 | - gtest	Google Test
24 | - HPC	High Performance Computing
25 | - HWTM	HardWare Tag Matching
26 | - IB	Infiniband
27 | - iface	Interaface
28 | - IPC	Inter Process Communication
29 | - JUCX	Java API over UCP
30 | - KLM	A new sophisticated way of creating memory regions.- (Mellanox specific)
31 | - KNEM	Kernel Nemesis
32 | - LLNL	Lawrence Livermore National Laboratory
33 | - madvise	give advice about use of memory. See manual - madvise(2)
34 | - md	Memory Domain
35 | - MEMH	Memory Handle
36 | - MLX	Mellanox Technologies
37 | - mlx5	Connect-X5 VPI
38 | - mm	Memory Mapper
39 | - MPI	Message Passing INterface
40 | - MPICH	A MPI Implementation
41 | - MTT	The MPI Testing Tool
42 | - NAK	Negative Acknowledge
43 | - ODP	OnDemand Paging
44 | - OFA	OpenFabrics Alliance
45 | - OMPI	OpenMPI
46 | - OOB	Out of band
47 | - OOO	Out of Order
48 | - OPA	Omni-Path Architecture
49 | - Open MPI	A MPI Implementation
50 | - ORNL	Oak Ridge National Laboratory
51 | - PCIe	PCI Express
52 | - PGAS	Partitioned Global Address Space
53 | - POSIX	Portable operating system interface
54 | - ppn	processes per node
55 | - PR	Pull Request
56 | - PROGRESS64	A C library of scalable functions for - concurrent programs, primarily focused on networking - applications.(https://github.com/ARM-software/- progress64)
57 | - QP	Queue Pair(Infiniband)
58 | - RC	Reliable Connection (Infiniband)
59 | - rcache	Registration Cache
60 | - RDMA	Remote Direct Memory Access
61 | - REQ	Request
62 | - rkey	Remote KEY
63 | - RMA	Remote Memory Access
64 | - RNR	Receiver Not Ready
65 | - RoCE	RDMA over Converged Ethernet
66 | - ROCm	Radeon Open Compute platform(AMD)
67 | - RTE	Run Time Environment
68 | - RX	Receive
69 | - Skb	Socket Buffer
70 | - sm	Shared Memory
71 | - SM	Subnet Manager(Infiniband)
72 | - SockCM	Socket Connection Manager
73 | - SRQ	Shared Receive Queue
74 | - SYSV	UNIX System V
75 | - tl	Transport Layer
76 | - TLS	Transpot LayerS
77 | - TM	Tag Matching
78 | - TX	Transmit
79 | - UC	Unreliable Connection (Infiniband)
80 | - UCF	Unified Communication Framework
81 | - UCM	Unified Communication Memory
82 | - UCP	Unified Communication Protocols Higher level API
83 | - UCS	Unified Communication Service Common utilities.
84 | - UCT	Unified Communication Transport Lower level API
85 | - UCX	Unified Communication X
86 | - UD	Unreliable Datagram (Infiniband)
87 | - uGNI	user level generic network interface(Cray)
88 | - UMR	User mode memory registration
89 | - VPI	Virtual Protocol Interconnect
90 | - WQ	Work Queue(Infiniband)
91 | - WQE	Work Queue Elements (pronounce WOOKIE)
92 | - WR	Work Request
93 | - XPMEM	cross partition memory
94 | - XRC	eXtended Reliable Connection(Infiniband)
95 | - Zcopy	Zero Copy


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | UCX-Py
 2 | ======
 3 | 
 4 | UCX-Py is the Python interface for `UCX <https://github.com/openucx/ucx>`_, a low-level high-performance networking library.  UCX and UCX-Py supports several transport methods including InfiniBand and NVLink while still using traditional networking protocols like TCP.
 5 | 
 6 | 
 7 | .. image:: _static/Architecture.png
 8 |    :alt: A simple dask dictionary
 9 |    :align: center
10 | 
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 |    :hidden:
15 | 
16 |    quickstart
17 |    install
18 |    configuration
19 |    deployment
20 |    ucx-debug
21 | 
22 | 
23 | .. toctree::
24 |    :maxdepth: 1
25 |    :hidden:
26 |    :caption: Help & reference
27 | 
28 |    os-limits
29 |    transport-monitoring
30 |    send-recv
31 |    api
32 |    glossary
33 | 


--------------------------------------------------------------------------------
/docs/source/os-limits.rst:
--------------------------------------------------------------------------------
 1 | Operating System Limits
 2 | =======================
 3 | 
 4 | 
 5 | UCX can be affected by a variety of limits, not just defined by UCX itself but also by the operating system. In this section we describe some of the limits that may be encountered by the user when running UCX-Py or just UCX alone.
 6 | 
 7 | File Descriptors
 8 | ----------------
 9 | 
10 | In sockets-based connections, multiple file descriptors may be open to establish connections between endpoints. When UCX is establishing connection between endpoints via protocols such as TCP, an error such as below may occur:
11 | 
12 | ::
13 | 
14 |     ucp.exceptions.UCXError: User-defined limit was reached
15 | 
16 | One possible cause for this is that the limit established by the OS or system administrators has been reached by the user. This limit can be checked with:
17 | 
18 | ::
19 | 
20 |     $ ulimit -n
21 | 
22 | If the user has permission to do so, the file descriptor limit can be increased by typing the new limit after the command above. For example, to set a new limit of 1 million, the following should be executed:
23 | 
24 | ::
25 | 
26 |     $ ulimit -n 1000000
27 | 
28 | Another way the number of open files limit can be increased is by editing the limits.conf file in the operating system. Please consult your system administration for details.
29 | 
30 | Please note that the number of open files required may different according to the application, further investigation may be required to find optimal values.
31 | 
32 | For systems with specialized hardware such as InfiniBand, using RDMACM may also help circumventing that issue, as it doesn't rely heavily on file descriptors.
33 | 
34 | 
35 | Maximum Connections
36 | -------------------
37 | 
38 | UCX respects the operating system's limit of socket listen() backlog, known in userspace as SOMAXCONN. This limit may cause creating may cause new endpoints from connecting to a listener to hang if too many connections happen to be initiated too quickly.
39 | 
40 | To check for the current limit, the user can execute the following command:
41 | 
42 | ::
43 | 
44 |     $ sysctl net.core.somaxconn
45 | 
46 | For most Linux distros, the default limit is 128. To increase that limit to 65535 for example, the user may run the following (require root or sudo permissions):
47 | 
48 | ::
49 | 
50 |     $ sudo sysctl -w net.core.somaxconn=128
51 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
  1 | Quickstart
  2 | ==========
  3 | 
  4 | 
  5 | Setup
  6 | -----
  7 | 
  8 | Create a new conda environment with UCX-Py:
  9 | 
 10 | ::
 11 | 
 12 |     conda create -n ucx -c conda-forge -c rapidsai \
 13 |       cudatoolkit=<CUDA version> ucx-py
 14 | 
 15 | For a more detailed guide on installation options please refer to the :doc:`install` page.
 16 | 
 17 | Send/Recv NumPy Arrays
 18 | ----------------------
 19 | 
 20 | Process 1 - Server
 21 | ~~~~~~~~~~~~~~~~~~
 22 | 
 23 | .. code-block:: python
 24 | 
 25 |     import asyncio
 26 |     import time
 27 |     import ucp
 28 |     import numpy as np
 29 | 
 30 |     n_bytes = 2**30
 31 |     host = ucp.get_address(ifname='eth0')  # ethernet device name
 32 |     port = 13337
 33 | 
 34 |     async def send(ep):
 35 |         # recv buffer
 36 |         arr = np.empty(n_bytes, dtype='u1')
 37 |         await ep.recv(arr)
 38 |         assert np.count_nonzero(arr) == np.array(0, dtype=np.int64)
 39 |         print("Received NumPy array")
 40 | 
 41 |         # increment array and send back
 42 |         arr += 1
 43 |         print("Sending incremented NumPy array")
 44 |         await ep.send(arr)
 45 | 
 46 |         await ep.close()
 47 |         lf.close()
 48 | 
 49 |     async def main():
 50 |         global lf
 51 |         lf = ucp.create_listener(send, port)
 52 | 
 53 |         while not lf.closed():
 54 |             await asyncio.sleep(0.1)
 55 | 
 56 |     if __name__ == '__main__':
 57 |         asyncio.run(main())
 58 | 
 59 | Process 2 - Client
 60 | ~~~~~~~~~~~~~~~~~~
 61 | 
 62 | .. code-block:: python
 63 | 
 64 |     import asyncio
 65 |     import ucp
 66 |     import numpy as np
 67 | 
 68 |     port = 13337
 69 |     n_bytes = 2**30
 70 | 
 71 |     async def main():
 72 |         host = ucp.get_address(ifname='eth0')  # ethernet device name
 73 |         ep = await ucp.create_endpoint(host, port)
 74 |         msg = np.zeros(n_bytes, dtype='u1') # create some data to send
 75 | 
 76 |         # send message
 77 |         print("Send Original NumPy array")
 78 |         await ep.send(msg)  # send the real message
 79 | 
 80 |         # recv response
 81 |         print("Receive Incremented NumPy arrays")
 82 |         resp = np.empty_like(msg)
 83 |         await ep.recv(resp)  # receive the echo
 84 |         await ep.close()
 85 |         np.testing.assert_array_equal(msg + 1, resp)
 86 | 
 87 |     if __name__ == '__main__':
 88 |         asyncio.run(main())
 89 | 
 90 | 
 91 | 
 92 | Send/Recv CuPy Arrays
 93 | ---------------------
 94 | 
 95 | .. note::
 96 |     If you are passing CuPy arrays between GPUs and want to use `NVLINK <https://www.nvidia.com/en-us/data-center/nvlink/>`_ ensure you have correctly set ``UCX_TLS`` with ``cuda_ipc``. See the :doc:`configuration` for more details
 97 | 
 98 | Process 1 - Server
 99 | ~~~~~~~~~~~~~~~~~~
100 | 
101 | .. code-block:: python
102 | 
103 |     import asyncio
104 |     import time
105 |     import ucp
106 |     import cupy as cp
107 | 
108 |     n_bytes = 2**30
109 |     host = ucp.get_address(ifname='eth0')  # ethernet device name
110 |     port = 13337
111 | 
112 |     async def send(ep):
113 |         # recv buffer
114 |         arr = cp.empty(n_bytes, dtype='u1')
115 |         await ep.recv(arr)
116 |         assert cp.count_nonzero(arr) == cp.array(0, dtype=cp.int64)
117 |         print("Received CuPy array")
118 | 
119 |         # increment array and send back
120 |         arr += 1
121 |         print("Sending incremented CuPy array")
122 |         await ep.send(arr)
123 | 
124 |         await ep.close()
125 |         lf.close()
126 | 
127 |     async def main():
128 |         global lf
129 |         lf = ucp.create_listener(send, port)
130 | 
131 |         while not lf.closed():
132 |             await asyncio.sleep(0.1)
133 | 
134 |     if __name__ == '__main__':
135 |         asyncio.run(main())
136 | 
137 | Process 2 - Client
138 | ~~~~~~~~~~~~~~~~~~
139 | 
140 | .. code-block:: python
141 | 
142 |     import asyncio
143 |     import ucp
144 |     import cupy as cp
145 |     import numpy as np
146 | 
147 |     port = 13337
148 |     n_bytes = 2**30
149 | 
150 |     async def main():
151 |         host = ucp.get_address(ifname='eth0')  # ethernet device name
152 |         ep = await ucp.create_endpoint(host, port)
153 |         msg = cp.zeros(n_bytes, dtype='u1') # create some data to send
154 | 
155 |         # send message
156 |         print("Send Original CuPy array")
157 |         await ep.send(msg)  # send the real message
158 | 
159 |         # recv response
160 |         print("Receive Incremented CuPy arrays")
161 |         resp = cp.empty_like(msg)
162 |         await ep.recv(resp)  # receive the echo
163 |         await ep.close()
164 |         cp.testing.assert_array_equal(msg + 1, resp)
165 | 
166 |     if __name__ == '__main__':
167 |         asyncio.run(main())
168 | 


--------------------------------------------------------------------------------
/docs/source/transport-monitoring.rst:
--------------------------------------------------------------------------------
 1 | Monitoring Transports
 2 | =====================
 3 | 
 4 | Below is a list of commonly used tools and commands to monitor InfiniBand and CUDA IPC messages:
 5 | 
 6 | 
 7 | Infiniband
 8 | ----------
 9 | 
10 | Monitor InfiniBand packet counters -- this number should dramatically increase when there's InfiniBand traffic:
11 | 
12 | ::
13 | 
14 |     watch -n 0.1 'cat /sys/class/infiniband/mlx5_*/ports/1/counters/port_xmit_data'
15 | 
16 | 
17 | CUDA IPC/NVLink
18 | ---------------
19 | 
20 | Monitor traffic over all GPUs
21 | 
22 | ::
23 | 
24 |     nvidia-smi nvlink -gt d
25 | 
26 | 
27 | Monitor traffic over all GPUs on counter 0
28 | 
29 | .. note::
30 |     nvidia-smi nvlink -g is now deprecated
31 | 
32 | ::
33 | 
34 |     # set counters
35 |     nvidia-smi nvlink -sc 0bz
36 |     watch -d 'nvidia-smi nvlink -g 0'
37 | 
38 | 
39 | Stats Monitoring of GPUs
40 | ::
41 | 
42 |     dcgmi dmon -e 449
43 | 
44 | `nvdashboard <https://github.com/rapidsai/jupyterlab-nvdashboard>`_
45 | 


--------------------------------------------------------------------------------
/examples/cudf-example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | 
 4 | from dask_cuda import LocalCUDACluster
 5 | from dask_cuda.initialize import initialize
 6 | from distributed import Client
 7 | 
 8 | import cudf
 9 | import dask_cudf
10 | 
11 | enable_tcp_over_ucx = True
12 | enable_infiniband = False
13 | enable_nvlink = False
14 | 
15 | 
16 | async def run():
17 |     initialize(
18 |         create_cuda_context=True,
19 |         enable_tcp_over_ucx=enable_tcp_over_ucx,
20 |         enable_infiniband=enable_infiniband,
21 |         enable_nvlink=enable_nvlink,
22 |     )
23 | 
24 |     async with LocalCUDACluster(
25 |         interface="enp1s0f0",
26 |         protocol="ucx",
27 |         enable_tcp_over_ucx=enable_tcp_over_ucx,
28 |         enable_infiniband=enable_infiniband,
29 |         enable_nvlink=enable_nvlink,
30 |         asynchronous=True,
31 |     ) as cluster:
32 |         async with Client(cluster, asynchronous=True) as client:
33 |             d = dask_cudf.from_cudf(
34 |                 cudf.DataFrame({"a": range(2**16)}), npartitions=2
35 |             )
36 |             r = d.sum()
37 | 
38 |             for i in range(100):
39 |                 print("Running iteration:", i)
40 |                 start = time.time()
41 |                 await client.compute(r)
42 |                 print("Time for iteration", i, ":", time.time() - start)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     asyncio.run(run())
47 | 


--------------------------------------------------------------------------------
/examples/cupy-example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | 
 4 | import cupy
 5 | 
 6 | from dask import array as da
 7 | from dask_cuda import LocalCUDACluster
 8 | from dask_cuda.initialize import initialize
 9 | from distributed import Client
10 | 
11 | enable_tcp_over_ucx = True
12 | enable_infiniband = False
13 | enable_nvlink = False
14 | 
15 | 
16 | async def run():
17 |     initialize(
18 |         create_cuda_context=True,
19 |         enable_tcp_over_ucx=enable_tcp_over_ucx,
20 |         enable_infiniband=enable_infiniband,
21 |         enable_nvlink=enable_nvlink,
22 |     )
23 | 
24 |     async with LocalCUDACluster(
25 |         interface="enp1s0f0",
26 |         protocol="ucx",
27 |         enable_tcp_over_ucx=enable_tcp_over_ucx,
28 |         enable_infiniband=enable_infiniband,
29 |         enable_nvlink=enable_nvlink,
30 |         asynchronous=True,
31 |     ) as cluster:
32 |         async with Client(cluster, asynchronous=True) as client:
33 |             rs = da.random.RandomState(RandomState=cupy.random.RandomState)
34 |             a = rs.normal(10, 1, (int(4e3), int(4e3)), chunks=(int(1e3), int(1e3)))
35 |             x = a + a.T
36 | 
37 |             for i in range(100):
38 |                 print("Running iteration:", i)
39 |                 start = time.time()
40 |                 await client.compute(x)
41 |                 print("Time for iteration", i, ":", time.time() - start)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     asyncio.run(run())
46 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021-2025, NVIDIA CORPORATION.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | [build-system]
 16 | build-backend = "rapids_build_backend.build"
 17 | requires = [
 18 |     "rapids-build-backend>=0.3.1,<0.4.0dev0",
 19 |     "setuptools>=64.0.0",
 20 | ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 21 | 
 22 | [project]
 23 | name = "ucx-py"
 24 | dynamic = ["version"]
 25 | description = "Python Bindings for the Unified Communication X library (UCX)"
 26 | readme = { file = "README.md", content-type = "text/markdown" }
 27 | authors = [
 28 |     { name = "NVIDIA Corporation" },
 29 | ]
 30 | license = { text = "BSD-3-Clause" }
 31 | requires-python = ">=3.10"
 32 | dependencies = [
 33 |     "numpy>=1.23,<3.0a0",
 34 |     "pynvml>=12.0.0,<13.0.0a0",
 35 | ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 36 | classifiers = [
 37 |     "Intended Audience :: Developers",
 38 |     "Intended Audience :: System Administrators",
 39 |     "License :: OSI Approved :: BSD License",
 40 |     "Operating System :: POSIX :: Linux",
 41 |     "Programming Language :: Python",
 42 |     "Topic :: Software Development :: Libraries :: Python Modules",
 43 |     "Topic :: System :: Hardware",
 44 |     "Topic :: System :: Systems Administration",
 45 |     "Programming Language :: Python :: 3",
 46 | ]
 47 | 
 48 | [project.optional-dependencies]
 49 | test = [
 50 |     "cloudpickle",
 51 |     "cudf==25.8.*,>=0.0.0a0",
 52 |     "cupy-cuda12x>=12.0.0",
 53 |     "distributed",
 54 |     "numba>=0.59.1,<0.62.0a0",
 55 |     "pytest-asyncio",
 56 |     "pytest-rerunfailures",
 57 |     "pytest==7.*",
 58 |     "rapids-dask-dependency==25.8.*,>=0.0.0a0",
 59 | ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 60 | 
 61 | [project.urls]
 62 | Homepage = "https://github.com/rapidsai/ucx-py"
 63 | Documentation = "https://ucx-py.readthedocs.io/en/stable/"
 64 | Source = "https://github.com/rapidsai/ucx-py"
 65 | 
 66 | [tool.isort]
 67 | line_length = 79
 68 | multi_line_output = 3
 69 | include_trailing_comma = true
 70 | force_grid_wrap = 0
 71 | combine_as_imports = true
 72 | order_by_type = true
 73 | known_dask = [
 74 |     "dask",
 75 |     "distributed",
 76 |     "dask_cuda",
 77 | ]
 78 | known_rapids = [
 79 |     "rmm",
 80 |     "cuml",
 81 |     "cugraph",
 82 |     "dask_cudf",
 83 |     "cudf",
 84 | ]
 85 | known_first_party = [
 86 |     "ucp",
 87 | ]
 88 | default_section = "THIRDPARTY"
 89 | sections = [
 90 |     "FUTURE",
 91 |     "STDLIB",
 92 |     "THIRDPARTY",
 93 |     "DASK",
 94 |     "RAPIDS",
 95 |     "FIRSTPARTY",
 96 |     "LOCALFOLDER",
 97 | ]
 98 | skip = [
 99 |     ".eggs",
100 |     ".git",
101 |     ".hg",
102 |     ".mypy_cache",
103 |     ".tox",
104 |     ".venv",
105 |     "build",
106 |     "dist",
107 |     "__init__.py",
108 | ]
109 | 
110 | [tool.pytest.ini_options]
111 | xfail_strict = true
112 | addopts = "--tb=native"
113 | 
114 | [tool.rapids-build-backend]
115 | build-backend = "setuptools.build_meta"
116 | commit-files = [
117 |     "ucp/COMMIT_FILE"
118 | ]
119 | # by default, do not rename the package 'ucx-py-cu${ver}'
120 | # (this is overridden in wheel publishing)
121 | disable-cuda=true
122 | dependencies-file = "dependencies.yaml"
123 | matrix-entry = "cuda_suffixed=true"
124 | requires = [
125 |     "cython>=3.0.0",
126 | ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
127 | 
128 | [tool.setuptools]
129 | license-files = ["LICENSE"]
130 | zip-safe = false
131 | 
132 | [tool.setuptools.packages.find]
133 | exclude=["*tests*"]
134 | 
135 | [tool.setuptools.dynamic]
136 | version = {file = "ucp/VERSION"}
137 | 
138 | [tool.pydistcheck]
139 | select = [
140 |     "distro-too-large-compressed",
141 | ]
142 | 
143 | # PyPI limit is 100 MiB, fail CI before we get too close to that
144 | max_allowed_size_compressed = '75M'
145 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | # This file is a copy of what is available in a Cython demo + some additions
 5 | 
 6 | from __future__ import absolute_import, print_function
 7 | 
 8 | import glob
 9 | import os
10 | from distutils.sysconfig import get_config_var, get_python_inc
11 | 
12 | from Cython.Distutils.build_ext import new_build_ext
13 | from setuptools import setup
14 | from setuptools.extension import Extension
15 | 
16 | 
17 | def _find_libucx_libs_and_headers():
18 |     """
19 |     If the 'libucx' wheel is not installed, returns a tuple of empty lists.
20 |     In that case, the project will be compiled against system installations
21 |     of the UCX libraries.
22 | 
23 |     If 'libucx' is installed, returns lists of library and header paths to help
24 |     the compiler and linker find its contents. In that case, the project will
25 |     be compiled against those libucx-wheel-provided versions of the UCX libraries.
26 |     """
27 |     try:
28 |         import libucx
29 |     except ImportError:
30 |         return [], []
31 | 
32 |     # find 'libucx'
33 |     module_dir = os.path.dirname(libucx.__file__)
34 | 
35 |     # find where it stores files like 'libucm.so.0'
36 |     libs = glob.glob(f"{module_dir}/**/lib*.so*", recursive=True)
37 | 
38 |     # deduplicate those library paths
39 |     lib_dirs = {os.path.dirname(f) for f in libs}
40 |     if not lib_dirs:
41 |         raise RuntimeError(
42 |             f"Did not find shared libraries in 'libucx' install location ({module_dir})"
43 |         )
44 | 
45 |     # find where it stores headers
46 |     headers = glob.glob(f"{module_dir}/**/include", recursive=True)
47 | 
48 |     # deduplicate those header paths (and ensure the list only includes directories)
49 |     header_dirs = {f for f in headers if os.path.isdir(f)}
50 |     if not header_dirs:
51 |         raise RuntimeError(
52 |             f"Did not find UCX headers 'libucx' install location ({module_dir})"
53 |         )
54 | 
55 |     return list(lib_dirs), list(header_dirs)
56 | 
57 | 
58 | include_dirs = [os.path.dirname(get_python_inc())]
59 | library_dirs = [get_config_var("LIBDIR")]
60 | libraries = ["ucp", "uct", "ucm", "ucs"]
61 | extra_compile_args = ["-std=c99", "-Werror"]
62 | 
63 | # tell the compiler and linker where to find UCX libraries and their headers
64 | # provided by the 'libucx' wheel
65 | libucx_lib_dirs, libucx_header_dirs = _find_libucx_libs_and_headers()
66 | library_dirs.extend(libucx_lib_dirs)
67 | include_dirs.extend(libucx_header_dirs)
68 | 
69 | 
70 | ext_modules = [
71 |     Extension(
72 |         "ucp._libs.ucx_api",
73 |         sources=["ucp/_libs/ucx_api.pyx", "ucp/_libs/src/c_util.c"],
74 |         depends=["ucp/_libs/src/c_util.h", "ucp/_libs/ucx_api_dep.pxd"],
75 |         include_dirs=include_dirs,
76 |         library_dirs=library_dirs,
77 |         libraries=libraries,
78 |         extra_compile_args=extra_compile_args,
79 |     ),
80 |     Extension(
81 |         "ucp._libs.arr",
82 |         sources=["ucp/_libs/arr.pyx"],
83 |         include_dirs=include_dirs,
84 |         library_dirs=library_dirs,
85 |         libraries=libraries,
86 |         extra_compile_args=extra_compile_args,
87 |     ),
88 | ]
89 | 
90 | setup(
91 |     ext_modules=ext_modules,
92 |     cmdclass={"build_ext": new_build_ext},
93 |     package_data={"ucp": ["VERSION"]},
94 | )
95 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | import pytest
 5 | 
 6 | import ucp
 7 | 
 8 | # Prevent calls such as `cudf = pytest.importorskip("cudf")` from initializing
 9 | # a CUDA context. Such calls may cause tests that must initialize the CUDA
10 | # context on the appropriate device to fail.
11 | # For example, without `RAPIDS_NO_INITIALIZE=True`, `test_benchmark_cluster`
12 | # will succeed if running alone, but fails when all tests are run in batch.
13 | os.environ["RAPIDS_NO_INITIALIZE"] = "True"
14 | 
15 | 
16 | def pytest_addoption(parser):
17 |     parser.addoption(
18 |         "--runslow", action="store_true", default=False, help="run slow tests"
19 |     )
20 | 
21 | 
22 | def pytest_configure(config):
23 |     config.addinivalue_line("markers", "slow: mark test as slow to run")
24 | 
25 | 
26 | def pytest_collection_modifyitems(config, items):
27 |     if config.getoption("--runslow"):
28 |         # --runslow given in cli: do not skip slow tests
29 |         return
30 |     skip_slow = pytest.mark.skip(reason="need --runslow option to run")
31 |     for item in items:
32 |         if "slow" in item.keywords:
33 |             item.add_marker(skip_slow)
34 | 
35 | 
36 | def handle_exception(loop, context):
37 |     msg = context.get("exception", context["message"])
38 |     print(msg)
39 | 
40 | 
41 | # Let's make sure that UCX gets time to cancel
42 | # progress tasks before closing the event loop.
43 | @pytest.fixture()
44 | def event_loop(scope="session"):
45 |     loop = asyncio.new_event_loop()
46 |     loop.set_exception_handler(handle_exception)
47 |     ucp.reset()
48 |     yield loop
49 |     ucp.reset()
50 |     loop.run_until_complete(asyncio.sleep(0))
51 |     loop.close()
52 | 


--------------------------------------------------------------------------------
/tests/test_benchmark_cluster.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import tempfile
 3 | from itertools import chain
 4 | 
 5 | import numpy as np
 6 | import pytest
 7 | 
 8 | from ucp.benchmarks.utils import _run_cluster_server, _run_cluster_workers
 9 | 
10 | 
11 | async def _worker(rank, eps, args):
12 |     futures = []
13 |     # Send my rank to all others
14 |     for ep in eps.values():
15 |         futures.append(ep.send(np.array([rank], dtype="u4")))
16 |     # Recv from all others
17 |     result = np.empty(len(eps.values()), dtype="u4")
18 |     futures += list(ep.recv(result[i : i + 1]) for i, ep in enumerate(eps.values()))
19 | 
20 |     # Wait for transfers to complete
21 |     await asyncio.gather(*futures)
22 | 
23 |     # We expect to get the sum of all ranks excluding ours
24 |     expect = sum(range(len(eps) + 1)) - rank
25 |     assert expect == result.sum()
26 | 
27 | 
28 | @pytest.mark.asyncio
29 | async def test_benchmark_cluster(n_chunks=1, n_nodes=2, n_workers=2):
30 |     server_file = tempfile.NamedTemporaryFile()
31 | 
32 |     server, server_ret = _run_cluster_server(server_file.name, n_nodes * n_workers)
33 | 
34 |     # Wait for server to become available
35 |     with open(server_file.name, "r") as f:
36 |         while len(f.read()) == 0:
37 |             pass
38 | 
39 |     workers = list(
40 |         chain.from_iterable(
41 |             _run_cluster_workers(server_file.name, n_chunks, n_workers, i, _worker)
42 |             for i in range(n_nodes)
43 |         )
44 |     )
45 | 
46 |     for worker in workers:
47 |         worker.join()
48 |         assert not worker.exitcode
49 | 
50 |     server.join()
51 |     assert not server.exitcode
52 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest.mock import patch
 3 | 
 4 | import pytest
 5 | from utils import captured_logger
 6 | 
 7 | import ucp
 8 | 
 9 | 
10 | def test_get_config():
11 |     with patch.dict(os.environ):
12 |         # Unset to test default value
13 |         if os.environ.get("UCX_TLS") is not None:
14 |             del os.environ["UCX_TLS"]
15 |         ucp.reset()
16 |         config = ucp.get_config()
17 |         assert isinstance(config, dict)
18 |         assert config["TLS"] == "all"
19 | 
20 | 
21 | @patch.dict(os.environ, {"UCX_SEG_SIZE": "2M"})
22 | def test_set_env():
23 |     ucp.reset()
24 |     config = ucp.get_config()
25 |     assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
26 | 
27 | 
28 | @patch.dict(os.environ, {"UCX_SEG_SIZE": "2M"})
29 | def test_init_options():
30 |     ucp.reset()
31 |     options = {"SEG_SIZE": "3M"}
32 |     # environment specification should be ignored
33 |     ucp.init(options)
34 |     config = ucp.get_config()
35 |     assert config["SEG_SIZE"] == options["SEG_SIZE"]
36 | 
37 | 
38 | @patch.dict(os.environ, {"UCX_SEG_SIZE": "4M"})
39 | def test_init_options_and_env():
40 |     ucp.reset()
41 |     options = {"SEG_SIZE": "3M"}  # Should be ignored
42 |     ucp.init(options, env_takes_precedence=True)
43 |     config = ucp.get_config()
44 |     assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
45 |     # Provided options dict was not modified.
46 |     assert options == {"SEG_SIZE": "3M"}
47 | 
48 | 
49 | @pytest.mark.skipif(
50 |     ucp.get_ucx_version() >= (1, 12, 0),
51 |     reason="Beginning with UCX >= 1.12, it's only possible to validate "
52 |     "UCP options but not options from other modules such as UCT. "
53 |     "See https://github.com/openucx/ucx/issues/7519.",
54 | )
55 | def test_init_unknown_option():
56 |     ucp.reset()
57 |     options = {"UNKNOWN_OPTION": "3M"}
58 |     with pytest.raises(ucp.exceptions.UCXConfigError):
59 |         ucp.init(options)
60 | 
61 | 
62 | def test_init_invalid_option():
63 |     ucp.reset()
64 |     options = {"SEG_SIZE": "invalid-size"}
65 |     with pytest.raises(ucp.exceptions.UCXConfigError):
66 |         ucp.init(options)
67 | 
68 | 
69 | @patch.dict(os.environ, {"UCX_SEG_SIZE": "2M"})
70 | def test_logging():
71 |     """
72 |     Test default logging configuration.
73 |     """
74 |     import logging
75 | 
76 |     root = logging.getLogger("ucx")
77 | 
78 |     # ucp.init will only print INFO LINES
79 |     with captured_logger(root, level=logging.INFO) as foreign_log:
80 |         ucp.reset()
81 |         options = {"SEG_SIZE": "3M"}
82 |         ucp.init(options)
83 |     assert len(foreign_log.getvalue()) > 0
84 | 
85 |     with captured_logger(root, level=logging.ERROR) as foreign_log:
86 |         ucp.reset()
87 |         options = {"SEG_SIZE": "3M"}
88 |         ucp.init(options)
89 | 
90 |     assert len(foreign_log.getvalue()) == 0
91 | 


--------------------------------------------------------------------------------
/tests/test_custom_send_recv.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import pickle
  3 | 
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | import ucp
  8 | 
  9 | cudf = pytest.importorskip("cudf")
 10 | distributed = pytest.importorskip("distributed")
 11 | cuda = pytest.importorskip("numba.cuda")
 12 | 
 13 | 
 14 | @pytest.mark.asyncio
 15 | @pytest.mark.parametrize(
 16 |     "g",
 17 |     [
 18 |         lambda cudf: cudf.Series([1, 2, 3]),
 19 |         lambda cudf: cudf.Series([1, 2, 3], index=[4, 5, 6]),
 20 |         lambda cudf: cudf.Series([1, None, 3]),
 21 |         lambda cudf: cudf.Series(range(2**13)),
 22 |         lambda cudf: cudf.DataFrame({"a": np.random.random(1200000)}),
 23 |         lambda cudf: cudf.DataFrame({"a": range(2**20)}),
 24 |         lambda cudf: cudf.DataFrame({"a": range(2**26)}),
 25 |         lambda cudf: cudf.Series(),
 26 |         lambda cudf: cudf.DataFrame(),
 27 |         lambda cudf: cudf.DataFrame({"a": [], "b": []}),
 28 |         lambda cudf: cudf.DataFrame({"a": [1.0], "b": [2.0]}),
 29 |         lambda cudf: cudf.DataFrame(
 30 |             {"a": ["a", "b", "c", "d"], "b": ["a", "b", "c", "d"]}
 31 |         ),
 32 |         lambda cudf: cudf.datasets.timeseries(),  # ts index with ints, cats, floats
 33 |     ],
 34 | )
 35 | async def test_send_recv_cudf(event_loop, g):
 36 |     from distributed.utils import nbytes
 37 | 
 38 |     class UCX:
 39 |         def __init__(self, ep):
 40 |             self.ep = ep
 41 | 
 42 |         async def write(self, cdf):
 43 |             header, _frames = cdf.serialize()
 44 |             frames = [pickle.dumps(header)] + _frames
 45 | 
 46 |             # Send meta data
 47 |             await self.ep.send(np.array([len(frames)], dtype=np.uint64))
 48 |             await self.ep.send(
 49 |                 np.array(
 50 |                     [hasattr(f, "__cuda_array_interface__") for f in frames],
 51 |                     dtype=bool,
 52 |                 )
 53 |             )
 54 |             await self.ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
 55 |             # Send frames
 56 |             for frame in frames:
 57 |                 if nbytes(frame) > 0:
 58 |                     await self.ep.send(frame)
 59 | 
 60 |         async def read(self):
 61 |             try:
 62 |                 # Recv meta data
 63 |                 nframes = np.empty(1, dtype=np.uint64)
 64 |                 await self.ep.recv(nframes)
 65 |                 is_cudas = np.empty(nframes[0], dtype=bool)
 66 |                 await self.ep.recv(is_cudas)
 67 |                 sizes = np.empty(nframes[0], dtype=np.uint64)
 68 |                 await self.ep.recv(sizes)
 69 |             except (ucp.exceptions.UCXCanceled, ucp.exceptions.UCXCloseError) as e:
 70 |                 msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
 71 |                 raise e(msg)
 72 |             else:
 73 |                 # Recv frames
 74 |                 frames = []
 75 |                 for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()):
 76 |                     if size > 0:
 77 |                         if is_cuda:
 78 |                             frame = cuda.device_array((size,), dtype=np.uint8)
 79 |                         else:
 80 |                             frame = np.empty(size, dtype=np.uint8)
 81 |                         await self.ep.recv(frame)
 82 |                         frames.append(frame)
 83 |                     else:
 84 |                         if is_cuda:
 85 |                             frames.append(cuda.device_array((0,), dtype=np.uint8))
 86 |                         else:
 87 |                             frames.append(b"")
 88 |                 return frames
 89 | 
 90 |     class UCXListener:
 91 |         def __init__(self):
 92 |             self.comm = None
 93 | 
 94 |         def start(self):
 95 |             async def serve_forever(ep):
 96 |                 ucx = UCX(ep)
 97 |                 self.comm = ucx
 98 | 
 99 |             self.ucp_server = ucp.create_listener(serve_forever)
100 | 
101 |     uu = UCXListener()
102 |     uu.start()
103 |     uu.address = ucp.get_address()
104 |     uu.client = await ucp.create_endpoint(uu.address, uu.ucp_server.port)
105 |     ucx = UCX(uu.client)
106 |     await asyncio.sleep(0.2)
107 |     msg = g(cudf)
108 |     frames, _ = await asyncio.gather(uu.comm.read(), ucx.write(msg))
109 |     ucx_header = pickle.loads(frames[0])
110 |     cudf_buffer = frames[1:]
111 |     typ = type(msg)
112 |     res = typ.deserialize(ucx_header, cudf_buffer)
113 | 
114 |     from cudf.testing import assert_eq
115 | 
116 |     assert_eq(res, msg)
117 |     await uu.comm.ep.close()
118 |     await uu.client.close()
119 | 
120 |     assert uu.client.closed()
121 |     assert uu.comm.ep.closed()
122 |     del uu.ucp_server
123 |     ucp.reset()
124 | 


--------------------------------------------------------------------------------
/tests/test_disconnect.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import multiprocessing as mp
  4 | from io import StringIO
  5 | from queue import Empty
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | 
 10 | import ucp
 11 | from ucp.utils import get_event_loop
 12 | 
 13 | mp = mp.get_context("spawn")
 14 | 
 15 | 
 16 | async def mp_queue_get_nowait(queue):
 17 |     while True:
 18 |         try:
 19 |             return queue.get_nowait()
 20 |         except Empty:
 21 |             pass
 22 |         await asyncio.sleep(0.01)
 23 | 
 24 | 
 25 | def _test_shutdown_unexpected_closed_peer_server(
 26 |     client_queue, server_queue, endpoint_error_handling
 27 | ):
 28 |     global ep_is_alive
 29 |     ep_is_alive = None
 30 | 
 31 |     async def run():
 32 |         async def server_node(ep):
 33 |             try:
 34 |                 global ep_is_alive
 35 | 
 36 |                 await ep.send(np.arange(100, dtype=np.int64))
 37 |                 # Waiting for signal to close the endpoint
 38 |                 await mp_queue_get_nowait(server_queue)
 39 | 
 40 |                 # At this point, the client should have died and the endpoint
 41 |                 # is not alive anymore. `True` only when endpoint error
 42 |                 # handling is enabled.
 43 |                 ep_is_alive = ep._ep.is_alive()
 44 | 
 45 |                 await ep.close()
 46 |             finally:
 47 |                 listener.close()
 48 | 
 49 |         listener = ucp.create_listener(
 50 |             server_node, endpoint_error_handling=endpoint_error_handling
 51 |         )
 52 |         client_queue.put(listener.port)
 53 |         while not listener.closed():
 54 |             await asyncio.sleep(0.1)
 55 | 
 56 |     log_stream = StringIO()
 57 |     logging.basicConfig(stream=log_stream, level=logging.DEBUG)
 58 |     get_event_loop().run_until_complete(run())
 59 |     log = log_stream.getvalue()
 60 | 
 61 |     if endpoint_error_handling is True:
 62 |         assert ep_is_alive is False
 63 |     else:
 64 |         assert ep_is_alive
 65 |         assert log.find("""UCXError('<[Send shutdown]""") != -1
 66 | 
 67 | 
 68 | def _test_shutdown_unexpected_closed_peer_client(
 69 |     client_queue, server_queue, endpoint_error_handling
 70 | ):
 71 |     async def run():
 72 |         server_port = client_queue.get()
 73 |         ep = await ucp.create_endpoint(
 74 |             ucp.get_address(),
 75 |             server_port,
 76 |             endpoint_error_handling=endpoint_error_handling,
 77 |         )
 78 |         msg = np.empty(100, dtype=np.int64)
 79 |         await ep.recv(msg)
 80 | 
 81 |     get_event_loop().run_until_complete(run())
 82 | 
 83 | 
 84 | @pytest.mark.parametrize("endpoint_error_handling", [True, False])
 85 | def test_shutdown_unexpected_closed_peer(caplog, endpoint_error_handling):
 86 |     """
 87 |     Test clean server shutdown after unexpected peer close
 88 | 
 89 |     This will causes some UCX warnings to be issued, but this as expected.
 90 |     The main goal is to assert that the processes exit without errors
 91 |     despite a somewhat messy initial state.
 92 |     """
 93 |     if endpoint_error_handling is False and any(
 94 |         [
 95 |             t.startswith(i)
 96 |             for i in ("rc", "dc", "ud")
 97 |             for t in ucp.get_active_transports()
 98 |         ]
 99 |     ):
100 |         pytest.skip(
101 |             "Endpoint error handling is required when rc, dc or ud"
102 |             "transport is enabled"
103 |         )
104 | 
105 |     client_queue = mp.Queue()
106 |     server_queue = mp.Queue()
107 |     p1 = mp.Process(
108 |         target=_test_shutdown_unexpected_closed_peer_server,
109 |         args=(client_queue, server_queue, endpoint_error_handling),
110 |     )
111 |     p1.start()
112 |     p2 = mp.Process(
113 |         target=_test_shutdown_unexpected_closed_peer_client,
114 |         args=(client_queue, server_queue, endpoint_error_handling),
115 |     )
116 |     p2.start()
117 |     p2.join()
118 |     server_queue.put("client is down")
119 |     p1.join()
120 | 
121 |     assert not p1.exitcode
122 |     assert not p2.exitcode
123 | 


--------------------------------------------------------------------------------
/tests/test_endpoint.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import pytest
 4 | 
 5 | import ucp
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | @pytest.mark.parametrize("server_close_callback", [True, False])
10 | async def test_close_callback(server_close_callback):
11 |     closed = [False]
12 | 
13 |     def _close_callback():
14 |         closed[0] = True
15 | 
16 |     async def server_node(ep):
17 |         if server_close_callback is True:
18 |             ep.set_close_callback(_close_callback)
19 |         if server_close_callback is False:
20 |             await ep.close()
21 |         listener.close()
22 | 
23 |     async def client_node(port):
24 |         ep = await ucp.create_endpoint(
25 |             ucp.get_address(),
26 |             port,
27 |         )
28 |         if server_close_callback is False:
29 |             ep.set_close_callback(_close_callback)
30 |         if server_close_callback is True:
31 |             await ep.close()
32 | 
33 |     listener = ucp.create_listener(
34 |         server_node,
35 |     )
36 |     await client_node(listener.port)
37 |     while not listener.closed():
38 |         await asyncio.sleep(0.01)
39 |     assert closed[0] is True
40 | 
41 | 
42 | @pytest.mark.asyncio
43 | @pytest.mark.parametrize("transfer_api", ["am", "tag"])
44 | async def test_cancel(transfer_api):
45 |     async def server_node(ep):
46 |         await ep.close()
47 | 
48 |     async def client_node(port):
49 |         ep = await ucp.create_endpoint(ucp.get_address(), port)
50 |         if transfer_api == "am":
51 |             with pytest.raises(
52 |                 ucp.exceptions.UCXCanceled,
53 |                 match="am_recv",
54 |             ):
55 |                 await ep.am_recv()
56 |         else:
57 |             with pytest.raises(
58 |                 ucp.exceptions.UCXCanceled,
59 |                 match="Recv.*tag",
60 |             ):
61 |                 msg = bytearray(1)
62 |                 await ep.recv(msg)
63 |         await ep.close()
64 | 
65 |     listener = ucp.create_listener(server_node)
66 |     await client_node(listener.port)
67 | 


--------------------------------------------------------------------------------
/tests/test_info.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import ucp
 4 | 
 5 | 
 6 | @pytest.fixture(autouse=True)
 7 | def reset():
 8 |     ucp.reset()
 9 |     yield
10 |     ucp.reset()
11 | 
12 | 
13 | def test_context_info():
14 |     info = ucp.get_ucp_context_info()
15 |     assert isinstance(info, str)
16 | 
17 | 
18 | def test_worker_info():
19 |     info = ucp.get_ucp_worker_info()
20 |     assert isinstance(info, str)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "transports",
25 |     ["posix", "tcp", "posix,tcp"],
26 | )
27 | def test_check_transport(transports):
28 |     transports_list = transports.split(",")
29 |     inactive_transports = list(set(["posix", "tcp"]) - set(transports_list))
30 | 
31 |     ucp.reset()
32 |     options = {"TLS": transports, "NET_DEVICES": "all"}
33 |     ucp.init(options)
34 | 
35 |     active_transports = ucp.get_active_transports()
36 |     for t in transports_list:
37 |         assert any([at.startswith(t) for at in active_transports])
38 |     for it in inactive_transports:
39 |         assert any([not at.startswith(it) for at in active_transports])
40 | 


--------------------------------------------------------------------------------
/tests/test_multiple_nodes.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | import ucp
 7 | 
 8 | 
 9 | def get_somaxconn():
10 |     with open("/proc/sys/net/core/somaxconn", "r") as f:
11 |         return int(f.readline())
12 | 
13 | 
14 | async def hello(ep):
15 |     msg2send = np.arange(10)
16 |     msg2recv = np.empty_like(msg2send)
17 |     f1 = ep.send(msg2send)
18 |     f2 = ep.recv(msg2recv)
19 |     await f1
20 |     await f2
21 |     np.testing.assert_array_equal(msg2send, msg2recv)
22 |     assert isinstance(ep.ucx_info(), str)
23 | 
24 | 
25 | async def server_node(ep):
26 |     await hello(ep)
27 |     assert isinstance(ep.ucx_info(), str)
28 |     await ep.close()
29 | 
30 | 
31 | async def client_node(port):
32 |     ep = await ucp.create_endpoint(ucp.get_address(), port)
33 |     await hello(ep)
34 |     assert isinstance(ep.ucx_info(), str)
35 | 
36 | 
37 | @pytest.mark.asyncio
38 | @pytest.mark.parametrize("num_servers", [1, 2, 4])
39 | @pytest.mark.parametrize("num_clients", [10, 50, 100])
40 | async def test_many_servers_many_clients(num_servers, num_clients):
41 |     somaxconn = get_somaxconn()
42 | 
43 |     listeners = []
44 | 
45 |     for _ in range(num_servers):
46 |         listeners.append(ucp.create_listener(server_node))
47 | 
48 |     # We ensure no more than `somaxconn` connections are submitted
49 |     # at once. Doing otherwise can block and hang indefinitely.
50 |     for i in range(0, num_clients * num_servers, somaxconn):
51 |         clients = []
52 |         for __ in range(i, min(i + somaxconn, num_clients * num_servers)):
53 |             clients.append(client_node(listeners[__ % num_servers].port))
54 |         await asyncio.gather(*clients)
55 | 


--------------------------------------------------------------------------------
/tests/test_probe.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import pytest
 4 | 
 5 | import ucp
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | @pytest.mark.parametrize("transfer_api", ["am", "tag"])
10 | async def test_message_probe(transfer_api):
11 |     msg = bytearray(b"0" * 10)
12 | 
13 |     async def server_node(ep):
14 |         # Wait for remote endpoint to close before probing the endpoint for
15 |         # in-transit message and receiving it.
16 |         while not ep.closed():
17 |             await asyncio.sleep(0)  # Yield task
18 | 
19 |         if transfer_api == "am":
20 |             assert ep._ep.am_probe() is True
21 |             received = await ep.am_recv()
22 |         else:
23 |             assert ep._ctx.worker.tag_probe(ep._tags["msg_recv"]) is True
24 |             received = bytearray(10)
25 |             await ep.recv(received)
26 |         assert received == msg
27 | 
28 |     async def client_node(port):
29 |         ep = await ucp.create_endpoint(
30 |             ucp.get_address(),
31 |             port,
32 |         )
33 |         if transfer_api == "am":
34 |             await ep.am_send(msg)
35 |         else:
36 |             await ep.send(msg)
37 | 
38 |     listener = ucp.create_listener(
39 |         server_node,
40 |     )
41 |     await client_node(listener.port)
42 | 


--------------------------------------------------------------------------------
/tests/test_reset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import ucp
 4 | 
 5 | 
 6 | class ResetAfterN:
 7 |     """Calls ucp.reset() after n calls"""
 8 | 
 9 |     def __init__(self, n):
10 |         self.n = n
11 |         self.count = 0
12 | 
13 |     def __call__(self):
14 |         self.count += 1
15 |         if self.count == self.n:
16 |             ucp.reset()
17 | 
18 | 
19 | @pytest.mark.asyncio
20 | async def test_reset():
21 |     reset = ResetAfterN(2)
22 | 
23 |     def server(ep):
24 |         ep.abort()
25 |         reset()
26 | 
27 |     lt = ucp.create_listener(server)
28 |     ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
29 |     del lt
30 |     del ep
31 |     reset()
32 | 
33 | 
34 | @pytest.mark.asyncio
35 | async def test_lt_still_in_scope_error():
36 |     reset = ResetAfterN(2)
37 | 
38 |     def server(ep):
39 |         ep.abort()
40 |         reset()
41 | 
42 |     lt = ucp.create_listener(server)
43 |     ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
44 |     del ep
45 |     with pytest.raises(
46 |         ucp.exceptions.UCXError,
47 |         match="Trying to reset UCX but not all Endpoints and/or Listeners are closed()",
48 |     ):
49 |         ucp.reset()
50 | 
51 |     lt.close()
52 |     ucp.reset()
53 | 
54 | 
55 | @pytest.mark.asyncio
56 | async def test_ep_still_in_scope_error():
57 |     reset = ResetAfterN(2)
58 | 
59 |     def server(ep):
60 |         ep.abort()
61 |         reset()
62 | 
63 |     lt = ucp.create_listener(server)
64 |     ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
65 |     del lt
66 |     with pytest.raises(
67 |         ucp.exceptions.UCXError,
68 |         match="Trying to reset UCX but not all Endpoints and/or Listeners are closed()",
69 |     ):
70 |         ucp.reset()
71 |     ep.abort()
72 |     ucp.reset()
73 | 


--------------------------------------------------------------------------------
/tests/test_rma.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import ucp
 4 | 
 5 | 
 6 | @pytest.mark.asyncio
 7 | @pytest.mark.parametrize("blocking_progress_mode", [True, False])
 8 | async def test_fence(blocking_progress_mode):
 9 |     # Test needs to be async here to ensure progress tasks are cleared
10 |     # and avoid warnings.
11 | 
12 |     ucp.init(blocking_progress_mode=blocking_progress_mode)
13 |     # this should always succeed
14 |     ucp.fence()
15 | 
16 | 
17 | @pytest.mark.asyncio
18 | @pytest.mark.parametrize("blocking_progress_mode", [True, False])
19 | async def test_flush(blocking_progress_mode):
20 |     ucp.init(blocking_progress_mode=blocking_progress_mode)
21 | 
22 |     await ucp.flush()
23 | 


--------------------------------------------------------------------------------
/tests/test_send_recv_am.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from functools import partial
  3 | 
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | import ucp
  8 | 
  9 | msg_sizes = [0] + [2**i for i in range(0, 25, 4)]
 10 | 
 11 | 
 12 | def _bytearray_assert_equal(a, b):
 13 |     assert a == b
 14 | 
 15 | 
 16 | def get_data():
 17 |     ret = [
 18 |         {
 19 |             "allocator": bytearray,
 20 |             "generator": lambda n: bytearray(b"m" * n),
 21 |             "validator": lambda recv, exp: _bytearray_assert_equal(recv, exp),
 22 |             "memory_type": "host",
 23 |         },
 24 |         {
 25 |             "allocator": partial(np.ones, dtype=np.uint8),
 26 |             "generator": partial(np.arange, dtype=np.int64),
 27 |             "validator": lambda recv, exp: np.testing.assert_equal(
 28 |                 recv.view(np.int64), exp
 29 |             ),
 30 |             "memory_type": "host",
 31 |         },
 32 |     ]
 33 | 
 34 |     try:
 35 |         import cupy as cp
 36 | 
 37 |         ret.append(
 38 |             {
 39 |                 "allocator": partial(cp.ones, dtype=np.uint8),
 40 |                 "generator": partial(cp.arange, dtype=np.int64),
 41 |                 "validator": lambda recv, exp: cp.testing.assert_array_equal(
 42 |                     recv.view(np.int64), exp
 43 |                 ),
 44 |                 "memory_type": "cuda",
 45 |             }
 46 |         )
 47 |     except ImportError:
 48 |         pass
 49 | 
 50 |     return ret
 51 | 
 52 | 
 53 | def simple_server(size, recv):
 54 |     async def server(ep):
 55 |         recv = await ep.am_recv()
 56 |         await ep.am_send(recv)
 57 |         await ep.close()
 58 | 
 59 |     return server
 60 | 
 61 | 
 62 | @pytest.mark.asyncio
 63 | @pytest.mark.parametrize("size", msg_sizes)
 64 | @pytest.mark.parametrize("blocking_progress_mode", [True, False])
 65 | @pytest.mark.parametrize("recv_wait", [True, False])
 66 | @pytest.mark.parametrize("data", get_data())
 67 | async def test_send_recv_am(size, blocking_progress_mode, recv_wait, data):
 68 |     rndv_thresh = 8192
 69 |     ucp.init(
 70 |         options={"RNDV_THRESH": str(rndv_thresh)},
 71 |         blocking_progress_mode=blocking_progress_mode,
 72 |     )
 73 | 
 74 |     ucp.register_am_allocator(data["allocator"], data["memory_type"])
 75 |     msg = data["generator"](size)
 76 | 
 77 |     recv = []
 78 |     listener = ucp.create_listener(simple_server(size, recv))
 79 |     num_clients = 1
 80 |     clients = [
 81 |         await ucp.create_endpoint(ucp.get_address(), listener.port)
 82 |         for i in range(num_clients)
 83 |     ]
 84 |     for c in clients:
 85 |         if recv_wait:
 86 |             # By sleeping here we ensure that the listener's
 87 |             # ep.am_recv call will have to wait, rather than return
 88 |             # immediately as receive data is already available.
 89 |             await asyncio.sleep(1)
 90 |         await c.am_send(msg)
 91 |         recv_msg = await c.am_recv()
 92 |     for c in clients:
 93 |         await c.close()
 94 |     listener.close()
 95 | 
 96 |     if data["memory_type"] == "cuda" and msg.nbytes < rndv_thresh:
 97 |         # Eager messages are always received on the host, if no host
 98 |         # allocator is registered UCX-Py defaults to `bytearray`.
 99 |         assert recv_msg == bytearray(msg.get())
100 |     else:
101 |         data["validator"](recv_msg, msg)
102 | 


--------------------------------------------------------------------------------
/tests/test_tags.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import pytest
 4 | 
 5 | import ucp
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_tag_match():
10 |     msg1 = bytes("msg1", "utf-8")
11 |     msg2 = bytes("msg2", "utf-8")
12 | 
13 |     async def server_node(ep):
14 |         f1 = ep.send(msg1, tag="msg1")
15 |         await asyncio.sleep(1)  # Let msg1 finish
16 |         f2 = ep.send(msg2, tag="msg2")
17 |         await asyncio.gather(f1, f2)
18 |         await ep.close()
19 | 
20 |     lf = ucp.create_listener(server_node)
21 |     ep = await ucp.create_endpoint(ucp.get_address(), lf.port)
22 |     m1, m2 = (bytearray(len(msg1)), bytearray(len(msg2)))
23 |     f2 = asyncio.create_task(ep.recv(m2, tag="msg2"))
24 | 
25 |     # At this point f2 shouldn't be able to finish because its
26 |     # tag "msg2" doesn't match the servers send tag "msg1"
27 |     done, pending = await asyncio.wait({f2}, timeout=0.01)
28 |     assert f2 in pending
29 |     # "msg1" should be ready
30 |     await ep.recv(m1, tag="msg1")
31 |     assert m1 == msg1
32 |     await f2
33 |     assert m2 == msg2
34 | 


--------------------------------------------------------------------------------
/tests/test_ucx_getters.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import ucp
 4 | 
 5 | 
 6 | @pytest.mark.asyncio
 7 | async def test_get_ucp_worker():
 8 |     worker = ucp.get_ucp_worker()
 9 |     assert isinstance(worker, int)
10 | 
11 |     async def server(ep):
12 |         assert ep.get_ucp_worker() == worker
13 | 
14 |     lt = ucp.create_listener(server)
15 |     ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
16 |     assert ep.get_ucp_worker() == worker
17 | 
18 | 
19 | @pytest.mark.asyncio
20 | async def test_get_endpoint():
21 |     async def server(ep):
22 |         ucp_ep = ep.get_ucp_endpoint()
23 |         assert isinstance(ucp_ep, int)
24 |         assert ucp_ep > 0
25 | 
26 |     lt = ucp.create_listener(server)
27 |     ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
28 |     ucp_ep = ep.get_ucp_endpoint()
29 |     assert isinstance(ucp_ep, int)
30 |     assert ucp_ep > 0
31 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | import ucp
 2 | 
 3 | 
 4 | def test_get_ucx_version():
 5 |     version = ucp.get_ucx_version()
 6 |     assert isinstance(version, tuple)
 7 |     assert len(version) == 3
 8 |     # Check UCX isn't initialized
 9 |     assert ucp.core._ctx is None
10 | 
11 | 
12 | def test_version_constants_are_populated():
13 |     # __git_commit__ will only be non-empty in a built distribution
14 |     assert isinstance(ucp.__git_commit__, str)
15 | 
16 |     # __version__ should always be non-empty
17 |     assert isinstance(ucp.__version__, str)
18 |     assert len(ucp.__version__) > 0
19 | 
20 | 
21 | def test_ucx_version_constant():
22 |     assert isinstance(ucp.__ucx_version__, str)
23 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import logging
  3 | import os
  4 | from contextlib import contextmanager
  5 | 
  6 | import numpy as np
  7 | import pytest
  8 | 
  9 | import ucp
 10 | 
 11 | normal_env = {
 12 |     "UCX_RNDV_SCHEME": "put_zcopy",
 13 |     "UCX_MEMTYPE_CACHE": "n",
 14 |     "UCX_TLS": "rc,cuda_copy,cuda_ipc",
 15 |     "CUDA_VISIBLE_DEVICES": "0",
 16 | }
 17 | 
 18 | 
 19 | def set_env():
 20 |     os.environ.update(normal_env)
 21 | 
 22 | 
 23 | def get_num_gpus():
 24 |     import pynvml
 25 | 
 26 |     pynvml.nvmlInit()
 27 |     ngpus = pynvml.nvmlDeviceGetCount()
 28 |     pynvml.nvmlShutdown()
 29 |     return ngpus
 30 | 
 31 | 
 32 | def get_cuda_devices():
 33 |     if "CUDA_VISIBLE_DEVICES" in os.environ:
 34 |         return os.environ["CUDA_VISIBLE_DEVICES"].split(",")
 35 |     else:
 36 |         ngpus = get_num_gpus()
 37 |         return list(range(ngpus))
 38 | 
 39 | 
 40 | @contextmanager
 41 | def captured_logger(logger, level=logging.INFO, propagate=None):
 42 |     """Capture output from the given Logger."""
 43 |     if isinstance(logger, str):
 44 |         logger = logging.getLogger(logger)
 45 |     orig_level = logger.level
 46 |     orig_handlers = logger.handlers[:]
 47 |     if propagate is not None:
 48 |         orig_propagate = logger.propagate
 49 |         logger.propagate = propagate
 50 |     sio = io.StringIO()
 51 |     logger.handlers[:] = [logging.StreamHandler(sio)]
 52 |     logger.setLevel(level)
 53 |     try:
 54 |         yield sio
 55 |     finally:
 56 |         logger.handlers[:] = orig_handlers
 57 |         logger.setLevel(orig_level)
 58 |         if propagate is not None:
 59 |             logger.propagate = orig_propagate
 60 | 
 61 | 
 62 | def cuda_array(size):
 63 |     try:
 64 |         import rmm
 65 | 
 66 |         return rmm.DeviceBuffer(size=size)
 67 |     except ImportError:
 68 |         import numba.cuda
 69 | 
 70 |         return numba.cuda.device_array((size,), dtype="u1")
 71 | 
 72 | 
 73 | async def send(ep, frames):
 74 |     pytest.importorskip("distributed")
 75 |     from distributed.utils import nbytes
 76 | 
 77 |     await ep.send(np.array([len(frames)], dtype=np.uint64))
 78 |     await ep.send(
 79 |         np.array([hasattr(f, "__cuda_array_interface__") for f in frames], dtype=bool)
 80 |     )
 81 |     await ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
 82 |     # Send frames
 83 |     for frame in frames:
 84 |         if nbytes(frame) > 0:
 85 |             await ep.send(frame)
 86 | 
 87 | 
 88 | async def recv(ep):
 89 |     pytest.importorskip("distributed")
 90 | 
 91 |     from distributed.comm.utils import from_frames
 92 | 
 93 |     try:
 94 |         # Recv meta data
 95 |         nframes = np.empty(1, dtype=np.uint64)
 96 |         await ep.recv(nframes)
 97 |         is_cudas = np.empty(nframes[0], dtype=bool)
 98 |         await ep.recv(is_cudas)
 99 |         sizes = np.empty(nframes[0], dtype=np.uint64)
100 |         await ep.recv(sizes)
101 |     except (ucp.exceptions.UCXCanceled, ucp.exceptions.UCXCloseError) as e:
102 |         msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
103 |         raise e(msg)
104 | 
105 |     # Recv frames
106 |     frames = []
107 |     for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()):
108 |         if size > 0:
109 |             if is_cuda:
110 |                 frame = cuda_array(size)
111 |             else:
112 |                 frame = np.empty(size, dtype=np.uint8)
113 |             await ep.recv(frame)
114 |             frames.append(frame)
115 |         else:
116 |             if is_cuda:
117 |                 frames.append(cuda_array(size))
118 |             else:
119 |                 frames.append(b"")
120 | 
121 |     msg = await from_frames(frames)
122 |     return frames, msg
123 | 
124 | 
125 | async def am_send(ep, frames):
126 |     await ep.am_send(np.array([len(frames)], dtype=np.uint64))
127 |     # Send frames
128 |     for frame in frames:
129 |         await ep.am_send(frame)
130 | 
131 | 
132 | async def am_recv(ep):
133 |     pytest.importorskip("distributed")
134 | 
135 |     from distributed.comm.utils import from_frames
136 | 
137 |     try:
138 |         # Recv meta data
139 |         nframes = (await ep.am_recv()).view(np.uint64)
140 |     except (ucp.exceptions.UCXCanceled, ucp.exceptions.UCXCloseError) as e:
141 |         msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
142 |         raise e(msg)
143 | 
144 |     # Recv frames
145 |     frames = []
146 |     for _ in range(nframes[0]):
147 |         frame = await ep.am_recv()
148 |         frames.append(frame)
149 | 
150 |     msg = await from_frames(frames)
151 |     return frames, msg
152 | 


--------------------------------------------------------------------------------
/ucp/VERSION:
--------------------------------------------------------------------------------
1 | ../VERSION


--------------------------------------------------------------------------------
/ucp/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | """UCX-Py: Python bindings for UCX <www.openucx.org>"""
  5 | 
  6 | import logging
  7 | import os
  8 | 
  9 | logger = logging.getLogger("ucx")
 10 | 
 11 | # Notice, if we have to update environment variables we need to do it
 12 | # before importing UCX, which must happen also before the Cython code
 13 | # import to prevent UCS unused variable warnings.
 14 | if "UCX_MEMTYPE_CACHE" not in os.environ:
 15 |     # See <https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support#known-issues>
 16 |     logger.debug("Setting env UCX_MEMTYPE_CACHE=n, which is required by UCX")
 17 |     os.environ["UCX_MEMTYPE_CACHE"] = "n"
 18 | 
 19 | 
 20 | # If libucx was installed as a wheel, we must request it to load the library symbols.
 21 | # Otherwise, we assume that the library was installed in a system path that ld can find.
 22 | try:
 23 |     import libucx
 24 | except ImportError:
 25 |     pass
 26 | else:
 27 |     libucx.load_library()
 28 |     del libucx
 29 | 
 30 | 
 31 | from .core import *  # noqa
 32 | from .core import get_ucx_version  # noqa
 33 | from .utils import get_ucxpy_logger  # noqa
 34 | from ._libs.utils import get_address  # noqa
 35 | from ._version import __git_commit__, __version__
 36 | 
 37 | try:
 38 |     import pynvml
 39 | except ImportError:
 40 |     pynvml = None
 41 | 
 42 | _ucx_version = get_ucx_version()
 43 | 
 44 | __ucx_min_version__ = "1.15.0"
 45 | __ucx_version__ = "%d.%d.%d" % _ucx_version
 46 | 
 47 | if _ucx_version < tuple(int(i) for i in __ucx_min_version__.split(".")):
 48 |     raise ImportError(
 49 |         f"Support for UCX {__ucx_version__} has ended. Please upgrade to "
 50 |         f"{__ucx_min_version__} or newer. If you believe the wrong version "
 51 |         "is being loaded, please check the path from where UCX is loaded "
 52 |         "by rerunning with the environment variable `UCX_LOG_LEVEL=debug`."
 53 |     )
 54 | 
 55 | # Setup UCX-Py logger
 56 | logger = get_ucxpy_logger()
 57 | 
 58 | if "UCX_RNDV_THRESH" not in os.environ:
 59 |     logger.info("Setting UCX_RNDV_THRESH=8192")
 60 |     os.environ["UCX_RNDV_THRESH"] = "8192"
 61 | 
 62 | if "UCX_RNDV_FRAG_MEM_TYPE" not in os.environ:
 63 |     logger.info("Setting UCX_RNDV_FRAG_MEM_TYPE=cuda")
 64 |     os.environ["UCX_RNDV_FRAG_MEM_TYPE"] = "cuda"
 65 | 
 66 | if (
 67 |     pynvml is not None
 68 |     and "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ
 69 |     and _ucx_version >= (1, 12, 0)
 70 | ):
 71 |     try:
 72 |         pynvml.nvmlInit()
 73 |         device_count = pynvml.nvmlDeviceGetCount()
 74 |         large_bar1 = [False] * device_count
 75 | 
 76 |         def _is_mig_device(handle):
 77 |             try:
 78 |                 pynvml.nvmlDeviceGetMigMode(handle)[0]
 79 |             except pynvml.NVMLError:
 80 |                 return False
 81 |             return True
 82 | 
 83 |         for dev_idx in range(device_count):
 84 |             handle = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
 85 | 
 86 |             try:
 87 |                 total_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total
 88 |             except pynvml.NVMLError_NotSupported:
 89 |                 total_memory = None
 90 | 
 91 |             # Ignore MIG devices and devices with no memory resource (i.e., only
 92 |             # integrated CPU+GPU memory resource) and rely on UCX's default for
 93 |             # now. Increasing `UCX_CUDA_COPY_MAX_REG_RATIO` should be thoroughly
 94 |             # tested, as it's not yet clear whether it would be safe to set `1.0`
 95 |             # for those instances too.
 96 |             if _is_mig_device(handle) or total_memory is None:
 97 |                 continue
 98 | 
 99 |             try:
100 |                 bar1_total = pynvml.nvmlDeviceGetBAR1MemoryInfo(handle).bar1Total
101 |             except pynvml.NVMLError_NotSupported:
102 |                 # Bar1 access not supported on this device, set it to
103 |                 # zero (always lower than device memory).
104 |                 bar1_total = 0
105 | 
106 |             total_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total
107 |             if total_memory <= bar1_total:
108 |                 large_bar1[dev_idx] = True
109 | 
110 |         if all(large_bar1):
111 |             logger.info("Setting UCX_CUDA_COPY_MAX_REG_RATIO=1.0")
112 |             os.environ["UCX_CUDA_COPY_MAX_REG_RATIO"] = "1.0"
113 |     except (
114 |         pynvml.NVMLError_LibraryNotFound,
115 |         pynvml.NVMLError_DriverNotLoaded,
116 |         pynvml.NVMLError_Unknown,
117 |     ):
118 |         pass
119 | 
120 | if "UCX_MAX_RNDV_RAILS" not in os.environ and _ucx_version >= (1, 12, 0):
121 |     logger.info("Setting UCX_MAX_RNDV_RAILS=1")
122 |     os.environ["UCX_MAX_RNDV_RAILS"] = "1"
123 | 
124 | if "UCX_PROTO_ENABLE" not in os.environ and (1, 12, 0) <= _ucx_version < (1, 18, 0):
125 |     # UCX protov2 still doesn't support CUDA async/managed memory
126 |     logger.info("Setting UCX_PROTO_ENABLE=n")
127 |     os.environ["UCX_PROTO_ENABLE"] = "n"
128 | 


--------------------------------------------------------------------------------
/ucp/_libs/__init__.pxd:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
2 | # See file LICENSE for terms.
3 | 
4 | # cython: language_level=3
5 | 


--------------------------------------------------------------------------------
/ucp/_libs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
2 | # See file LICENSE for terms.
3 | 
4 | from .utils import nvtx_annotate  # noqa
5 | 


--------------------------------------------------------------------------------
/ucp/_libs/arr.pxd:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | # cython: language_level=3
 5 | 
 6 | 
 7 | from libc.stdint cimport uintptr_t
 8 | 
 9 | 
10 | cdef class Array:
11 |     cdef readonly uintptr_t ptr
12 |     cdef readonly bint readonly
13 |     cdef readonly object obj
14 | 
15 |     cdef readonly Py_ssize_t itemsize
16 | 
17 |     cdef readonly Py_ssize_t ndim
18 |     cdef Py_ssize_t[::1] shape_mv
19 |     cdef Py_ssize_t[::1] strides_mv
20 | 
21 |     cdef readonly bint cuda
22 | 
23 |     cpdef bint _c_contiguous(self)
24 |     cpdef bint _f_contiguous(self)
25 |     cpdef bint _contiguous(self)
26 |     cpdef Py_ssize_t _nbytes(self)
27 | 
28 | 
29 | cpdef Array asarray(obj)
30 | 


--------------------------------------------------------------------------------
/ucp/_libs/arr.pyi:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | from typing import Generic, Tuple, TypeVar
 5 | 
 6 | T = TypeVar("T")
 7 | 
 8 | class Array(Generic[T]):
 9 |     def __init__(self, obj: T): ...
10 |     @property
11 |     def c_contiguous(self) -> bool: ...
12 |     @property
13 |     def f_contiguous(self) -> bool: ...
14 |     @property
15 |     def contiguous(self) -> bool: ...
16 |     @property
17 |     def nbytes(self) -> int: ...
18 |     @property
19 |     def shape(self) -> Tuple[int]: ...
20 |     @property
21 |     def strides(self) -> Tuple[int]: ...
22 |     @property
23 |     def cuda(self) -> bool: ...
24 |     @property
25 |     def obj(self) -> T: ...
26 | 
27 | def asarray(obj) -> Array: ...
28 | 


--------------------------------------------------------------------------------
/ucp/_libs/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | import contextlib
 5 | import logging
 6 | 
 7 | logger = logging.getLogger("ucx")
 8 | 
 9 | 
10 | @contextlib.contextmanager
11 | def log_errors(reraise_exception=False):
12 |     try:
13 |         yield
14 |     except BaseException as e:
15 |         logger.exception(e)
16 |         if reraise_exception:
17 |             raise
18 | 
19 | 
20 | class UCXBaseException(Exception):
21 |     pass
22 | 
23 | 
24 | class UCXError(UCXBaseException):
25 |     pass
26 | 
27 | 
28 | class UCXConfigError(UCXError):
29 |     pass
30 | 
31 | 
32 | class UCXWarning(UserWarning):
33 |     pass
34 | 
35 | 
36 | class UCXCloseError(UCXBaseException):
37 |     pass
38 | 
39 | 
40 | class UCXCanceled(UCXBaseException):
41 |     pass
42 | 
43 | 
44 | class UCXConnectionReset(UCXBaseException):
45 |     pass
46 | 
47 | 
48 | class UCXMsgTruncated(UCXBaseException):
49 |     pass
50 | 
51 | 
52 | class UCXNotConnected(UCXBaseException):
53 |     pass
54 | 
55 | 
56 | class UCXUnreachable(UCXBaseException):
57 |     pass
58 | 


--------------------------------------------------------------------------------
/ucp/_libs/packed_remote_key.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021       UT-Battelle, LLC. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | # cython: language_level=3
 5 | 
 6 | from libc.stdint cimport uintptr_t
 7 | from libc.stdlib cimport free
 8 | from libc.string cimport memcpy
 9 | 
10 | from .arr cimport Array
11 | from .ucx_api_dep cimport *
12 | 
13 | 
14 | cdef class PackedRemoteKey:
15 |     """ A packed remote key. This key is suitable for sending to remote nodes to setup
16 |         remote access to local memory. Users should not instance this class directly and
17 |         should use the from_buffer() and from_mem_handle() class methods or the
18 |         pack_rkey() method on the UCXMemoryHandle class
19 |     """
20 |     cdef void *_key
21 |     cdef Py_ssize_t _length
22 | 
23 |     def __cinit__(self, uintptr_t packed_key_as_int, Py_ssize_t length):
24 |         key = <void *> packed_key_as_int
25 |         self._key = malloc(length)
26 |         self._length = length
27 |         memcpy(self._key, key, length)
28 | 
29 |     @classmethod
30 |     def from_buffer(cls, buffer):
31 |         """ Wrap a received buffer in a PackedRemoteKey to turn magic buffers into a
32 |             python class suitable for unpacking on an EP
33 | 
34 |         Parameters
35 |         ----------
36 |         buffer:
37 |             Python buffer to be wrapped
38 |         """
39 |         buf = Array(buffer)
40 |         assert buf.c_contiguous
41 |         return PackedRemoteKey(buf.ptr, buf.nbytes)
42 | 
43 |     @classmethod
44 |     def from_mem_handle(self, UCXMemoryHandle mem):
45 |         """ Create a new packed remote key from a given UCXMemoryHandle class
46 | 
47 |             Parameters
48 |             ----------
49 |             mem: UCXMemoryHandle
50 |                 The memory handle to be packed in an rkey for sending
51 |         """
52 |         cdef void *key
53 |         cdef size_t len
54 |         cdef ucs_status_t status
55 |         status = ucp_rkey_pack(mem._context._handle, mem._mem_handle, &key, &len)
56 |         packed_key = PackedRemoteKey(<uintptr_t>key, len)
57 |         ucp_rkey_buffer_release(key)
58 |         assert_ucs_status(status)
59 |         return packed_key
60 | 
61 |     def __dealloc__(self):
62 |         free(self._key)
63 | 
64 |     @property
65 |     def key(self):
66 |         return int(<uintptr_t><void*>self._key)
67 | 
68 |     @property
69 |     def length(self):
70 |         return int(self._length)
71 | 
72 |     def __getbuffer__(self, Py_buffer *buffer, int flags):
73 |         get_ucx_object(buffer, flags, <void*>self._key, self._length, self)
74 | 
75 |     def __releasebuffer__(self, Py_buffer *buffer):
76 |         pass
77 | 
78 |     def __reduce__(self):
79 |         return (PackedRemoteKey.from_buffer, (bytes(self),))
80 | 
81 |     def __hash__(self):
82 |         return hash(bytes(self))
83 | 


--------------------------------------------------------------------------------
/ucp/_libs/src/c_util.c:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
 3 |  * See file LICENSE for terms.
 4 |  */
 5 | 
 6 | #include "c_util.h"
 7 | #include <stdlib.h>
 8 | #include <string.h>
 9 | #include <sys/socket.h>
10 | #include <netinet/in.h>
11 | #include <arpa/inet.h>
12 | 
13 | 
14 | int c_util_set_sockaddr(ucs_sock_addr_t *sockaddr, const char *ip_address, uint16_t port) {
15 |     struct sockaddr_in *addr = malloc(sizeof(struct sockaddr_in));
16 |     if(addr == NULL) {
17 |         return 1;
18 |     }
19 |     memset(addr, 0, sizeof(struct sockaddr_in));
20 |     addr->sin_family      = AF_INET;
21 |     addr->sin_addr.s_addr = ip_address==NULL ? INADDR_ANY : inet_addr(ip_address);
22 |     addr->sin_port        = htons(port);
23 |     sockaddr->addr      = (const struct sockaddr *) addr;
24 |     sockaddr->addrlen   = sizeof(struct sockaddr_in);
25 |     return 0;
26 | }
27 | 
28 | 
29 | void c_util_sockaddr_free(ucs_sock_addr_t *sockaddr) {
30 |     free((void*) sockaddr->addr);
31 | }
32 | 
33 | void c_util_sockaddr_get_ip_port_str(const struct sockaddr_storage *sock_addr,
34 |                                      char *ip_str, char *port_str,
35 |                                      size_t max_str_size)
36 | {
37 |     struct sockaddr_in  addr_in;
38 |     struct sockaddr_in6 addr_in6;
39 | 
40 |     switch (sock_addr->ss_family) {
41 |     case AF_INET:
42 |         memcpy(&addr_in, sock_addr, sizeof(struct sockaddr_in));
43 |         inet_ntop(AF_INET, &addr_in.sin_addr, ip_str, max_str_size);
44 |         snprintf(port_str, max_str_size, "%d", ntohs(addr_in.sin_port));
45 |     case AF_INET6:
46 |         memcpy(&addr_in6, sock_addr, sizeof(struct sockaddr_in6));
47 |         inet_ntop(AF_INET6, &addr_in6.sin6_addr, ip_str, max_str_size);
48 |         snprintf(port_str, max_str_size, "%d", ntohs(addr_in6.sin6_port));
49 |     default:
50 |         ip_str = "Invalid address family";
51 |         port_str = "Invalid address family";
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/ucp/_libs/src/c_util.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
 3 |  * See file LICENSE for terms.
 4 |  */
 5 | 
 6 | #include <stdint.h>
 7 | #include <sys/socket.h>
 8 | #include <ucp/api/ucp.h>
 9 | 
10 | int c_util_set_sockaddr(ucs_sock_addr_t *sockaddr, const char *ip_address, uint16_t port);
11 | 
12 | void c_util_sockaddr_free(ucs_sock_addr_t *sockaddr);
13 | 
14 | void c_util_sockaddr_get_ip_port_str(
15 |     const struct sockaddr_storage *sock_addr,
16 |     char *ip_str, char *port_str,
17 |     size_t max_str_size
18 | );
19 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_address_object.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | import pickle
 3 | 
 4 | from ucp._libs import ucx_api
 5 | 
 6 | mp = mp.get_context("spawn")
 7 | 
 8 | 
 9 | def test_pickle_ucx_address():
10 |     ctx = ucx_api.UCXContext()
11 |     worker = ucx_api.UCXWorker(ctx)
12 |     org_address = worker.get_address()
13 |     dumped_address = pickle.dumps(org_address)
14 |     org_address_hash = hash(org_address)
15 |     org_address = bytes(org_address)
16 |     new_address = pickle.loads(dumped_address)
17 |     assert org_address_hash == hash(new_address)
18 |     assert bytes(org_address) == bytes(new_address)
19 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_cancel.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing as mp
  2 | import re
  3 | 
  4 | import pytest
  5 | 
  6 | from ucp._libs import ucx_api
  7 | from ucp._libs.arr import Array
  8 | from ucp._libs.utils import get_address
  9 | from ucp.exceptions import UCXCanceled
 10 | 
 11 | mp = mp.get_context("spawn")
 12 | 
 13 | WireupMessage = bytearray(b"wireup")
 14 | DataMessage = bytearray(b"0" * 10)
 15 | 
 16 | 
 17 | def _handler(request, exception, ret):
 18 |     if exception is not None:
 19 |         ret[0] = exception
 20 |     else:
 21 |         ret[0] = request
 22 | 
 23 | 
 24 | def _server_cancel(queue, transfer_api):
 25 |     """Server that establishes an endpoint to client and immediately closes
 26 |     it, triggering received messages to be canceled on the client.
 27 |     """
 28 |     feature_flags = (
 29 |         ucx_api.Feature.AM if transfer_api == "am" else ucx_api.Feature.TAG,
 30 |     )
 31 |     ctx = ucx_api.UCXContext(feature_flags=feature_flags)
 32 |     worker = ucx_api.UCXWorker(ctx)
 33 | 
 34 |     # Keep endpoint to be used from outside the listener callback
 35 |     ep = [None]
 36 | 
 37 |     def _listener_handler(conn_request):
 38 |         ep[0] = ucx_api.UCXEndpoint.create_from_conn_request(
 39 |             worker,
 40 |             conn_request,
 41 |             endpoint_error_handling=True,
 42 |         )
 43 | 
 44 |     listener = ucx_api.UCXListener(worker=worker, port=0, cb_func=_listener_handler)
 45 |     queue.put(listener.port)
 46 | 
 47 |     while ep[0] is None:
 48 |         worker.progress()
 49 | 
 50 |     ep[0].close()
 51 |     worker.progress()
 52 | 
 53 | 
 54 | def _client_cancel(queue, transfer_api):
 55 |     """Client that connects to server and waits for messages to be received,
 56 |     because the server closes without sending anything, the messages will
 57 |     trigger cancelation.
 58 |     """
 59 |     feature_flags = (
 60 |         ucx_api.Feature.AM if transfer_api == "am" else ucx_api.Feature.TAG,
 61 |     )
 62 |     ctx = ucx_api.UCXContext(feature_flags=feature_flags)
 63 |     worker = ucx_api.UCXWorker(ctx)
 64 |     port = queue.get()
 65 |     ep = ucx_api.UCXEndpoint.create(
 66 |         worker,
 67 |         get_address(),
 68 |         port,
 69 |         endpoint_error_handling=True,
 70 |     )
 71 | 
 72 |     ret = [None]
 73 | 
 74 |     if transfer_api == "am":
 75 |         ucx_api.am_recv_nb(ep, cb_func=_handler, cb_args=(ret,))
 76 | 
 77 |         match_msg = ".*am_recv.*"
 78 |     else:
 79 |         msg = Array(bytearray(1))
 80 |         ucx_api.tag_recv_nb(
 81 |             worker, msg, msg.nbytes, tag=0, cb_func=_handler, cb_args=(ret,), ep=ep
 82 |         )
 83 | 
 84 |         match_msg = ".*tag_recv_nb.*"
 85 | 
 86 |     while ep.is_alive():
 87 |         worker.progress()
 88 | 
 89 |     canceled = worker.cancel_inflight_messages()
 90 | 
 91 |     while ret[0] is None:
 92 |         worker.progress()
 93 | 
 94 |     assert canceled == 1
 95 |     assert isinstance(ret[0], UCXCanceled)
 96 |     assert re.match(match_msg, ret[0].args[0])
 97 | 
 98 | 
 99 | @pytest.mark.parametrize("transfer_api", ["am", "tag"])
100 | def test_message_probe(transfer_api):
101 |     queue = mp.Queue()
102 |     server = mp.Process(
103 |         target=_server_cancel,
104 |         args=(queue, transfer_api),
105 |     )
106 |     server.start()
107 |     client = mp.Process(
108 |         target=_client_cancel,
109 |         args=(queue, transfer_api),
110 |     )
111 |     client.start()
112 |     client.join(timeout=10)
113 |     server.join(timeout=10)
114 |     assert client.exitcode == 0
115 |     assert server.exitcode == 0
116 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from ucp._libs import ucx_api
 6 | from ucp._libs.arr import Array
 7 | from ucp._libs.exceptions import UCXConfigError
 8 | 
 9 | 
10 | def test_get_config():
11 |     # Cache user-defined UCX_TLS and unset it to test default value
12 |     tls = os.environ.get("UCX_TLS", None)
13 |     if tls is not None:
14 |         del os.environ["UCX_TLS"]
15 | 
16 |     ctx = ucx_api.UCXContext()
17 |     config = ctx.get_config()
18 |     assert isinstance(config, dict)
19 |     assert config["TLS"] == "all"
20 | 
21 |     # Restore user-defined UCX_TLS
22 |     if tls is not None:
23 |         os.environ["UCX_TLS"] = tls
24 | 
25 | 
26 | def test_set_env():
27 |     os.environ["UCX_SEG_SIZE"] = "2M"
28 |     ctx = ucx_api.UCXContext()
29 |     config = ctx.get_config()
30 |     assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
31 | 
32 | 
33 | def test_init_options():
34 |     os.environ["UCX_SEG_SIZE"] = "2M"  # Should be ignored
35 |     options = {"SEG_SIZE": "3M"}
36 |     ctx = ucx_api.UCXContext(options)
37 |     config = ctx.get_config()
38 |     assert config["SEG_SIZE"] == options["SEG_SIZE"]
39 | 
40 | 
41 | @pytest.mark.skipif(
42 |     ucx_api.get_ucx_version() >= (1, 12, 0),
43 |     reason="Beginning with UCX >= 1.12, it's only possible to validate "
44 |     "UCP options but not options from other modules such as UCT. "
45 |     "See https://github.com/openucx/ucx/issues/7519.",
46 | )
47 | def test_init_unknown_option():
48 |     options = {"UNKNOWN_OPTION": "3M"}
49 |     with pytest.raises(UCXConfigError):
50 |         ucx_api.UCXContext(options)
51 | 
52 | 
53 | def test_init_invalid_option():
54 |     options = {"SEG_SIZE": "invalid-size"}
55 |     with pytest.raises(UCXConfigError):
56 |         ucx_api.UCXContext(options)
57 | 
58 | 
59 | @pytest.mark.parametrize(
60 |     "feature_flag", [ucx_api.Feature.TAG, ucx_api.Feature.STREAM, ucx_api.Feature.AM]
61 | )
62 | def test_feature_flags_mismatch(feature_flag):
63 |     ctx = ucx_api.UCXContext(feature_flags=(feature_flag,))
64 |     worker = ucx_api.UCXWorker(ctx)
65 |     addr = worker.get_address()
66 |     ep = ucx_api.UCXEndpoint.create_from_worker_address(
67 |         worker, addr, endpoint_error_handling=False
68 |     )
69 |     msg = Array(bytearray(10))
70 |     if feature_flag != ucx_api.Feature.TAG:
71 |         with pytest.raises(
72 |             ValueError, match="UCXContext must be created with `Feature.TAG`"
73 |         ):
74 |             ucx_api.tag_send_nb(ep, msg, msg.nbytes, 0, None)
75 |         with pytest.raises(
76 |             ValueError, match="UCXContext must be created with `Feature.TAG`"
77 |         ):
78 |             ucx_api.tag_recv_nb(worker, msg, msg.nbytes, 0, None)
79 |     if feature_flag != ucx_api.Feature.STREAM:
80 |         with pytest.raises(
81 |             ValueError, match="UCXContext must be created with `Feature.STREAM`"
82 |         ):
83 |             ucx_api.stream_send_nb(ep, msg, msg.nbytes, None)
84 |         with pytest.raises(
85 |             ValueError, match="UCXContext must be created with `Feature.STREAM`"
86 |         ):
87 |             ucx_api.stream_recv_nb(ep, msg, msg.nbytes, None)
88 |     if feature_flag != ucx_api.Feature.AM:
89 |         with pytest.raises(
90 |             ValueError, match="UCXContext must be created with `Feature.AM`"
91 |         ):
92 |             ucx_api.am_send_nbx(ep, msg, msg.nbytes, None)
93 |         with pytest.raises(
94 |             ValueError, match="UCXContext must be created with `Feature.AM`"
95 |         ):
96 |             ucx_api.am_recv_nb(ep, None)
97 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_endpoint.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import multiprocessing as mp
 3 | 
 4 | import pytest
 5 | 
 6 | from ucp._libs import ucx_api
 7 | from ucp._libs.utils import get_address
 8 | 
 9 | mp = mp.get_context("spawn")
10 | 
11 | 
12 | def _close_callback(closed):
13 |     closed[0] = True
14 | 
15 | 
16 | def _server(queue, server_close_callback):
17 |     """Server that send received message back to the client
18 | 
19 |     Notice, since it is illegal to call progress() in call-back functions,
20 |     we use a "chain" of call-back functions.
21 |     """
22 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.TAG,))
23 |     worker = ucx_api.UCXWorker(ctx)
24 | 
25 |     listener_finished = [False]
26 |     closed = [False]
27 | 
28 |     # A reference to listener's endpoint is stored to prevent it from going
29 |     # out of scope too early.
30 |     # ep = None
31 | 
32 |     def _listener_handler(conn_request):
33 |         global ep
34 |         ep = ucx_api.UCXEndpoint.create_from_conn_request(
35 |             worker,
36 |             conn_request,
37 |             endpoint_error_handling=True,
38 |         )
39 |         if server_close_callback is True:
40 |             ep.set_close_callback(functools.partial(_close_callback, closed))
41 |         listener_finished[0] = True
42 | 
43 |     listener = ucx_api.UCXListener(worker=worker, port=0, cb_func=_listener_handler)
44 |     queue.put(listener.port)
45 | 
46 |     if server_close_callback is True:
47 |         while closed[0] is False:
48 |             worker.progress()
49 |         assert closed[0] is True
50 |     else:
51 |         while listener_finished[0] is False:
52 |             worker.progress()
53 | 
54 | 
55 | def _client(port, server_close_callback):
56 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.TAG,))
57 |     worker = ucx_api.UCXWorker(ctx)
58 |     ep = ucx_api.UCXEndpoint.create(
59 |         worker,
60 |         get_address(),
61 |         port,
62 |         endpoint_error_handling=True,
63 |     )
64 |     if server_close_callback is True:
65 |         ep.close()
66 |         worker.progress()
67 |     else:
68 |         closed = [False]
69 |         ep.set_close_callback(functools.partial(_close_callback, closed))
70 |         while closed[0] is False:
71 |             worker.progress()
72 | 
73 | 
74 | @pytest.mark.parametrize("server_close_callback", [True, False])
75 | def test_close_callback(server_close_callback):
76 |     queue = mp.Queue()
77 |     server = mp.Process(
78 |         target=_server,
79 |         args=(queue, server_close_callback),
80 |     )
81 |     server.start()
82 |     port = queue.get()
83 |     client = mp.Process(
84 |         target=_client,
85 |         args=(port, server_close_callback),
86 |     )
87 |     client.start()
88 |     client.join(timeout=10)
89 |     server.join(timeout=10)
90 |     assert client.exitcode == 0
91 |     assert server.exitcode == 0
92 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_listener.py:
--------------------------------------------------------------------------------
 1 | from ucp._libs import ucx_api
 2 | 
 3 | 
 4 | def test_listener_ip_port():
 5 |     ctx = ucx_api.UCXContext()
 6 |     worker = ucx_api.UCXWorker(ctx)
 7 | 
 8 |     def _listener_handler(conn_request):
 9 |         pass
10 | 
11 |     listener = ucx_api.UCXListener(worker=worker, port=0, cb_func=_listener_handler)
12 | 
13 |     assert isinstance(listener.ip, str) and listener.ip
14 |     assert (
15 |         isinstance(listener.port, int) and listener.port >= 0 and listener.port <= 65535
16 |     )
17 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_mem.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import io
 3 | import mmap
 4 | 
 5 | import pytest
 6 | 
 7 | from ucp._libs import ucx_api
 8 | 
 9 | builtin_buffers = [
10 |     b"",
11 |     b"abcd",
12 |     array.array("i", []),
13 |     array.array("i", [0, 1, 2, 3]),
14 |     array.array("I", [0, 1, 2, 3]),
15 |     array.array("f", []),
16 |     array.array("f", [0, 1, 2, 3]),
17 |     array.array("d", [0, 1, 2, 3]),
18 |     memoryview(array.array("B", [0, 1, 2, 3, 4, 5])).cast("B", (3, 2)),
19 |     memoryview(b"abcd"),
20 |     memoryview(bytearray(b"abcd")),
21 |     io.BytesIO(b"abcd").getbuffer(),
22 |     mmap.mmap(-1, 5),
23 | ]
24 | 
25 | 
26 | def test_alloc():
27 |     ctx = ucx_api.UCXContext({})
28 |     mem = ucx_api.UCXMemoryHandle.alloc(ctx, 1024)
29 |     rkey = mem.pack_rkey()
30 |     assert rkey is not None
31 | 
32 | 
33 | @pytest.mark.parametrize("buffer", builtin_buffers)
34 | def test_map(buffer):
35 |     ctx = ucx_api.UCXContext({})
36 |     mem = ucx_api.UCXMemoryHandle.map(ctx, buffer)
37 |     rkey = mem.pack_rkey()
38 |     assert rkey is not None
39 | 
40 | 
41 | def test_ctx_alloc():
42 |     ctx = ucx_api.UCXContext({})
43 |     mem = ctx.alloc(1024)
44 |     rkey = mem.pack_rkey()
45 |     assert rkey is not None
46 | 
47 | 
48 | @pytest.mark.parametrize("buffer", builtin_buffers)
49 | def test_ctx_map(buffer):
50 |     ctx = ucx_api.UCXContext({})
51 |     mem = ctx.map(buffer)
52 |     rkey = mem.pack_rkey()
53 |     assert rkey is not None
54 | 
55 | 
56 | def test_rkey_unpack():
57 |     ctx = ucx_api.UCXContext({})
58 |     mem = ucx_api.UCXMemoryHandle.alloc(ctx, 1024)
59 |     packed_rkey = mem.pack_rkey()
60 |     worker = ucx_api.UCXWorker(ctx)
61 |     ep = ucx_api.UCXEndpoint.create_from_worker_address(
62 |         worker,
63 |         worker.get_address(),
64 |         endpoint_error_handling=True,
65 |     )
66 |     rkey = ep.unpack_rkey(packed_rkey)
67 |     assert rkey is not None
68 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_peer_send_recv.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing as mp
  2 | import os
  3 | from itertools import repeat
  4 | 
  5 | import pytest
  6 | 
  7 | from ucp._libs import ucx_api
  8 | from ucp._libs.utils_test import blocking_flush, blocking_recv, blocking_send
  9 | 
 10 | mp = mp.get_context("spawn")
 11 | 
 12 | 
 13 | def _rma_setup(worker, address, prkey, base, msg_size):
 14 |     ep = ucx_api.UCXEndpoint.create_from_worker_address(
 15 |         worker, address, endpoint_error_handling=True
 16 |     )
 17 |     rkey = ep.unpack_rkey(prkey)
 18 |     mem = ucx_api.RemoteMemory(rkey, base, msg_size)
 19 |     return ep, mem
 20 | 
 21 | 
 22 | def _test_peer_communication_rma(queue, rank, msg_size):
 23 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.RMA, ucx_api.Feature.TAG))
 24 |     worker = ucx_api.UCXWorker(ctx)
 25 |     self_address = worker.get_address()
 26 |     mem_handle = ctx.alloc(msg_size)
 27 |     self_base = mem_handle.address
 28 |     self_prkey = mem_handle.pack_rkey()
 29 | 
 30 |     self_ep, self_mem = _rma_setup(
 31 |         worker, self_address, self_prkey, self_base, msg_size
 32 |     )
 33 |     send_msg = bytes(repeat(rank, msg_size))
 34 |     if not self_mem.put_nbi(send_msg):
 35 |         blocking_flush(self_ep)
 36 | 
 37 |     queue.put((rank, self_address, self_prkey, self_base))
 38 |     right_rank, right_address, right_prkey, right_base = queue.get()
 39 |     left_rank, left_address, left_prkey, left_base = queue.get()
 40 | 
 41 |     right_ep, right_mem = _rma_setup(
 42 |         worker, right_address, right_prkey, right_base, msg_size
 43 |     )
 44 |     right_msg = bytearray(msg_size)
 45 |     right_mem.get_nbi(right_msg)
 46 | 
 47 |     left_ep, left_mem = _rma_setup(
 48 |         worker, left_address, left_prkey, left_base, msg_size
 49 |     )
 50 |     left_msg = bytearray(msg_size)
 51 |     left_mem.get_nbi(left_msg)
 52 | 
 53 |     blocking_flush(worker)
 54 |     assert left_msg == bytes(repeat(left_rank, msg_size))
 55 |     assert right_msg == bytes(repeat(right_rank, msg_size))
 56 | 
 57 |     # We use the blocking tag send/recv as a barrier implementation
 58 |     recv_msg = bytearray(8)
 59 |     if rank == 0:
 60 |         send_msg = bytes(os.urandom(8))
 61 |         blocking_send(worker, right_ep, send_msg, right_rank)
 62 |         blocking_recv(worker, left_ep, recv_msg, rank)
 63 |     else:
 64 |         blocking_recv(worker, left_ep, recv_msg, rank)
 65 |         blocking_send(worker, right_ep, recv_msg, right_rank)
 66 | 
 67 | 
 68 | def _test_peer_communication_tag(queue, rank, msg_size):
 69 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.TAG,))
 70 |     worker = ucx_api.UCXWorker(ctx)
 71 |     queue.put((rank, worker.get_address()))
 72 |     right_rank, right_address = queue.get()
 73 |     left_rank, left_address = queue.get()
 74 | 
 75 |     right_ep = ucx_api.UCXEndpoint.create_from_worker_address(
 76 |         worker,
 77 |         right_address,
 78 |         endpoint_error_handling=True,
 79 |     )
 80 |     left_ep = ucx_api.UCXEndpoint.create_from_worker_address(
 81 |         worker,
 82 |         left_address,
 83 |         endpoint_error_handling=True,
 84 |     )
 85 |     recv_msg = bytearray(msg_size)
 86 |     if rank == 0:
 87 |         send_msg = bytes(os.urandom(msg_size))
 88 |         blocking_send(worker, right_ep, send_msg, right_rank)
 89 |         blocking_recv(worker, left_ep, recv_msg, rank)
 90 |         assert send_msg == recv_msg
 91 |     else:
 92 |         blocking_recv(worker, left_ep, recv_msg, rank)
 93 |         blocking_send(worker, right_ep, recv_msg, right_rank)
 94 | 
 95 | 
 96 | @pytest.mark.parametrize(
 97 |     "test_name", [_test_peer_communication_tag, _test_peer_communication_rma]
 98 | )
 99 | @pytest.mark.parametrize("msg_size", [10, 2**24])
100 | def test_peer_communication(test_name, msg_size, num_nodes=2):
101 |     """Test peer communication by sending a message between each worker"""
102 |     queues = [mp.Queue() for _ in range(num_nodes)]
103 |     ps = []
104 |     addresses = []
105 |     for rank, queue in enumerate(queues):
106 |         p = mp.Process(target=test_name, args=(queue, rank, msg_size))
107 |         p.start()
108 |         ps.append(p)
109 |         addresses.append(queue.get())
110 | 
111 |     for i in range(num_nodes):
112 |         queues[i].put(addresses[(i + 1) % num_nodes])  # Right peer
113 |         queues[i].put(addresses[(i - 1) % num_nodes])  # Left peer
114 | 
115 |     for p in ps:
116 |         p.join()
117 |         assert not p.exitcode
118 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_probe.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing as mp
  2 | 
  3 | import pytest
  4 | 
  5 | from ucp._libs import ucx_api
  6 | from ucp._libs.utils import get_address
  7 | from ucp._libs.utils_test import (
  8 |     blocking_am_recv,
  9 |     blocking_am_send,
 10 |     blocking_recv,
 11 |     blocking_send,
 12 | )
 13 | 
 14 | mp = mp.get_context("spawn")
 15 | 
 16 | WireupMessage = bytearray(b"wireup")
 17 | DataMessage = bytearray(b"0" * 10)
 18 | 
 19 | 
 20 | def _server_probe(queue, transfer_api):
 21 |     """Server that probes and receives message after client disconnected.
 22 | 
 23 |     Note that since it is illegal to call progress() in callback functions,
 24 |     we keep a reference to the endpoint after the listener callback has
 25 |     terminated, this way we can progress even after Python blocking calls.
 26 |     """
 27 |     feature_flags = (
 28 |         ucx_api.Feature.AM if transfer_api == "am" else ucx_api.Feature.TAG,
 29 |     )
 30 |     ctx = ucx_api.UCXContext(feature_flags=feature_flags)
 31 |     worker = ucx_api.UCXWorker(ctx)
 32 | 
 33 |     # Keep endpoint to be used from outside the listener callback
 34 |     ep = [None]
 35 | 
 36 |     def _listener_handler(conn_request):
 37 |         ep[0] = ucx_api.UCXEndpoint.create_from_conn_request(
 38 |             worker,
 39 |             conn_request,
 40 |             endpoint_error_handling=True,
 41 |         )
 42 | 
 43 |     listener = ucx_api.UCXListener(worker=worker, port=0, cb_func=_listener_handler)
 44 |     queue.put(listener.port),
 45 | 
 46 |     while ep[0] is None:
 47 |         worker.progress()
 48 | 
 49 |     ep = ep[0]
 50 | 
 51 |     # Ensure wireup and inform client before it can disconnect
 52 |     if transfer_api == "am":
 53 |         wireup = blocking_am_recv(worker, ep)
 54 |     else:
 55 |         wireup = bytearray(len(WireupMessage))
 56 |         blocking_recv(worker, ep, wireup)
 57 |     queue.put("wireup completed")
 58 | 
 59 |     # Ensure client has disconnected -- endpoint is not alive anymore
 60 |     while ep.is_alive() is True:
 61 |         worker.progress()
 62 | 
 63 |     # Probe/receive message even after the remote endpoint has disconnected
 64 |     if transfer_api == "am":
 65 |         while ep.am_probe() is False:
 66 |             worker.progress()
 67 |         received = blocking_am_recv(worker, ep)
 68 |     else:
 69 |         while worker.tag_probe(0) is False:
 70 |             worker.progress()
 71 |         received = bytearray(len(DataMessage))
 72 |         blocking_recv(worker, ep, received)
 73 | 
 74 |     assert wireup == WireupMessage
 75 |     assert received == DataMessage
 76 | 
 77 | 
 78 | def _client_probe(queue, transfer_api):
 79 |     feature_flags = (
 80 |         ucx_api.Feature.AM if transfer_api == "am" else ucx_api.Feature.TAG,
 81 |     )
 82 |     ctx = ucx_api.UCXContext(feature_flags=feature_flags)
 83 |     worker = ucx_api.UCXWorker(ctx)
 84 |     port = queue.get()
 85 |     ep = ucx_api.UCXEndpoint.create(
 86 |         worker,
 87 |         get_address(),
 88 |         port,
 89 |         endpoint_error_handling=True,
 90 |     )
 91 | 
 92 |     _send = blocking_am_send if transfer_api == "am" else blocking_send
 93 | 
 94 |     _send(worker, ep, WireupMessage)
 95 |     _send(worker, ep, DataMessage)
 96 | 
 97 |     # Wait for wireup before disconnecting
 98 |     assert queue.get() == "wireup completed"
 99 | 
100 | 
101 | @pytest.mark.parametrize("transfer_api", ["am", "tag"])
102 | def test_message_probe(transfer_api):
103 |     queue = mp.Queue()
104 |     server = mp.Process(
105 |         target=_server_probe,
106 |         args=(queue, transfer_api),
107 |     )
108 |     server.start()
109 |     client = mp.Process(
110 |         target=_client_probe,
111 |         args=(queue, transfer_api),
112 |     )
113 |     client.start()
114 |     client.join(timeout=10)
115 |     server.join(timeout=10)
116 |     assert client.exitcode == 0
117 |     assert server.exitcode == 0
118 | 


--------------------------------------------------------------------------------
/ucp/_libs/tests/test_server_client.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | import os
 3 | from queue import Empty as QueueIsEmpty
 4 | 
 5 | import pytest
 6 | 
 7 | from ucp._libs import ucx_api
 8 | from ucp._libs.arr import Array
 9 | from ucp._libs.utils import get_address
10 | from ucp._libs.utils_test import blocking_recv, blocking_send
11 | 
12 | mp = mp.get_context("spawn")
13 | 
14 | 
15 | def _echo_server(get_queue, put_queue, msg_size):
16 |     """Server that send received message back to the client
17 | 
18 |     Notice, since it is illegal to call progress() in call-back functions,
19 |     we use a "chain" of call-back functions.
20 |     """
21 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.TAG,))
22 |     worker = ucx_api.UCXWorker(ctx)
23 | 
24 |     # A reference to listener's endpoint is stored to prevent it from going
25 |     # out of scope too early.
26 |     ep = None
27 | 
28 |     def _send_handle(request, exception, msg):
29 |         # Notice, we pass `msg` to the handler in order to make sure
30 |         # it doesn't go out of scope prematurely.
31 |         assert exception is None
32 | 
33 |     def _recv_handle(request, exception, ep, msg):
34 |         assert exception is None
35 |         ucx_api.tag_send_nb(
36 |             ep, msg, msg.nbytes, tag=0, cb_func=_send_handle, cb_args=(msg,)
37 |         )
38 | 
39 |     def _listener_handler(conn_request):
40 |         global ep
41 |         ep = ucx_api.UCXEndpoint.create_from_conn_request(
42 |             worker,
43 |             conn_request,
44 |             endpoint_error_handling=True,
45 |         )
46 |         msg = Array(bytearray(msg_size))
47 |         ucx_api.tag_recv_nb(
48 |             worker, msg, msg.nbytes, tag=0, cb_func=_recv_handle, cb_args=(ep, msg)
49 |         )
50 | 
51 |     listener = ucx_api.UCXListener(worker=worker, port=0, cb_func=_listener_handler)
52 |     put_queue.put(listener.port)
53 | 
54 |     while True:
55 |         worker.progress()
56 |         try:
57 |             get_queue.get(block=False, timeout=0.1)
58 |         except QueueIsEmpty:
59 |             continue
60 |         else:
61 |             break
62 | 
63 | 
64 | def _echo_client(msg_size, port):
65 |     ctx = ucx_api.UCXContext(feature_flags=(ucx_api.Feature.TAG,))
66 |     worker = ucx_api.UCXWorker(ctx)
67 |     ep = ucx_api.UCXEndpoint.create(
68 |         worker,
69 |         get_address(),
70 |         port,
71 |         endpoint_error_handling=True,
72 |     )
73 |     send_msg = bytes(os.urandom(msg_size))
74 |     recv_msg = bytearray(msg_size)
75 |     blocking_send(worker, ep, send_msg)
76 |     blocking_recv(worker, ep, recv_msg)
77 |     assert send_msg == recv_msg
78 | 
79 | 
80 | @pytest.mark.parametrize("msg_size", [10, 2**24])
81 | def test_server_client(msg_size):
82 |     put_queue, get_queue = mp.Queue(), mp.Queue()
83 |     server = mp.Process(
84 |         target=_echo_server,
85 |         args=(put_queue, get_queue, msg_size),
86 |     )
87 |     server.start()
88 |     port = get_queue.get()
89 |     client = mp.Process(target=_echo_client, args=(msg_size, port))
90 |     client.start()
91 |     client.join(timeout=10)
92 |     assert not client.exitcode
93 |     put_queue.put("Finished")
94 |     server.join(timeout=10)
95 |     assert not server.exitcode
96 | 


--------------------------------------------------------------------------------
/ucp/_libs/transfer_common.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # Copyright (c) 2020       UT-Battelle, LLC. All rights reserved.
 3 | # See file LICENSE for terms.
 4 | 
 5 | # cython: language_level=3
 6 | 
 7 | from libc.stdint cimport uintptr_t
 8 | 
 9 | from .exceptions import UCXCanceled, UCXError, log_errors
10 | from .ucx_api_dep cimport *
11 | 
12 | 
13 | # This callback function is currently needed by stream_send_nb and
14 | # tag_send_nb transfer functions, as well as UCXEndpoint and UCXWorker
15 | # flush methods.
16 | cdef void _send_callback(void *request, ucs_status_t status) with gil:
17 |     cdef UCXRequest req
18 |     cdef dict req_info
19 |     cdef str name, ucx_status_msg, msg
20 |     cdef set inflight_msgs
21 |     cdef tuple cb_args
22 |     cdef dict cb_kwargs
23 |     with log_errors():
24 |         req = UCXRequest(<uintptr_t><void*> request)
25 |         assert not req.closed()
26 |         req_info = <dict>req._handle.info
27 |         req_info["status"] = "finished"
28 | 
29 |         if "cb_func" not in req_info:
30 |             # This callback function was called before ucp_tag_send_nb() returned
31 |             return
32 | 
33 |         exception = None
34 |         if status == UCS_ERR_CANCELED:
35 |             name = req_info["name"]
36 |             msg = "<%s>: " % name
37 |             exception = UCXCanceled(msg)
38 |         elif status != UCS_OK:
39 |             name = req_info["name"]
40 |             ucx_status_msg = ucs_status_string(status).decode("utf-8")
41 |             msg = "<%s>: %s" % (name, ucx_status_msg)
42 |             exception = UCXError(msg)
43 |         try:
44 |             inflight_msgs = req_info["inflight_msgs"]
45 |             inflight_msgs.discard(req)
46 |             cb_func = req_info["cb_func"]
47 |             if cb_func is not None:
48 |                 cb_args = req_info["cb_args"]
49 |                 if cb_args is None:
50 |                     cb_args = ()
51 |                 cb_kwargs = req_info["cb_kwargs"]
52 |                 if cb_kwargs is None:
53 |                     cb_kwargs = {}
54 |                 cb_func(req, exception, *cb_args, **cb_kwargs)
55 |         finally:
56 |             req.close()
57 | 


--------------------------------------------------------------------------------
/ucp/_libs/typedefs.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | # cython: language_level=3
 5 | 
 6 | import enum
 7 | 
 8 | from cpython.ref cimport PyObject
 9 | 
10 | from .ucx_api_dep cimport *
11 | 
12 | 
13 | class Feature(enum.Enum):
14 |     """Enum of the UCP_FEATURE_* constants"""
15 |     TAG = UCP_FEATURE_TAG
16 |     RMA = UCP_FEATURE_RMA
17 |     AMO32 = UCP_FEATURE_AMO32
18 |     AMO64 = UCP_FEATURE_AMO64
19 |     WAKEUP = UCP_FEATURE_WAKEUP
20 |     STREAM = UCP_FEATURE_STREAM
21 |     AM = UCP_FEATURE_AM
22 | 
23 | 
24 | class AllocatorType(enum.Enum):
25 |     HOST = 0
26 |     CUDA = 1
27 |     UNSUPPORTED = -1
28 | 
29 | 
30 | # Struct used as requests by UCX
31 | cdef struct ucx_py_request:
32 |     bint finished  # Used by downstream projects such as cuML
33 |     unsigned int uid
34 |     PyObject *info
35 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_address.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # Copyright (c) 2020-2021, UT-Battelle, LLC. All rights reserved.
 3 | # See file LICENSE for terms.
 4 | 
 5 | # cython: language_level=3
 6 | 
 7 | from libc.stdint cimport uintptr_t
 8 | from libc.stdlib cimport free
 9 | from libc.string cimport memcpy
10 | 
11 | from .arr cimport Array
12 | from .ucx_api_dep cimport *
13 | 
14 | 
15 | def _ucx_address_finalizer(
16 |     uintptr_t handle_as_int,
17 |     uintptr_t worker_handle_as_int,
18 | ):
19 |     cdef ucp_address_t *address = <ucp_address_t *>handle_as_int
20 |     cdef ucp_worker_h worker = <ucp_worker_h>worker_handle_as_int
21 |     if worker_handle_as_int != 0:
22 |         ucp_worker_release_address(worker, address)
23 |     else:
24 |         free(address)
25 | 
26 | 
27 | cdef class UCXAddress(UCXObject):
28 |     """Python representation of ucp_address_t"""
29 |     cdef ucp_address_t *_address
30 |     cdef size_t _length
31 | 
32 |     def __cinit__(
33 |             self,
34 |             uintptr_t address_as_int,
35 |             size_t length,
36 |             UCXWorker worker=None,
37 |     ):
38 |         address = <ucp_address_t *> address_as_int
39 |         # Copy address to `self._address`
40 |         self._address = <ucp_address_t *> malloc(length)
41 |         self._length = length
42 |         memcpy(self._address, address, length)
43 | 
44 |         self.add_handle_finalizer(
45 |             _ucx_address_finalizer,
46 |             int(<uintptr_t>self._address),
47 |             0 if worker is None else worker.handle,
48 |         )
49 |         if worker is not None:
50 |             worker.add_child(self)
51 | 
52 |     @classmethod
53 |     def from_buffer(cls, buffer):
54 |         buf = Array(buffer)
55 |         assert buf.c_contiguous
56 |         return UCXAddress(buf.ptr, buf.nbytes)
57 | 
58 |     @classmethod
59 |     def from_worker(cls, UCXWorker worker):
60 |         cdef ucs_status_t status
61 |         cdef ucp_worker_h ucp_worker = worker._handle
62 |         cdef ucp_address_t *address
63 |         cdef size_t length
64 |         status = ucp_worker_get_address(ucp_worker, &address, &length)
65 |         assert_ucs_status(status)
66 |         return UCXAddress(<uintptr_t>address, length, worker=worker)
67 | 
68 |     @property
69 |     def address(self):
70 |         return <uintptr_t>self._address
71 | 
72 |     @property
73 |     def length(self):
74 |         return int(self._length)
75 | 
76 |     def __getbuffer__(self, Py_buffer *buffer, int flags):
77 |         get_ucx_object(buffer, flags, <void*>self._address, self._length, self)
78 | 
79 |     def __releasebuffer__(self, Py_buffer *buffer):
80 |         pass
81 | 
82 |     def __reduce__(self):
83 |         return (UCXAddress.from_buffer, (bytes(self),))
84 | 
85 |     def __hash__(self):
86 |         return hash(bytes(self))
87 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_api.pyi:
--------------------------------------------------------------------------------
  1 | import enum
  2 | from typing import Callable, Dict, Iterable, Mapping, Optional, Tuple
  3 | 
  4 | # typedefs.pyx
  5 | 
  6 | class AllocatorType(enum.Enum):
  7 |     HOST: int
  8 |     CUDA: int
  9 |     UNSUPPORTED: int
 10 | 
 11 | class Feature(enum.Enum):
 12 |     TAG: int
 13 |     RMA: int
 14 |     AMO32: int
 15 |     AMO64: int
 16 |     WAKEUP: int
 17 |     STREAM: int
 18 |     AM: int
 19 | 
 20 | # utils.pyx
 21 | 
 22 | def get_current_options() -> Dict[str, str]: ...
 23 | def get_ucx_version() -> Tuple[int]: ...
 24 | 
 25 | # ucx_object.pyx
 26 | 
 27 | class UCXObject:
 28 |     def close(self) -> None: ...
 29 | 
 30 | # ucx_context.pyx
 31 | 
 32 | class UCXContext(UCXObject):
 33 |     def __init__(
 34 |         self, config_dict: Mapping = ..., feature_flags: Iterable[Feature] = ...
 35 |     ): ...
 36 | 
 37 | # ucx_address.pyx
 38 | 
 39 | class UCXAddress:
 40 |     @classmethod
 41 |     def from_buffer(cls, buffer) -> UCXAddress: ...
 42 |     @classmethod
 43 |     def from_worker(cls, worker: UCXWorker) -> UCXAddress: ...
 44 |     @property
 45 |     def address(self) -> int: ...
 46 |     @property
 47 |     def length(self) -> int: ...
 48 | 
 49 | # ucx_worker.pyx
 50 | 
 51 | class UCXWorker(UCXObject):
 52 |     def __init__(self, context: UCXContext): ...
 53 |     def progress(self) -> None: ...
 54 |     def ep_create(
 55 |         self, ip_address: str, port: int, endpoint_error_handling: bool
 56 |     ) -> UCXEndpoint: ...
 57 |     def ep_create_from_worker_address(
 58 |         self, ip_address: str, port: int, endpoint_error_handling: bool
 59 |     ) -> UCXEndpoint: ...
 60 |     def ep_create_from_conn_request(
 61 |         self, conn_request: int, endpoint_error_handling: bool
 62 |     ) -> UCXEndpoint: ...
 63 |     def register_am_allocator(
 64 |         self, allocator: Callable, allocator_type: AllocatorType
 65 |     ) -> None: ...
 66 | 
 67 | # ucx_listener.pyx
 68 | 
 69 | class UCXListener(UCXObject):
 70 |     port: int
 71 |     ip: str
 72 |     def __init__(
 73 |         self,
 74 |         worker: UCXWorker,
 75 |         port: int,
 76 |         cb_func: Callable,
 77 |         cb_args: Optional[tuple] = ...,
 78 |         cb_kwargs: dict = ...,
 79 |     ): ...
 80 | 
 81 | # ucx_endpoint.pyx
 82 | 
 83 | class UCXEndpoint(UCXObject):
 84 |     def info(self) -> str: ...
 85 |     @property
 86 |     def worker(self) -> UCXWorker: ...
 87 |     def unpack_rkey(self, rkey) -> UCXRkey: ...
 88 | 
 89 | # ucx_memory_handle.pyx
 90 | 
 91 | class UCXMemoryHandle(UCXObject):
 92 |     @classmethod
 93 |     def alloc(cls, ctx: UCXContext, size: int) -> UCXMemoryHandle: ...
 94 |     @classmethod
 95 |     def map(cls, ctx: UCXContext, buffer) -> UCXMemoryHandle: ...
 96 |     def pack_rkey(self) -> PackedRemoteKey: ...
 97 | 
 98 | # transfer_am.pyx
 99 | 
100 | def am_send_nbx(
101 |     ep: UCXEndpoint,
102 |     buffer,
103 |     nbytes: int,
104 |     cb_func: Callable,
105 |     cb_args: Optional[tuple] = ...,
106 |     cb_kwargs: Optional[dict] = ...,
107 |     name: Optional[str] = ...,
108 | ): ...
109 | def am_recv_nb(
110 |     ep: UCXEndpoint,
111 |     cb_func: Callable,
112 |     cb_args: Optional[tuple] = ...,
113 |     cb_kwargs: Optional[dict] = ...,
114 |     name: Optional[str] = ...,
115 | ): ...
116 | 
117 | # transfer_stream.pyx
118 | 
119 | def stream_send_nb(
120 |     ep: UCXEndpoint,
121 |     buffer,
122 |     nbytes: int,
123 |     cb_func: Callable,
124 |     cb_args: Optional[tuple] = ...,
125 |     cb_kwargs: Optional[dict] = ...,
126 |     name: Optional[str] = ...,
127 | ): ...
128 | def stream_recv_nb(
129 |     ep: UCXEndpoint,
130 |     buffer,
131 |     nbytes: int,
132 |     cb_func: Callable,
133 |     cb_args: Optional[tuple] = ...,
134 |     cb_kwargs: Optional[dict] = ...,
135 |     name: Optional[str] = ...,
136 | ): ...
137 | 
138 | # transfer_tag.pyx
139 | 
140 | def tag_send_nb(
141 |     ep: UCXEndpoint,
142 |     buffer,
143 |     nbytes: int,
144 |     tag: int,
145 |     cb_func: Callable,
146 |     cb_args: Optional[tuple] = ...,
147 |     cb_kwargs: Optional[dict] = ...,
148 |     name: Optional[str] = ...,
149 | ): ...
150 | def tag_recv_nb(
151 |     worker: UCXWorker,
152 |     buffer,
153 |     nbytes: int,
154 |     tag: int,
155 |     cb_func: Callable,
156 |     cb_args: Optional[tuple] = ...,
157 |     cb_kwargs: Optional[dict] = ...,
158 |     name: Optional[str] = ...,
159 |     ep: Optional[UCXEndpoint] = ...,
160 | ): ...
161 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_api.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | include "packed_remote_key.pyx"
 5 | include "transfer_am.pyx"
 6 | include "transfer_common.pyx"
 7 | include "transfer_stream.pyx"
 8 | include "transfer_tag.pyx"
 9 | include "typedefs.pyx"
10 | include "ucx_address.pyx"
11 | include "ucx_context.pyx"
12 | include "ucx_endpoint.pyx"
13 | include "ucx_listener.pyx"
14 | include "ucx_memory_handle.pyx"
15 | include "ucx_object.pyx"
16 | include "ucx_request.pyx"
17 | include "ucx_rkey.pyx"
18 | include "ucx_rma.pyx"
19 | include "ucx_worker.pyx"
20 | include "ucx_worker_cb.pyx"
21 | include "ucxio.pyx"
22 | include "utils.pyx"
23 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_context.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # Copyright (c) 2021       UT-Battelle, LLC. All rights reserved.
  3 | # See file LICENSE for terms.
  4 | 
  5 | # cython: language_level=3
  6 | 
  7 | import functools
  8 | import logging
  9 | 
 10 | from libc.stdint cimport uintptr_t
 11 | from libc.stdio cimport FILE
 12 | from libc.string cimport memset
 13 | 
 14 | from .ucx_api_dep cimport *
 15 | 
 16 | logger = logging.getLogger("ucx")
 17 | 
 18 | 
 19 | def _ucx_context_handle_finalizer(uintptr_t handle):
 20 |     ucp_cleanup(<ucp_context_h> handle)
 21 | 
 22 | 
 23 | cdef class UCXContext(UCXObject):
 24 |     """Python representation of `ucp_context_h`
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     config_dict: Mapping[str, str]
 29 |         UCX options such as "MEMTYPE_CACHE=n" and "SEG_SIZE=3M"
 30 |     feature_flags: Iterable[Feature]
 31 |         Tuple of UCX feature flags
 32 |     """
 33 |     cdef:
 34 |         ucp_context_h _handle
 35 |         dict _config
 36 |         tuple _feature_flags
 37 |         readonly bint cuda_support
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         config_dict={},
 42 |         feature_flags=(
 43 |             Feature.TAG,
 44 |             Feature.WAKEUP,
 45 |             Feature.STREAM,
 46 |             Feature.AM,
 47 |             Feature.RMA
 48 |         )
 49 |     ):
 50 |         cdef ucp_params_t ucp_params
 51 |         cdef ucp_worker_params_t worker_params
 52 |         cdef ucs_status_t status
 53 |         self._feature_flags = tuple(feature_flags)
 54 | 
 55 |         memset(&ucp_params, 0, sizeof(ucp_params))
 56 |         ucp_params.field_mask = (
 57 |             UCP_PARAM_FIELD_FEATURES |
 58 |             UCP_PARAM_FIELD_REQUEST_SIZE |
 59 |             UCP_PARAM_FIELD_REQUEST_INIT
 60 |         )
 61 |         ucp_params.features = functools.reduce(
 62 |             lambda x, y: x | y.value, feature_flags, 0
 63 |         )
 64 |         ucp_params.request_size = sizeof(ucx_py_request)
 65 |         ucp_params.request_init = (
 66 |             <ucp_request_init_callback_t>ucx_py_request_reset
 67 |         )
 68 | 
 69 |         cdef ucp_config_t *config = _read_ucx_config(config_dict)
 70 |         try:
 71 |             status = ucp_init(&ucp_params, config, &self._handle)
 72 |             assert_ucs_status(status)
 73 |             self._config = ucx_config_to_dict(config)
 74 |         finally:
 75 |             ucp_config_release(config)
 76 | 
 77 |         # UCX supports CUDA if "cuda" is part of the TLS or TLS is "all"
 78 |         cdef str tls = self._config["TLS"]
 79 |         cuda_transports = {"cuda", "cuda_copy"}
 80 |         if tls.startswith("^"):
 81 |             # UCX_TLS=^x,y,z means "all \ {x, y, z}"
 82 |             disabled = set(tls[1:].split(","))
 83 |             self.cuda_support = not (disabled & cuda_transports)
 84 |         else:
 85 |             enabled = set(tls.split(","))
 86 |             self.cuda_support = bool(
 87 |                 enabled & ({"all", "cuda_ipc"} | cuda_transports)
 88 |             )
 89 | 
 90 |         self.add_handle_finalizer(
 91 |             _ucx_context_handle_finalizer,
 92 |             int(<uintptr_t>self._handle)
 93 |         )
 94 | 
 95 |         logger.info("UCP initiated using config: ")
 96 |         cdef str k, v
 97 |         for k, v in self._config.items():
 98 |             logger.info(f"  {k}: {v}")
 99 | 
100 |     cpdef dict get_config(self):
101 |         return self._config
102 | 
103 |     @property
104 |     def handle(self):
105 |         assert self.initialized
106 |         return int(<uintptr_t>self._handle)
107 | 
108 |     def info(self):
109 |         assert self.initialized
110 | 
111 |         cdef FILE *text_fd = create_text_fd()
112 |         ucp_context_print_info(self._handle, text_fd)
113 |         return decode_text_fd(text_fd)
114 | 
115 |     def map(self, mem):
116 |         return UCXMemoryHandle.map(self, mem)
117 | 
118 |     def alloc(self, size):
119 |         return UCXMemoryHandle.alloc(self, size)
120 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_listener.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | # cython: language_level=3
  5 | 
  6 | from libc.stdint cimport uint16_t, uintptr_t
  7 | 
  8 | from .exceptions import log_errors
  9 | from .ucx_api_dep cimport *
 10 | 
 11 | 
 12 | cdef void _listener_callback(ucp_conn_request_h conn_request, void *args) with gil:
 13 |     """Callback function used by UCXListener"""
 14 |     cdef dict cb_data = <dict> args
 15 | 
 16 |     with log_errors():
 17 |         cb_data['cb_func'](
 18 |             int(<uintptr_t>conn_request),
 19 |             *cb_data['cb_args'],
 20 |             **cb_data['cb_kwargs']
 21 |         )
 22 | 
 23 | 
 24 | def _ucx_listener_handle_finalizer(uintptr_t handle):
 25 |     ucp_listener_destroy(<ucp_listener_h> handle)
 26 | 
 27 | 
 28 | cdef class UCXListener(UCXObject):
 29 |     """Python representation of `ucp_listener_h`
 30 | 
 31 |     Create and start a listener to accept incoming connections.
 32 | 
 33 |     Notice, the listening is closed when the returned Listener
 34 |     goes out of scope thus remember to keep a reference to the object.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     worker: UCXWorker
 39 |         Listening worker.
 40 |     port: int
 41 |         An unused port number for listening, or `0` to let UCX assign
 42 |         an unused port.
 43 |     callback_func: callable
 44 |         A callback function that gets invoked when an incoming
 45 |         connection is accepted. The arguments are `conn_request`
 46 |         followed by *cb_args and **cb_kwargs (if not None).
 47 |     cb_args: tuple, optional
 48 |         Extra arguments to the call-back function
 49 |     cb_kwargs: dict, optional
 50 |         Extra keyword arguments to the call-back function
 51 | 
 52 |     Returns
 53 |     -------
 54 |     Listener: UCXListener
 55 |         The new listener. When this object is deleted, the listening stops
 56 |     """
 57 | 
 58 |     cdef:
 59 |         ucp_listener_h _handle
 60 |         dict cb_data
 61 | 
 62 |     cdef public:
 63 |         uint16_t port
 64 |         str ip
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         UCXWorker worker,
 69 |         uint16_t port,
 70 |         cb_func,
 71 |         tuple cb_args=None,
 72 |         dict cb_kwargs=None
 73 |     ):
 74 |         if cb_args is None:
 75 |             cb_args = ()
 76 |         if cb_kwargs is None:
 77 |             cb_kwargs = {}
 78 |         cdef ucp_listener_params_t params
 79 |         cdef ucp_listener_conn_callback_t _listener_cb = (
 80 |             <ucp_listener_conn_callback_t>_listener_callback
 81 |         )
 82 |         cdef ucp_listener_attr_t attr
 83 |         self.cb_data = {
 84 |             "cb_func": cb_func,
 85 |             "cb_args": cb_args,
 86 |             "cb_kwargs": cb_kwargs,
 87 |         }
 88 |         params.field_mask = (
 89 |             UCP_LISTENER_PARAM_FIELD_SOCK_ADDR | UCP_LISTENER_PARAM_FIELD_CONN_HANDLER
 90 |         )
 91 |         params.conn_handler.cb = _listener_cb
 92 |         params.conn_handler.arg = <void*> self.cb_data
 93 |         if c_util_set_sockaddr(&params.sockaddr, NULL, port):
 94 |             raise MemoryError("Failed allocation of sockaddr")
 95 | 
 96 |         cdef ucs_status_t status = ucp_listener_create(
 97 |             worker._handle, &params, &self._handle
 98 |         )
 99 |         c_util_sockaddr_free(&params.sockaddr)
100 |         assert_ucs_status(status)
101 | 
102 |         attr.field_mask = UCP_LISTENER_ATTR_FIELD_SOCKADDR
103 |         status = ucp_listener_query(self._handle, &attr)
104 |         if status != UCS_OK:
105 |             ucp_listener_destroy(self._handle)
106 |         assert_ucs_status(status)
107 | 
108 |         DEF MAX_STR_LEN = 50
109 |         cdef char ip_str[MAX_STR_LEN]
110 |         cdef char port_str[MAX_STR_LEN]
111 |         c_util_sockaddr_get_ip_port_str(&attr.sockaddr,
112 |                                         ip_str,
113 |                                         port_str,
114 |                                         MAX_STR_LEN)
115 | 
116 |         self.port = <uint16_t>int(port_str.decode(errors="ignore"))
117 |         self.ip = ip_str.decode(errors="ignore")
118 | 
119 |         self.add_handle_finalizer(
120 |             _ucx_listener_handle_finalizer,
121 |             int(<uintptr_t>self._handle)
122 |         )
123 |         worker.add_child(self)
124 | 
125 |     @property
126 |     def handle(self):
127 |         assert self.initialized
128 |         return int(<uintptr_t>self._handle)
129 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_memory_handle.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021,      NVIDIA CORPORATION. All rights reserved.
  2 | # Copyright (c) 2021,      UT-Battelle, LLC. All rights reserved.
  3 | # See file LICENSE for terms.
  4 | 
  5 | # cython: language_level=3
  6 | 
  7 | from libc.stdint cimport uintptr_t
  8 | 
  9 | from .arr cimport Array
 10 | from .ucx_api_dep cimport *
 11 | 
 12 | 
 13 | def _ucx_mem_handle_finalizer(uintptr_t handle_as_int, UCXContext ctx):
 14 |     assert ctx.initialized
 15 |     cdef ucp_mem_h handle = <ucp_mem_h><void *>handle_as_int
 16 |     cdef ucs_status_t status
 17 |     status = ucp_mem_unmap(ctx._handle, handle)
 18 |     assert_ucs_status(status)
 19 | 
 20 | 
 21 | cdef class UCXMemoryHandle(UCXObject):
 22 |     """ Python representation for ucp_mem_h type. Users should not instance this class
 23 |         directly and instead use either the map or the alloc class methods
 24 |     """
 25 |     cdef ucp_mem_h _mem_handle
 26 |     cdef UCXContext _context
 27 |     cdef uint64_t r_address
 28 |     cdef size_t _length
 29 | 
 30 |     def __cinit__(self, UCXContext ctx, uintptr_t par):
 31 |         cdef ucs_status_t status
 32 |         cdef ucp_context_h ctx_handle = <ucp_context_h><uintptr_t>ctx.handle
 33 |         cdef ucp_mem_map_params_t *params = <ucp_mem_map_params_t *>par
 34 |         self._context = ctx
 35 |         status = ucp_mem_map(ctx_handle, params, &self._mem_handle)
 36 |         assert_ucs_status(status)
 37 |         self._populate_metadata()
 38 |         self.add_handle_finalizer(
 39 |             _ucx_mem_handle_finalizer,
 40 |             int(<uintptr_t>self._mem_handle),
 41 |             self._context
 42 |         )
 43 |         ctx.add_child(self)
 44 | 
 45 |     @classmethod
 46 |     def alloc(cls, ctx, size):
 47 |         """ Allocate a new pool of registered memory. This memory can be used for
 48 |             RMA and AMO operations. This memory should not be accessed from outside
 49 |             these operations.
 50 | 
 51 |             Parameters
 52 |             ----------
 53 |             ctx: UCXContext
 54 |                 The UCX context that this memory should be registered to
 55 |             size: int
 56 |                 Minimum amount of memory to allocate
 57 |             """
 58 |         cdef ucp_mem_map_params_t params
 59 |         cdef ucs_status_t status
 60 | 
 61 |         params.field_mask = (
 62 |             UCP_MEM_MAP_PARAM_FIELD_FLAGS |
 63 |             UCP_MEM_MAP_PARAM_FIELD_LENGTH
 64 |         )
 65 |         params.length = <size_t>size
 66 |         params.flags = UCP_MEM_MAP_NONBLOCK | UCP_MEM_MAP_ALLOCATE
 67 | 
 68 |         return UCXMemoryHandle(ctx, <uintptr_t>&params)
 69 | 
 70 |     @classmethod
 71 |     def map(cls, ctx, mem):
 72 |         """ Register an existing memory object to UCX for use in RMA and AMO operations
 73 |             It is not safe to access this memory from outside UCX while operations are
 74 |             outstanding
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         ctx: UCXContext
 79 |             The UCX context that this memory should be registered to
 80 |         mem: buffer
 81 |             The memory object to be registered
 82 |         """
 83 |         cdef ucp_mem_map_params_t params
 84 |         cdef ucs_status_t status
 85 | 
 86 |         buff = Array(mem)
 87 | 
 88 |         params.field_mask = (
 89 |             UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
 90 |             UCP_MEM_MAP_PARAM_FIELD_LENGTH
 91 |         )
 92 |         params.address = <void*>buff.ptr
 93 |         params.length = buff.nbytes
 94 | 
 95 |         return UCXMemoryHandle(ctx, <uintptr_t>&params)
 96 | 
 97 |     def pack_rkey(self):
 98 |         """ Returns an UCXRKey object that represents a packed key. This key is what
 99 |             allows the UCX API to associate this memory with an EP.
100 |         """
101 |         return PackedRemoteKey.from_mem_handle(self)
102 | 
103 |     @property
104 |     def mem_handle(self):
105 |         return <uintptr_t>self._mem_handle
106 | 
107 |     # Done as a separate function because some day I plan on making this loaded lazily
108 |     # I believe this reports the actual registered space, rather than what was requested
109 |     def _populate_metadata(self):
110 |         cdef ucs_status_t status
111 |         cdef ucp_mem_attr_t attr
112 | 
113 |         attr.field_mask = (
114 |             UCP_MEM_ATTR_FIELD_ADDRESS |
115 |             UCP_MEM_ATTR_FIELD_LENGTH
116 |         )
117 |         status = ucp_mem_query(self._mem_handle, &attr)
118 |         assert_ucs_status(status)
119 |         self.r_address = <uintptr_t>attr.address
120 |         self._length = attr.length
121 | 
122 |     @property
123 |     def address(self):
124 |         """ Get base address for the memory registration """
125 |         return self.r_address
126 | 
127 |     @property
128 |     def length(self):
129 |         """ Get length of registered memory """
130 |         return self._length
131 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_object.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | # cython: language_level=3
 5 | 
 6 | import weakref
 7 | 
 8 | 
 9 | def _handle_finalizer_wrapper(
10 |     children, handle_finalizer, handle_as_int, *extra_args, **extra_kargs
11 | ):
12 |     for weakref_to_child in children:
13 |         child = weakref_to_child()
14 |         if child is not None:
15 |             child.close()
16 |     handle_finalizer(handle_as_int, *extra_args, **extra_kargs)
17 | 
18 | 
19 | cdef class UCXObject:
20 |     """Base class for UCX classes
21 | 
22 |     This base class streamlines the cleanup of UCX objects and reduces duplicate code.
23 |     """
24 |     cdef:
25 |         object __weakref__
26 |         object _finalizer
27 |         list _children
28 | 
29 |     def __cinit__(self):
30 |         # The finalizer, which can be called multiple times but only
31 |         # evoke the finalizer function once.
32 |         # Is None when the underlying UCX handle hasen't been initialized.
33 |         self._finalizer = None
34 |         # List of weak references of UCX objects that make use of this object
35 |         self._children = []
36 | 
37 |     cpdef void close(self) except *:
38 |         """Close the object and free the underlying UCX handle.
39 |         Does nothing if the object is already closed
40 |         """
41 |         if self.initialized:
42 |             self._finalizer()
43 | 
44 |     @property
45 |     def initialized(self):
46 |         """Is the underlying UCX handle initialized"""
47 |         return self._finalizer and self._finalizer.alive
48 | 
49 |     cpdef void add_child(self, child) except *:
50 |         """Add a UCX object to this object's children. The underlying UCX
51 |         handle will be freed when this obejct is freed.
52 |         """
53 |         self._children.append(weakref.ref(child))
54 | 
55 |     def add_handle_finalizer(self, handle_finalizer, handle_as_int, *extra_args):
56 |         """Add a finalizer of `handle_as_int`"""
57 |         self._finalizer = weakref.finalize(
58 |             self,
59 |             _handle_finalizer_wrapper,
60 |             self._children,
61 |             handle_finalizer,
62 |             handle_as_int,
63 |             *extra_args
64 |         )
65 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_request.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | # cython: language_level=3
  5 | 
  6 | from cpython.ref cimport Py_DECREF, Py_INCREF, PyObject
  7 | from libc.stdint cimport uintptr_t
  8 | 
  9 | from .exceptions import UCXError, UCXMsgTruncated
 10 | from .ucx_api_dep cimport *
 11 | 
 12 | 
 13 | # Counter used as UCXRequest UIDs
 14 | cdef unsigned int _ucx_py_request_counter = 0
 15 | 
 16 | 
 17 | cdef class UCXRequest:
 18 |     """Python wrapper of UCX request handle.
 19 | 
 20 |     Don't create this class directly, the send/recv functions and their
 21 |     callback functions will return UCXRequest objects.
 22 | 
 23 |     Notice, this class doesn't own the handle and multiple instances of
 24 |     UCXRequest can point to the same underlying UCX handle.
 25 |     Furthermore, UCX can modify/free the UCX handle without notice
 26 |     thus we use `_uid` to make sure the handle hasn't been modified.
 27 |     """
 28 |     cdef:
 29 |         ucx_py_request *_handle
 30 |         unsigned int _uid
 31 | 
 32 |     def __init__(self, uintptr_t req_as_int):
 33 |         global _ucx_py_request_counter
 34 |         cdef ucx_py_request *req = <ucx_py_request*>req_as_int
 35 |         assert req != NULL
 36 |         self._handle = req
 37 | 
 38 |         cdef dict info = {"status": "pending"}
 39 |         if self._handle.info == NULL:  # First time we are wrapping this UCX request
 40 |             Py_INCREF(info)
 41 |             self._handle.info = <PyObject*> info
 42 |             _ucx_py_request_counter += 1
 43 |             self._uid = _ucx_py_request_counter
 44 |             assert self._handle.uid == 0
 45 |             self._handle.uid = _ucx_py_request_counter
 46 |         else:
 47 |             self._uid = self._handle.uid
 48 | 
 49 |     cpdef bint closed(self):
 50 |         return self._handle == NULL or self._uid != self._handle.uid
 51 | 
 52 |     cpdef void close(self) except *:
 53 |         """This routine releases the non-blocking request back to UCX,
 54 |         regardless of its current state. Communications operations associated with
 55 |         this request will make progress internally, however no further notifications or
 56 |         callbacks will be invoked for this request. """
 57 | 
 58 |         if not self.closed():
 59 |             Py_DECREF(<object>self._handle.info)
 60 |             self._handle.info = NULL
 61 |             self._handle.uid = 0
 62 |             ucp_request_free(self._handle)
 63 |             self._handle = NULL
 64 | 
 65 |     @property
 66 |     def info(self):
 67 |         assert not self.closed()
 68 |         return <dict> self._handle.info
 69 | 
 70 |     @property
 71 |     def handle(self):
 72 |         assert not self.closed()
 73 |         return int(<uintptr_t>self._handle)
 74 | 
 75 |     def __hash__(self):
 76 |         if self.closed():
 77 |             return id(self)
 78 |         else:
 79 |             return self._uid
 80 | 
 81 |     def __eq__(self, other):
 82 |         return hash(self) == hash(other)
 83 | 
 84 |     def __repr__(self):
 85 |         if self.closed():
 86 |             return "<UCXRequest closed>"
 87 |         else:
 88 |             return (
 89 |                 f"<UCXRequest handle={hex(self.handle)} "
 90 |                 f"uid={self._uid} info={self.info}>"
 91 |             )
 92 | 
 93 | 
 94 | cdef UCXRequest _handle_status(
 95 |     ucs_status_ptr_t status,
 96 |     int64_t expected_receive,
 97 |     cb_func,
 98 |     cb_args,
 99 |     cb_kwargs,
100 |     unicode name,
101 |     set inflight_msgs
102 | ):
103 |     if UCS_PTR_STATUS(status) == UCS_OK:
104 |         return
105 |     cdef str ucx_status_msg, msg
106 |     if UCS_PTR_IS_ERR(status):
107 |         ucx_status_msg = (
108 |             ucs_status_string(UCS_PTR_STATUS(status)).decode("utf-8")
109 |         )
110 |         msg = "<%s>: %s" % (name, ucx_status_msg)
111 |         raise UCXError(msg)
112 |     cdef UCXRequest req = UCXRequest(<uintptr_t><void*> status)
113 |     assert not req.closed()
114 |     cdef dict req_info = <dict>req._handle.info
115 |     if req_info["status"] == "finished":
116 |         try:
117 |             # The callback function has already handled the request
118 |             received = req_info.get("received", None)
119 |             if received is not None and received != expected_receive:
120 |                 msg = "<%s>: length mismatch: %d (got) != %d (expected)" % (
121 |                     name, received, expected_receive
122 |                 )
123 |                 raise UCXMsgTruncated(msg)
124 |             else:
125 |                 cb_func(req, None, *cb_args, **cb_kwargs)
126 |                 return
127 |         finally:
128 |             req.close()
129 |     else:
130 |         req_info["cb_func"] = cb_func
131 |         req_info["cb_args"] = cb_args
132 |         req_info["cb_kwargs"] = cb_kwargs
133 |         req_info["expected_receive"] = expected_receive
134 |         req_info["name"] = name
135 |         inflight_msgs.add(req)
136 |         req_info["inflight_msgs"] = inflight_msgs
137 |         return req
138 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucx_rkey.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | # Copyright (c) 2021, UT-Battelle, LLC. All rights reserved.
 3 | # See file LICENSE for terms.
 4 | 
 5 | # cython: language_level=3
 6 | 
 7 | import logging
 8 | 
 9 | from libc.stdint cimport uintptr_t
10 | 
11 | from .arr cimport Array
12 | from .ucx_api_dep cimport *
13 | 
14 | logger = logging.getLogger("ucx")
15 | 
16 | 
17 | def _ucx_remote_mem_finalizer_post_flush(req, exception, UCXRkey rkey):
18 |     if exception is not None:
19 |         logger.debug("Remote memory finalizer exception: %s" % str(exception))
20 |     ucp_rkey_destroy(rkey._handle)
21 | 
22 | 
23 | def _ucx_rkey_finalizer(UCXRkey rkey, UCXEndpoint ep):
24 |     req = ep.flush(_ucx_remote_mem_finalizer_post_flush, (rkey,))
25 | 
26 |     # Flush completed immediately and callback wasn't called
27 |     if req is None:
28 |         ucp_rkey_destroy(rkey._handle)
29 | 
30 | 
31 | cdef class UCXRkey(UCXObject):
32 |     cdef ucp_rkey_h _handle
33 |     cdef UCXEndpoint ep
34 | 
35 |     def __init__(self, UCXEndpoint ep, PackedRemoteKey rkey):
36 |         cdef ucs_status_t status
37 |         rkey_arr = Array(rkey)
38 |         cdef const void *key_data = <const void *><const uintptr_t>rkey_arr.ptr
39 |         status = ucp_ep_rkey_unpack(ep._handle, key_data, &self._handle)
40 |         assert_ucs_status(status)
41 |         self.ep = ep
42 |         self.add_handle_finalizer(
43 |             _ucx_rkey_finalizer,
44 |             self,
45 |             ep
46 |         )
47 |         ep.add_child(self)
48 | 
49 |     @property
50 |     def ep(self):
51 |         return self.ep
52 | 


--------------------------------------------------------------------------------
/ucp/_libs/ucxio.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, UT-Battelle, LLC. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | from io import SEEK_CUR, SEEK_END, SEEK_SET, RawIOBase
 5 | 
 6 | from .arr cimport Array
 7 | from .ucx_api_dep cimport *
 8 | 
 9 | 
10 | def blocking_handler(request, exception, finished):
11 |     assert exception is None
12 |     finished[0] = True
13 | 
14 | 
15 | class UCXIO(RawIOBase):
16 |     """A class to simulate python streams backed by UCX RMA operations
17 | 
18 |         Parameters
19 |         ----------
20 |         dest: int
21 |             A 64 bit number that represents the remote address that will be written to
22 |             and read from.
23 |         length: int
24 |             Maximum length of the region that can be written to and read from.
25 |         rkey: UCXRkey
26 |             An unpacked UCXRkey that represents the remote memory that was unpacked by
27 |             UCX for use in RMA operations.
28 |     """
29 | 
30 |     def __init__(self, dest, length, rkey):
31 |         self.pos = 0
32 |         self.remote_addr = dest
33 |         self.length = length
34 |         self.rkey = rkey
35 |         self.cb_finished = [False]
36 | 
37 |     def block_on_request(self, req):
38 |         if req is not None:
39 |             while not self.cb_finished[0]:
40 |                 self.rkey.ep.worker.progress()
41 |         self.cb_finished[0] = False
42 | 
43 |     def flush(self):
44 |         req = self.rkey.ep.flush(blocking_handler, cb_args=(self.cb_finished,))
45 |         self.block_on_request(req)
46 | 
47 |     def seek(self, pos, whence=SEEK_SET):
48 |         if whence == SEEK_SET:
49 |             self.pos = min(max(pos, 0), self.length)
50 |         elif whence == SEEK_CUR:
51 |             if pos < 0:
52 |                 self.pos = max(self.pos + pos, 0)
53 |             else:
54 |                 self.pos = min(self.pos + pos, self.length)
55 |         elif whence == SEEK_END:
56 |             self.pos = min(max(self.pos + pos, 0), self.length)
57 |         else:
58 |             raise ValueError("Invalid argument")
59 |         return self.pos
60 | 
61 |     def _do_rma(self, op, buff):
62 |         data = Array(buff)
63 |         size = data.nbytes
64 |         if self.pos + size > self.length:
65 |             size = self.length - self.pos
66 |         finished = op(data, size, self.remote_addr + self.pos, self.rkey)
67 |         self.pos += size
68 |         if not finished:
69 |             self.flush()
70 |         return size
71 | 
72 |     def readinto(self, buff):
73 |         return self._do_rma(get_nbi, buff)
74 | 
75 |     def write(self, buff):
76 |         return self._do_rma(put_nbi, buff)
77 | 
78 |     def seekable(self):
79 |         return True
80 | 
81 |     def writable(self):
82 |         return True
83 | 
84 |     def readable(self):
85 |         return True
86 | 


--------------------------------------------------------------------------------
/ucp/_libs/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | import fcntl
  5 | import glob
  6 | import os
  7 | import socket
  8 | import struct
  9 | 
 10 | try:
 11 |     from nvtx import annotate as nvtx_annotate
 12 | except ImportError:
 13 |     # If nvtx module is not installed, `annotate` yields only.
 14 |     from contextlib import contextmanager
 15 | 
 16 |     @contextmanager
 17 |     def nvtx_annotate(message=None, color=None, domain=None):
 18 |         yield
 19 | 
 20 | 
 21 | try:
 22 |     from dask.utils import format_bytes, format_time, parse_bytes
 23 | except ImportError:
 24 | 
 25 |     def format_time(x):
 26 |         if x < 1e-6:
 27 |             return f"{x * 1e9:.3f} ns"
 28 |         if x < 1e-3:
 29 |             return f"{x * 1e6:.3f} us"
 30 |         if x < 1:
 31 |             return f"{x * 1e3:.3f} ms"
 32 |         else:
 33 |             return f"{x:.3f} s"
 34 | 
 35 |     def format_bytes(x):
 36 |         """Return formatted string in B, KiB, MiB, GiB or TiB"""
 37 |         if x < 1024:
 38 |             return f"{x} B"
 39 |         elif x < 1024**2:
 40 |             return f"{x / 1024:.2f} KiB"
 41 |         elif x < 1024**3:
 42 |             return f"{x / 1024**2:.2f} MiB"
 43 |         elif x < 1024**4:
 44 |             return f"{x / 1024**3:.2f} GiB"
 45 |         else:
 46 |             return f"{x / 1024**4:.2f} TiB"
 47 | 
 48 |     parse_bytes = None
 49 | 
 50 | 
 51 | def print_separator(separator="-", length=80):
 52 |     """Print a single separator character multiple times"""
 53 |     print(separator * length)
 54 | 
 55 | 
 56 | def print_key_value(key, value, key_length=25):
 57 |     """Print a key and value with fixed key-field length"""
 58 |     print(f"{key: <{key_length}} | {value}")
 59 | 
 60 | 
 61 | def print_multi(values, key_length=25):
 62 |     """Print a key and value with fixed key-field length"""
 63 |     assert isinstance(values, tuple) or isinstance(values, list)
 64 |     assert len(values) > 1
 65 | 
 66 |     print_str = "".join(f"{s: <{key_length}} | " for s in values[:-1])
 67 |     print_str += values[-1]
 68 |     print(print_str)
 69 | 
 70 | 
 71 | def get_address(ifname=None):
 72 |     """
 73 |     Get the address associated with a network interface.
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     ifname : str
 78 |         The network interface name to find the address for.
 79 |         If None, it uses the value of environment variable `UCXPY_IFNAME`
 80 |         and if `UCXPY_IFNAME` is not set it defaults to "ib0"
 81 |         An OSError is raised for invalid interfaces.
 82 | 
 83 |     Returns
 84 |     -------
 85 |     address : str
 86 |         The inet addr associated with an interface.
 87 | 
 88 |     Raises
 89 |     ------
 90 |     RuntimeError
 91 |         If a network address could not be determined.
 92 | 
 93 |     Examples
 94 |     --------
 95 |     >>> get_address()
 96 |     '10.33.225.160'
 97 | 
 98 |     >>> get_address(ifname='lo')
 99 |     '127.0.0.1'
100 |     """
101 | 
102 |     def _get_address(ifname):
103 |         ifname = ifname.encode()
104 |         with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
105 |             return socket.inet_ntoa(
106 |                 fcntl.ioctl(
107 |                     s.fileno(), 0x8915, struct.pack("256s", ifname[:15])  # SIOCGIFADDR
108 |                 )[20:24]
109 |             )
110 | 
111 |     def _try_interfaces():
112 |         prefix_priority = ["ib", "eth", "en", "docker"]
113 |         iftypes = {p: [] for p in prefix_priority}
114 |         for i in glob.glob("/sys/class/net/*"):
115 |             name = i.split("/")[-1]
116 |             for p in prefix_priority:
117 |                 if name.startswith(p):
118 |                     iftypes[p].append(name)
119 |         for p in prefix_priority:
120 |             iftype = iftypes[p]
121 |             iftype.sort()
122 |             for i in iftype:
123 |                 try:
124 |                     return _get_address(i)
125 |                 except OSError:
126 |                     pass
127 | 
128 |         raise RuntimeError(
129 |             "A network address could not be determined, an interface that has a valid "
130 |             "IP address with the environment variable `UCXPY_IFNAME`."
131 |         )
132 | 
133 |     if ifname is None:
134 |         ifname = os.environ.get("UCXPY_IFNAME")
135 | 
136 |     if ifname is not None:
137 |         return _get_address(ifname)
138 |     else:
139 |         return _try_interfaces()
140 | 


--------------------------------------------------------------------------------
/ucp/_libs/utils_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | import multiprocessing as mp
  5 | 
  6 | from ucp._libs import ucx_api
  7 | from ucp._libs.arr import Array
  8 | 
  9 | mp = mp.get_context("spawn")
 10 | 
 11 | 
 12 | def blocking_handler(request, exception, finished):
 13 |     assert exception is None
 14 |     finished[0] = True
 15 | 
 16 | 
 17 | def blocking_flush(obj):
 18 |     finished = [False]
 19 |     if not hasattr(obj, "progress"):
 20 |         progress = obj.worker.progress
 21 |     else:
 22 |         progress = obj.progress
 23 |     req = obj.flush(cb_func=blocking_handler, cb_args=(finished,))
 24 |     if req is not None:
 25 |         while not finished[0]:
 26 |             progress()
 27 | 
 28 | 
 29 | def blocking_send(worker, ep, msg, tag=0):
 30 |     msg = Array(msg)
 31 |     finished = [False]
 32 |     req = ucx_api.tag_send_nb(
 33 |         ep,
 34 |         msg,
 35 |         msg.nbytes,
 36 |         tag=tag,
 37 |         cb_func=blocking_handler,
 38 |         cb_args=(finished,),
 39 |     )
 40 |     if req is not None:
 41 |         while not finished[0]:
 42 |             worker.progress()
 43 | 
 44 | 
 45 | def blocking_recv(worker, ep, msg, tag=0):
 46 |     msg = Array(msg)
 47 |     finished = [False]
 48 |     req = ucx_api.tag_recv_nb(
 49 |         worker,
 50 |         msg,
 51 |         msg.nbytes,
 52 |         tag=tag,
 53 |         cb_func=blocking_handler,
 54 |         cb_args=(finished,),
 55 |         ep=ep,
 56 |     )
 57 |     if req is not None:
 58 |         while not finished[0]:
 59 |             worker.progress()
 60 | 
 61 | 
 62 | def non_blocking_handler(request, exception, completed_cb):
 63 |     if exception is not None:
 64 |         print(exception)
 65 |     assert exception is None
 66 |     completed_cb()
 67 | 
 68 | 
 69 | def non_blocking_send(worker, ep, msg, started_cb, completed_cb, tag=0):
 70 |     msg = Array(msg)
 71 |     started_cb()
 72 |     req = ucx_api.tag_send_nb(
 73 |         ep,
 74 |         msg,
 75 |         msg.nbytes,
 76 |         tag=tag,
 77 |         cb_func=non_blocking_handler,
 78 |         cb_args=(completed_cb,),
 79 |     )
 80 |     if req is None:
 81 |         completed_cb()
 82 |     return req
 83 | 
 84 | 
 85 | def non_blocking_recv(worker, ep, msg, started_cb, completed_cb, tag=0):
 86 |     msg = Array(msg)
 87 |     started_cb()
 88 |     req = ucx_api.tag_recv_nb(
 89 |         worker,
 90 |         msg,
 91 |         msg.nbytes,
 92 |         tag=tag,
 93 |         cb_func=non_blocking_handler,
 94 |         cb_args=(completed_cb,),
 95 |         ep=ep,
 96 |     )
 97 |     if req is None:
 98 |         completed_cb()
 99 |     return req
100 | 
101 | 
102 | def blocking_am_send(worker, ep, msg):
103 |     msg = Array(msg)
104 |     finished = [False]
105 |     req = ucx_api.am_send_nbx(
106 |         ep,
107 |         msg,
108 |         msg.nbytes,
109 |         cb_func=blocking_handler,
110 |         cb_args=(finished,),
111 |     )
112 |     if req is not None:
113 |         while not finished[0]:
114 |             worker.progress()
115 | 
116 | 
117 | def blocking_am_recv_handler(recv_obj, exception, ret):
118 |     assert exception is None
119 |     ret[0] = recv_obj
120 | 
121 | 
122 | def blocking_am_recv(worker, ep):
123 |     ret = [None]
124 |     ucx_api.am_recv_nb(
125 |         ep,
126 |         cb_func=blocking_am_recv_handler,
127 |         cb_args=(ret,),
128 |     )
129 |     while ret[0] is None:
130 |         worker.progress()
131 |     return ret[0]
132 | 


--------------------------------------------------------------------------------
/ucp/_version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib.resources
16 | 
17 | __version__ = (
18 |     importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
19 | )
20 | 
21 | try:
22 |     __git_commit__ = (
23 |         importlib.resources.files(__package__)
24 |         .joinpath("GIT_COMMIT")
25 |         .read_text()
26 |         .strip()
27 |     )
28 | except FileNotFoundError:
29 |     __git_commit__ = ""
30 | 
31 | __all__ = ["__git_commit__", "__version__"]
32 | 


--------------------------------------------------------------------------------
/ucp/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2 | # See file LICENSE for terms.
3 | 


--------------------------------------------------------------------------------
/ucp/benchmarks/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2 | # See file LICENSE for terms.
3 | 


--------------------------------------------------------------------------------
/ucp/benchmarks/backends/base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from argparse import Namespace
  3 | from queue import Queue
  4 | 
  5 | 
  6 | class BaseServer(ABC):
  7 |     @abstractmethod
  8 |     def __init__(self, args: Namespace, queue: Queue):
  9 |         """
 10 |         Benchmark server.
 11 | 
 12 |         Parameters
 13 |         ----------
 14 |         args: argparse.Namespace
 15 |             Parsed command-line arguments that will be used as parameters during
 16 |             the `run` method.
 17 |         queue: Queue
 18 |             Queue object where server will put the port it is listening at.
 19 |         """
 20 |         pass
 21 | 
 22 |     @property
 23 |     @abstractmethod
 24 |     def has_cuda_support() -> bool:
 25 |         """
 26 |         Check whether server implementation supports CUDA memory transfers.
 27 | 
 28 |         Returns
 29 |         -------
 30 |         ret: bool
 31 |             `True` if CUDA is supported, `False` otherwise.
 32 |         """
 33 |         return False
 34 | 
 35 |     @abstractmethod
 36 |     def run(self):
 37 |         """
 38 |         Run the benchmark server.
 39 | 
 40 |         The server is executed as follows:
 41 |         1. Start the listener and put port where it is listening into the queue
 42 |            registered in constructor;
 43 |         2. Setup any additional context (Active Message registration, memory buffers
 44 |            to reuse, etc.);
 45 |         3. Transfer data back-and-forth with client;
 46 |         4. Shutdown server.
 47 |         """
 48 |         pass
 49 | 
 50 | 
 51 | class BaseClient(ABC):
 52 |     @abstractmethod
 53 |     def __init__(self, args: Namespace, queue: Queue, server_address: str, port: int):
 54 |         """
 55 |         Benchmark client.
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         args
 60 |             Parsed command-line arguments that will be used as parameters during
 61 |             the `run` method.
 62 |         queue
 63 |             Queue object where to put timing results.
 64 |         server_address
 65 |             Hostname or IP address where server is listening at.
 66 |         port
 67 |             Port where server is listening at.
 68 |         """
 69 |         pass
 70 | 
 71 |     @property
 72 |     @abstractmethod
 73 |     def has_cuda_support() -> bool:
 74 |         """
 75 |         Check whether client implementation supports CUDA memory transfers.
 76 | 
 77 |         Returns
 78 |         -------
 79 |         ret: bool
 80 |             `True` if CUDA is supported, `False` otherwise.
 81 |         """
 82 |         return False
 83 | 
 84 |     @abstractmethod
 85 |     def run(self):
 86 |         """
 87 |         Run the benchmark client.
 88 | 
 89 |         The client is executed as follows:
 90 |         1. Connects to listener;
 91 |         2. Setup any additional context (Active Message registration, memory buffers
 92 |            to reuse, etc.);
 93 |         3. Transfer data back-and-forth with server;
 94 |         4. Shutdown client;
 95 |         5. Put timing results into the queue registered in constructor.
 96 |         """
 97 |         pass
 98 | 
 99 |     def print_backend_specific_config(self):
100 |         """
101 |         Pretty print configuration specific to backend implementation.
102 |         """
103 |         pass
104 | 


--------------------------------------------------------------------------------
/ucp/benchmarks/backends/tornado.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from time import monotonic
  3 | 
  4 | import numpy as np
  5 | from tornado.iostream import StreamClosedError
  6 | from tornado.tcpclient import TCPClient
  7 | from tornado.tcpserver import TCPServer
  8 | 
  9 | from ucp.benchmarks.backends.base import BaseClient, BaseServer
 10 | 
 11 | 
 12 | class TornadoServer(BaseServer):
 13 |     has_cuda_support = False
 14 | 
 15 |     def __init__(self, args, queue):
 16 |         self.args = args
 17 |         self.queue = queue
 18 | 
 19 |     def _start_listener(self, server, port):
 20 |         if port is not None:
 21 |             server.listen(port)
 22 |         else:
 23 |             for i in range(10000, 60000):
 24 |                 try:
 25 |                     server.listen(i)
 26 |                 except OSError:
 27 |                     continue
 28 |                 else:
 29 |                     port = i
 30 |                     break
 31 | 
 32 |         return port
 33 | 
 34 |     async def run(self):
 35 |         args = self.args
 36 | 
 37 |         event = asyncio.Event()
 38 | 
 39 |         class TransferServer(TCPServer):
 40 |             async def handle_stream(self, stream, address):
 41 |                 if args.reuse_alloc:
 42 |                     recv_msg = np.zeros(args.n_bytes, dtype="u1")
 43 | 
 44 |                     assert recv_msg.nbytes == args.n_bytes
 45 | 
 46 |                 for i in range(args.n_iter + args.n_warmup_iter):
 47 |                     if not args.reuse_alloc:
 48 |                         recv_msg = np.zeros(args.n_bytes, dtype="u1")
 49 | 
 50 |                     try:
 51 |                         await stream.read_into(recv_msg.data)
 52 |                         await stream.write(recv_msg.data)
 53 |                     except StreamClosedError as e:
 54 |                         print(e)
 55 |                         break
 56 | 
 57 |                 event.set()
 58 | 
 59 |         # Set max_buffer_size to 1 GiB for now
 60 |         server = TransferServer(max_buffer_size=1024**3)
 61 |         port = self._start_listener(server, args.port)
 62 | 
 63 |         self.queue.put(port)
 64 |         await event.wait()
 65 | 
 66 | 
 67 | class TornadoClient(BaseClient):
 68 |     has_cuda_support = False
 69 | 
 70 |     def __init__(self, args, queue, server_address, port):
 71 |         self.args = args
 72 |         self.queue = queue
 73 |         self.server_address = server_address
 74 |         self.port = port
 75 | 
 76 |     async def run(self) -> bool:
 77 |         client = TCPClient()
 78 |         # Set max_buffer_size to 1 GiB for now
 79 |         stream = await client.connect(
 80 |             self.server_address, self.port, max_buffer_size=1024**3
 81 |         )
 82 | 
 83 |         send_msg = np.arange(self.args.n_bytes, dtype="u1")
 84 |         assert send_msg.nbytes == self.args.n_bytes
 85 |         if self.args.reuse_alloc:
 86 |             recv_msg = np.zeros(self.args.n_bytes, dtype="u1")
 87 |             assert recv_msg.nbytes == self.args.n_bytes
 88 | 
 89 |         if self.args.report_gil_contention:
 90 |             from gilknocker import KnockKnock
 91 | 
 92 |             # Use smallest polling interval possible to ensure, contention will always
 93 |             # be zero for small messages otherwise and inconsistent for large messages.
 94 |             knocker = KnockKnock(polling_interval_micros=1)
 95 |             knocker.start()
 96 | 
 97 |         times = []
 98 |         for i in range(self.args.n_iter + self.args.n_warmup_iter):
 99 |             start = monotonic()
100 | 
101 |             if not self.args.reuse_alloc:
102 |                 recv_msg = np.zeros(self.args.n_bytes, dtype="u1")
103 | 
104 |             await stream.write(send_msg.data)
105 |             await stream.read_into(recv_msg.data)
106 | 
107 |             stop = monotonic()
108 |             if i >= self.args.n_warmup_iter:
109 |                 times.append(stop - start)
110 | 
111 |         if self.args.report_gil_contention:
112 |             knocker.stop()
113 | 
114 |         self.queue.put(times)
115 |         if self.args.report_gil_contention:
116 |             self.queue.put(knocker.contention_metric)
117 | 


--------------------------------------------------------------------------------
/ucp/comm.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # Copyright (c) 2020       UT-Battelle, LLC. All rights reserved.
  3 | # See file LICENSE for terms.
  4 | 
  5 | import asyncio
  6 | from typing import Union
  7 | 
  8 | from ._libs import arr, ucx_api
  9 | from .utils import get_event_loop
 10 | 
 11 | 
 12 | def _cb_func(request, exception, event_loop, future):
 13 |     if event_loop.is_closed() or future.done():
 14 |         return
 15 |     if exception is not None:
 16 |         future.set_exception(exception)
 17 |     else:
 18 |         future.set_result(True)
 19 | 
 20 | 
 21 | def _call_ucx_api(event_loop, func, *args, **kwargs):
 22 |     """Help function to avoid duplicated code.
 23 |     Basically, all the communication functions have the
 24 |     same structure, which this wrapper implements.
 25 |     """
 26 |     event_loop = event_loop or get_event_loop()
 27 |     ret = event_loop.create_future()
 28 |     # All the comm functions takes the call-back function and its arguments
 29 |     kwargs["cb_func"] = _cb_func
 30 |     kwargs["cb_args"] = (event_loop, ret)
 31 |     req = func(*args, **kwargs)
 32 |     if req is None and not ret.done():
 33 |         ret.set_result(True)
 34 |     return ret
 35 | 
 36 | 
 37 | def _am_cb_func(recv_obj, exception, event_loop, future):
 38 |     if event_loop.is_closed() or future.done():
 39 |         return
 40 |     if exception is not None:
 41 |         future.set_exception(exception)
 42 |     else:
 43 |         future.set_result(recv_obj)
 44 | 
 45 | 
 46 | def tag_send(
 47 |     ep: ucx_api.UCXEndpoint,
 48 |     buffer: arr.Array,
 49 |     nbytes: int,
 50 |     tag: int,
 51 |     name="tag_send",
 52 |     event_loop=None,
 53 | ) -> asyncio.Future:
 54 | 
 55 |     return _call_ucx_api(
 56 |         event_loop, ucx_api.tag_send_nb, ep, buffer, nbytes, tag, name=name
 57 |     )
 58 | 
 59 | 
 60 | def am_send(
 61 |     ep: ucx_api.UCXEndpoint,
 62 |     buffer: arr.Array,
 63 |     nbytes: int,
 64 |     name="am_send",
 65 |     event_loop=None,
 66 | ) -> asyncio.Future:
 67 | 
 68 |     return _call_ucx_api(event_loop, ucx_api.am_send_nbx, ep, buffer, nbytes, name=name)
 69 | 
 70 | 
 71 | def stream_send(
 72 |     ep: ucx_api.UCXEndpoint,
 73 |     buffer: arr.Array,
 74 |     nbytes: int,
 75 |     name="stream_send",
 76 |     event_loop=None,
 77 | ) -> asyncio.Future:
 78 | 
 79 |     return _call_ucx_api(
 80 |         event_loop, ucx_api.stream_send_nb, ep, buffer, nbytes, name=name
 81 |     )
 82 | 
 83 | 
 84 | def tag_recv(
 85 |     obj: Union[ucx_api.UCXEndpoint, ucx_api.UCXWorker],
 86 |     buffer: arr.Array,
 87 |     nbytes: int,
 88 |     tag: int,
 89 |     name="tag_recv",
 90 |     event_loop=None,
 91 | ) -> asyncio.Future:
 92 | 
 93 |     worker = obj if isinstance(obj, ucx_api.UCXWorker) else obj.worker
 94 |     ep = obj if isinstance(obj, ucx_api.UCXEndpoint) else None
 95 | 
 96 |     return _call_ucx_api(
 97 |         event_loop,
 98 |         ucx_api.tag_recv_nb,
 99 |         worker,
100 |         buffer,
101 |         nbytes,
102 |         tag,
103 |         name=name,
104 |         ep=ep,
105 |     )
106 | 
107 | 
108 | def am_recv(
109 |     ep: ucx_api.UCXEndpoint,
110 |     name="am_recv",
111 |     event_loop=None,
112 | ) -> asyncio.Future:
113 | 
114 |     event_loop = event_loop or get_event_loop()
115 |     ret = event_loop.create_future()
116 |     # All the comm functions takes the call-back function and its arguments
117 |     cb_args = (event_loop, ret)
118 |     ucx_api.am_recv_nb(ep, cb_func=_am_cb_func, cb_args=cb_args, name=name)
119 |     return ret
120 | 
121 | 
122 | def stream_recv(
123 |     ep: ucx_api.UCXEndpoint,
124 |     buffer: arr.Array,
125 |     nbytes: int,
126 |     name="stream_recv",
127 |     event_loop=None,
128 | ) -> asyncio.Future:
129 | 
130 |     return _call_ucx_api(
131 |         event_loop, ucx_api.stream_recv_nb, ep, buffer, nbytes, name=name
132 |     )
133 | 
134 | 
135 | def flush_worker(worker: ucx_api.UCXWorker, event_loop=None) -> asyncio.Future:
136 |     return _call_ucx_api(event_loop, worker.flush)
137 | 
138 | 
139 | def flush_ep(ep: ucx_api.UCXEndpoint, event_loop=None) -> asyncio.Future:
140 |     return _call_ucx_api(event_loop, ep.flush)
141 | 


--------------------------------------------------------------------------------
/ucp/continuous_ucx_progress.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | # See file LICENSE for terms.
  3 | 
  4 | import asyncio
  5 | import socket
  6 | import weakref
  7 | 
  8 | 
  9 | class ProgressTask(object):
 10 |     def __init__(self, worker, event_loop):
 11 |         """Creates a task that keeps calling worker.progress()
 12 | 
 13 |         Notice, class and created task is carefull not to hold a
 14 |         reference to `worker` so that a danling progress task will
 15 |         not prevent `worker` to be garbage collected.
 16 | 
 17 |         Parameters
 18 |         ----------
 19 |         worker: UCXWorker
 20 |             The UCX worker context to progress
 21 |         event_loop: asyncio.EventLoop
 22 |             The event loop to do progress in.
 23 |         """
 24 |         self.weakref_worker = weakref.ref(worker)
 25 |         self.event_loop = event_loop
 26 |         self.asyncio_task = None
 27 | 
 28 |     def __del__(self):
 29 |         if self.asyncio_task is not None:
 30 |             self.asyncio_task.cancel()
 31 | 
 32 |     # Hash and equality is based on the event loop
 33 |     def __hash__(self):
 34 |         return hash(self.event_loop)
 35 | 
 36 |     def __eq__(self, other):
 37 |         return hash(self) == hash(other)
 38 | 
 39 | 
 40 | class NonBlockingMode(ProgressTask):
 41 |     def __init__(self, worker, event_loop):
 42 |         super().__init__(worker, event_loop)
 43 |         self.asyncio_task = event_loop.create_task(self._progress_task())
 44 | 
 45 |     async def _progress_task(self):
 46 |         """This helper function maintains a UCX progress loop."""
 47 |         while True:
 48 |             worker = self.weakref_worker()
 49 |             if worker is None or not worker.initialized:
 50 |                 return
 51 |             worker.progress()
 52 |             del worker
 53 |             # Give other co-routines a chance to run.
 54 |             await asyncio.sleep(0)
 55 | 
 56 | 
 57 | class BlockingMode(ProgressTask):
 58 |     def __init__(self, worker, event_loop, epoll_fd):
 59 |         super().__init__(worker, event_loop)
 60 | 
 61 |         # Creating a job that is ready straightaway but with low priority.
 62 |         # Calling `await self.event_loop.sock_recv(self.rsock, 1)` will
 63 |         # return when all non-IO tasks are finished.
 64 |         # See <https://stackoverflow.com/a/48491563>.
 65 |         self.rsock, wsock = socket.socketpair()
 66 |         self.rsock.setblocking(0)
 67 |         wsock.setblocking(0)
 68 |         wsock.close()
 69 | 
 70 |         # Bind an asyncio reader to a UCX epoll file descripter
 71 |         event_loop.add_reader(epoll_fd, self._fd_reader_callback)
 72 | 
 73 |         # Remove the reader and close socket on finalization
 74 |         weakref.finalize(self, event_loop.remove_reader, epoll_fd)
 75 |         weakref.finalize(self, self.rsock.close)
 76 | 
 77 |     def _fd_reader_callback(self):
 78 |         worker = self.weakref_worker()
 79 |         if worker is None or not worker.initialized:
 80 |             return
 81 |         worker.progress()
 82 | 
 83 |         # Notice, we can safely overwrite `self.dangling_arm_task`
 84 |         # since previous arm task is finished by now.
 85 |         assert self.asyncio_task is None or self.asyncio_task.done()
 86 |         self.asyncio_task = self.event_loop.create_task(self._arm_worker())
 87 | 
 88 |     async def _arm_worker(self):
 89 |         # When arming the worker, the following must be true:
 90 |         #  - No more progress in UCX (see doc of ucp_worker_arm())
 91 |         #  - All asyncio tasks that isn't waiting on UCX must be executed
 92 |         #    so that the asyncio's next state is epoll wait.
 93 |         #    See <https://github.com/rapidsai/ucx-py/issues/413>
 94 |         while True:
 95 |             worker = self.weakref_worker()
 96 |             if worker is None or not worker.initialized:
 97 |                 return
 98 |             worker.progress()
 99 | 
100 |             # Cancel inflight messages that couldn't be completed. This may
101 |             # happen if the user called ep.recv() but the remote worker
102 |             # errored before sending the message.
103 |             if worker.cancel_inflight_messages() > 0:
104 |                 worker.progress()
105 | 
106 |             del worker
107 | 
108 |             # This IO task returns when all non-IO tasks are finished.
109 |             # Notice, we do NOT hold a reference to `worker` while waiting.
110 |             await self.event_loop.sock_recv(self.rsock, 1)
111 | 
112 |             worker = self.weakref_worker()
113 |             if worker is None or not worker.initialized:
114 |                 return
115 |             if worker.arm():
116 |                 # At this point we know that asyncio's next state is
117 |                 # epoll wait.
118 |                 break
119 | 


--------------------------------------------------------------------------------
/ucp/exceptions.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
2 | # See file LICENSE for terms.
3 | 
4 | from ._libs.exceptions import *  # noqa
5 | 


--------------------------------------------------------------------------------
/ucp/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 2 | # See file LICENSE for terms.
 3 | 
 4 | import asyncio
 5 | import hashlib
 6 | import logging
 7 | import multiprocessing as mp
 8 | import os
 9 | import socket
10 | import time
11 | 
12 | import numpy as np
13 | 
14 | mp = mp.get_context("spawn")
15 | 
16 | 
17 | def get_event_loop():
18 |     """
19 |     Get running or create new event loop
20 | 
21 |     In Python 3.10, the behavior of `get_event_loop()` is deprecated and in
22 |     the future it will be an alias of `get_running_loop()`. In several
23 |     situations, UCX-Py needs to create a new event loop, so this function
24 |     will remain for now as an alternative to the behavior of `get_event_loop()`
25 |     from Python < 3.10, returning the `get_running_loop()` if an event loop
26 |     exists, or returning a new one with `new_event_loop()` otherwise.
27 |     """
28 |     try:
29 |         return asyncio.get_running_loop()
30 |     except RuntimeError:
31 |         return asyncio.new_event_loop()
32 | 
33 | 
34 | def get_ucxpy_logger():
35 |     """
36 |     Get UCX-Py logger with custom formatting
37 | 
38 |     Returns
39 |     -------
40 |     logger : logging.Logger
41 |         Logger object
42 | 
43 |     Examples
44 |     --------
45 |     >>> logger = get_ucxpy_logger()
46 |     >>> logger.warning("Test")
47 |     [1585175070.2911468] [dgx12:1054] UCXPY  WARNING Test
48 |     """
49 | 
50 |     _level_enum = logging.getLevelName(os.getenv("UCXPY_LOG_LEVEL", "WARNING"))
51 |     logger = logging.getLogger("ucx")
52 | 
53 |     # Avoid duplicate logging
54 |     logger.propagate = False
55 | 
56 |     class LoggingFilter(logging.Filter):
57 |         def filter(self, record):
58 |             record.hostname = socket.gethostname()
59 |             record.timestamp = str("%.6f" % time.time())
60 |             return True
61 | 
62 |     formatter = logging.Formatter(
63 |         "[%(timestamp)s] [%(hostname)s:%(process)d] UCXPY  %(levelname)s %(message)s"
64 |     )
65 | 
66 |     handler = logging.StreamHandler()
67 |     handler.setFormatter(formatter)
68 |     handler.addFilter(LoggingFilter())
69 |     logger.addHandler(handler)
70 | 
71 |     logger.setLevel(_level_enum)
72 | 
73 |     return logger
74 | 
75 | 
76 | def hash64bits(*args):
77 |     """64 bit unsigned hash of `args`"""
78 |     # 64 bits hexdigest
79 |     h = hashlib.sha1(bytes(repr(args), "utf-8")).hexdigest()[:16]
80 |     # Convert to an integer and return
81 |     return int(h, 16)
82 | 
83 | 
84 | def hmean(a):
85 |     """Harmonic mean"""
86 |     if len(a):
87 |         return 1 / np.mean(1 / a)
88 |     else:
89 |         return 0
90 | 


--------------------------------------------------------------------------------