├── .gitignore ├── doc ├── spec │ ├── source │ │ ├── spelling_wordlist.txt.license │ │ ├── spelling_wordlist.txt │ │ ├── views │ │ │ ├── all_view.rst │ │ │ ├── views.rst │ │ │ ├── iota_view.rst │ │ │ ├── drop_view.rst │ │ │ ├── take_view.rst │ │ │ ├── counted_view.rst │ │ │ ├── transform_view.rst │ │ │ ├── sliding_view.rst │ │ │ └── zip_view.rst │ │ ├── containers │ │ │ ├── sp_dense_matrix.rst │ │ │ ├── sp_sparse_matrix.rst │ │ │ ├── sp_distributed_vector.rst │ │ │ ├── containers.rst │ │ │ ├── mp_distributed_vector.rst │ │ │ └── mp_distributed_dense_matrix.rst │ │ ├── algorithms │ │ │ ├── sp_matrix_gemm.rst │ │ │ ├── algorithms.rst │ │ │ ├── sort.rst │ │ │ ├── iota.rst │ │ │ ├── sp_matrix_gemv.rst │ │ │ ├── for_each.rst │ │ │ ├── copy.rst │ │ │ ├── transform.rst │ │ │ ├── exclusive_scan.rst │ │ │ ├── fill.rst │ │ │ ├── inclusive_scan.rst │ │ │ └── reduce.rst │ │ ├── arch.rst │ │ ├── intro.rst │ │ └── index.rst │ ├── Makefile │ ├── README.rst │ └── make.bat ├── presentations │ ├── Distributed Ranges.pdf.license │ ├── Distributed Ranges.pdf │ ├── Distributed Ranges, why you need it.pdf.license │ └── Distributed Ranges, why you need it.pdf └── developer │ ├── testing │ ├── ctest.rst │ └── pre-commit.rst │ └── design │ ├── namespaces.rst │ └── include.rst ├── include ├── dr │ ├── detail │ │ ├── mdarray_shim.hpp │ │ ├── mdspan_shim.hpp │ │ ├── format_shim.hpp │ │ ├── ranges_shim.hpp │ │ ├── utils.hpp │ │ ├── ranges_utils.hpp │ │ ├── tuple_utils.hpp │ │ ├── remote_subrange.hpp │ │ ├── owning_view.hpp │ │ ├── enumerate.hpp │ │ └── view_detectors.hpp │ ├── sp │ │ ├── algorithms │ │ │ ├── matrix │ │ │ │ └── matrix_algorithms.hpp │ │ │ ├── algorithms.hpp │ │ │ ├── iota.hpp │ │ │ ├── execution_policy.hpp │ │ │ ├── equal.hpp │ │ │ └── for_each.hpp │ │ ├── range_adaptors.hpp │ │ ├── containers │ │ │ ├── detail.hpp │ │ │ └── duplicated_vector.hpp │ │ ├── views │ │ │ ├── views.hpp │ │ │ ├── standard_views.hpp │ │ │ └── enumerate.hpp │ │ ├── device_vector.hpp │ │ ├── future.hpp │ │ ├── device_ref.hpp │ │ └── span.hpp │ ├── views │ │ ├── views.hpp │ │ └── iota.hpp │ ├── mp │ │ ├── common_support.hpp │ │ ├── algorithms │ │ │ ├── fill.hpp │ │ │ ├── iota.hpp │ │ │ ├── transform.hpp │ │ │ ├── for_each.hpp │ │ │ ├── equal.hpp │ │ │ ├── exclusive_scan.hpp │ │ │ └── copy.hpp │ │ ├── containers │ │ │ ├── distribution.hpp │ │ │ └── broadcasted_vector.hpp │ │ ├── allocator.hpp │ │ ├── views │ │ │ ├── enumerate.hpp │ │ │ └── sliding.hpp │ │ └── alignment.hpp │ ├── sp.hpp │ └── concepts │ │ └── concepts.hpp ├── vendor │ └── source_location │ │ ├── README.rst │ │ └── source_location.hpp └── CMakeLists.txt ├── CHANGELOG.md ├── test ├── gtest │ ├── serial │ │ ├── CMakeLists.txt │ │ └── serial-tests.cpp │ ├── sp │ │ ├── copy.hpp │ │ ├── containers.hpp │ │ ├── fill.cpp │ │ ├── xp-tests.hpp │ │ ├── detail.cpp │ │ ├── CMakeLists.txt │ │ ├── sp-tests.cpp │ │ ├── copy-3.cpp │ │ └── containers-3.cpp │ ├── common │ │ ├── all.cpp │ │ ├── enumerate.cpp │ │ ├── fill.cpp │ │ ├── transform.cpp │ │ ├── equal.cpp │ │ ├── iota.cpp │ │ ├── iota_view.cpp │ │ ├── copy.cpp │ │ ├── reduce.cpp │ │ ├── count.cpp │ │ └── subrange.cpp │ └── mp │ │ ├── copy.cpp │ │ ├── mpsort.cpp │ │ ├── reduce.cpp │ │ ├── stencil.cpp │ │ ├── communicator-3.cpp │ │ ├── segments.cpp │ │ ├── communicator.cpp │ │ ├── alignment.cpp │ │ ├── xp-tests.hpp │ │ ├── broadcasted_vector.cpp │ │ └── mp-tests.cpp ├── cmake-application │ ├── CMakeLists.txt │ └── mp-app.cpp └── fuzz │ ├── README.rst │ └── cpu │ ├── cpu-fuzz.hpp │ ├── CMakeLists.txt │ ├── algorithms.cpp │ └── cpu-fuzz.cpp ├── examples ├── include │ ├── utils.hpp │ ├── vector-add-serial.hpp │ ├── mpi-utils.hpp │ ├── transpose-serial.hpp │ └── data-utils.hpp ├── serial │ ├── transpose-serial.cpp │ ├── vector-add-serial.cpp │ └── CMakeLists.txt ├── sp │ ├── sparse_test.cpp │ ├── sort.cpp │ ├── matrix_example.cpp │ ├── gemv_example.cpp │ ├── take_example.cpp │ ├── CMakeLists.txt │ ├── dot_product.cpp │ ├── vector_example.cpp │ ├── zip_example.cpp │ ├── inclusive_scan_example.cpp │ └── exclusive_scan_example.cpp └── mp │ ├── vector-add.cpp │ ├── hello_world.cpp │ ├── CMakeLists.txt │ ├── stencil-slide.cpp │ ├── vector-add-ref.cpp │ └── transpose-cpu.cpp ├── scripts ├── install-doxygen.sh ├── setvars-2023.2.1.sh ├── devcloud-benchmark.sh ├── regenerate-requirements-txt.sh ├── borealis-benchmark.sh ├── run_command_on_compute_node.sh └── devcloud-test.sh ├── benchmarks ├── gbench │ ├── common │ │ └── stream.cpp │ ├── mp │ │ ├── mpi.cpp │ │ ├── streammp.cpp │ │ ├── mdspan.cpp │ │ ├── rooted.cpp │ │ └── wave_utils.hpp │ ├── CMakeLists.txt │ ├── README.rst │ └── sp │ │ ├── CMakeLists.txt │ │ └── gemm.cpp └── README.rst ├── .cmake-format.py ├── base-requirements.txt ├── SECURITY.md ├── .github └── dependabot.yml ├── requirements.txt ├── LICENSES └── BSD-3-Clause.txt └── .pre-commit-config.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | *egg-info 6 | ._* 7 | -------------------------------------------------------------------------------- /doc/spec/source/spelling_wordlist.txt.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: Intel Corporation 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | -------------------------------------------------------------------------------- /doc/presentations/Distributed Ranges.pdf.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: Intel Corporation 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | -------------------------------------------------------------------------------- /doc/presentations/Distributed Ranges.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oneapi-src/distributed-ranges/HEAD/doc/presentations/Distributed Ranges.pdf -------------------------------------------------------------------------------- /doc/presentations/Distributed Ranges, why you need it.pdf.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: Intel Corporation 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | -------------------------------------------------------------------------------- /doc/presentations/Distributed Ranges, why you need it.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oneapi-src/distributed-ranges/HEAD/doc/presentations/Distributed Ranges, why you need it.pdf -------------------------------------------------------------------------------- /include/dr/detail/mdarray_shim.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | -------------------------------------------------------------------------------- /doc/spec/source/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | accessor 2 | adaptor 3 | Allocator 4 | allocators 5 | Decompositions 6 | Dereference 7 | Dereferencing 8 | Instantiations 9 | scalability 10 | subspans 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Distributed Ranges changelog 8 | 9 | ## v.X.X.X 10 | * Placeholder for the first release 11 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/matrix/matrix_algorithms.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | -------------------------------------------------------------------------------- /include/dr/detail/mdspan_shim.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #define MDSPAN_NAMESPACE std::experimental 9 | namespace md = MDSPAN_NAMESPACE; 10 | -------------------------------------------------------------------------------- /include/vendor/source_location/README.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ================= 6 | source_location 7 | ================= 8 | 9 | Source: github_ 10 | 11 | .. _github: https://github.com/paweldac/source_location 12 | -------------------------------------------------------------------------------- /test/gtest/serial/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | add_executable(serial-tests serial-tests.cpp) 6 | 7 | target_link_libraries(serial-tests GTest::gtest_main) 8 | 9 | add_test(NAME serial-tests COMMAND ./serial-tests) 10 | -------------------------------------------------------------------------------- /include/dr/detail/format_shim.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #ifdef DR_FORMAT 8 | #include 9 | #include 10 | #endif 11 | 12 | // Workaround for doxygen warning about internal inconsistency 13 | namespace fmt {} 14 | -------------------------------------------------------------------------------- /examples/include/utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #ifdef MPI_VERSION 14 | #include "mpi-utils.hpp" 15 | #endif 16 | 17 | #include "data-utils.hpp" 18 | -------------------------------------------------------------------------------- /scripts/install-doxygen.sh: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | set -xe 6 | curl -s https://www.doxygen.nl/files/doxygen-1.9.6.linux.bin.tar.gz -o /tmp/dox.tgz 7 | sudo tar zxf /tmp/dox.tgz -C /usr/local 8 | sudo ln -s /usr/local/doxygen*/bin/* /usr/bin 9 | # class diagrams need dot 10 | sudo apt install -y graphviz 11 | -------------------------------------------------------------------------------- /scripts/setvars-2023.2.1.sh: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | source /opt/intel/oneapi/compiler/2023.2.1/env/vars.sh 6 | source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh 7 | source /opt/intel/oneapi/mkl/2023.2.0/env/vars.sh 8 | source /opt/intel/oneapi/mpi/2021.10.0/env/vars.sh 9 | source /opt/intel/oneapi/dpl/2022.2.0/env/vars.sh 10 | -------------------------------------------------------------------------------- /scripts/devcloud-benchmark.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # SPDX-FileCopyrightText: Intel Corporation 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | source scripts/setvars-2023.2.1.sh 8 | set -e 9 | hostname 10 | 11 | # SLURM/MPI integration is broken 12 | unset SLURM_TASKS_PER_NODE 13 | unset SLURM_JOBID 14 | 15 | cmake -B build -DENABLE_SYCL=on 16 | cmake --build build -j --target devcloud-bench 17 | -------------------------------------------------------------------------------- /doc/developer/testing/ctest.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ======= 6 | CTest 7 | ======= 8 | 9 | We use ``ctest``, which is part of ``cmake`` as a top-level test 10 | runner. It runs google test and examples. To invoke tests:: 11 | 12 | ctest 13 | 14 | or:: 15 | 16 | make test 17 | 18 | To see more output, do:: 19 | 20 | ctest -VV 21 | -------------------------------------------------------------------------------- /test/gtest/sp/copy.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #pragma once 5 | #include "xp-tests.hpp" 6 | 7 | template class CopyTest : public testing::Test { 8 | public: 9 | using DistVec = 10 | dr::sp::distributed_vector; 11 | using LocalVec = std::vector; 12 | }; 13 | -------------------------------------------------------------------------------- /examples/serial/transpose-serial.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "transpose-serial.hpp" 6 | #include "utils.hpp" 7 | 8 | int main(int argc, char *argv[]) { 9 | transpose_serial t; 10 | 11 | t.init(2, 6); 12 | t.compute(); 13 | 14 | fmt::print("a: {}\n", t.a); 15 | fmt::print("b: {}\n", t.b); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /test/gtest/sp/containers.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #pragma once 5 | #include "xp-tests.hpp" 6 | 7 | template class DistributedVectorTest : public testing::Test { 8 | public: 9 | using DistVec = 10 | dr::sp::distributed_vector; 11 | using LocalVec = std::vector; 12 | }; 13 | -------------------------------------------------------------------------------- /doc/spec/source/views/all_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _all_view: 8 | 9 | ============= 10 | ``all_view`` 11 | ============= 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::all 19 | dr::sp::views::all 20 | 21 | For interface, see `std::ranges::views::all`_ 22 | -------------------------------------------------------------------------------- /doc/spec/source/views/views.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ======= 6 | Views 7 | ======= 8 | 9 | Views can reference collections of objects but do not own the storage. 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | all_view 15 | counted_view 16 | drop_view 17 | iota_view 18 | take_view 19 | sliding_view 20 | transform_view 21 | zip_view 22 | -------------------------------------------------------------------------------- /doc/spec/source/views/iota_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _iota_view: 8 | 9 | ============== 10 | ``iota_view`` 11 | ============== 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::all 19 | dr::sp::views::all 20 | 21 | For interface, see `std::ranges::views::all`_ 22 | -------------------------------------------------------------------------------- /benchmarks/gbench/common/stream.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "stream.hpp" 6 | 7 | using VecT = xp::distributed_vector; 8 | DR_BENCHMARK(Stream_Copy)->Name("Stream_Copy_DR"); 9 | DR_BENCHMARK(Stream_Scale)->Name("Stream_Scale_DR"); 10 | DR_BENCHMARK(Stream_Add)->Name("Stream_Add_DR"); 11 | DR_BENCHMARK(Stream_Triad)->Name("Stream_Triad_DR"); 12 | -------------------------------------------------------------------------------- /doc/spec/source/views/drop_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _drop_view: 8 | 9 | ============== 10 | ``drop_view`` 11 | ============== 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::drop 19 | dr::sp::views::drop 20 | 21 | For interface, see `std::ranges::views::drop`_ 22 | -------------------------------------------------------------------------------- /doc/spec/source/views/take_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _take_view: 8 | 9 | ============== 10 | ``take_view`` 11 | ============== 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::take 19 | dr::sp::views::take 20 | 21 | For interface, see `std::ranges::views::take`_ 22 | -------------------------------------------------------------------------------- /scripts/regenerate-requirements-txt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # SPDX-FileCopyrightText: Intel Corporation 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | VDIR=regenerate-venv 7 | python3 -m venv --clear ${VDIR} 8 | source ${VDIR}/bin/activate 9 | pip install -r base-requirements.txt 10 | pip freeze > requirements.txt 11 | reuse addheader --exclude-year --license BSD-3-Clause --copyright "Intel Corporation" requirements.txt 12 | git diff requirements.txt 13 | -------------------------------------------------------------------------------- /include/dr/sp/range_adaptors.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace dr::sp { 11 | 12 | template auto enumerate(R &&r) { 13 | auto i = rng::views::iota(uint32_t(0), uint32_t(rng::size(r))); 14 | return dr::sp::zip_view(i, r); 15 | } 16 | 17 | } // namespace dr::sp 18 | -------------------------------------------------------------------------------- /scripts/borealis-benchmark.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # SPDX-FileCopyrightText: Intel Corporation 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | set -e 7 | 8 | cd $PBS_O_WORKDIR 9 | 10 | echo "Host: " $(hostname) 11 | echo "CWD: " $(pwd) 12 | module list 13 | 14 | source venv/bin/activate 15 | 16 | printenv > build/envdump.txt 17 | 18 | # Builds dependencies and runs benchmarks 19 | cmake --build build -j --target aurora-bench > build/cmake-output-1.txt 2>&1 20 | -------------------------------------------------------------------------------- /doc/spec/source/containers/sp_dense_matrix.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _sp_dense_matrix: 8 | 9 | =============================== 10 | ``dr::sp::dense_matrix`` 11 | =============================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenclass:: dr::sp::dense_matrix 17 | :members: 18 | 19 | Description 20 | =========== 21 | -------------------------------------------------------------------------------- /doc/spec/source/containers/sp_sparse_matrix.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _sp_sparse_matrix: 8 | 9 | =============================== 10 | ``dr::sp::sparse_matrix`` 11 | =============================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenclass:: dr::sp::sparse_matrix 17 | :members: 18 | 19 | Description 20 | =========== 21 | -------------------------------------------------------------------------------- /doc/spec/source/views/counted_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _counted_view: 8 | 9 | ================= 10 | ``counted_view`` 11 | ================= 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::counted 19 | dr::sp::views::counted 20 | 21 | For interface, see `std::ranges::views::counted`_ 22 | -------------------------------------------------------------------------------- /examples/serial/vector-add-serial.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "vector-add-serial.hpp" 6 | #include "utils.hpp" 7 | 8 | int main(int argc, char *argv[]) { 9 | vector_add_serial adder; 10 | 11 | adder.init(10); 12 | adder.compute(); 13 | 14 | fmt::print("a: {}\n", adder.a); 15 | fmt::print("b: {}\n", adder.b); 16 | fmt::print("c: {}\n", adder.c); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /doc/spec/source/views/transform_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _transform_view: 8 | 9 | =================== 10 | ``transform_view`` 11 | =================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | dr::mp::views::transform 19 | dr::sp::views::transform 20 | 21 | For interface, see `std::ranges::views::transform`_ 22 | -------------------------------------------------------------------------------- /.cmake-format.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # Useful links about creating this config file: 6 | # https://cmake-format.readthedocs.io/en/latest/configuration.html 7 | # https://cmake-format.readthedocs.io/en/latest/configopts.html?highlight=layout_passes 8 | # https://cmake-format.readthedocs.io/en/latest/format-algorithm.html 9 | 10 | with section("format"): # noqa: F821 11 | layout_passes = {"PargGroupNode": [(0, False)]} 12 | -------------------------------------------------------------------------------- /base-requirements.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # procedure of adding a new python package to our repo 6 | # 1. edit base-requirements.txt 7 | # 2. run scripts/regenerate-requirements-txt.sh 8 | # 3. review changes made in requirements.txt and if ok, commit changes 9 | 10 | # Testing 11 | pre-commit 12 | reuse 13 | 14 | # Documentation 15 | breathe 16 | sphinx 17 | sphinx-book-theme 18 | pydata-sphinx-theme 19 | 20 | sphinxcontrib-spelling 21 | -------------------------------------------------------------------------------- /include/dr/views/views.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace dr { 11 | 12 | // returns range: [(rank, element) ...] 13 | auto ranked_view(const dr::distributed_range auto &r) { 14 | auto rank = [](auto &&v) { return dr::ranges::rank(&v); }; 15 | return rng::views::zip(rng::views::transform(r, rank), r); 16 | } 17 | 18 | } // namespace dr 19 | -------------------------------------------------------------------------------- /benchmarks/gbench/mp/mpi.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "../common/dr_bench.hpp" 6 | 7 | static void Barrier(benchmark::State &state) { 8 | for (auto _ : state) { 9 | dr::mp::barrier(); 10 | } 11 | } 12 | 13 | DR_BENCHMARK_BASE(Barrier)->Iterations(1000000); 14 | 15 | static void Fence(benchmark::State &state) { 16 | for (auto _ : state) { 17 | dr::mp::fence(); 18 | } 19 | } 20 | 21 | DR_BENCHMARK_BASE(Fence)->Iterations(100000000); 22 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/sp_matrix_gemm.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _gemm: 8 | 9 | ====================== 10 | ``gemm`` 11 | ====================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenfunction:: dr::sp::gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, distributed_dense_matrix &c) 17 | 18 | Description 19 | =========== 20 | 21 | 22 | Examples 23 | ======== 24 | -------------------------------------------------------------------------------- /include/dr/mp/common_support.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | // file for helper functions implemented for both SYCL and non-SYCL compilations 8 | 9 | namespace dr::mp::__detail { 10 | 11 | template void copy(const T *src, T *dst, std::size_t sz) { 12 | if (mp::use_sycl()) { 13 | sycl_copy(src, dst, sz); 14 | } else { 15 | memcpy(dst, src, sz * sizeof(T)); 16 | } 17 | } 18 | 19 | } // namespace dr::mp::__detail 20 | -------------------------------------------------------------------------------- /doc/spec/source/containers/sp_distributed_vector.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _sp_distributed_vector: 8 | 9 | =============================== 10 | ``dr::sp::distributed_vector`` 11 | =============================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenstruct:: dr::sp::distributed_vector 17 | :members: 18 | 19 | Description 20 | =========== 21 | 22 | .. seealso:: 23 | 24 | `std::vector`_ 25 | -------------------------------------------------------------------------------- /test/gtest/serial/serial-tests.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | // Demonstrate some basic assertions. 8 | TEST(CpuTest, BasicAssertions) { 9 | // Expect two strings not to be equal. 10 | EXPECT_STRNE("hello", "world"); 11 | // Expect equality. 12 | EXPECT_EQ(7 * 6, 42); 13 | } 14 | 15 | int main(int argc, char *argv[]) { 16 | ::testing::InitGoogleTest(&argc, argv); 17 | auto res = RUN_ALL_TESTS(); 18 | 19 | return res; 20 | } 21 | -------------------------------------------------------------------------------- /benchmarks/gbench/mp/streammp.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #ifdef DRISHMEM 6 | #include "../common/stream.hpp" 7 | 8 | using VectT = dr::mp::distributed_vector; 9 | DR_BENCHMARK(Stream_Copy)->Name("Stream_Copy_DR_ishmem"); 10 | DR_BENCHMARK(Stream_Scale)->Name("Stream_Scale_DR_ishmem"); 11 | DR_BENCHMARK(Stream_Add)->Name("Stream_Add_DR_ishmem"); 12 | DR_BENCHMARK(Stream_Triad)->Name("Stream_Triad_DR_ishmem"); 13 | #endif 14 | -------------------------------------------------------------------------------- /doc/spec/source/views/sliding_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _sliding_view: 8 | 9 | ================= 10 | ``sliding_view`` 11 | ================= 12 | 13 | Interface 14 | ========= 15 | 16 | 17 | .. code-block:: cpp 18 | 19 | dr::mp::views::sliding 20 | dr::sp::views::sliding 21 | 22 | Description 23 | =========== 24 | 25 | .. seealso:: 26 | 27 | `std::ranges::views::slide`_ 28 | Standard C++ view 29 | -------------------------------------------------------------------------------- /examples/serial/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | add_executable(vector-add-serial vector-add-serial.cpp) 6 | target_link_libraries(vector-add-serial PRIVATE DR::mpi fmt::fmt) 7 | add_test(vector-add-serial ./vector-add-serial) 8 | 9 | add_executable(transpose-serial transpose-serial.cpp) 10 | target_link_libraries(transpose-serial PRIVATE MKL::MKL range-v3 DR::mpi 11 | fmt::fmt) 12 | add_test(transpose-serial ./transpose-serial) 13 | -------------------------------------------------------------------------------- /include/dr/detail/ranges_shim.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | // TODO: use libstdc++ 13.0 or greater if available. 8 | 9 | // #define DR_USE_STD_RANGES 10 | 11 | #ifdef DR_USE_STD_RANGES 12 | 13 | #include 14 | 15 | namespace rng = ::std::ranges; 16 | 17 | #define DR_RANGES_NAMESPACE std::ranges 18 | 19 | #else 20 | 21 | #include 22 | 23 | namespace rng = ::ranges; 24 | 25 | #define DR_RANGES_NAMESPACE ranges 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /doc/spec/source/containers/containers.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============ 6 | Containers 7 | ============ 8 | 9 | *Containers* own storage. 10 | 11 | The storage for a *distributed container* is divided over multiple 12 | processes and can be accessed by any process in the team. 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | mp_distributed_vector 18 | mp_distributed_dense_matrix 19 | 20 | sp_distributed_vector 21 | sp_dense_matrix 22 | sp_sparse_matrix 23 | -------------------------------------------------------------------------------- /test/cmake-application/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | cmake_minimum_required(VERSION 3.20) 6 | project(cmake-applications) 7 | 8 | find_package(MPI REQUIRED) 9 | include(FetchContent) 10 | FetchContent_Declare( 11 | dr 12 | GIT_REPOSITORY https://github.com/oneapi-src/distributed-ranges.git 13 | GIT_TAG main) 14 | FetchContent_MakeAvailable(dr) 15 | 16 | set(CMAKE_CXX_STANDARD 20) 17 | add_executable(mp-app mp-app.cpp) 18 | target_link_libraries(mp-app MPI::MPI_CXX DR::mpi) 19 | -------------------------------------------------------------------------------- /test/fuzz/README.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | =========== 6 | Fuzz Test 7 | =========== 8 | 9 | Build the test:: 10 | 11 | CXX=clang++ cmake -B build 12 | cd build/fuzz/cpu 13 | make -j 14 | ./cpu-fuzz -max_len=16 15 | 16 | The command asserts when it finds an error. Otherwise it runs forever 17 | so kill it to stop testing. When it finds an error, it writes the 18 | input to a file in the current directory. To run again for just that 19 | input:: 20 | 21 | ./cpu-fuzz . . 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Security Policy 8 | 9 | Intel is committed to rapidly addressing security vulnerabilities 10 | affecting our customers and providing clear guidance on the solution, 11 | impact, severity and mitigation. 12 | 13 | ## Reporting a Vulnerability 14 | 15 | Please report any security vulnerabilities in this project 16 | [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 17 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/algorithms.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============ 6 | Algorithms 7 | ============ 8 | 9 | Algorithms on distributed ranges. 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | copy 15 | exclusive_scan 16 | fill 17 | for_each 18 | inclusive_scan 19 | iota 20 | reduce 21 | sort 22 | transform 23 | 24 | # transform_reduce 25 | 26 | Algorithms on matrices, dense and sparse 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | 31 | sp_matrix_gemm 32 | sp_matrix_gemv 33 | -------------------------------------------------------------------------------- /doc/spec/source/arch.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============== 6 | Architecture 7 | ============== 8 | 9 | See `Distributed Ranges, why you need it`_ presentation 10 | and all other materials listed in `Documentation`_ section 11 | on Distributed Ranges repository main page. 12 | 13 | .. _`Distributed Ranges, why you need it`: https://github.com/oneapi-src/distributed-ranges/blob/main/doc/presentations/Distributed%20Ranges%2C%20why%20you%20need%20it.pdf 14 | .. _`Documentation`: https://github.com/oneapi-src/distributed-ranges/blob/main/README.rst 15 | -------------------------------------------------------------------------------- /include/dr/sp/containers/detail.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::sp { 10 | 11 | namespace detail { 12 | 13 | // Factor n into 2 roughly equal factors 14 | // n = pq, p >= q 15 | inline std::tuple factor(std::size_t n) { 16 | std::size_t q = std::sqrt(n); 17 | 18 | while (q > 1 && n / q != static_cast(n) / q) { 19 | q--; 20 | } 21 | std::size_t p = n / q; 22 | 23 | return {p, q}; 24 | } 25 | 26 | } // namespace detail 27 | 28 | } // namespace dr::sp 29 | -------------------------------------------------------------------------------- /doc/spec/source/containers/mp_distributed_vector.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _mp_distributed_vector: 8 | 9 | =============================== 10 | ``dr::mp::distributed_vector`` 11 | =============================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenclass:: dr::mp::distributed_vector 17 | :members: 18 | 19 | Description 20 | =========== 21 | 22 | Vector distributed among MPI nodes, with support 23 | for data exchange at segment edges (halo) 24 | 25 | .. seealso:: 26 | 27 | `std::vector`_ 28 | -------------------------------------------------------------------------------- /examples/sp/sparse_test.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | int main(int argc, char **argv) { 9 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 10 | dr::sp::init(devices); 11 | 12 | dr::sp::sparse_matrix x({100, 100}, 0.01); 13 | 14 | printf("%lu x %lu matrix with %lu stored values.\n", x.shape()[0], 15 | x.shape()[1], x.size()); 16 | 17 | for (auto &&[idx, v] : x) { 18 | auto &&[i, j] = idx; 19 | printf("(%lu, %lu): %f\n", i, j, (float)v); 20 | } 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /doc/spec/source/containers/mp_distributed_dense_matrix.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _distributed_dense_matrix: 8 | 9 | ===================================== 10 | ``dr::mp::distributed_dense_matrix`` 11 | ===================================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. code-block:: cpp 17 | 18 | template class distributed_dense_matrix; 19 | 20 | The class will be merged and documented soon 21 | 22 | 23 | Description 24 | =========== 25 | 26 | Dense matrix class, distributed over MPI nodes 27 | -------------------------------------------------------------------------------- /include/dr/detail/utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace dr::__detail { 8 | 9 | inline std::size_t round_up(std::size_t n, std::size_t multiple) { 10 | if (multiple == 0) { 11 | return n; 12 | } 13 | 14 | int remainder = n % multiple; 15 | if (remainder == 0) { 16 | return n; 17 | } 18 | 19 | return n + multiple - remainder; 20 | } 21 | 22 | inline std::size_t partition_up(std::size_t n, std::size_t multiple) { 23 | if (multiple == 0) { 24 | return n; 25 | } 26 | 27 | return round_up(n, multiple) / multiple; 28 | } 29 | 30 | } // namespace dr::__detail 31 | -------------------------------------------------------------------------------- /include/dr/sp.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # To get started with Dependabot version updates, you'll need to specify which 6 | # package ecosystems to update and where the package manifests are located. 7 | # Please see the documentation for all configuration options: 8 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 9 | 10 | version: 2 11 | updates: 12 | - package-ecosystem: "pip" 13 | directory: "/" 14 | schedule: 15 | interval: "weekly" 16 | - package-ecosystem: "github-actions" 17 | directory: "/" 18 | schedule: 19 | interval: "weekly" 20 | -------------------------------------------------------------------------------- /include/dr/detail/ranges_utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace dr::__detail { 8 | 9 | // 10 | // std::ranges::enumerate handles unbounded ranges and returns a range 11 | // where end() is a different type than begin(). Most of our code 12 | // assumes std::ranges::common_range. bounded_enumerate requires a 13 | // bounded range and returns a common_range. 14 | // 15 | template auto bounded_enumerate(R &&r) { 16 | auto size = rng::distance(r); 17 | using W = std::uint32_t; 18 | return rng::views::zip(rng::views::iota(W(0), W(size)), r); 19 | } 20 | 21 | } // namespace dr::__detail 22 | -------------------------------------------------------------------------------- /examples/sp/sort.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace sp = dr::sp; 13 | 14 | int main(int argc, char **argv) { 15 | auto devices = sp::get_numa_devices(sycl::default_selector_v); 16 | sp::init(devices); 17 | 18 | std::size_t n = 32; 19 | 20 | sp::distributed_vector v(n); 21 | 22 | srand48(time(0)); 23 | 24 | for (std::size_t i = 0; i < v.size(); i++) { 25 | v[i] = lrand48() % 1000; 26 | } 27 | 28 | sort(v); 29 | 30 | fmt::print("v: {}\n", v); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/algorithms.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/sort.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _sort: 8 | 9 | ========== 10 | ``sort`` 11 | ========== 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | SP 20 | --- 21 | 22 | .. doxygenfunction:: dr::sp::sort(R &&r, Compare comp = Compare()) 23 | :outline: 24 | .. doxygenfunction:: dr::sp::sort(RandomIt first, RandomIt last, Compare comp = Compare()) 25 | :outline: 26 | 27 | Description 28 | =========== 29 | 30 | .. seealso:: 31 | 32 | C++ model 33 | `std::sort`_ 34 | C++ model 35 | `std::ranges::sort`_ 36 | 37 | Usage 38 | ===== 39 | -------------------------------------------------------------------------------- /include/dr/sp/views/views.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace dr::sp::views { 13 | 14 | inline constexpr auto all = rng::views::all; 15 | 16 | inline constexpr auto counted = rng::views::counted; 17 | 18 | inline constexpr auto drop = rng::views::drop; 19 | 20 | inline constexpr auto iota = dr::views::iota; 21 | 22 | inline constexpr auto take = rng::views::take; 23 | 24 | inline constexpr auto transform = dr::views::transform; 25 | 26 | } // namespace dr::sp::views 27 | -------------------------------------------------------------------------------- /doc/spec/source/views/zip_view.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | 6 | .. include:: ../include/distributed-ranges.rst 7 | 8 | .. _zip_view: 9 | 10 | ============= 11 | ``zip_view`` 12 | ============= 13 | 14 | Interface 15 | ========= 16 | 17 | MP 18 | --- 19 | 20 | .. doxygenclass:: dr::mp::zip_view 21 | :members: 22 | .. doxygenfunction:: dr::mp::views::zip 23 | :outline: 24 | 25 | SP 26 | --- 27 | 28 | .. doxygenclass:: dr::sp::zip_view 29 | :members: 30 | .. doxygenfunction:: dr::sp::views::zip 31 | :outline: 32 | 33 | Description 34 | =========== 35 | 36 | .. seealso:: 37 | 38 | `std::ranges::views::zip`_ 39 | Standard C++ view 40 | -------------------------------------------------------------------------------- /doc/spec/source/intro.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============== 6 | Introduction 7 | ============== 8 | 9 | Distributed Ranges is a productivity library for distributed and partitioned memory based on C++ ranges. 10 | It offers a collection of data structures, views, and algorithms for building generic abstractions 11 | and provides interoperability with MPI, SHMEM, SYCL and OpenMP and portability on CPUs and GPUs. 12 | NUMA-aware allocators and distributed data structures facilitate development of C++ applications 13 | on heterogeneous nodes with multiple devices and achieve excellent performance and parallel scalability 14 | by exploiting local compute and data access. 15 | -------------------------------------------------------------------------------- /doc/spec/source/index.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. distributed-ranges documentation master file, created by 6 | sphinx-quickstart on Wed Sep 7 15:21:53 2022. 7 | You can adapt this file completely to your liking, but it should at least 8 | contain the root `toctree` directive. 9 | 10 | Distributed Ranges 11 | ================== 12 | 13 | Welcome to the Distributed Ranges Specification. 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | 18 | intro 19 | arch 20 | concepts 21 | containers/containers 22 | algorithms/algorithms 23 | views/views 24 | 25 | 26 | Indices and tables 27 | ================== 28 | 29 | * :ref:`genindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /examples/include/vector-add-serial.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "utils.hpp" 10 | 11 | template struct vector_add_serial { 12 | std::vector a, b, c; 13 | std::size_t size; 14 | 15 | void init(std::size_t n) { 16 | size = n; 17 | a.resize(n); 18 | b.resize(n); 19 | c.resize(n); 20 | 21 | set_step(a, 0); 22 | set_step(b, 10, 10); 23 | set_step(c, 0); 24 | } 25 | 26 | void compute() { 27 | for (std::size_t i = 0; i < size; i++) { 28 | c[i] = a[i] + b[i]; 29 | } 30 | } 31 | 32 | void check(std::vector &result) { assert(::check(result, c) == 0); } 33 | }; 34 | -------------------------------------------------------------------------------- /benchmarks/gbench/mp/mdspan.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "../common/dr_bench.hpp" 6 | 7 | using T = double; 8 | 9 | static void MdspanUtil_Pack(benchmark::State &state) { 10 | std::vector a(num_rows * num_columns); 11 | std::vector b(num_rows * num_columns); 12 | 13 | Stats stats(state, sizeof(T) * a.size(), sizeof(T) * b.size()); 14 | 15 | for (auto _ : state) { 16 | for (std::size_t i = 0; i < default_repetitions; i++) { 17 | stats.rep(); 18 | dr::__detail::mdspan_copy( 19 | md::mdspan(a.data(), std::array{num_rows, num_columns}), b.begin()) 20 | .wait(); 21 | } 22 | } 23 | } 24 | 25 | DR_BENCHMARK(MdspanUtil_Pack); 26 | -------------------------------------------------------------------------------- /include/dr/views/iota.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace dr::views { 8 | 9 | // 10 | // range-v3 iota uses sentinels that are not the same type as the 11 | // iterator. A zip that uses an iota has the same issue. Make our own. 12 | // 13 | 14 | struct iota_fn_ { 15 | template auto operator()(W value) const { 16 | return rng::views::iota(value, std::numeric_limits::max()); 17 | } 18 | 19 | template 20 | auto operator()(W value, Bound bound) const { 21 | return rng::views::iota(value, W(bound)); 22 | } 23 | }; 24 | 25 | inline constexpr auto iota = iota_fn_{}; 26 | 27 | } // namespace dr::views 28 | -------------------------------------------------------------------------------- /test/gtest/common/all.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class All : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(All, AllTypes); 13 | 14 | TYPED_TEST(All, Basic) { 15 | Ops1 ops(10); 16 | 17 | auto local = rng::views::all(ops.vec); 18 | auto dist = xp::views::all(ops.dist_vec); 19 | static_assert(compliant_view); 20 | EXPECT_TRUE(check_view(local, dist)); 21 | } 22 | 23 | TYPED_TEST(All, Mutate) { 24 | Ops1 ops(10); 25 | 26 | EXPECT_TRUE(check_mutate_view(ops, rng::views::all(ops.vec), 27 | xp::views::all(ops.dist_vec))); 28 | } 29 | -------------------------------------------------------------------------------- /examples/include/mpi-utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | template inline MPI_Datatype mpi_data_type() { 8 | assert(false); 9 | return MPI_CHAR; 10 | } 11 | template <> inline MPI_Datatype mpi_data_type() { return MPI_INT; } 12 | template <> inline MPI_Datatype mpi_data_type() { return MPI_INT; } 13 | template <> inline MPI_Datatype mpi_data_type() { return MPI_FLOAT; } 14 | template <> inline MPI_Datatype mpi_data_type() { 15 | return MPI_FLOAT; 16 | } 17 | template <> inline MPI_Datatype mpi_data_type() { return MPI_DOUBLE; } 18 | template <> inline MPI_Datatype mpi_data_type() { 19 | return MPI_DOUBLE; 20 | } 21 | -------------------------------------------------------------------------------- /test/gtest/mp/copy.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | 9 | template class CopyMP : public testing::Test { 10 | public: 11 | }; 12 | 13 | TYPED_TEST_SUITE(CopyMP, AllTypes); 14 | 15 | const std::size_t root = 0; 16 | 17 | TYPED_TEST(CopyMP, Dist2Local) { 18 | Ops2 ops(10); 19 | 20 | dr::mp::copy(root, ops.dist_vec0, ops.vec1.begin()); 21 | 22 | if (comm_rank == root) { 23 | EXPECT_EQ(ops.vec0, ops.vec1); 24 | } 25 | } 26 | 27 | TYPED_TEST(CopyMP, Local2Dist) { 28 | Ops2 ops(10); 29 | 30 | dr::mp::copy(root, ops.vec0, ops.dist_vec1.begin()); 31 | 32 | if (comm_rank == root) { 33 | EXPECT_EQ(ops.vec0, ops.dist_vec1); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /include/dr/detail/tuple_utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace dr::__detail { 8 | 9 | auto tuple_transform(auto tuple, auto op) { 10 | auto transform = [op](auto &&...items) { 11 | return std::make_tuple(op(items)...); 12 | }; 13 | return std::apply(transform, tuple); 14 | } 15 | 16 | auto tie_transform(auto tuple, auto op) { 17 | auto transform = [op](Items &&...items) { 18 | return std::tie(op(std::forward(items))...); 19 | }; 20 | return std::apply(transform, tuple); 21 | } 22 | 23 | auto tuple_foreach(auto tuple, auto op) { 24 | auto transform = [op](auto... items) { (op(items), ...); }; 25 | std::apply(transform, tuple); 26 | } 27 | 28 | } // namespace dr::__detail 29 | -------------------------------------------------------------------------------- /scripts/run_command_on_compute_node.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # SPDX-FileCopyrightText: Intel Corporation 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | set -e 7 | 8 | # needed for tasks running under PBS 9 | cd ${PBS_O_WORKDIR:-.} 10 | 11 | # needed for tasks running under SLURM, as SLURM/MPI integration is broken 12 | unset SLURM_TASKS_PER_NODE 13 | unset SLURM_JOBID 14 | 15 | # workaround for missing libaccel-config.so.1 on jfpvc compute nodes 16 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dmozog/usr/lib64/ 17 | 18 | echo "Host: " $(hostname) 19 | echo "CWD: " $(pwd) 20 | module list 21 | 22 | if [ "$#" -le 1 ]; then 23 | echo "provide as first argument a log file" 24 | exit 1 25 | fi 26 | 27 | LOGFILE=$1 28 | mkdir -p $(dirname $LOGFILE) 29 | 30 | shift 1 31 | 32 | set -o pipefail 33 | "$@" 2>&1 | tee $LOGFILE 34 | -------------------------------------------------------------------------------- /test/gtest/mp/mpsort.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // TODO: add sort tests with ISHMEM, currently doesn't compile 8 | using T = int; 9 | using DV = xp::distributed_vector; 10 | using LV = std::vector; 11 | 12 | // disabled until the issue with Intel MPI is solved 13 | // https://github.com/orgs/oneapi-src/projects/15/views/2?pane=issue&itemId=38871430 14 | TEST(MpSort, DISABLED_BigRandom) { 15 | LV v = generate_random(2000000, 10000); 16 | auto size = v.size(); 17 | DV d_v(size); 18 | std::cout << "BigRandom: dv size " << size << std::endl; 19 | dr::mp::copy(0, v, d_v.begin()); 20 | 21 | std::sort(v.begin(), v.end()); 22 | dr::mp::sort(d_v); 23 | 24 | EXPECT_TRUE(equal_gtest(v, d_v)); 25 | } 26 | -------------------------------------------------------------------------------- /test/fuzz/cpu/cpu-fuzz.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include "cxxopts.hpp" 8 | 9 | #include "mpi.h" 10 | 11 | #include "dr/mp.hpp" 12 | 13 | extern MPI_Comm comm; 14 | extern int comm_rank; 15 | extern int comm_size; 16 | 17 | extern void check_copy(std::size_t n, std::size_t b, std::size_t e); 18 | extern void check_transform(std::size_t n, std::size_t b, std::size_t e); 19 | 20 | bool is_equal(rng::range auto &&expected, rng::range auto &&actual) { 21 | for (auto e : rng::zip_view(expected, actual)) { 22 | if (e.first != e.second) { 23 | fmt::print("Expected: {}\n" 24 | "Actual: {}\n", 25 | expected, actual); 26 | return false; 27 | } 28 | } 29 | 30 | return true; 31 | } 32 | -------------------------------------------------------------------------------- /doc/spec/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # Minimal makefile for Sphinx documentation 6 | # 7 | 8 | # You can set these variables from the command line, and also 9 | # from the environment for the first two. 10 | SPHINXOPTS ?= -q -W 11 | SPHINXBUILD ?= sphinx-build 12 | SOURCEDIR = source 13 | BUILDDIR = build 14 | 15 | # Put it first so that "make" without argument is like "make help". 16 | help: 17 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 18 | 19 | .PHONY: help Makefile 20 | 21 | # Catch-all target: route all unknown targets to Sphinx using the new 22 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 23 | %: Makefile 24 | doxygen 25 | $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 26 | -------------------------------------------------------------------------------- /test/cmake-application/mp-app.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "mpi.h" 6 | 7 | #include "dr/mp.hpp" 8 | 9 | using T = int; 10 | 11 | MPI_Comm comm; 12 | int comm_rank; 13 | 14 | const std::size_t n = 10; 15 | 16 | void vector_add() { 17 | dr::mp::distributed_vector a(n), b(n), c(n); 18 | 19 | // Initialize 20 | dr::mp::iota(a, 10); 21 | dr::mp::iota(b, 100); 22 | 23 | auto add = [](auto ops) { return ops.first + ops.second; }; 24 | 25 | dr::mp::transform(dr::mp::views::zip(a, b), c.begin(), add); 26 | } 27 | 28 | int main(int argc, char *argv[]) { 29 | MPI_Init(&argc, &argv); 30 | comm = MPI_COMM_WORLD; 31 | MPI_Comm_rank(comm, &comm_rank); 32 | dr::mp::init(); 33 | 34 | vector_add(); 35 | 36 | MPI_Finalize(); 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/iota.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _iota: 8 | 9 | ========== 10 | ``iota`` 11 | ========== 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | .. doxygenfunction:: dr::mp::iota(R &&r, T value) 20 | :outline: 21 | .. doxygenfunction:: dr::mp::iota(Iter begin, Iter end, T value) 22 | :outline: 23 | 24 | SP 25 | --- 26 | 27 | .. doxygenfunction:: dr::sp::iota(R &&r, T value) 28 | :outline: 29 | .. doxygenfunction:: dr::sp::iota(Iter begin, Iter end, T value) 30 | :outline: 31 | 32 | 33 | 34 | Description 35 | =========== 36 | 37 | .. seealso:: 38 | 39 | `std::ranges::iota`_ 40 | Standard C++ algorithm 41 | `std::iota`_ 42 | Standard C++ algorithm 43 | 44 | Usage 45 | ===== 46 | -------------------------------------------------------------------------------- /doc/spec/README.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ================== 6 | Editing the Spec 7 | ================== 8 | 9 | Build the spec:: 10 | 11 | make -C doc/spec html 12 | 13 | Open in your browser: ``doc/spec/build/html/index.html`` 14 | 15 | Doxygen html is at: ``doc/spec/build/doxygen-html/index.html`` 16 | 17 | There are pre-commit checks for spelling and broken links. To run it manually:: 18 | 19 | make -C doc/spec linkcheck 20 | make -C doc/spec spelling SPHINXOPTS=-q 21 | 22 | The `SPHINXOPTS` is necessary to disable warning as errors, so you can 23 | see all the spelling errors instead of the first one. Add spelling 24 | exceptions to `spelling_wordlist.txt`. Do not add variable, class, 25 | function, etc to the exceptions. Spellcheck ignores them if they are 26 | properly delimited in the source doc. 27 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/iota.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace dr::sp { 15 | 16 | template void iota(R &&r, T value) { 17 | auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); 18 | 19 | for_each(par_unseq, views::zip(iota_view, r), [](auto &&elem) { 20 | auto &&[idx, v] = elem; 21 | v = idx; 22 | }); 23 | } 24 | 25 | template 26 | void iota(Iter begin, Iter end, T value) { 27 | auto r = rng::subrange(begin, end); 28 | iota(r, value); 29 | } 30 | 31 | } // namespace dr::sp 32 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/sp_matrix_gemv.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _gemv: 8 | 9 | ====================== 10 | ``gemv`` 11 | ====================== 12 | 13 | Interface 14 | ========= 15 | 16 | .. doxygenfunction:: dr::sp::flat_gemv(C &&c, dr::sp::sparse_matrix &a, B &&b) 17 | .. doxygenfunction:: dr::sp::gemv(C &&c, dr::sp::sparse_matrix &a, B &&b, sp::duplicated_vector> &scratch) 18 | .. doxygenfunction:: dr::sp::gemv(C &&c, dr::sp::sparse_matrix &a, B &&b) 19 | .. doxygenfunction:: dr::sp::gemv_square(C &&c, dr::sp::sparse_matrix &a, B &&b) 20 | .. doxygenfunction:: dr::sp::gemv_square_copy(C &&c, dr::sp::sparse_matrix &a, B &&b) 21 | 22 | Description 23 | =========== 24 | 25 | 26 | Examples 27 | ======== 28 | -------------------------------------------------------------------------------- /test/gtest/common/enumerate.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Enumerate : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Enumerate, AllTypes); 13 | 14 | TYPED_TEST(Enumerate, Basic) { 15 | Ops1 ops(10); 16 | 17 | EXPECT_TRUE(check_view(rng::views::enumerate(ops.vec), 18 | xp::views::enumerate(ops.dist_vec))); 19 | } 20 | 21 | TYPED_TEST(Enumerate, Mutate) { 22 | Ops1 ops(10); 23 | auto local = rng::views::enumerate(ops.vec); 24 | auto dist = xp::views::enumerate(ops.dist_vec); 25 | 26 | auto copy = [](auto &&v) { std::get<1>(v) = std::get<0>(v); }; 27 | xp::for_each(dist, copy); 28 | rng::for_each(local, copy); 29 | 30 | EXPECT_EQ(local, dist); 31 | EXPECT_EQ(ops.vec, ops.dist_vec); 32 | } 33 | -------------------------------------------------------------------------------- /examples/include/transpose-serial.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "range/v3/all.hpp" 10 | 11 | #include "mkl.h" 12 | 13 | #include "utils.hpp" 14 | 15 | void transpose(std::size_t rows, std::size_t cols, double *src, double *dst) { 16 | mkl_domatcopy('R', 'T', rows, cols, 1.0, src, cols, dst, rows); 17 | } 18 | 19 | template struct transpose_serial { 20 | std::vector a, b; 21 | std::size_t rows, cols; 22 | 23 | void init(std::size_t r, std::size_t c) { 24 | rows = r; 25 | cols = c; 26 | a.resize(rows * cols); 27 | b.resize(rows * cols); 28 | 29 | set_step(a, 0); 30 | set_step(b, 100); 31 | } 32 | 33 | void compute() { transpose(rows, cols, a.data(), b.data()); } 34 | 35 | void check(std::vector &result) { assert(::check(result, b) == 0); } 36 | }; 37 | -------------------------------------------------------------------------------- /test/gtest/sp/fill.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #include "xp-tests.hpp" 5 | 6 | template class FillTest : public testing::Test { 7 | public: 8 | using DistVec = 9 | dr::sp::distributed_vector; 10 | using LocalVec = std::vector; 11 | }; 12 | 13 | TYPED_TEST_SUITE(FillTest, AllocatorTypes); 14 | 15 | // tests of fill are WIP, below test will be refactored, new tests will be added 16 | TYPED_TEST(FillTest, fill_all) { 17 | typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 18 | auto segments = dist_vec.segments(); 19 | int value = 1; 20 | for (auto &&segment : segments) { 21 | dr::sp::fill(segment.begin(), segment.end(), value); 22 | } 23 | EXPECT_TRUE(equal_gtest( 24 | dist_vec, typename TestFixture::DistVec{1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); 25 | ; 26 | } 27 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/fill.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace dr::mp { 19 | 20 | /// Collective fill on distributed range 21 | auto fill(dr::distributed_contiguous_range auto &&dr, auto value) { 22 | for_each(dr, [=](auto &v) { v = value; }); 23 | return rng::end(dr); 24 | } 25 | 26 | /// Collective fill on iterator/sentinel for a distributed range 27 | template 28 | auto fill(DI first, DI last, auto value) { 29 | mp::fill(rng::subrange(first, last), value); 30 | return last; 31 | } 32 | 33 | } // namespace dr::mp 34 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/execution_policy.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace dr::sp { 12 | 13 | struct device_policy { 14 | device_policy(sycl::device device) : devices_({device}) {} 15 | device_policy(sycl::queue queue) : devices_({queue.get_device()}) {} 16 | 17 | device_policy() : devices_({sycl::queue{}.get_device()}) {} 18 | 19 | template 20 | requires(std::is_same_v, sycl::device>) 21 | device_policy(R &&devices) 22 | : devices_(rng::begin(devices), rng::end(devices)) {} 23 | 24 | std::span get_devices() noexcept { return devices_; } 25 | 26 | std::span get_devices() const noexcept { 27 | return devices_; 28 | } 29 | 30 | private: 31 | std::vector devices_; 32 | }; 33 | 34 | } // namespace dr::sp 35 | -------------------------------------------------------------------------------- /benchmarks/README.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============ 6 | Benchmarks 7 | ============ 8 | 9 | Streams 10 | ======= 11 | 12 | Used as a baseline for memory bound applications. Assume we are using 13 | 48 core machine and want each core to process :math:`10,000,000` 14 | elements of type `double`. To build:: 15 | 16 | git clone https://github.com/jeffhammond/STREAM.git 17 | cd STREAM 18 | gcc -fopenmp -O3 -mcmodel=medium -DSTREAM_TYPE=double -DSTREAM_ARRAY_SIZE=480000000 -DNTIMES=100 stream.c -o stream_cpu_openmp 19 | 20 | When running:: 21 | 22 | OMP_NUM_THREADS=48 ./stream_cpu_openmp 23 | 24 | To verify that you are using 48 cores, run ``top`` in another 25 | window. The stream process should show 4800 in the ``%CPU`` column. 26 | 27 | Google Benchmarks 28 | ================= 29 | 30 | Micro-benchmark framework for measuring primitive operations. See 31 | `gbench README`_. 32 | 33 | .. _`gbench README`: gbench/README.rst 34 | -------------------------------------------------------------------------------- /examples/mp/vector-add.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "mpi.h" 6 | 7 | #include "dr/mp.hpp" 8 | 9 | using T = int; 10 | 11 | MPI_Comm comm; 12 | int comm_rank; 13 | 14 | const std::size_t n = 10; 15 | 16 | void vector_add() { 17 | dr::mp::distributed_vector a(n), b(n), c(n); 18 | 19 | // Initialize 20 | dr::mp::iota(a, 10); 21 | dr::mp::iota(b, 100); 22 | 23 | auto add = [](auto ops) { return ops.first + ops.second; }; 24 | 25 | dr::mp::transform(dr::mp::views::zip(a, b), c.begin(), add); 26 | 27 | if (comm_rank == 0) { 28 | fmt::print("a: {}\n" 29 | "b: {}\n" 30 | "c: {}\n", 31 | a, b, c); 32 | } 33 | } 34 | 35 | int main(int argc, char *argv[]) { 36 | MPI_Init(&argc, &argv); 37 | comm = MPI_COMM_WORLD; 38 | MPI_Comm_rank(comm, &comm_rank); 39 | dr::mp::init(); 40 | 41 | vector_add(); 42 | 43 | dr::mp::finalize(); 44 | MPI_Finalize(); 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /test/gtest/sp/xp-tests.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #pragma once 5 | 6 | #include "cxxopts.hpp" 7 | #include "dr/sp.hpp" 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define TEST_SP 14 | 15 | // To share tests with MP 16 | const std::size_t comm_rank = 0; 17 | const std::size_t comm_size = 1; 18 | 19 | // Namespace aliases and wrapper functions to make the tests uniform 20 | namespace xp = dr::sp; 21 | 22 | inline void barrier() {} 23 | inline void fence() {} 24 | inline void fence_on(auto &&) {} 25 | 26 | using AllocatorTypes = ::testing::Types>; 27 | 28 | template 29 | concept compliant_view = rng::forward_range && requires(V &v) { 30 | dr::ranges::segments(v); 31 | dr::ranges::rank(dr::ranges::segments(v)[0]); 32 | }; 33 | 34 | #include "common-tests.hpp" 35 | 36 | using AllTypes = ::testing::Types>; 37 | -------------------------------------------------------------------------------- /include/dr/sp/device_vector.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace dr::sp { 11 | 12 | template 13 | class device_vector : public dr::sp::vector { 14 | public: 15 | constexpr device_vector() noexcept {} 16 | 17 | using base = dr::sp::vector; 18 | 19 | using value_type = T; 20 | using size_type = std::size_t; 21 | using difference_type = std::size_t; 22 | 23 | constexpr device_vector(size_type count, const Allocator &alloc, 24 | size_type rank) 25 | : base(count, alloc), rank_(rank) {} 26 | 27 | constexpr std::size_t rank() const noexcept { return rank_; } 28 | 29 | private: 30 | std::size_t rank_ = 0; 31 | }; 32 | 33 | template 34 | device_vector(std::size_t, const Alloc, std::size_t) 35 | -> device_vector; 36 | 37 | } // namespace dr::sp 38 | -------------------------------------------------------------------------------- /doc/spec/make.bat: -------------------------------------------------------------------------------- 1 | REM SPDX-FileCopyrightText: Intel Corporation 2 | REM 3 | REM SPDX-License-Identifier: BSD-3-Clause 4 | 5 | @ECHO OFF 6 | 7 | pushd %~dp0 8 | 9 | REM Command file for Sphinx documentation 10 | 11 | if "%SPHINXBUILD%" == "" ( 12 | set SPHINXBUILD=sphinx-build 13 | ) 14 | set SOURCEDIR=source 15 | set BUILDDIR=build 16 | 17 | if "%1" == "" goto help 18 | 19 | %SPHINXBUILD% >NUL 2>NUL 20 | if errorlevel 9009 ( 21 | echo. 22 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 23 | echo.installed, then set the SPHINXBUILD environment variable to point 24 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 25 | echo.may add the Sphinx directory to PATH. 26 | echo. 27 | echo.If you don't have Sphinx installed, grab it from 28 | echo.https://www.sphinx-doc.org/ 29 | exit /b 1 30 | ) 31 | 32 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | goto end 34 | 35 | :help 36 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 37 | 38 | :end 39 | popd 40 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/iota.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace dr::mp { 19 | 20 | /// Collective iota on distributed range 21 | template void iota(R &&r, T value) { 22 | auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); 23 | 24 | for_each(views::zip(iota_view, r), [](auto &&elem) { 25 | auto &&[idx, v] = elem; 26 | v = idx; 27 | }); 28 | } 29 | 30 | /// Collective iota on iterator/sentinel for a distributed range 31 | template 32 | void iota(Iter begin, Iter end, T value) { 33 | auto r = rng::subrange(begin, end); 34 | iota(r, value); 35 | } 36 | 37 | } // namespace dr::mp 38 | -------------------------------------------------------------------------------- /benchmarks/gbench/mp/rooted.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "../common/dr_bench.hpp" 6 | 7 | using T = double; 8 | 9 | static void CopyDist2Local_DR(benchmark::State &state) { 10 | xp::distributed_vector src(default_vector_size); 11 | std::vector dst(default_vector_size); 12 | Stats stats(state, sizeof(T) * src.size(), sizeof(T) * dst.size()); 13 | for (auto _ : state) { 14 | for (std::size_t i = 0; i < default_repetitions; i++) { 15 | stats.rep(); 16 | xp::copy(0, src, dst.begin()); 17 | } 18 | } 19 | } 20 | 21 | DR_BENCHMARK(CopyDist2Local_DR); 22 | 23 | static void CopyLocal2Dist_DR(benchmark::State &state) { 24 | std::vector src(default_vector_size); 25 | xp::distributed_vector dst(default_vector_size); 26 | Stats stats(state, sizeof(T) * src.size(), sizeof(T) * dst.size()); 27 | for (auto _ : state) { 28 | for (std::size_t i = 0; i < default_repetitions; i++) { 29 | stats.rep(); 30 | xp::copy(0, src, dst.begin()); 31 | } 32 | } 33 | } 34 | 35 | DR_BENCHMARK(CopyLocal2Dist_DR); 36 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/for_each.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _for_each: 8 | 9 | ============== 10 | ``for_each`` 11 | ============== 12 | 13 | Interface 14 | ========= 15 | MP 16 | --- 17 | .. doxygenfunction:: dr::mp::for_each(dr::distributed_range auto &&dr, auto op) 18 | :outline: 19 | .. doxygenfunction:: dr::mp::for_each(DI first, DI last, auto op) 20 | :outline: 21 | 22 | SP 23 | --- 24 | 25 | .. doxygenfunction:: dr::sp::for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) 26 | :outline: 27 | .. doxygenfunction:: dr::sp::for_each(ExecutionPolicy &&policy, Iter begin, Iter end, Fn &&fn) 28 | :outline: 29 | .. doxygenfunction:: dr::sp::for_each(R &&r, Fn &&fn) 30 | :outline: 31 | .. doxygenfunction:: dr::sp::for_each(Iter begin, Iter end, Fn &&fn) 32 | :outline: 33 | 34 | Description 35 | =========== 36 | 37 | .. seealso:: 38 | 39 | `std::ranges::for_each`_ 40 | Standard C++ algorithm 41 | `std::for_each`_ 42 | Standard C++ algorithm 43 | 44 | Examples 45 | ======== 46 | -------------------------------------------------------------------------------- /include/dr/mp/containers/distribution.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::mp { 10 | 11 | struct distribution { 12 | public: 13 | distribution &halo(std::size_t radius) { 14 | halo_bounds_.prev = radius; 15 | halo_bounds_.next = radius; 16 | return *this; 17 | } 18 | 19 | distribution &halo(std::size_t prev, std::size_t next) { 20 | halo_bounds_.prev = prev; 21 | halo_bounds_.next = next; 22 | return *this; 23 | } 24 | 25 | auto halo() const { return halo_bounds_; } 26 | 27 | distribution &periodic(bool periodic) { 28 | halo_bounds_.periodic = periodic; 29 | return *this; 30 | } 31 | 32 | auto periodic() const { return halo_bounds_.periodic; } 33 | 34 | distribution &granularity(std::size_t size) { 35 | granularity_ = size; 36 | return *this; 37 | } 38 | 39 | auto granularity() const { return granularity_; } 40 | 41 | private: 42 | halo_bounds halo_bounds_; 43 | std::size_t granularity_ = 1; 44 | }; 45 | 46 | } // namespace dr::mp 47 | -------------------------------------------------------------------------------- /examples/sp/matrix_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | auto devices = dr::sp::get_numa_devices(sycl::gpu_selector_v); 9 | dr::sp::init(devices); 10 | 11 | auto partition = dr::sp::block_cyclic(); 12 | dr::sp::distributed_dense_matrix x({10, 10}, partition); 13 | 14 | x[{2, 3}] = 12; 15 | x[{5, 7}] = 42; 16 | x[{8, 9}] = 37; 17 | 18 | // Execute a parallel `for_each` algorithm across all the GPUs. 19 | // Each entry consists of an index tuple and value. (With a dense matrix, 20 | // the indices are not stored explicitly.) 21 | // 22 | // Here, we add `12` to each scalar value. 23 | dr::sp::for_each(dr::sp::par_unseq, x, [](auto &&entry) { 24 | auto &&[idx, v] = entry; 25 | v = v + 12; 26 | }); 27 | 28 | // Print out matrix. 29 | for (auto iter = x.begin(); iter != x.end(); ++iter) { 30 | auto &&[idx, v] = *iter; 31 | auto &&[i, j] = idx; 32 | std::cout << "(" << i << ", " << j << "): " << v << std::endl; 33 | } 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /test/gtest/sp/detail.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | #include 7 | 8 | namespace sp = dr::sp; 9 | 10 | TEST(DetailTest, parallel_for) { 11 | std::size_t size = 2 * 1024 * 1024; 12 | std::size_t n = 4 * std::size_t(std::numeric_limits::max()); 13 | 14 | // Compute `v` 15 | std::vector v(size, 0); 16 | 17 | auto iota = ranges::views::iota(std::size_t(0), n); 18 | 19 | std::for_each(iota.begin(), iota.end(), [&](auto i) { v[i % size] += 1; }); 20 | 21 | auto &&q = sp::__detail::queue(0); 22 | 23 | sp::shared_allocator alloc(q); 24 | 25 | sp::vector> dvec(size, 0, alloc); 26 | 27 | auto dv = dvec.data(); 28 | 29 | dr::__detail::parallel_for(q, n, [=](auto i) { 30 | sycl::atomic_ref 32 | v(dv[i % size]); 33 | v += 1; 34 | }).wait(); 35 | 36 | std::vector dvec_local(size); 37 | sp::copy(dvec.begin(), dvec.end(), dvec_local.begin()); 38 | 39 | EXPECT_EQ(v, dvec_local); 40 | } 41 | -------------------------------------------------------------------------------- /scripts/devcloud-test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # SPDX-FileCopyrightText: Intel Corporation 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | source scripts/setvars-2023.2.1.sh 8 | set -e 9 | hostname 10 | 11 | # SLURM/MPI integration is broken 12 | unset SLURM_TASKS_PER_NODE 13 | unset SLURM_JOBID 14 | unset ONEAPI_DEVICE_SELECTOR 15 | 16 | echo "::group::Generate" 17 | time cmake -B build -DENABLE_SYCL=on 18 | echo "::endgroup::" 19 | 20 | echo "::group::Build" 21 | time make -C build all -j 22 | echo "::endgroup::" 23 | 24 | echo "::group::SP GPU Test" 25 | # Use 1 device because p2p does not work 26 | ONEAPI_DEVICE_SELECTOR=level_zero:0 time ctest --test-dir build -L SP 27 | echo "::endgroup::" 28 | 29 | # disabled: very slow or fails when cryptominer is on devcloud 30 | #echo "::group::SP CPU Test" 31 | #ONEAPI_DEVICE_SELECTOR=opencl:cpu time ctest --test-dir build -L SP 32 | #echo "::endgroup::" 33 | 34 | echo "::group::MP GPU Test" 35 | ONEAPI_DEVICE_SELECTOR=level_zero:* time ctest --test-dir build -L MP 36 | echo "::endgroup::" 37 | 38 | echo "::group::MP CPU Test" 39 | ONEAPI_DEVICE_SELECTOR=opencl:cpu time ctest --test-dir build -L MP 40 | echo "::endgroup::" 41 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/transform.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace dr::mp { 19 | 20 | void transform(rng::forward_range auto &&in, dr::distributed_iterator auto out, 21 | auto op) { 22 | if (rng::empty(in)) { 23 | return; 24 | } 25 | assert(aligned(in, out)); 26 | 27 | auto zip = mp::views::zip(in, rng::subrange(out, out + rng::size(in))); 28 | auto transform_op = [op](auto pair) { 29 | auto &[in, out] = pair; 30 | out = op(in); 31 | }; 32 | for_each(zip, transform_op); 33 | } 34 | 35 | template 36 | void transform(DI_IN &&first, DI_IN &&last, dr::distributed_iterator auto &&out, 37 | auto op) { 38 | mp::transform(rng::subrange(first, last), out, op); 39 | } 40 | 41 | } // namespace dr::mp 42 | -------------------------------------------------------------------------------- /benchmarks/gbench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) 6 | # 7 | # Google benchmark 8 | # 9 | set(BENCHMARK_ENABLE_TESTING off) 10 | set(BENCHMARK_ENABLE_WERROR off) 11 | # oneApi 2024.1 compiler refuses to compile code with standard less than C++17 12 | set(BENCHMARK_CXX_STANDARD 20) 13 | FetchContent_Declare( 14 | googlebench 15 | GIT_REPOSITORY https://github.com/lslusarczyk/benchmark.git 16 | GIT_TAG set-cxx-std) 17 | FetchContent_MakeAvailable(googlebench) 18 | 19 | if(ENABLE_CUDA) 20 | # because sort.cpp compilation fails with 21 | # dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h warning: 22 | # attribute argument 16 is invalid and will be ignored; CUDA requires 23 | # sub_group size 32 24 | add_compile_options(-Wno-error=cuda-compat) 25 | endif() 26 | 27 | # mp is not under ENABLE_SYCL to check benchmarks also compilation in gcc 28 | add_subdirectory(mp) 29 | 30 | if(ENABLE_SYCL) 31 | add_subdirectory(sp) 32 | add_custom_target(xp-bench DEPENDS mp-bench sp-bench) 33 | endif() 34 | endif() 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | accessible-pygments==0.0.5 6 | alabaster==1.0.0 7 | Babel==2.17.0 8 | beautifulsoup4==4.13.3 9 | binaryornot==0.4.4 10 | boolean.py==4.0 11 | breathe==4.35.0 12 | certifi==2024.12.14 13 | cfgv==3.4.0 14 | chardet==5.2.0 15 | charset-normalizer==3.4.1 16 | distlib==0.3.9 17 | docutils==0.21.2 18 | filelock==3.18.0 19 | identify==2.6.9 20 | idna==3.10 21 | imagesize==1.4.1 22 | Jinja2==3.1.6 23 | license-expression==30.4.1 24 | MarkupSafe==3.0.2 25 | nodeenv==1.9.1 26 | packaging==24.2 27 | platformdirs==4.3.6 28 | pre-commit==4.2.0 29 | pydata-sphinx-theme==0.16.1 30 | pyenchant==3.2.2 31 | Pygments==2.19.1 32 | python-debian==1.0.1 33 | PyYAML==6.0.2 34 | requests==2.32.4 35 | reuse==5.0.2 36 | snowballstemmer==2.2.0 37 | soupsieve==2.6 38 | Sphinx==8.1.3 39 | sphinx-book-theme==1.1.3 40 | sphinxcontrib-applehelp==2.0.0 41 | sphinxcontrib-devhelp==2.0.0 42 | sphinxcontrib-htmlhelp==2.1.0 43 | sphinxcontrib-jsmath==1.0.1 44 | sphinxcontrib-qthelp==2.0.0 45 | sphinxcontrib-serializinghtml==2.0.0 46 | sphinxcontrib-spelling==8.0.1 47 | typing_extensions==4.12.2 48 | urllib3==2.5.0 49 | virtualenv==20.29.3 50 | -------------------------------------------------------------------------------- /examples/mp/hello_world.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | namespace mp = dr::mp; 9 | 10 | int main(int argc, char **argv) { 11 | #ifdef SYCL_LANGUAGE_VERSION 12 | mp::init(sycl::default_selector_v); 13 | #else 14 | mp::init(); 15 | #endif 16 | 17 | { 18 | 19 | fmt::print("Hello, World! Distributed ranges is running on rank {} / {} on " 20 | "host {}\n", 21 | mp::rank(), mp::nprocs(), mp::hostname()); 22 | 23 | std::size_t n = 1000; 24 | 25 | mp::distributed_vector v(n); 26 | 27 | if (mp::rank() == 0) { 28 | auto &&segments = v.segments(); 29 | 30 | fmt::print("Created distributed_vector of size {} with {} segments.\n", 31 | v.size(), segments.size()); 32 | 33 | std::size_t segment_id = 0; 34 | for (auto &&segment : segments) { 35 | fmt::print("Rank {} owns segment {}, which is size {}\n", 36 | dr::ranges::rank(segment), segment_id, segment.size()); 37 | ++segment_id; 38 | } 39 | } 40 | } 41 | 42 | mp::finalize(); 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /include/dr/mp/allocator.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::mp::__detail { 10 | 11 | template class allocator { 12 | 13 | public: 14 | T *allocate(std::size_t sz) { 15 | if (sz == 0) { 16 | return nullptr; 17 | } 18 | 19 | T *mem = nullptr; 20 | 21 | if (mp::use_sycl()) { 22 | #ifdef SYCL_LANGUAGE_VERSION 23 | mem = sycl::malloc(sz, sycl_queue(), sycl_mem_kind()); 24 | #else 25 | assert(false); 26 | #endif 27 | } else { 28 | mem = std_allocator_.allocate(sz); 29 | } 30 | 31 | assert(mem != nullptr); 32 | return mem; 33 | } 34 | 35 | void deallocate(T *ptr, std::size_t sz) { 36 | if (sz == 0) { 37 | assert(ptr == nullptr); 38 | return; 39 | } 40 | assert(ptr != nullptr); 41 | #ifdef SYCL_LANGUAGE_VERSION 42 | if (mp::use_sycl()) { 43 | sycl::free(ptr, sycl_queue()); 44 | return; 45 | } 46 | #endif 47 | 48 | std_allocator_.deallocate(ptr, sz); 49 | } 50 | 51 | private: 52 | std::allocator std_allocator_; 53 | }; 54 | 55 | } // namespace dr::mp::__detail 56 | -------------------------------------------------------------------------------- /include/dr/sp/future.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | namespace dr::sp { 13 | 14 | template class future { 15 | public: 16 | using event_type = Event; 17 | 18 | future(std::unique_ptr &&value, const std::vector &events) 19 | : value_(std::move(value)), events_(events) {} 20 | 21 | future(T &&value, const std::vector &events) 22 | : value_(new T(std::move(value))), events_(events) {} 23 | 24 | void update(const Event &event) { events_.push_back(event); } 25 | 26 | future(future &&) = default; 27 | future &operator=(future &&) = default; 28 | 29 | future(const future &) = delete; 30 | future &operator=(const future &) = delete; 31 | 32 | T get() { 33 | wait(); 34 | return std::move(*value_); 35 | } 36 | 37 | std::vector events() const { return events_; } 38 | 39 | T &value() const { return *value_; } 40 | 41 | void wait() { __detail::wait(events_); } 42 | 43 | private: 44 | std::unique_ptr value_; 45 | std::vector events_; 46 | }; 47 | 48 | } // namespace dr::sp 49 | -------------------------------------------------------------------------------- /doc/developer/design/namespaces.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============ 6 | Namespaces 7 | ============ 8 | 9 | See `oneAPI Style Guide`_. 10 | 11 | Option 1 12 | ======== 13 | 14 | :: 15 | 16 | dr:: 17 | sp:: 18 | mp:: 19 | 20 | Option 2 21 | ======== 22 | 23 | :: 24 | 25 | dr:: 26 | dr::sp:: 27 | dr::mp:: 28 | 29 | Variation 1 30 | =========== 31 | 32 | ``sp.hpp`` does:: 33 | 34 | namespace dr { 35 | 36 | using namespace sp; 37 | 38 | }} 39 | 40 | If you include ``sp.hpp``, everything you need is in ``dr::``. 41 | 42 | Variation 2 43 | =========== 44 | 45 | :: 46 | 47 | dr::spp:: 48 | dr::mpp:: 49 | 50 | Variation 3 51 | =========== 52 | 53 | :: 54 | 55 | dr::sp:: 56 | dr::mp:: 57 | 58 | Variation 4 59 | =========== 60 | 61 | :: 62 | 63 | dr::single:: 64 | dr::multi:: 65 | 66 | Variation 5 67 | =========== 68 | 69 | :: 70 | 71 | dr::spmd:: 72 | dr::smp:: 73 | 74 | Not clear what is counterpart to SPMD_. 75 | 76 | .. _SPMD: https://en.wikipedia.org/wiki/Single_program,_multiple_data#:~:text=SPMD%20usually%20refers%20to%20message%20passing%20programming%20on%20distributed%20memory 77 | .. _`oneAPI Style Guide`: oneapi-cpp-style-guide.md 78 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/copy.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _copy: 8 | 9 | ========== 10 | ``copy`` 11 | ========== 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | .. doxygenfunction:: dr::mp::copy(rng::forward_range auto &&in, dr::distributed_iterator auto out) 20 | :outline: 21 | .. doxygenfunction:: dr::mp::copy(DI_IN &&first, DI_IN &&last, dr::distributed_iterator auto &&out) 22 | :outline: 23 | .. doxygenfunction:: dr::mp::copy(std::size_t root, dr::distributed_contiguous_range auto &&in, std::contiguous_iterator auto out) 24 | :outline: 25 | .. doxygenfunction:: dr::mp::copy(std::size_t root, rng::contiguous_range auto &&in, dr::distributed_contiguous_iterator auto out) 26 | :outline: 27 | 28 | SP 29 | --- 30 | 31 | .. doxygenfunction:: dr::sp::copy(InputIt first, InputIt last, OutputIt d_first) 32 | :outline: 33 | .. doxygenfunction:: dr::sp::copy(device_ptr first, device_ptr last, Iter d_first) 34 | :outline: 35 | 36 | 37 | Description 38 | =========== 39 | 40 | .. seealso:: 41 | 42 | `std::ranges::copy`_ 43 | Standard C++ algorithm 44 | `std::copy`_ 45 | Standard C++ algorithm 46 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/transform.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _transform: 8 | 9 | =============== 10 | ``transform`` 11 | =============== 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | .. doxygenfunction:: dr::mp::transform(rng::forward_range auto &&in, dr::distributed_iterator auto out, auto op) 20 | .. doxygenfunction:: dr::mp::transform(DI_IN &&first, DI_IN &&last, dr::distributed_iterator auto &&out, auto op) 21 | 22 | SP 23 | --- 24 | 25 | .. doxygenfunction:: dr::sp::transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, dr::distributed_iterator auto out, auto &&fn) 26 | .. doxygenfunction:: dr::sp::transform(R &&in, Iter out, Fn &&fn) 27 | .. doxygenfunction:: dr::sp::transform(ExecutionPolicy &&policy, Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) 28 | .. doxygenfunction:: dr::sp::transform(Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) 29 | 30 | 31 | Description 32 | =========== 33 | 34 | .. seealso:: 35 | 36 | `std::transform`_ 37 | C++ model 38 | `std::ranges::transform`_ 39 | C++ range-based model 40 | :ref:`reduce` 41 | related algorithm 42 | 43 | Usage 44 | ===== 45 | -------------------------------------------------------------------------------- /examples/sp/gemv_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | namespace sp = dr::sp; 8 | 9 | int main(int argc, char **argv) { 10 | auto devices = sp::get_numa_devices(sycl::gpu_selector_v); 11 | sp::init(devices); 12 | 13 | for (auto &device : devices) { 14 | std::cout << " Device: " << device.get_info() 15 | << "\n"; 16 | } 17 | 18 | using T = float; 19 | 20 | sp::distributed_vector> b(100); 21 | 22 | sp::duplicated_vector b_duplicated(100); 23 | 24 | sp::for_each(sp::par_unseq, sp::enumerate(b), [](auto &&tuple) { 25 | auto &&[idx, value] = tuple; 26 | value = 1; 27 | }); 28 | 29 | sp::distributed_vector> c(100); 30 | 31 | sp::for_each(sp::par_unseq, c, [](auto &&v) { v = 0; }); 32 | 33 | sp::sparse_matrix a( 34 | {100, 100}, 0.01, 35 | sp::block_cyclic({sp::tile::div, sp::tile::div}, {sp::nprocs(), 1})); 36 | 37 | printf("a tiles: %lu x %lu\n", a.grid_shape()[0], a.grid_shape()[1]); 38 | 39 | sp::print_range(b, "b"); 40 | 41 | sp::print_matrix(a, "a"); 42 | 43 | sp::gemv(c, a, b, b_duplicated); 44 | 45 | sp::print_range(c, "c"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /include/dr/sp/containers/duplicated_vector.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace dr::sp { 11 | 12 | template > 13 | class duplicated_vector { 14 | public: 15 | using segment_type = dr::sp::device_vector; 16 | 17 | using value_type = T; 18 | using size_type = std::size_t; 19 | using difference_type = std::ptrdiff_t; 20 | 21 | using allocator_type = Allocator; 22 | 23 | duplicated_vector(std::size_t count = 0) { 24 | size_ = count; 25 | capacity_ = count; 26 | 27 | std::size_t rank = 0; 28 | for (auto &&device : dr::sp::devices()) { 29 | segments_.emplace_back( 30 | segment_type(size(), Allocator(dr::sp::context(), device), rank++)); 31 | } 32 | } 33 | 34 | size_type size() const noexcept { return size_; } 35 | 36 | segment_type &local_vector(std::size_t rank) { return segments_[rank]; } 37 | 38 | const segment_type &local_vector(std::size_t rank) const { 39 | return segments_[rank]; 40 | } 41 | 42 | private: 43 | std::vector segments_; 44 | std::size_t capacity_ = 0; 45 | std::size_t size_ = 0; 46 | }; 47 | 48 | } // namespace dr::sp 49 | -------------------------------------------------------------------------------- /test/fuzz/cpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # tested with a variable number of ranks 6 | add_executable(cpu-fuzz cpu-fuzz.cpp algorithms.cpp) 7 | 8 | target_compile_options(cpu-fuzz PUBLIC -fsanitize=fuzzer) 9 | target_link_options(cpu-fuzz PUBLIC -fsanitize=fuzzer) 10 | 11 | target_link_libraries(cpu-fuzz cxxopts DR::mpi) 12 | function(add_long_fuzz_mpi_test test_name name processes) 13 | add_test( 14 | NAME ${test_name} 15 | CONFIGURATIONS long_fuzz 16 | COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${processes} 17 | ${MPIEXEC_PREFLAGS} ./${name} ${ARGN} COMMAND_EXPAND_LISTS) 18 | endfunction() 19 | 20 | add_mp_ctest( 21 | cpu-fuzz-commit cpu-fuzz 1 -max_len=16 -runs=1000000 -ignore_remaining_args=1 22 | -controller=1) 23 | 24 | add_long_fuzz_mpi_test( 25 | cpu-fuzz-4-0 cpu-fuzz 4 -max_len=16 -runs=10000000 -ignore_remaining_args=1 26 | -controller=0) 27 | add_long_fuzz_mpi_test( 28 | cpu-fuzz-4-1 cpu-fuzz 4 -max_len=16 -runs=10000000 -ignore_remaining_args=1 29 | -controller=1) 30 | add_long_fuzz_mpi_test( 31 | cpu-fuzz-4-2 cpu-fuzz 4 -max_len=16 -runs=10000000 -ignore_remaining_args=1 32 | -controller=2) 33 | add_long_fuzz_mpi_test( 34 | cpu-fuzz-4-3 cpu-fuzz 4 -max_len=16 -runs=10000000 -ignore_remaining_args=1 35 | -controller=3) 36 | -------------------------------------------------------------------------------- /include/dr/sp/views/standard_views.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace dr::sp { 15 | 16 | namespace views { 17 | 18 | template 19 | auto slice(R &&r, dr::index<> slice_indices) { 20 | return dr::sp::distributed_span(dr::ranges::segments(std::forward(r))) 21 | .subspan(slice_indices[0], slice_indices[1] - slice_indices[0]); 22 | } 23 | 24 | class slice_adaptor_closure { 25 | public: 26 | slice_adaptor_closure(dr::index<> slice_indices) : idx_(slice_indices) {} 27 | 28 | template auto operator()(R &&r) const { 29 | return slice(std::forward(r), idx_); 30 | } 31 | 32 | template 33 | friend auto operator|(R &&r, const slice_adaptor_closure &closure) { 34 | return closure(std::forward(r)); 35 | } 36 | 37 | private: 38 | dr::index<> idx_; 39 | }; 40 | 41 | inline auto slice(dr::index<> slice_indices) { 42 | return slice_adaptor_closure(slice_indices); 43 | } 44 | 45 | } // namespace views 46 | 47 | } // namespace dr::sp 48 | -------------------------------------------------------------------------------- /test/gtest/common/fill.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Fill : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Fill, AllTypes); 13 | 14 | TYPED_TEST(Fill, Range) { 15 | Ops1 ops(10); 16 | 17 | auto input = ops.vec; 18 | 19 | xp::fill(ops.dist_vec, 33); 20 | rng::fill(ops.vec, 33); 21 | EXPECT_TRUE(check_unary_op(input, ops.vec, ops.dist_vec)); 22 | } 23 | 24 | TYPED_TEST(Fill, Iterators) { 25 | Ops1 ops(10); 26 | 27 | auto input = ops.vec; 28 | 29 | xp::fill(ops.dist_vec.begin() + 1, ops.dist_vec.end() - 1, 33); 30 | rng::fill(ops.vec.begin() + 1, ops.vec.end() - 1, 33); 31 | EXPECT_TRUE(check_unary_op(input, ops.vec, ops.dist_vec)); 32 | } 33 | 34 | TYPED_TEST(Fill, Iterators_large) { 35 | TypeParam large_dist_vec(80000); 36 | xp::fill(large_dist_vec.begin() + 71000, large_dist_vec.end(), 33); 37 | EXPECT_EQ(large_dist_vec[77777], 33); 38 | } 39 | 40 | TYPED_TEST(Fill, Iterators_large_segment) { 41 | TypeParam large_dist_vec(80000); 42 | auto last_segment = large_dist_vec.segments().back(); 43 | xp::fill(last_segment.begin(), last_segment.end(), 44 | static_cast(33)); 45 | EXPECT_EQ(large_dist_vec[79999], 33); 46 | } 47 | -------------------------------------------------------------------------------- /benchmarks/gbench/mp/wave_utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #include 5 | 6 | // Arakava C grid object 7 | // 8 | // T points at cell centers 9 | // U points at center of x edges 10 | // V points at center of y edges 11 | // F points at vertices 12 | // 13 | // | | | | | 14 | // f---v---f---v---f---v---f---v---f- 15 | // | | | | | 16 | // u t u t u t u t u 17 | // | | | | | 18 | // f---v---f---v---f---v---f---v---f- 19 | struct ArakawaCGrid { 20 | double xmin, xmax; // x limits in physical coordinates (U point min/max) 21 | double ymin, ymax; // y limits in physical coordinates (V point min/max) 22 | std::size_t nx, ny; // number of cells (T points) 23 | double lx, ly; // grid size in physical coordinates 24 | double dx, dy; // cell size in physical coordinates 25 | double dx_inv, dy_inv; // reciprocial dx and dy 26 | 27 | ArakawaCGrid(double _xmin, double _xmax, double _ymin, double _ymax, 28 | std::size_t _nx, std::size_t _ny) 29 | : xmin(_xmin), xmax(_xmax), ymin(_ymin), ymax(_ymax), nx(_nx), ny(_ny), 30 | lx(_xmax - _xmin), ly(_ymax - _ymin) { 31 | dx = lx / nx; 32 | dy = ly / ny; 33 | dx_inv = 1.0 / dx; 34 | dy_inv = 1.0 / dy; 35 | }; 36 | }; 37 | -------------------------------------------------------------------------------- /examples/sp/take_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | auto devices = dr::sp::get_numa_devices(sycl::gpu_selector_v); 9 | dr::sp::init(devices); 10 | 11 | for (auto &device : devices) { 12 | std::cout << " Device: " << device.get_info() 13 | << "\n"; 14 | } 15 | 16 | dr::sp::distributed_vector> v(100); 17 | 18 | dr::sp::for_each(dr::sp::par_unseq, dr::sp::enumerate(v), [](auto &&tuple) { 19 | auto &&[idx, value] = tuple; 20 | value = idx; 21 | }); 22 | 23 | dr::sp::for_each(dr::sp::par_unseq, v, [](auto &&value) { value += 2; }); 24 | 25 | dr::sp::print_range(v, "Distributed vector"); 26 | 27 | // Create trimmed view. 28 | // `trimmed_view` is a distributed range. 29 | auto trimmed_view = dr::sp::views::take(v, 53); 30 | dr::sp::print_range(trimmed_view, "Trimmed View"); 31 | 32 | auto sum = dr::sp::reduce(dr::sp::par_unseq, v, 0, std::plus{}); 33 | std::cout << "Total sum: " << sum << std::endl; 34 | 35 | auto tsum = dr::sp::reduce(dr::sp::par_unseq, trimmed_view, 0, std::plus{}); 36 | std::cout << "Trimmed sum: " << tsum << std::endl; 37 | 38 | dr::sp::print_range(v | rng::views::drop(40) | dr::sp::views::slice({5, 10})); 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /doc/developer/testing/pre-commit.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============ 6 | Pre-commit 7 | ============ 8 | 9 | The ``checks`` job in CI runs some static tests with the 10 | ``pre-commit`` python package. You can resolve issues faster by 11 | running the checks locally before submitting the PR. 12 | 13 | Run pre-commit checks:: 14 | 15 | pre-commit run --all 16 | 17 | Do pre-commit testing as part of git commit:: 18 | 19 | pre-commit install 20 | 21 | ``pre-commit`` checks links in the documentation. If it is taking a 22 | long time, you can skip all checks as part of commit with ``-n``:: 23 | 24 | git commit -n -m 'Commit message' 25 | 26 | ``pre-commit`` will automatically fix most issues. Do a ``git add`` to 27 | add the changes and run ``pre-commit`` or ``git commit`` again. 28 | 29 | To skip the sphinx tests:: 30 | 31 | SKIP=sphinx pre-commit run --all 32 | 33 | This may be convenient if you are not changing the documentation. The 34 | Sphinx tests require installing some prerequisites, a working internet 35 | connection, and take longer to run. 36 | 37 | Reuse 38 | ===== 39 | 40 | To fix a problem with a missing license do:: 41 | 42 | reuse annotate --exclude-year --license BSD-3-Clause --copyright "Intel Corporation" 43 | 44 | Or copy the license from the top of a file with the same filename 45 | extension. 46 | -------------------------------------------------------------------------------- /test/gtest/mp/reduce.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | 9 | template class ReduceMP : public testing::Test { 10 | public: 11 | }; 12 | 13 | TYPED_TEST_SUITE(ReduceMP, AllTypes); 14 | 15 | const std::size_t root = 0; 16 | 17 | TYPED_TEST(ReduceMP, RootRange) { 18 | Ops1 ops(10); 19 | 20 | auto result = dr::mp::reduce(root, ops.dist_vec, 0, std::plus{}); 21 | 22 | if (comm_rank == root) { 23 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end(), 0, std::plus{}), 24 | result); 25 | } 26 | } 27 | 28 | TYPED_TEST(ReduceMP, RootIterators) { 29 | Ops1 ops(10); 30 | 31 | auto result = dr::mp::reduce(root, ops.dist_vec.begin() + 1, 32 | ops.dist_vec.end() - 1, 0, std::plus{}); 33 | 34 | if (comm_rank == root) { 35 | EXPECT_EQ( 36 | std::reduce(ops.vec.begin() + 1, ops.vec.end() - 1, 0, std::plus{}), 37 | result); 38 | } 39 | } 40 | 41 | TYPED_TEST(ReduceMP, TransformReduce) { 42 | Ops1 ops(10); 43 | 44 | auto add = [](auto &&elem) { return elem + 1; }; 45 | 46 | auto added = dr::mp::views::transform(ops.dist_vec, add); 47 | auto min = [](double x, double y) { return std::min(x, y); }; 48 | auto result = dr::mp::reduce(root, added, 1, min); 49 | if (comm_rank == root) { 50 | EXPECT_EQ(result, 1); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /test/gtest/common/transform.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Transform : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Transform, AllTypes); 13 | 14 | TYPED_TEST(Transform, Range) { 15 | Ops2 ops(10); 16 | 17 | auto negate = [](auto &&v) { return -v; }; 18 | 19 | xp::transform(ops.dist_vec0, ops.dist_vec1.begin(), negate); 20 | rng::transform(ops.vec0, ops.vec1.begin(), negate); 21 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 22 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 23 | } 24 | 25 | TYPED_TEST(Transform, RangeMutate) { 26 | Ops2 ops(10); 27 | 28 | auto negate = [](auto &&v) { 29 | v++; 30 | return -v; 31 | }; 32 | 33 | xp::transform(ops.dist_vec0, ops.dist_vec1.begin(), negate); 34 | rng::transform(ops.vec0, ops.vec1.begin(), negate); 35 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 36 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 37 | } 38 | 39 | TYPED_TEST(Transform, Iterators) { 40 | Ops2 ops(10); 41 | 42 | auto negate = [](auto &&v) { return -v; }; 43 | 44 | xp::transform(ops.dist_vec0.begin(), ops.dist_vec0.end(), 45 | ops.dist_vec1.begin(), negate); 46 | rng::transform(ops.vec0.begin(), ops.vec0.end(), ops.vec1.begin(), negate); 47 | 48 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 49 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 50 | } 51 | -------------------------------------------------------------------------------- /test/gtest/mp/stencil.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | using T = int; 8 | using V = std::vector; 9 | using DV = dr::mp::distributed_vector; 10 | using DVI = typename DV::iterator; 11 | 12 | const std::size_t radius = 4; 13 | const std::size_t n = 10; 14 | 15 | TEST(Stencil, 1D) { 16 | auto dist = dr::mp::distribution().halo(radius); 17 | DV dv_in(n, dist); 18 | DV dv_out(n, dist); 19 | V v_in(n); 20 | 21 | dr::mp::iota(dv_in, 10); 22 | dv_in.halo().exchange(); 23 | 24 | dr::mp::fill(dv_out, 100); 25 | dv_out.halo().exchange(); 26 | 27 | if (comm_rank == 0) { 28 | rng::iota(v_in, 10); 29 | EXPECT_TRUE(check_segments(dv_in)); 30 | EXPECT_TRUE(equal_gtest(v_in, dv_in)); 31 | } 32 | 33 | MPI_Barrier(comm); 34 | 35 | auto sum = [](auto &&v) { 36 | T s = v; 37 | auto p = &v; 38 | for (std::size_t i = 0; i <= radius; i++) { 39 | s += p[-i]; 40 | s += p[i]; 41 | } 42 | 43 | return s; 44 | }; 45 | 46 | dr::mp::transform(dv_in.begin() + radius, dv_in.end() - radius, 47 | dv_out.begin() + radius, sum); 48 | 49 | if (comm_rank == 0) { 50 | V v_out(n); 51 | rng::fill(v_out, 100); 52 | std::transform(v_in.begin() + radius, v_in.end() - radius, 53 | v_out.begin() + radius, sum); 54 | EXPECT_TRUE(check_unary_op(v_in, v_out, dv_out)); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /test/gtest/mp/communicator-3.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | template class Communicator3 : public testing::Test {}; 8 | 9 | using T = int; 10 | 11 | TYPED_TEST_SUITE(Communicator3, AllTypes); 12 | 13 | TYPED_TEST(Communicator3, suite_works_for_3_processes_only) { 14 | EXPECT_EQ(dr::mp::default_comm().size(), 3); 15 | } 16 | 17 | TEST(Communicator3, AlltoallvThreeRanksOnly) { 18 | const std::size_t max_send_recv_size = 3; 19 | std::vector vec_src = {1, 2, 3, 0, 5, 6}; 20 | std::vector vec_dst(comm_size * max_send_recv_size, 0); 21 | 22 | std::vector sendsizes = {3, 2, 1}; 23 | std::vector recvsizes = {3, 3, 3}; 24 | 25 | std::vector senddispl = {0, 3, 5}; 26 | std::vector recvdispl = {0, 3, 6}; 27 | 28 | dr::mp::default_comm().alltoallv(vec_src, sendsizes, senddispl, vec_dst, 29 | recvsizes, recvdispl); 30 | 31 | std::vector vec_ref_0 = {1, 2, 3, 1, 2, 3, 1, 2, 3}; 32 | std::vector vec_ref_1 = {0, 5, 0, 0, 5, 0, 0, 5, 0}; 33 | std::vector vec_ref_2 = {6, 0, 0, 6, 0, 0, 6, 0, 0}; 34 | 35 | switch (comm_rank) { 36 | case 0: 37 | EXPECT_EQ(vec_ref_0, vec_dst); 38 | break; 39 | case 1: 40 | EXPECT_EQ(vec_ref_1, vec_dst); 41 | break; 42 | case 2: 43 | EXPECT_EQ(vec_ref_2, vec_dst); 44 | break; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /test/gtest/sp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 6 | 7 | add_executable( 8 | sp-tests 9 | sp-tests.cpp ../common/all.cpp ../common/copy.cpp ../common/counted.cpp 10 | ../common/distributed_vector.cpp ../common/drop.cpp ../common/enumerate.cpp 11 | ../common/equal.cpp ../common/fill.cpp ../common/for_each.cpp 12 | ../common/iota.cpp 13 | # ../common/iota_view.cpp 14 | ../common/reduce.cpp ../common/sort.cpp ../common/subrange.cpp 15 | ../common/take.cpp ../common/transform.cpp ../common/transform_view.cpp 16 | ../common/zip.cpp ../common/zip_local.cpp containers.cpp algorithms.cpp 17 | copy.cpp detail.cpp fill.cpp gemv.cpp sparse.cpp transform.cpp) 18 | 19 | add_executable(sp-tests-3 sp-tests.cpp containers-3.cpp copy-3.cpp) 20 | 21 | # skeleton for rapid builds of individual tests, feel free to change this 22 | add_executable(sp-quick-test sp-tests.cpp sparse.cpp) 23 | target_compile_definitions(sp-quick-test PRIVATE QUICK_TEST) 24 | 25 | foreach(test-exec IN ITEMS sp-tests sp-tests-3 sp-quick-test) 26 | target_link_libraries(${test-exec} GTest::gtest_main DR::sp fmt::fmt cxxopts) 27 | endforeach() 28 | 29 | # timeout in SP tests 30 | add_sp_ctest(sp-tests sp-tests) 31 | add_sp_ctest(sp-tests-3 sp-tests --devicesCount 3) 32 | add_sp_ctest(sp-tests-3-only sp-tests-3 --devicesCount 3) 33 | add_sp_disabled_ctest(sp-quick-test sp-quick-test --devicesCount 3) 34 | -------------------------------------------------------------------------------- /LICENSES/BSD-3-Clause.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) Intel Corporation. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/equal.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace dr::sp { 16 | 17 | template 19 | requires std::equality_comparable_with, 20 | rng::range_value_t> 21 | bool equal(ExecutionPolicy &&policy, R1 &&r1, R2 &&r2) { 22 | 23 | if (rng::distance(r1) != rng::distance(r2)) { 24 | return false; 25 | } 26 | 27 | // we must use ints instead of bools, because distributed ranges do not 28 | // support bools 29 | auto compare = [](auto &&elems) { 30 | return elems.first == elems.second ? 1 : 0; 31 | }; 32 | 33 | auto zipped_views = views::zip(r1, r2); 34 | auto compared = sp::views::transform(zipped_views, compare); 35 | auto min = [](double x, double y) { return std::min(x, y); }; 36 | auto result = sp::reduce(policy, compared, 1, min); 37 | return result == 1; 38 | } 39 | 40 | template 41 | bool equal(R1 &&r1, R2 &&r2) { 42 | return equal(dr::sp::par_unseq, r1, r2); 43 | } 44 | } // namespace dr::sp 45 | -------------------------------------------------------------------------------- /test/gtest/sp/sp-tests.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | cxxopts::ParseResult options; 8 | 9 | int main(int argc, char *argv[]) { 10 | ::testing::InitGoogleTest(&argc, argv); 11 | cxxopts::Options options_spec(argv[0], "DR SP tests"); 12 | 13 | // clang-format off 14 | options_spec.add_options() 15 | ("drhelp", "Print help") 16 | ("d, devicesCount", "number of GPUs to create", cxxopts::value()->default_value("0")); 17 | // clang-format on 18 | 19 | try { 20 | options = options_spec.parse(argc, argv); 21 | } catch (const cxxopts::OptionParseException &e) { 22 | fmt::print("{}\n", options_spec.help()); 23 | exit(1); 24 | } 25 | 26 | if (options.count("drhelp")) { 27 | fmt::print("{}\n", options_spec.help()); 28 | exit(0); 29 | } 30 | 31 | const unsigned int dev_num = options["devicesCount"].as(); 32 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 33 | 34 | if (dev_num > 0) { 35 | unsigned int i = 0; 36 | while (devices.size() < dev_num) { 37 | devices.push_back(devices[i++]); 38 | } 39 | devices.resize(dev_num); // if too many devices 40 | } 41 | 42 | dr::sp::init(devices); 43 | 44 | for (auto &device : devices) { 45 | std::cout << " Device: " << device.get_info() 46 | << "\n"; 47 | } 48 | 49 | return RUN_ALL_TESTS(); 50 | } 51 | -------------------------------------------------------------------------------- /include/dr/sp/views/enumerate.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::sp { 10 | 11 | namespace views { 12 | 13 | namespace { 14 | 15 | template struct range_size { 16 | using type = std::size_t; 17 | }; 18 | 19 | template struct range_size { 20 | using type = rng::range_size_t; 21 | }; 22 | 23 | template using range_size_t = typename range_size::type; 24 | 25 | } // namespace 26 | 27 | class enumerate_adapter_closure { 28 | public: 29 | template 30 | requires(rng::sized_range) 31 | auto operator()(R &&r) const { 32 | using W = std::uint32_t; 33 | return dr::sp::zip_view(rng::views::iota(W(0), W(rng::size(r))), 34 | std::forward(r)); 35 | } 36 | 37 | template 38 | friend auto operator|(R &&r, const enumerate_adapter_closure &closure) { 39 | return closure(std::forward(r)); 40 | } 41 | }; 42 | 43 | class enumerate_fn_ { 44 | public: 45 | template constexpr auto operator()(R &&r) const { 46 | return enumerate_adapter_closure{}(std::forward(r)); 47 | } 48 | 49 | inline auto enumerate() const { return enumerate_adapter_closure{}; } 50 | }; 51 | 52 | inline constexpr auto enumerate = enumerate_fn_{}; 53 | 54 | } // namespace views 55 | 56 | } // namespace dr::sp 57 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/exclusive_scan.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _exclusive_scan: 8 | 9 | ============================ 10 | ``exclusive_scan`` 11 | ============================ 12 | 13 | Interface 14 | ========= 15 | MP 16 | --- 17 | 18 | SP 19 | --- 20 | 21 | .. doxygenfunction:: dr::sp::exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init, BinaryOp &&binary_op) 22 | :outline: 23 | .. doxygenfunction:: dr::sp::exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init) 24 | :outline: 25 | .. doxygenfunction:: dr::sp::exclusive_scan(R &&r, O &&o, T init, BinaryOp &&binary_op) 26 | :outline: 27 | .. doxygenfunction:: dr::sp::exclusive_scan(R &&r, O &&o, T init) 28 | :outline: 29 | .. doxygenfunction:: dr::sp::exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) 30 | :outline: 31 | .. doxygenfunction:: dr::sp::exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init) 32 | :outline: 33 | .. doxygenfunction:: dr::sp::exclusive_scan(Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) 34 | :outline: 35 | .. doxygenfunction:: dr::sp::exclusive_scan(Iter first, Iter last, OutputIter d_first, T init) 36 | :outline 37 | 38 | Description 39 | =========== 40 | 41 | .. seealso:: 42 | 43 | 'std::exclusive_scan'_ 44 | 45 | Examples 46 | ======== 47 | -------------------------------------------------------------------------------- /include/dr/mp/views/enumerate.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::mp { 10 | 11 | namespace views { 12 | 13 | namespace __detail { 14 | 15 | template struct range_size { 16 | using type = std::size_t; 17 | }; 18 | 19 | template struct range_size { 20 | using type = rng::range_size_t; 21 | }; 22 | 23 | template using range_size_t = typename range_size::type; 24 | 25 | } // namespace __detail 26 | 27 | class enumerate_adapter_closure { 28 | public: 29 | template 30 | requires(rng::sized_range) 31 | auto operator()(R &&r) const { 32 | using W = std::uint32_t; 33 | return mp::views::zip(mp::views::iota(W(0), W(rng::distance(r))), 34 | std::forward(r)); 35 | } 36 | 37 | template 38 | friend auto operator|(R &&r, const enumerate_adapter_closure &closure) { 39 | return closure(std::forward(r)); 40 | } 41 | }; 42 | 43 | class enumerate_fn_ { 44 | public: 45 | template constexpr auto operator()(R &&r) const { 46 | return enumerate_adapter_closure{}(std::forward(r)); 47 | } 48 | 49 | inline auto enumerate() const { return enumerate_adapter_closure{}; } 50 | }; 51 | 52 | inline constexpr auto enumerate = enumerate_fn_{}; 53 | 54 | } // namespace views 55 | 56 | } // namespace dr::mp 57 | -------------------------------------------------------------------------------- /test/gtest/common/equal.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Equals : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Equals, AllTypes); 13 | 14 | TYPED_TEST(Equals, Same) { 15 | Ops1 ops(10); 16 | 17 | xp::distributed_vector toCompareXhp(10); 18 | 19 | for (std::size_t idx = 0; idx < 10; idx++) { 20 | toCompareXhp[idx] = ops.dist_vec[idx]; 21 | } 22 | barrier(); 23 | 24 | bool xpEq = xp::equal(ops.dist_vec, toCompareXhp); 25 | 26 | EXPECT_TRUE(xpEq); 27 | } 28 | 29 | TYPED_TEST(Equals, Different) { 30 | Ops1 ops(10); 31 | 32 | xp::distributed_vector toCompareXhp(10); 33 | 34 | for (std::size_t idx = 0; idx < 10; idx++) { 35 | toCompareXhp[idx] = ops.dist_vec[idx]; 36 | } 37 | 38 | toCompareXhp[2] = ops.dist_vec[2] + 1; 39 | 40 | barrier(); 41 | 42 | bool xpEq = xp::equal(ops.dist_vec, toCompareXhp); 43 | 44 | EXPECT_TRUE(!xpEq); 45 | } 46 | 47 | TYPED_TEST(Equals, EmptiesEqual) { 48 | Ops1 ops(0); 49 | 50 | xp::distributed_vector toCompareXhp(0); 51 | 52 | bool xpEq = xp::equal(ops.dist_vec, toCompareXhp); 53 | 54 | EXPECT_TRUE(xpEq); 55 | } 56 | 57 | TYPED_TEST(Equals, EmptyNotEmptyDifferent) { 58 | Ops1 ops(0); 59 | 60 | xp::distributed_vector toCompareXhp(10); 61 | 62 | bool xpEq = xp::equal(ops.dist_vec, toCompareXhp); 63 | 64 | EXPECT_TRUE(!xpEq); 65 | } 66 | -------------------------------------------------------------------------------- /test/gtest/common/iota.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class IotaTest : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(IotaTest, AllTypes); 13 | 14 | TYPED_TEST(IotaTest, Range) { 15 | TypeParam v(10); 16 | xp::iota(v, 1); 17 | EXPECT_EQ(v, (LocalVec{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})); 18 | } 19 | 20 | TYPED_TEST(IotaTest, Iter) { 21 | TypeParam v(10); 22 | xp::iota(v.begin(), v.end(), 10); 23 | EXPECT_EQ(v, (LocalVec{10, 11, 12, 13, 14, 15, 16, 17, 18, 19})); 24 | } 25 | 26 | TYPED_TEST(IotaTest, PartialIter) { 27 | TypeParam v(10, 99); 28 | xp::iota(++v.begin(), --v.end(), 0); 29 | EXPECT_EQ(v, (LocalVec{99, 0, 1, 2, 3, 4, 5, 6, 7, 99})); 30 | } 31 | 32 | TYPED_TEST(IotaTest, SlicedLeft) { 33 | TypeParam dist_vec(10, 0); 34 | xp::iota(dist_vec.begin() + 2, dist_vec.end(), 2); 35 | EXPECT_TRUE( 36 | equal_gtest(dist_vec, LocalVec{0, 0, 2, 3, 4, 5, 6, 7, 8, 9})); 37 | } 38 | 39 | TYPED_TEST(IotaTest, SlicedRight) { 40 | TypeParam dist_vec(10, 0); 41 | xp::iota(dist_vec.begin(), dist_vec.end() - 2, 2); 42 | EXPECT_TRUE( 43 | equal_gtest(dist_vec, LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 0, 0})); 44 | } 45 | 46 | TYPED_TEST(IotaTest, Large) { 47 | TypeParam v(98765); 48 | xp::iota(v, 0); 49 | EXPECT_EQ(v[33000], 33000); 50 | EXPECT_EQ(v[66000], 66000); 51 | EXPECT_EQ(v[91000], 91000); 52 | } 53 | -------------------------------------------------------------------------------- /include/dr/detail/remote_subrange.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace dr { 13 | 14 | template 15 | class remote_subrange : public rng::subrange { 16 | using base = rng::subrange; 17 | 18 | public: 19 | remote_subrange() 20 | requires std::default_initializable 21 | = default; 22 | 23 | constexpr remote_subrange(I first, I last, std::size_t rank) 24 | : base(first, last), rank_(rank) {} 25 | 26 | template 27 | constexpr remote_subrange(R &&r, std::size_t rank) 28 | : base(rng::begin(r), rng::end(r)), rank_(rank) {} 29 | 30 | template 31 | constexpr remote_subrange(R &&r) 32 | : base(rng::begin(r), rng::end(r)), rank_(dr::ranges::rank(r)) {} 33 | 34 | constexpr std::size_t rank() const noexcept { return rank_; } 35 | 36 | private: 37 | std::size_t rank_; 38 | }; 39 | 40 | template 41 | remote_subrange(R &&, std::size_t) -> remote_subrange>; 42 | 43 | template 44 | remote_subrange(R &&) -> remote_subrange>; 45 | 46 | } // namespace dr 47 | 48 | #if !defined(DR_SPEC) 49 | 50 | // Needed to satisfy concepts for rng::begin 51 | template 52 | inline constexpr bool rng::enable_borrowed_range> = true; 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /include/dr/sp/device_ref.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace dr::sp { 12 | 13 | template 14 | requires(std::is_trivially_copyable_v || std::is_void_v) 15 | class device_ref { 16 | public: 17 | device_ref() = delete; 18 | ~device_ref() = default; 19 | device_ref(const device_ref &) = default; 20 | 21 | device_ref(T *pointer) : pointer_(pointer) {} 22 | 23 | operator T() const { 24 | #ifdef __SYCL_DEVICE_ONLY__ 25 | return *pointer_; 26 | #else 27 | auto &&q = dr::sp::__detail::default_queue(); 28 | char buffer[sizeof(T)] __attribute__((aligned(sizeof(T)))); 29 | q.memcpy(reinterpret_cast(buffer), pointer_, sizeof(T)).wait(); 30 | return *reinterpret_cast(buffer); 31 | #endif 32 | } 33 | 34 | device_ref operator=(const T &value) const 35 | requires(!std::is_const_v) 36 | { 37 | #ifdef __SYCL_DEVICE_ONLY__ 38 | *pointer_ = value; 39 | #else 40 | auto &&q = dr::sp::__detail::default_queue(); 41 | q.memcpy(pointer_, &value, sizeof(T)).wait(); 42 | #endif 43 | return *this; 44 | } 45 | 46 | device_ref operator=(const device_ref &other) const { 47 | #ifdef __SYCL_DEVICE_ONLY__ 48 | *pointer_ = *other.pointer_; 49 | #else 50 | T value = other; 51 | *this = value; 52 | #endif 53 | return *this; 54 | } 55 | 56 | private: 57 | T *pointer_; 58 | }; 59 | 60 | } // namespace dr::sp 61 | -------------------------------------------------------------------------------- /examples/sp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | function(add_sp_example_no_test example_name) 6 | add_executable(${example_name} ${example_name}.cpp) 7 | target_link_libraries(${example_name} DR::sp) 8 | endfunction() 9 | 10 | function(add_sp_example example_name) 11 | add_sp_example_no_test(${example_name}) 12 | add_sp_ctest(${example_name} ${example_name} ${ARGN}) 13 | endfunction() 14 | 15 | add_sp_example(vector_example) 16 | add_sp_example(dot_product) 17 | add_sp_example(sort) 18 | add_sp_example_no_test(sort_benchmark) 19 | add_sp_example(inclusive_scan_example) 20 | add_sp_example(exclusive_scan_example) 21 | # unsatisfied dependency of grb/grb.hpp add_sp_example(gemv_benchmark) 22 | add_sp_example_no_test(dot_product_benchmark) 23 | add_sp_example_no_test(inclusive_scan_benchmark) 24 | add_sp_example_no_test(exclusive_scan_benchmark) 25 | add_sp_example_no_test(copy_test) 26 | add_sp_example_no_test(gather_test) 27 | add_sp_example_no_test(gemv_example) # PI_ERROR_DEVICE_NOT_FOUND 28 | add_sp_example_no_test(matrix_example) # PI_ERROR_DEVICE_NOT_FOUND 29 | add_sp_example_no_test(gemm_example) # PI_ERROR_DEVICE_NOT_FOUND 30 | add_sp_example_no_test(gemm_benchmark) # PI_ERROR_DEVICE_NOT_FOUND 31 | add_sp_example(sparse_test) 32 | add_sp_example_no_test(take_example) # PI_ERROR_DEVICE_NOT_FOUND 33 | add_sp_example_no_test(test_range) # due to DRA-81 34 | add_sp_example_no_test(zip_example) # PI_ERROR_DEVICE_NOT_FOUND 35 | add_sp_example_no_test(black_scholes_benchmark) # PI_ERROR_DEVICE_NOT_FOUND 36 | -------------------------------------------------------------------------------- /include/dr/mp/containers/broadcasted_vector.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace dr::mp { 8 | 9 | template > 10 | class broadcasted_vector { 11 | public: 12 | broadcasted_vector() = default; 13 | 14 | template 15 | void broadcast_data(std::size_t data_size, std::size_t root, R root_data, 16 | dr::communicator comm) { 17 | if (_data != nullptr) { 18 | destroy_data(); 19 | } 20 | _data_size = data_size; 21 | _data = alloc.allocate(_data_size); 22 | if (comm.rank() == root) { 23 | if (use_sycl()) { 24 | __detail::sycl_copy(std::to_address(root_data.begin()), 25 | std::to_address(root_data.end()), _data); 26 | } else { 27 | rng::copy(root_data.begin(), root_data.end(), _data); 28 | } 29 | } 30 | comm.bcast(_data, sizeof(T) * _data_size, root); 31 | } 32 | 33 | void destroy_data() { 34 | alloc.deallocate(_data, _data_size); 35 | _data_size = 0; 36 | _data = nullptr; 37 | } 38 | 39 | T &operator[](std::size_t index) { return _data[index]; } 40 | 41 | T *broadcasted_data() { return _data; } 42 | 43 | auto size() { return _data_size; } 44 | 45 | auto begin() const { return _data; } 46 | auto end() const { return begin() + _data_size; } 47 | 48 | private: 49 | T *_data = nullptr; 50 | std::size_t _data_size = 0; 51 | Allocator alloc; 52 | }; 53 | } // namespace dr::mp 54 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/fill.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _fill: 8 | 9 | ========== 10 | ``fill`` 11 | ========== 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | .. doxygenfunction:: dr::mp::fill(dr::distributed_contiguous_range auto &&dr, auto value); 20 | :outline: 21 | .. doxygenfunction:: dr::mp::fill(DI first, DI last, auto value) 22 | :outline: 23 | 24 | SP 25 | --- 26 | 27 | .. doxygenfunction:: dr::sp::fill_async(Iter first, Iter last, const std::iter_value_t &value) 28 | :outline: 29 | .. doxygenfunction:: dr::sp::fill(Iter first, Iter last, const std::iter_value_t &value) 30 | :outline: 31 | .. doxygenfunction:: dr::sp::fill_async(device_ptr first, device_ptr last, const U &value) 32 | :outline: 33 | .. doxygenfunction:: dr::sp::fill(device_ptr first, device_ptr last, const U &value) 34 | :outline: 35 | .. doxygenfunction:: dr::sp::fill_async(R &&r, const T &value) 36 | :outline: 37 | .. doxygenfunction:: dr::sp::fill(R &&r, const T &value) 38 | :outline: 39 | .. doxygenfunction:: dr::sp::fill_async(DR &&r, const T &value) 40 | :outline: 41 | .. doxygenfunction:: dr::sp::fill(DR &&r, const T &value) 42 | :outline: 43 | .. doxygenfunction:: dr::sp::fill(Iter first, Iter last, const T &value) 44 | :outline: 45 | 46 | Description 47 | =========== 48 | 49 | .. seealso:: 50 | 51 | C++ model 52 | `std::fill`_ 53 | C++ model 54 | `std::ranges::fill`_ 55 | 56 | Usage 57 | ===== 58 | -------------------------------------------------------------------------------- /test/gtest/common/iota_view.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | template class IotaView : public testing::Test { 8 | public: 9 | }; 10 | 11 | TYPED_TEST_SUITE(IotaView, AllTypes); 12 | 13 | TYPED_TEST(IotaView, ZipWithDR) { 14 | xp::distributed_vector dv(10); 15 | auto v = dr::views::iota(1, 10); 16 | 17 | auto z = xp::views::zip(dv, v); 18 | 19 | xp::for_each(z, [](auto ze) { 20 | auto [dve, ve] = ze; 21 | dve = ve; 22 | }); 23 | 24 | EXPECT_TRUE(equal_gtest(std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, dv)); 25 | } 26 | 27 | TYPED_TEST(IotaView, Copy) { 28 | TypeParam dv(10); 29 | auto v = dr::views::iota(1, 11); 30 | 31 | xp::copy(v, dv.begin()); 32 | 33 | barrier(); 34 | EXPECT_TRUE(equal_gtest(std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, dv)); 35 | } 36 | 37 | TYPED_TEST(IotaView, Transform) { 38 | TypeParam dv(10); 39 | auto v = dr::views::iota(1, 11); 40 | auto negate = [](auto v) { return -v; }; 41 | 42 | xp::transform(v, dv.begin(), negate); 43 | 44 | EXPECT_TRUE(equal_gtest( 45 | dv, std::vector{-1, -2, -3, -4, -5, -6, -7, -8, -9, -10})); 46 | } 47 | 48 | TYPED_TEST(IotaView, ForEach) { 49 | TypeParam dv(10); 50 | auto v = dr::views::iota(1, 11); 51 | 52 | auto negate = [](auto v) { 53 | auto &[in, out] = v; 54 | out = -in; 55 | }; 56 | 57 | auto z = xp::views::zip(v, dv); 58 | 59 | xp::for_each(z, negate); 60 | 61 | EXPECT_TRUE(equal_gtest( 62 | dv, std::vector{-1, -2, -3, -4, -5, -6, -7, -8, -9, -10})); 63 | } 64 | -------------------------------------------------------------------------------- /test/gtest/sp/copy-3.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #include "copy.hpp" 5 | 6 | TYPED_TEST_SUITE(CopyTest, AllocatorTypes); 7 | 8 | TYPED_TEST(CopyTest, tests_from_this_file_run_on_3_devices) { 9 | EXPECT_EQ(dr::sp::nprocs(), 3); 10 | EXPECT_EQ(rng::size(dr::sp::devices()), 3); 11 | } 12 | 13 | TYPED_TEST(CopyTest, dist2local_wholesegment) { 14 | // when running on 3 devices copy exactly one segment 15 | const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5, 6, 16 | 7, 8, 9, 10, 11, 12}; 17 | typename TestFixture::LocalVec local_vec = {0, 0, 0, 0}; 18 | 19 | auto ret_it = dr::sp::copy(rng::begin(dist_vec) + 4, rng::begin(dist_vec) + 8, 20 | rng::begin(local_vec)); 21 | EXPECT_TRUE( 22 | equal_gtest(local_vec, typename TestFixture::LocalVec{5, 6, 7, 8})); 23 | EXPECT_EQ(ret_it, rng::end(local_vec)); 24 | } 25 | 26 | TYPED_TEST(CopyTest, local2dist_wholesegment) { 27 | // when running on 3 devices copy into exactly one segment 28 | const typename TestFixture::LocalVec local_vec = {50, 60, 70, 80}; 29 | typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5, 6, 30 | 7, 8, 9, 10, 11, 12}; 31 | auto ret_it = dr::sp::copy(rng::begin(local_vec), rng::end(local_vec), 32 | rng::begin(dist_vec) + 4); 33 | EXPECT_TRUE(equal_gtest(dist_vec, 34 | typename TestFixture::LocalVec{1, 2, 3, 4, 50, 60, 70, 35 | 80, 9, 10, 11, 12})); 36 | EXPECT_EQ(*ret_it, 9); 37 | } 38 | -------------------------------------------------------------------------------- /test/fuzz/cpu/algorithms.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "cpu-fuzz.hpp" 6 | 7 | using V = std::vector; 8 | using DV = dr::mp::distributed_vector; 9 | 10 | void check_transform(std::size_t n, std::size_t b, std::size_t e) { 11 | auto op = [](auto n) { return n * n; }; 12 | int iota_base = 100; 13 | 14 | DV dvi1(n), dvr1(n, 0); 15 | dr::mp::iota(dvi1, iota_base); 16 | dr::mp::transform(dvi1.begin() + b, dvi1.begin() + e, dvr1.begin() + b, op); 17 | 18 | DV dvi2(n), dvr2(n, 0); 19 | dr::mp::iota(dvi2, iota_base); 20 | 21 | if (comm_rank == 0) { 22 | std::transform(dvi2.begin() + b, dvi2.begin() + e, dvr2.begin() + b, op); 23 | 24 | std::vector v(n, 0), vr(n, 0); 25 | rng::iota(v, iota_base); 26 | std::transform(v.begin() + b, v.begin() + e, vr.begin() + b, op); 27 | assert(is_equal(vr, dvr1)); 28 | assert(is_equal(vr, dvr2)); 29 | } 30 | } 31 | 32 | void check_copy(std::size_t n, std::size_t b, std::size_t e) { 33 | 34 | V v_in(n, 0), v(n, 0); 35 | rng::iota(v_in, 100); 36 | 37 | DV dv_in(n, 0), dv1(n, 0), dv2(n, 0), dv3(n, 0); 38 | dr::mp::iota(dv_in, 100); 39 | dr::mp::copy(dv_in.begin() + b, dv_in.begin() + e, dv1.begin() + b); 40 | dr::mp::copy(rng::subrange(dv_in.begin() + b, dv_in.begin() + e), 41 | dv2.begin() + b); 42 | 43 | if (comm_rank == 0) { 44 | std::copy(dv_in.begin() + b, dv_in.begin() + e, dv3.begin() + b); 45 | } 46 | dr::mp::fence(); 47 | 48 | if (comm_rank == 0) { 49 | std::copy(v_in.begin() + b, v_in.begin() + e, v.begin() + b); 50 | assert(is_equal(dv1, v)); 51 | assert(is_equal(dv2, v)); 52 | assert(is_equal(dv3, v)); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /benchmarks/gbench/README.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | ============== 6 | Google Bench 7 | ============== 8 | 9 | We use google bench for micro-benchmarks. 10 | 11 | Show standard google bench options:: 12 | 13 | ./mp-bench --help 14 | 15 | Show custom options:: 16 | 17 | ./mp-bench --drhelp 18 | 19 | See `user guide`_ for more information on google benchmark. 20 | 21 | MP Sample Commands 22 | =================== 23 | 24 | Run all benchmarks with 2 ranks. Each rank uses a single thread:: 25 | 26 | mpirun -n 2 ./mp-bench --benchmark_counters_tabular=true 27 | 28 | Run all benchmarks with 2 ranks. Each rank uses a single SYCL device:: 29 | 30 | mpirun -n 2 ./mp-bench --benchmark_counters_tabular=true --sycl 31 | 32 | Run 2D stencil algorithms:: 33 | 34 | mpirun -n 2 ./mp-bench --benchmark_counters_tabular=true --benchmark_filter=Stencil2D 35 | 36 | Run distributed ranges algorithms:: 37 | 38 | mpirun -n 2 ./mp-bench --benchmark_counters_tabular=true --benchmark_filter=.*DR 39 | 40 | 41 | SP Sample Commands 42 | =================== 43 | 44 | By default, SP uses all available devices. When running on a 2 socket 45 | CPU system, SP partitions the root device into 2 devices. Use ``-d`` 46 | to explicitly control the number of devices:: 47 | 48 | ./sp-bench --benchmark_time_unit=ms --benchmark_counters_tabular=true -d 2 49 | 50 | Benchmark Variants 51 | ================== 52 | 53 | DR 54 | distributed ranges 55 | Serial 56 | single thread 57 | SYCL 58 | single SYCL device, direct coded in SYCL 59 | DPL 60 | single SYCL device using DPL 61 | 62 | 63 | .. _`user guide`: https://github.com/google/benchmark/blob/main/docs/user_guide.md#custom-counters 64 | -------------------------------------------------------------------------------- /examples/sp/dot_product.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | template 9 | auto dot_product_distributed(X &&x, Y &&y) { 10 | auto z = dr::sp::views::zip(x, y) | dr::views::transform([](auto &&elem) { 11 | auto &&[a, b] = elem; 12 | return a * b; 13 | }); 14 | 15 | return dr::sp::reduce(dr::sp::par_unseq, z, 0, std::plus()); 16 | } 17 | 18 | template 19 | auto dot_product_sequential(X &&x, Y &&y) { 20 | auto z = rng::views::zip(x, y) | rng::views::transform([](auto &&elem) { 21 | auto &&[a, b] = elem; 22 | return a * b; 23 | }); 24 | 25 | return std::reduce(z.begin(), z.end(), 0, std::plus()); 26 | } 27 | 28 | int main(int argc, char **argv) { 29 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 30 | dr::sp::init(devices); 31 | 32 | std::size_t n = 100; 33 | 34 | dr::sp::distributed_vector x(n); 35 | dr::sp::distributed_vector y(n); 36 | 37 | std::iota(x.begin(), x.end(), 0); 38 | std::iota(y.begin(), y.end(), 0); 39 | 40 | auto v = dot_product_distributed(x, y); 41 | 42 | fmt::print("{}\n", v); 43 | 44 | std::vector x_local(n); 45 | std::vector y_local(n); 46 | std::iota(x_local.begin(), x_local.end(), 0); 47 | std::iota(y_local.begin(), y_local.end(), 0); 48 | 49 | auto v_serial = dot_product_sequential(x_local, y_local); 50 | 51 | if (v != v_serial) { 52 | fmt::print("Wrong result. Expected {}, actual {}\n", v_serial, v); 53 | return 1; 54 | } 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /include/dr/detail/owning_view.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr { 10 | 11 | namespace __detail { 12 | 13 | // TODO: this `owning_view` is needed because range-v3 does not have an 14 | // `owning_view`. Ideally we would submit a PR to range-v3 /or 15 | // switch to solely using libstdc++13. 16 | 17 | template 18 | class owning_view : public rng::view_interface> { 19 | public: 20 | owning_view(R &&range) : range_(std::move(range)) {} 21 | 22 | owning_view() 23 | requires std::default_initializable 24 | = default; 25 | owning_view(owning_view &&other) = default; 26 | owning_view(const owning_view &other) = default; 27 | 28 | owning_view &operator=(owning_view &&other) = default; 29 | owning_view &operator=(const owning_view &other) = default; 30 | 31 | auto size() const 32 | requires(rng::sized_range) 33 | { 34 | return rng::size(range_); 35 | } 36 | 37 | auto empty() const 38 | requires(rng::sized_range) 39 | { 40 | return size() == 0; 41 | } 42 | 43 | auto begin() { return rng::begin(range_); } 44 | 45 | auto begin() const 46 | requires(rng::range) 47 | { 48 | return rng::begin(range_); 49 | } 50 | 51 | auto end() { return rng::end(range_); } 52 | 53 | auto end() const 54 | requires(rng::range) 55 | { 56 | return rng::end(range_); 57 | } 58 | 59 | decltype(auto) base() { return range_; } 60 | 61 | decltype(auto) base() const { return range_; } 62 | 63 | private: 64 | R range_; 65 | }; 66 | 67 | } // namespace __detail 68 | 69 | } // namespace dr 70 | -------------------------------------------------------------------------------- /benchmarks/gbench/sp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 6 | 7 | add_library(sp-benchmark INTERFACE) 8 | target_compile_definitions(sp-benchmark INTERFACE BENCH_SP) 9 | target_link_libraries(sp-benchmark INTERFACE benchmark::benchmark cxxopts 10 | DR::sp) 11 | 12 | if(ENABLE_CUDA) 13 | # because sort.cpp compilation fails with 14 | # dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h warning: 15 | # attribute argument 16 is invalid and will be ignored; CUDA requires 16 | # sub_group size 32 17 | add_compile_options(-Wno-error=cuda-compat) 18 | endif() 19 | 20 | # cmake-format: off 21 | add_executable( 22 | sp-bench 23 | sp-bench.cpp 24 | fft3d.cpp 25 | gemm.cpp 26 | ../common/distributed_vector.cpp 27 | ../common/dot_product.cpp 28 | # ../common/inclusive_exclusive_scan.cpp disabled due to deadlock DRA-214 29 | ../common/sort.cpp 30 | ../common/stream.cpp) 31 | # cmake-format: on 32 | if(NOT ENABLE_CUDA) 33 | # black_scholes.cpp does not compile in CUDA because it uses std::log 34 | target_sources(sp-bench PRIVATE ../common/black_scholes.cpp) 35 | endif() 36 | target_link_libraries(sp-bench sp-benchmark) 37 | add_sp_ctest(sp-bench sp-bench --vector-size 200000 --check) 38 | 39 | # builds only 1 benchmark for quick testing. Change this to the benchmark you 40 | # are testing 41 | add_executable(sp-quick-bench sp-bench.cpp fft3d.cpp) 42 | target_link_libraries(sp-quick-bench sp-benchmark) 43 | 44 | # standalone as an example 45 | add_executable(fft3d-sp fft3d.cpp) 46 | target_compile_definitions(fft3d-sp PRIVATE STANDALONE_BENCHMARK) 47 | target_link_libraries(fft3d-sp cxxopts DR::sp) 48 | -------------------------------------------------------------------------------- /examples/mp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | add_executable(stencil-slide stencil-slide.cpp) 6 | target_link_libraries(stencil-slide cxxopts DR::mpi) 7 | 8 | if(ENABLE_SYCL) 9 | add_executable(mp_dot_product_benchmark dot_product_benchmark.cpp) 10 | target_link_libraries(mp_dot_product_benchmark DR::mpi cxxopts) 11 | add_mp_ctest( 12 | TEST_NAME mp_dot_product NAME mp_dot_product_benchmark TARGS -n 1000) 13 | endif() 14 | 15 | add_executable(vector-add vector-add.cpp) 16 | target_link_libraries(vector-add DR::mpi) 17 | add_mp_ctest(TEST_NAME vector-add NAME vector-add NPROC 2) 18 | 19 | function(add_mp_example_no_test example_name) 20 | add_executable(${example_name} ${example_name}.cpp) 21 | target_link_libraries(${example_name} cxxopts DR::mpi) 22 | endfunction() 23 | 24 | function(add_mp_example example_name) 25 | add_mp_example_no_test(${example_name}) 26 | add_mp_ctest(TEST_NAME ${example_name} NAME ${example_name} NPROC 2) 27 | endfunction() 28 | 29 | add_mp_example(stencil-1d) 30 | add_mp_example(stencil-1d-array) 31 | add_mp_example(stencil-1d-pointer) 32 | add_mp_example(hello_world) 33 | add_mp_example_no_test(sparse_matrix) 34 | add_mp_example_no_test(sparse_benchmark) 35 | add_mp_example_no_test(sparse_matrix_matrix_mul) 36 | 37 | if(OpenMP_FOUND) 38 | add_executable(vector-add-ref vector-add-ref.cpp) 39 | target_link_libraries(vector-add-ref PRIVATE MPI::MPI_CXX OpenMP::OpenMP_CXX 40 | DR::mpi fmt::fmt) 41 | add_mp_ctest(TEST_NAME vector-add-ref NAME vector-add-ref NPROC 2) 42 | endif() 43 | 44 | add_executable(transpose-ref transpose-ref.cpp) 45 | target_link_libraries(transpose-ref PRIVATE MPI::MPI_CXX MKL::MKL) 46 | -------------------------------------------------------------------------------- /test/gtest/common/copy.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Copy : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Copy, AllTypes); 13 | 14 | TYPED_TEST(Copy, Range) { 15 | Ops2 ops(10); 16 | 17 | xp::copy(ops.dist_vec0, ops.dist_vec1.begin()); 18 | rng::copy(ops.vec0, ops.vec1.begin()); 19 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 20 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 21 | } 22 | 23 | TYPED_TEST(Copy, Iterator) { 24 | Ops2 ops(10); 25 | 26 | std::copy(ops.vec0.begin(), ops.vec0.end(), ops.vec1.begin()); 27 | xp::copy(ops.dist_vec0.begin(), ops.dist_vec0.end(), ops.dist_vec1.begin()); 28 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 29 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 30 | } 31 | 32 | TYPED_TEST(Copy, IteratorOffset) { 33 | Ops2 ops(10); 34 | 35 | std::copy(ops.vec0.begin() + 1, ops.vec0.end() - 1, ops.vec1.begin() + 1); 36 | xp::copy(ops.dist_vec0.begin() + 1, ops.dist_vec0.end() - 1, 37 | ops.dist_vec1.begin() + 1); 38 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 39 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 40 | } 41 | 42 | TYPED_TEST(Copy, RangeToDist) { 43 | Ops2 ops(10); 44 | 45 | xp::copy(ops.vec0, ops.dist_vec0.begin()); 46 | rng::copy(ops.vec1, ops.dist_vec1.begin()); 47 | EXPECT_EQ(ops.vec0, ops.dist_vec0); 48 | EXPECT_EQ(ops.vec1, ops.dist_vec1); 49 | } 50 | 51 | TYPED_TEST(Copy, DistToLocal) { 52 | Ops2 ops(10); 53 | 54 | xp::copy(ops.dist_vec0, ops.vec0.begin()); 55 | rng::copy(ops.dist_vec1, ops.vec1.begin()); 56 | EXPECT_EQ(ops.dist_vec0, ops.vec0); 57 | EXPECT_EQ(ops.dist_vec1, ops.vec1); 58 | } 59 | -------------------------------------------------------------------------------- /examples/include/data-utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | inline std::size_t partition_up(std::size_t num, std::size_t multiple) { 10 | return (num + multiple - 1) / multiple; 11 | } 12 | 13 | template 14 | void set_step(Seq &seq, typename Seq::value_type v = 0, 15 | typename Seq::value_type step = 1) { 16 | for (auto &r : seq) { 17 | r = v; 18 | v += step; 19 | } 20 | } 21 | 22 | int check(const auto &actual, const auto &reference, int max_errors = 10) { 23 | int errors = 0; 24 | 25 | for (std::size_t i = 0; i < std::min(actual.size(), reference.size()); i++) { 26 | if (actual[i] != reference[i]) { 27 | if (errors == 0) 28 | fmt::print("Value mismatches (actual):(reference)\n"); 29 | if (errors < max_errors) 30 | fmt::print(" {}: {}:{}\n", i, actual[i], reference[i]); 31 | errors++; 32 | } 33 | } 34 | if (actual.size() != reference.size()) { 35 | fmt::print("Size mismatch: {}(actual) {}(reference)\n", actual.size(), 36 | reference.size()); 37 | errors++; 38 | } 39 | 40 | return errors; 41 | } 42 | 43 | template bool is_equal(T a, T b) { return a == b; } 44 | 45 | template 46 | bool is_equal(Tp a, Tp b, 47 | Tp epsilon = 128 * std::numeric_limits::epsilon()) { 48 | if (a == b) { 49 | return true; 50 | } 51 | auto abs_th = std::numeric_limits::min(); 52 | auto diff = std::abs(a - b); 53 | auto norm = 54 | std::min(std::abs(a) + std::abs(b), std::numeric_limits::max()); 55 | 56 | return diff < std::max(abs_th, epsilon * norm); 57 | } 58 | -------------------------------------------------------------------------------- /examples/sp/vector_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | printf("Creating NUMA devices...\n"); 9 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 10 | dr::sp::init(devices); 11 | 12 | for (auto &device : devices) { 13 | std::cout << " Device: " << device.get_info() 14 | << "\n"; 15 | } 16 | 17 | dr::sp::distributed_vector> v(100); 18 | 19 | dr::sp::for_each(dr::sp::par_unseq, dr::sp::enumerate(v), [](auto &&tuple) { 20 | auto &&[idx, value] = tuple; 21 | value = idx; 22 | }); 23 | 24 | dr::sp::for_each(dr::sp::par_unseq, v, 25 | [](auto &&value) { value = value + 2; }); 26 | 27 | std::size_t sum = dr::sp::reduce(dr::sp::par_unseq, v, int(0), std::plus{}); 28 | 29 | dr::sp::print_range(v); 30 | 31 | std::cout << "Sum: " << sum << std::endl; 32 | 33 | std::vector local_vec(v.size()); 34 | std::iota(local_vec.begin(), local_vec.end(), 0); 35 | 36 | dr::sp::print_range(local_vec, "local vec"); 37 | 38 | dr::sp::copy(local_vec.begin(), local_vec.end(), v.begin()); 39 | 40 | dr::sp::print_range(v, "vec after copy"); 41 | 42 | dr::sp::for_each(dr::sp::par_unseq, v, 43 | [](auto &&value) { value = value + 2; }); 44 | 45 | dr::sp::print_range(v, "vec after update"); 46 | 47 | dr::sp::copy(v.begin(), v.end(), local_vec.begin()); 48 | 49 | dr::sp::print_range(local_vec, "local vec after copy"); 50 | 51 | v.resize(200); 52 | dr::sp::print_range(v, "resized to 200"); 53 | 54 | v.resize(50); 55 | dr::sp::print_range(v, "resized to 50"); 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /examples/mp/stencil-slide.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "dr/mp.hpp" 6 | 7 | #include "cxxopts.hpp" 8 | 9 | cxxopts::ParseResult options; 10 | 11 | auto stencil_op = [](auto &&r) { return r[0] + r[1] + r[2]; }; 12 | 13 | int stencil(auto n, auto steps) { 14 | std::vector a(n), b(n); 15 | rng::fill(a, 0); 16 | rng::fill(b, 0); 17 | 18 | // Input is a window 19 | auto in_curr = rng::views::sliding(a, 3); 20 | auto in_prev = rng::views::sliding(b, 3); 21 | 22 | // Output is an element 23 | auto out_curr = rng::subrange(b.begin() + 1, b.end() - 1); 24 | auto out_prev = rng::subrange(a.begin() + 1, a.end() - 1); 25 | 26 | // Initialize the input 27 | rng::iota(out_prev, 100); 28 | fmt::print("{}\n\n", a); 29 | 30 | for (std::size_t s = 0; s < steps; s++) { 31 | rng::transform(in_curr, out_curr.begin(), stencil_op); 32 | std::swap(in_curr, in_prev); 33 | std::swap(out_curr, out_prev); 34 | fmt::print("{}\n", s % 2 ? a : b); 35 | } 36 | 37 | return 0; 38 | } 39 | 40 | int main(int argc, char *argv[]) { 41 | cxxopts::Options options_spec(argv[0], "stencil 1d"); 42 | // clang-format off 43 | options_spec.add_options() 44 | ("n", "Size of array", cxxopts::value()->default_value("10")) 45 | ("s", "Number of time steps", cxxopts::value()->default_value("5")) 46 | ("help", "Print help"); 47 | // clang-format on 48 | 49 | try { 50 | options = options_spec.parse(argc, argv); 51 | } catch (const cxxopts::OptionParseException &e) { 52 | std::cout << options_spec.help() << "\n"; 53 | exit(1); 54 | } 55 | 56 | auto error = 57 | stencil(options["n"].as(), options["s"].as()); 58 | 59 | return error; 60 | } 61 | -------------------------------------------------------------------------------- /examples/mp/vector-add-ref.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "mpi.h" 6 | 7 | #include "utils.hpp" 8 | #include "vector-add-serial.hpp" 9 | 10 | MPI_Comm comm; 11 | int comm_rank; 12 | int comm_size; 13 | 14 | void vector_add() { 15 | using T = int; 16 | 17 | const std::size_t slice_size = 5; 18 | const std::size_t full_size = slice_size * comm_size; 19 | 20 | // Compute the reference data 21 | vector_add_serial ref_adder; 22 | if (comm_rank == 0) { 23 | ref_adder.init(full_size); 24 | ref_adder.compute(); 25 | } 26 | 27 | // Distribute the data 28 | auto data_type = mpi_data_type(); 29 | std::vector a(slice_size), b(slice_size), c(slice_size); 30 | MPI_Scatter(ref_adder.a.data(), slice_size, data_type, a.data(), slice_size, 31 | data_type, 0, comm); 32 | MPI_Scatter(ref_adder.b.data(), slice_size, data_type, b.data(), slice_size, 33 | data_type, 0, comm); 34 | 35 | // multi-threaded vector add on slice 36 | #pragma omp parallel for 37 | for (std::size_t i = 0; i < slice_size; i++) { 38 | c[i] = a[i] + b[i]; 39 | } 40 | 41 | // Collect the results 42 | std::vector c_full(full_size); 43 | MPI_Gather(c.data(), slice_size, data_type, c_full.data(), slice_size, 44 | data_type, 0, comm); 45 | 46 | // Check 47 | if (comm_rank == 0) { 48 | fmt::print("a: {}\n", ref_adder.a); 49 | fmt::print("b: {}\n", ref_adder.b); 50 | fmt::print("c: {}\n", c_full); 51 | ref_adder.check(c_full); 52 | } 53 | } 54 | 55 | int main(int argc, char *argv[]) { 56 | MPI_Init(&argc, &argv); 57 | comm = MPI_COMM_WORLD; 58 | MPI_Comm_rank(comm, &comm_rank); 59 | MPI_Comm_size(comm, &comm_size); 60 | 61 | vector_add(); 62 | 63 | MPI_Finalize(); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | FetchContent_Declare( 6 | dpl 7 | GIT_REPOSITORY https://github.com/uxlfoundation/oneDPL.git 8 | GIT_TAG main) 9 | FetchContent_MakeAvailable(dpl) 10 | 11 | # mpi: multi-process 12 | 13 | add_library(dr_mpi INTERFACE) 14 | add_library(DR::mpi ALIAS dr_mpi) 15 | 16 | find_package(MPI REQUIRED) 17 | find_package(MKL REQUIRED) 18 | find_package(TBB REQUIRED) 19 | target_link_libraries(dr_mpi INTERFACE MPI::MPI_CXX range-v3 std::mdspan 20 | TBB::tbb) 21 | 22 | # gcc 10 uses TBB API that were removed 23 | target_compile_definitions(dr_mpi INTERFACE _GLIBCXX_USE_TBB_PAR_BACKEND=0) 24 | 25 | if(ENABLE_FORMAT) 26 | target_compile_definitions(dr_mpi INTERFACE DR_FORMAT) 27 | target_link_libraries(dr_mpi INTERFACE fmt::fmt) 28 | endif() 29 | if(ENABLE_SYCL) 30 | target_link_libraries(dr_mpi INTERFACE oneDPL) 31 | endif() 32 | target_include_directories(dr_mpi INTERFACE . vendor) 33 | 34 | # sp: sycl, single process, multi-gpu 35 | 36 | add_library(dr_sp INTERFACE) 37 | add_library(DR::sp ALIAS dr_sp) 38 | 39 | target_include_directories(dr_sp INTERFACE . vendor) 40 | target_compile_definitions(dr_sp INTERFACE USE_MKL 41 | _GLIBCXX_USE_TBB_PAR_BACKEND=0) 42 | target_link_libraries(dr_sp INTERFACE range-v3 oneDPL fmt::fmt MKL::MKL_DPCPP) 43 | 44 | # For use, see: 45 | # https://github.com/illuhad/hipSYCL/blob/develop/doc/using-hipsycl.md#using-the-cmake-integration 46 | # example: cmake .. -DhipSYCL_DIR= 47 | # -DHIPSYCL_TARGETS="" 48 | if($(HIPSYCL_TARGETS)) 49 | find_package(hipSYCL REQUIRED) 50 | add_sycl_to_target(TARGET dr_sp) 51 | target_compile_options(dr_sp INTERFACE --hipsycl-targets='cuda:sm_75') 52 | endif() 53 | -------------------------------------------------------------------------------- /test/gtest/common/reduce.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Reduce : public testing::Test { 9 | protected: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Reduce, AllTypes); 13 | 14 | TYPED_TEST(Reduce, Range) { 15 | Ops1 ops(10); 16 | 17 | auto max = [](double x, double y) { return std::max(x, y); }; 18 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end(), 3, max), 19 | xp::reduce(ops.dist_vec, 3, max)); 20 | } 21 | 22 | TYPED_TEST(Reduce, Max) { 23 | Ops1 ops(10); 24 | 25 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end(), 3, std::plus{}), 26 | xp::reduce(ops.dist_vec, 3, std::plus{})); 27 | } 28 | 29 | TYPED_TEST(Reduce, Iterators) { 30 | Ops1 ops(10); 31 | 32 | EXPECT_EQ( 33 | std::reduce(ops.vec.begin(), ops.vec.end(), 3, std::plus{}), 34 | xp::reduce(ops.dist_vec.begin(), ops.dist_vec.end(), 3, std::plus{})); 35 | } 36 | 37 | TYPED_TEST(Reduce, RangeDefaultOp) { 38 | Ops1 ops(10); 39 | 40 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end(), 3), 41 | xp::reduce(ops.dist_vec, 3)); 42 | } 43 | 44 | TYPED_TEST(Reduce, IteratorsDefaultOp) { 45 | Ops1 ops(10); 46 | 47 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end(), 3), 48 | xp::reduce(ops.dist_vec.begin(), ops.dist_vec.end(), 3)); 49 | } 50 | 51 | TYPED_TEST(Reduce, RangeDefaultInit) { 52 | Ops1 ops(10); 53 | 54 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end()), 55 | xp::reduce(ops.dist_vec)); 56 | } 57 | 58 | TYPED_TEST(Reduce, IteratorsDefaultInit) { 59 | Ops1 ops(10); 60 | 61 | EXPECT_EQ(std::reduce(ops.vec.begin(), ops.vec.end()), 62 | xp::reduce(ops.dist_vec.begin(), ops.dist_vec.end())); 63 | } 64 | -------------------------------------------------------------------------------- /include/dr/detail/enumerate.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr { 10 | 11 | namespace __detail { 12 | 13 | namespace { 14 | 15 | template struct range_size { 16 | using type = std::size_t; 17 | }; 18 | 19 | template struct range_size { 20 | using type = rng::range_size_t; 21 | }; 22 | 23 | template using range_size_t = typename range_size::type; 24 | 25 | } // namespace 26 | 27 | class enumerate_adapter_closure { 28 | public: 29 | template auto operator()(R &&r) const { 30 | using S = range_size_t; 31 | // NOTE: This line only necessary due to bug in range-v3 where views 32 | // have non-weakly-incrementable size types. (Standard mandates 33 | // size type must be weakly incrementable.) 34 | using W = std::conditional_t, S, std::size_t>; 35 | if constexpr (rng::sized_range) { 36 | return rng::views::zip(rng::views::iota(W{0}, W{rng::size(r)}), 37 | std::forward(r)); 38 | } else { 39 | return rng::views::zip(rng::views::iota(W{0}), std::forward(r)); 40 | } 41 | } 42 | 43 | template 44 | friend auto operator|(R &&r, const enumerate_adapter_closure &closure) { 45 | return closure(std::forward(r)); 46 | } 47 | }; 48 | 49 | class enumerate_fn_ { 50 | public: 51 | template constexpr auto operator()(R &&r) const { 52 | return enumerate_adapter_closure{}(std::forward(r)); 53 | } 54 | 55 | inline auto enumerate() const { return enumerate_adapter_closure{}; } 56 | }; 57 | 58 | inline constexpr auto enumerate = enumerate_fn_{}; 59 | 60 | } // namespace __detail 61 | 62 | } // namespace dr 63 | -------------------------------------------------------------------------------- /test/gtest/mp/segments.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | #include 8 | 9 | template class Segmented : public testing::Test { 10 | public: 11 | }; 12 | 13 | TYPED_TEST_SUITE(Segmented, AllTypesWithoutIshmem); 14 | 15 | TYPED_TEST(Segmented, StaticAssert) { 16 | Ops1 ops(10); 17 | auto segmented = dr::mp::segmented_view(rng::views::iota(100), 18 | dr::ranges::segments(ops.dist_vec)); 19 | static_assert(std::forward_iterator); 20 | static_assert(rng::forward_range); 21 | } 22 | 23 | TYPED_TEST(Segmented, Basic) { 24 | Ops1 ops(10); 25 | auto segmented = dr::mp::segmented_view(rng::views::iota(100), 26 | dr::ranges::segments(ops.dist_vec)); 27 | EXPECT_EQ(dr::ranges::segments(ops.dist_vec), segmented); 28 | } 29 | 30 | template class SegmentUtils : public testing::Test { 31 | public: 32 | }; 33 | 34 | // traversing on host over local_segment does not work in case of both: 35 | // device_memory and IshmemBackend (which uses device memory) 36 | TYPED_TEST_SUITE(SegmentUtils, AllTypesWithoutIshmem); 37 | 38 | TYPED_TEST(SegmentUtils, LocalSegment) { 39 | if (options.count("device-memory")) { 40 | return; 41 | } 42 | Ops1 ops(10); 43 | auto segments = dr::mp::local_segments(ops.dist_vec); 44 | auto ls = dr::mp::local_segment(ops.dist_vec); 45 | if (ls.size() == 0) // comparison would not be possible 46 | return; 47 | EXPECT_EQ(ls, *rng::begin(segments)); 48 | } 49 | 50 | TYPED_TEST(SegmentUtils, OnlyRank0Data) { 51 | // Only first rank gets data 52 | TypeParam dist(10, dr::mp::distribution().granularity(10)); 53 | EXPECT_EQ(dr::mp::local_segment(dist).empty(), comm_rank != 0); 54 | } 55 | -------------------------------------------------------------------------------- /test/gtest/mp/communicator.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | template class Communicator : public testing::Test {}; 8 | 9 | using T = int; 10 | using DV = dr::mp::distributed_vector; 11 | 12 | TEST(Communicator, Alltoallv) { 13 | if (comm_size <= 1) 14 | return; 15 | 16 | const std::size_t SIZE = 2; 17 | std::vector vec_src(comm_size * SIZE); 18 | std::vector vec_dst(comm_size * SIZE); 19 | 20 | rng::fill(vec_src, comm_rank * 10 + 1); 21 | 22 | std::vector sendsizes(comm_size, SIZE); 23 | std::vector recvsizes(comm_size, SIZE); 24 | 25 | std::vector senddispl(comm_size); 26 | std::vector recvdispl(comm_size); 27 | 28 | for (std::size_t i = 0; i < comm_size; i++) { 29 | senddispl[i] = recvdispl[i] = i * SIZE; 30 | } 31 | 32 | dr::mp::default_comm().alltoallv(vec_src, sendsizes, senddispl, vec_dst, 33 | recvsizes, recvdispl); 34 | 35 | std::vector vec_ref(comm_size * SIZE); 36 | 37 | for (std::size_t i = 0; i < comm_size; i++) 38 | for (std::size_t j = 0; j < SIZE; j++) { 39 | vec_ref[i * SIZE + j] = 10 * i + 1; 40 | } 41 | 42 | EXPECT_TRUE(equal_gtest(vec_ref, vec_dst)); 43 | } 44 | 45 | TEST(Communicator, Allgather) { 46 | if (comm_size <= 1) 47 | return; 48 | 49 | const std::size_t SIZE = 2; 50 | std::vector vec_src(SIZE); 51 | std::vector vec_dst(comm_size * SIZE); 52 | 53 | rng::fill(vec_src, comm_rank * 10 + 1); 54 | 55 | dr::mp::default_comm().all_gather(vec_src, vec_dst); 56 | 57 | std::vector vec_ref(comm_size * SIZE); 58 | 59 | for (std::size_t i = 0; i < comm_size; i++) 60 | for (std::size_t j = 0; j < SIZE; j++) { 61 | vec_ref[i * SIZE + j] = 10 * i + 1; 62 | } 63 | 64 | EXPECT_TRUE(equal_gtest(vec_ref, vec_dst)); 65 | } 66 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/for_each.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace dr::mp { 20 | 21 | /// Collective for_each on distributed range 22 | void for_each(dr::distributed_range auto &&dr, auto op) { 23 | dr::drlog.debug(dr::logger::for_each, "for_each: parallel execution\n"); 24 | if (rng::empty(dr)) { 25 | return; 26 | } 27 | assert(aligned(dr)); 28 | 29 | for (const auto &s : local_segments(dr)) { 30 | if (mp::use_sycl()) { 31 | dr::drlog.debug(" using sycl\n"); 32 | 33 | assert(rng::distance(s) > 0); 34 | #ifdef SYCL_LANGUAGE_VERSION 35 | dr::__detail::parallel_for( 36 | dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), 37 | [first = rng::begin(s), op](auto idx) { op(first[idx]); }) 38 | .wait(); 39 | #else 40 | assert(false); 41 | #endif 42 | } else { 43 | dr::drlog.debug(" using cpu\n"); 44 | rng::for_each(s, op); 45 | } 46 | } 47 | barrier(); 48 | } 49 | 50 | /// Collective for_each on iterator/sentinel for a distributed range 51 | template 52 | void for_each(DI first, DI last, auto op) { 53 | mp::for_each(rng::subrange(first, last), op); 54 | } 55 | 56 | /// Collective for_each on iterator/sentinel for a distributed range 57 | template 58 | DI for_each_n(DI first, I n, auto op) { 59 | auto last = first; 60 | rng::advance(last, n); 61 | mp::for_each(first, last, op); 62 | return last; 63 | } 64 | 65 | } // namespace dr::mp 66 | -------------------------------------------------------------------------------- /examples/sp/zip_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | int main(int argc, char **argv) { 8 | printf("Creating NUMA devices...\n"); 9 | // auto devices = dr::sp::get_duplicated_devices(sycl::gpu_selector_v, 8); 10 | auto devices = dr::sp::get_numa_devices(sycl::gpu_selector_v); 11 | dr::sp::init(devices); 12 | 13 | using DV = dr::sp::distributed_vector>; 14 | DV v(100); 15 | 16 | DV v2(50); 17 | 18 | dr::sp::for_each(dr::sp::par_unseq, dr::sp::enumerate(v), [](auto &&tuple) { 19 | auto &&[idx, value] = tuple; 20 | value = idx; 21 | }); 22 | 23 | dr::sp::for_each(dr::sp::par_unseq, v, [](auto &&value) { value += 2; }); 24 | 25 | std::size_t sum [[maybe_unused]] = 26 | dr::sp::reduce(dr::sp::par_unseq, v, int(0), std::plus{}); 27 | 28 | dr::sp::print_range(v); 29 | 30 | dr::sp::distributed_span dspan(v.segments()); 31 | dr::sp::print_range(dspan); 32 | 33 | auto i = rng::views::iota(int32_t(0), int32_t(rng::size(v))); 34 | dr::sp::zip_view zip_v(i, v); 35 | 36 | auto segments = zip_v.segments(); 37 | 38 | dr::sp::for_each(dr::sp::par_unseq, zip_v, [](auto &&tuple) { 39 | auto &&[i, v] = tuple; 40 | v = i; 41 | }); 42 | 43 | dr::sp::zip_view zip_v2(i, v2); 44 | 45 | dr::sp::for_each(dr::sp::par_unseq, zip_v2, [](auto &&tuple) { 46 | auto &&[i, v2] = tuple; 47 | v2 = i; 48 | }); 49 | 50 | dr::sp::zip_view view2(v, v2); 51 | 52 | dr::sp::print_range(v, "v"); 53 | dr::sp::print_range(v2, "v2"); 54 | 55 | dr::sp::print_range_details(v, "v"); 56 | dr::sp::print_range_details(v2, "v2"); 57 | 58 | printf("Writing to zip_view...\n"); 59 | dr::sp::for_each(dr::sp::par_unseq, view2, [](auto &&tuple) { 60 | auto &&[v, v2] = tuple; 61 | v2 = 1; 62 | }); 63 | 64 | dr::sp::print_range(v); 65 | dr::sp::print_range(v2); 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /include/dr/concepts/concepts.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr { 10 | 11 | template 12 | concept remote_iterator = 13 | std::forward_iterator && requires(I &iter) { dr::ranges::rank(iter); }; 14 | 15 | template 16 | concept remote_range = 17 | rng::forward_range && requires(R &r) { dr::ranges::rank(r); }; 18 | 19 | template 20 | concept distributed_range = 21 | rng::forward_range && requires(R &r) { dr::ranges::segments(r); }; 22 | 23 | template 24 | concept remote_contiguous_iterator = 25 | std::random_access_iterator && requires(I &iter) { 26 | dr::ranges::rank(iter); 27 | { dr::ranges::local(iter) } -> std::contiguous_iterator; 28 | }; 29 | 30 | template 31 | concept distributed_iterator = std::forward_iterator && requires(I &iter) { 32 | dr::ranges::segments(iter); 33 | }; 34 | 35 | template 36 | concept remote_contiguous_range = 37 | remote_range && rng::random_access_range && requires(R &r) { 38 | { dr::ranges::local(r) } -> rng::contiguous_range; 39 | }; 40 | 41 | template 42 | concept distributed_contiguous_range = 43 | distributed_range && rng::random_access_range && 44 | requires(R &r) { 45 | { dr::ranges::segments(r) } -> rng::random_access_range; 46 | } && 47 | remote_contiguous_range< 48 | rng::range_value_t()))>>; 49 | 50 | template 51 | concept distributed_contiguous_iterator = 52 | distributed_iterator && rng::random_access_iterator && 53 | requires(Iter &iter) { 54 | { dr::ranges::segments(iter) } -> rng::random_access_range; 55 | } && 56 | remote_contiguous_range()))>>; 58 | 59 | } // namespace dr 60 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/equal.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace dr::mp::_detail { 13 | template 14 | requires std::equality_comparable_with, 15 | rng::range_value_t> 16 | bool equal(std::size_t root, bool root_provided, R1 &&r1, R2 &&r2) { 17 | 18 | if (rng::distance(r1) != rng::distance(r2)) { 19 | return false; 20 | } 21 | 22 | // we must use ints instead of bools, because distributed ranges do not 23 | // support bools 24 | auto compare = [](auto &&elems) { 25 | return elems.first == elems.second ? 1 : 0; 26 | }; 27 | 28 | auto zipped_views = views::zip(r1, r2); 29 | auto compared = dr::mp::views::transform(zipped_views, compare); 30 | 31 | auto min = [](double x, double y) { return std::min(x, y); }; 32 | if (root_provided) { 33 | auto result = mp::reduce(root, compared, 1, min); 34 | return result == 1; 35 | } 36 | auto result = mp::reduce(compared, 1, min); 37 | return result == 1; 38 | } 39 | 40 | } // namespace dr::mp::_detail 41 | 42 | namespace dr::mp { 43 | template 44 | requires std::equality_comparable_with, 45 | rng::range_value_t> 46 | bool equal(std::size_t root, R1 &&r1, R2 &&r2) { 47 | return _detail::equal(root, true, r1, r2); 48 | } 49 | 50 | template 51 | requires std::equality_comparable_with, 52 | rng::range_value_t> 53 | bool equal(R1 &&r1, R2 &&r2) { 54 | return _detail::equal(0, false, r1, r2); 55 | } 56 | 57 | } // namespace dr::mp 58 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/exclusive_scan.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::mp { 10 | 11 | template 13 | auto exclusive_scan(R &&r, O &&o, T init, BinaryOp &&binary_op) { 14 | return __detail::inclusive_exclusive_scan_impl_( 15 | std::forward(r), rng::begin(std::forward(o)), 16 | std::forward(binary_op), 17 | std::optional>(init)); 18 | } 19 | 20 | template 22 | auto exclusive_scan(R &&r, O &&o, T init) { 23 | return dr::mp::exclusive_scan(std::forward(r), std::forward(o), 24 | static_cast>(init), 25 | std::plus>()); 26 | } 27 | 28 | // Distributed iterator versions 29 | 30 | template 32 | OutputIter exclusive_scan(Iter first, Iter last, OutputIter d_first, T init, 33 | BinaryOp &&binary_op) { 34 | 35 | return dr::mp::exclusive_scan(rng::subrange(first, last), d_first, 36 | std::forward(binary_op), init); 37 | } 38 | 39 | template 41 | OutputIter exclusive_scan(Iter first, Iter last, OutputIter d_first, T init) { 42 | auto dist = rng::distance(first, last); 43 | auto d_last = d_first; 44 | rng::advance(d_last, dist); 45 | dr::mp::exclusive_scan(rng::subrange(first, last), 46 | rng::subrange(d_first, d_last), init); 47 | 48 | return d_last; 49 | } 50 | 51 | } // namespace dr::mp 52 | -------------------------------------------------------------------------------- /include/dr/sp/span.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | 11 | namespace dr::sp { 12 | 13 | template 14 | class span : public rng::view_interface> { 15 | public: 16 | static_assert(std::is_same_v, T>); 17 | 18 | using value_type = std::iter_value_t; 19 | using size_type = std::size_t; 20 | using difference_type = std::iter_difference_t; 21 | using reference = std::iter_reference_t; 22 | using iterator = Iter; 23 | 24 | template 25 | span(R &&r) : begin_(rng::begin(r)), end_(rng::end(r)) {} 26 | span(Iter first, Iter last) : begin_(first), end_(last) {} 27 | span(Iter first, std::size_t count) : begin_(first), end_(first + count) {} 28 | 29 | span() = default; 30 | span(const span &) noexcept = default; 31 | span &operator=(const span &) noexcept = default; 32 | 33 | std::size_t size() const noexcept { return std::size_t(end() - begin()); } 34 | 35 | bool empty() const noexcept { return size() == 0; } 36 | 37 | Iter begin() const noexcept { return begin_; } 38 | 39 | Iter end() const noexcept { return end_; } 40 | 41 | reference operator[](size_type index) const { return *(begin() + index); } 42 | 43 | span first(size_type n) const { return span(begin(), begin() + n); } 44 | 45 | span last(size_type n) const { return span(end() - n, end()); } 46 | 47 | span subspan(std::size_t offset, std::size_t count) const { 48 | return span(begin() + offset, begin() + offset + count); 49 | } 50 | 51 | private: 52 | Iter begin_, end_; 53 | }; 54 | 55 | template 56 | span(R &&) -> span, rng::iterator_t>; 57 | 58 | template 59 | span(Iter first, std::size_t count) -> span, Iter>; 60 | 61 | } // namespace dr::sp 62 | -------------------------------------------------------------------------------- /test/gtest/common/count.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Count : public testing::Test { 9 | protected: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Count, AllTypes); 13 | 14 | TYPED_TEST(Count, EmptyIf) { 15 | std::vector vec; 16 | 17 | Ops1 ops(0); 18 | 19 | auto pred = [=](auto &&v) { return true; }; 20 | 21 | EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 0); 22 | EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), 23 | xp::count_if(ops.dist_vec, pred)); 24 | } 25 | 26 | TYPED_TEST(Count, BasicFirstElem) { 27 | std::vector vec{1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7}; 28 | 29 | Ops1 ops(vec.size()); 30 | ops.vec = vec; 31 | xp::copy(ops.vec, ops.dist_vec.begin()); 32 | 33 | auto value = *ops.vec.begin(); 34 | 35 | EXPECT_EQ(xp::count(ops.dist_vec, value), 4); 36 | EXPECT_EQ(std::count(ops.vec.begin(), ops.vec.end(), value), 37 | xp::count(ops.dist_vec, value)); 38 | } 39 | 40 | TYPED_TEST(Count, BasicFirstElemIf) { 41 | std::vector vec{1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7}; 42 | 43 | Ops1 ops(vec.size()); 44 | ops.vec = vec; 45 | xp::copy(ops.vec, ops.dist_vec.begin()); 46 | 47 | auto value = *vec.begin(); 48 | auto pred = [=](auto &&v) { return v == value; }; 49 | 50 | EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 4); 51 | EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), 52 | xp::count_if(ops.dist_vec, pred)); 53 | } 54 | 55 | TYPED_TEST(Count, FirstElemsIf) { 56 | std::vector vec(20); 57 | std::iota(vec.begin(), vec.end(), 0); 58 | 59 | Ops1 ops(vec.size()); 60 | ops.vec = vec; 61 | xp::copy(ops.vec, ops.dist_vec.begin()); 62 | 63 | auto pred = [=](auto &&v) { return v < 5; }; 64 | 65 | EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 5); 66 | EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), 67 | xp::count_if(ops.dist_vec, pred)); 68 | } 69 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/inclusive_scan.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _inclusive_scan: 8 | 9 | ============================ 10 | ``inclusive_scan`` 11 | ============================ 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | SP 20 | --- 21 | 22 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op, T init) 23 | :outline: 24 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op) 25 | :outline: 26 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o) 27 | :outline: 28 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) 29 | :outline: 30 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) 31 | :outline: 32 | .. doxygenfunction:: dr::sp::inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first) 33 | :outline: 34 | 35 | Execution policy-less versions 36 | 37 | .. doxygenfunction:: dr::sp::inclusive_scan(R &&r, O &&o) 38 | :outline: 39 | .. doxygenfunction:: dr::sp::inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op) 40 | :outline: 41 | .. doxygenfunction:: dr::sp::inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op, T init) 42 | :outline: 43 | 44 | Distributed iterator versions 45 | 46 | .. doxygenfunction:: dr::sp::inclusive_scan(Iter first, Iter last, OutputIter d_first) 47 | :outline: 48 | .. doxygenfunction:: dr::sp::inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) 49 | :outline: 50 | .. doxygenfunction:: dr::sp::inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) 51 | :outline: 52 | 53 | Description 54 | =========== 55 | 56 | .. seealso:: 57 | 58 | 'std::inclusive_scan'_ 59 | 60 | Examples 61 | ======== 62 | -------------------------------------------------------------------------------- /examples/sp/inclusive_scan_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | int main(int argc, char **argv) { 14 | printf("Creating NUMA devices...\n"); 15 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 16 | dr::sp::init(devices); 17 | 18 | for (auto &device : devices) { 19 | std::cout << " Device: " << device.get_info() 20 | << "\n"; 21 | } 22 | 23 | dr::sp::distributed_vector> v(100); 24 | 25 | std::vector lv(100); 26 | 27 | std::iota(lv.begin(), lv.end(), 0); 28 | dr::sp::copy(lv.begin(), lv.end(), v.begin()); 29 | 30 | fmt::print(" v: {}\n", v); 31 | fmt::print("lv: {}\n", lv); 32 | 33 | std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); 34 | dr::sp::inclusive_scan(dr::sp::par_unseq, v, v); 35 | 36 | fmt::print(" (after) v: {}\n", v); 37 | fmt::print(" (after) lv: {}\n", lv); 38 | 39 | for (std::size_t i = 0; i < lv.size(); i++) { 40 | int x = lv[i]; 41 | int y = v[i]; 42 | if (x != y) { 43 | printf("(%lu) %d != %d\n", i, x, y); 44 | } 45 | } 46 | 47 | std::iota(lv.begin(), lv.end(), 0); 48 | dr::sp::copy(lv.begin(), lv.end(), v.begin()); 49 | 50 | dr::sp::distributed_vector> o(v.size() + 51 | 100); 52 | 53 | std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::plus<>(), 12); 54 | dr::sp::inclusive_scan(dr::sp::par_unseq, v, o, std::plus<>(), 12); 55 | 56 | fmt::print(" (after) v: {}\n", 57 | rng::subrange(o.begin(), o.begin() + v.size())); 58 | fmt::print(" (after) lv: {}\n", lv); 59 | 60 | for (std::size_t i = 0; i < lv.size(); i++) { 61 | int x = lv[i]; 62 | int y = o[i]; 63 | if (x != y) { 64 | printf("(%lu) %d != %d\n", i, x, y); 65 | } 66 | } 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /examples/sp/exclusive_scan_example.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | int main(int argc, char **argv) { 14 | printf("Creating NUMA devices...\n"); 15 | auto devices = dr::sp::get_numa_devices(sycl::default_selector_v); 16 | dr::sp::init(devices); 17 | 18 | for (auto &device : devices) { 19 | std::cout << " Device: " << device.get_info() 20 | << "\n"; 21 | } 22 | 23 | dr::sp::distributed_vector> v(100); 24 | 25 | std::vector lv(100); 26 | 27 | std::iota(lv.begin(), lv.end(), 0); 28 | dr::sp::copy(lv.begin(), lv.end(), v.begin()); 29 | 30 | fmt::print(" v: {}\n", v); 31 | fmt::print("lv: {}\n", lv); 32 | 33 | std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 0); 34 | dr::sp::exclusive_scan(dr::sp::par_unseq, v, v, 0); 35 | 36 | fmt::print(" (after) v: {}\n", v); 37 | fmt::print(" (after) lv: {}\n", lv); 38 | 39 | for (std::size_t i = 0; i < lv.size(); i++) { 40 | int x = lv[i]; 41 | int y = v[i]; 42 | if (x != y) { 43 | printf("(%lu) %d != %d\n", i, x, y); 44 | } 45 | } 46 | 47 | std::iota(lv.begin(), lv.end(), 0); 48 | dr::sp::copy(lv.begin(), lv.end(), v.begin()); 49 | 50 | dr::sp::distributed_vector> o(v.size() + 51 | 100); 52 | 53 | std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::plus<>()); 54 | dr::sp::exclusive_scan(dr::sp::par_unseq, v, o, 12, std::plus<>()); 55 | 56 | fmt::print(" (after) v: {}\n", 57 | rng::subrange(o.begin(), o.begin() + v.size())); 58 | fmt::print(" (after) lv: {}\n", lv); 59 | 60 | for (std::size_t i = 0; i < lv.size(); i++) { 61 | int x = lv[i]; 62 | int y = o[i]; 63 | if (x != y) { 64 | printf("(%lu) %d != %d\n", i, x, y); 65 | } 66 | } 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /include/dr/detail/view_detectors.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr { 10 | 11 | template struct is_ref_view : std::false_type {}; 12 | template 13 | struct is_ref_view> : std::true_type {}; 14 | 15 | template inline constexpr bool is_ref_view_v = is_ref_view{}; 16 | 17 | template struct is_iota_view : std::false_type {}; 18 | template 19 | struct is_iota_view> : std::true_type {}; 20 | 21 | template inline constexpr bool is_iota_view_v = is_iota_view{}; 22 | 23 | template struct is_take_view : std::false_type {}; 24 | template 25 | struct is_take_view> : std::true_type {}; 26 | 27 | template 28 | inline constexpr bool is_take_view_v = is_take_view::value; 29 | 30 | template struct is_drop_view : std::false_type {}; 31 | template 32 | struct is_drop_view> : std::true_type {}; 33 | 34 | template 35 | inline constexpr bool is_drop_view_v = is_drop_view::value; 36 | 37 | template struct is_subrange_view : std::false_type {}; 38 | template 39 | struct is_subrange_view> : std::true_type {}; 40 | 41 | template 42 | inline constexpr bool is_subrange_view_v = is_subrange_view::value; 43 | 44 | template struct is_sliding_view : std::false_type {}; 45 | template 46 | struct is_sliding_view> : std::true_type {}; 47 | template 48 | inline constexpr bool is_sliding_view_v = 49 | is_sliding_view>::value; 50 | 51 | template struct is_zip_view : std::false_type {}; 52 | 53 | template 54 | struct is_zip_view> : std::true_type {}; 55 | 56 | template 57 | inline constexpr bool is_zip_view_v = is_zip_view::value; 58 | 59 | } // namespace dr 60 | -------------------------------------------------------------------------------- /include/dr/mp/views/sliding.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace dr::mp { 12 | namespace views { 13 | namespace __detail { 14 | 15 | struct sliding_fn { 16 | 17 | // one can not use local algorithms if n is not equal to halo_bounds.prev + 1 18 | // + halo_bounds.next 19 | template 20 | requires rng::viewable_range && rng::forward_range && 21 | rng::detail::integer_like_ 22 | auto operator()(Rng &&r, Int n) const { 23 | return rng::views::sliding(static_cast(r), n); 24 | } 25 | }; 26 | 27 | } // namespace __detail 28 | 29 | inline constexpr __detail::sliding_fn sliding{}; 30 | 31 | } // namespace views 32 | } // namespace dr::mp 33 | 34 | namespace DR_RANGES_NAMESPACE { 35 | 36 | template 37 | requires(dr::is_sliding_view_v) 38 | auto segments_(V &&v) { 39 | 40 | auto base_segments = dr::ranges::segments(v.base()); 41 | auto elements_to_skip_in_base = rng::size(v.base()); 42 | auto elements_to_take = 0; 43 | if (!rng::empty(v)) { 44 | // need to reverse engineer `n` which was passed to sliding_view 45 | elements_to_take = rng::size(v); 46 | const auto slide_size = elements_to_skip_in_base - elements_to_take + 1; 47 | // TODO: this code assumes that halo is symmetric, thus odd (center + 2n) 48 | // note, it is not an assertion preventing all wrong use cases 49 | // other ones are caught by assert during attempt to read outside halo 50 | assert(slide_size % 2 == 1); 51 | elements_to_skip_in_base = slide_size / 2; 52 | } 53 | 54 | return dr::mp::views::segmented( 55 | v, 56 | dr::__detail::take_segments( 57 | dr::__detail::drop_segments(base_segments, elements_to_skip_in_base), 58 | elements_to_take)); 59 | } 60 | 61 | // TODO: add support for dr::mp::halo(dr::mp::views::sliding(r)).exchange() 62 | } // namespace DR_RANGES_NAMESPACE 63 | -------------------------------------------------------------------------------- /test/gtest/mp/alignment.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | using DV = dr::mp::distributed_vector; 8 | 9 | TEST(Alignment, One) { 10 | Ops1 ops(10); 11 | EXPECT_TRUE(dr::mp::aligned(ops.dist_vec)); 12 | } 13 | 14 | TEST(Alignment, Two) { 15 | Ops2 ops(10); 16 | EXPECT_TRUE(dr::mp::aligned(ops.dist_vec0, ops.dist_vec1)); 17 | } 18 | 19 | TEST(Alignment, Three) { 20 | Ops3 ops(10); 21 | EXPECT_TRUE(dr::mp::aligned(ops.dist_vec0, ops.dist_vec1, ops.dist_vec2)); 22 | } 23 | 24 | TEST(Alignment, OffsetBy1) { 25 | Ops2 ops(10); 26 | bool is_aligned = 27 | dr::mp::aligned(rng::views::drop(ops.dist_vec0, 1), ops.dist_vec1); 28 | if (comm_size == 1) { 29 | // If there is a single segment, then it is aligned 30 | EXPECT_TRUE(is_aligned); 31 | } else { 32 | EXPECT_FALSE(is_aligned); 33 | } 34 | } 35 | 36 | TEST(Alignment, Subrange) { 37 | Ops2 ops(10); 38 | auto is_aligned = dr::mp::aligned( 39 | rng::subrange(ops.dist_vec0.begin() + 1, ops.dist_vec0.end() - 1), 40 | rng::views::drop(ops.dist_vec1, 2)); 41 | if (comm_size == 1) { 42 | // If there is a single segment, then it is aligned 43 | EXPECT_TRUE(is_aligned); 44 | } else { 45 | EXPECT_FALSE(is_aligned); 46 | } 47 | } 48 | 49 | TEST(Alignment, Iota) { EXPECT_TRUE(dr::mp::aligned(rng::views::iota(100))); } 50 | 51 | TEST(Alignment, Iota2) { 52 | Ops1 ops(10); 53 | EXPECT_TRUE(dr::mp::aligned(ops.dist_vec, rng::views::iota(100))); 54 | } 55 | 56 | TEST(Alignment, ZipAligned) { 57 | Ops2 ops(10); 58 | EXPECT_TRUE( 59 | dr::mp::aligned(dr::mp::views::zip(ops.dist_vec0, ops.dist_vec1))); 60 | } 61 | 62 | TEST(Alignment, ZipMisaligned) { 63 | Ops2 ops(10); 64 | auto is_aligned = dr::mp::aligned( 65 | dr::mp::views::zip(dr::mp::views::drop(ops.dist_vec0, 1), ops.dist_vec1)); 66 | if (comm_size == 1) { 67 | // If there is a single segment, then it is aligned 68 | EXPECT_TRUE(is_aligned); 69 | } else { 70 | EXPECT_FALSE(is_aligned); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /test/gtest/common/subrange.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | // Fixture 8 | template class Subrange : public testing::Test { 9 | public: 10 | }; 11 | 12 | TYPED_TEST_SUITE(Subrange, AllTypes); 13 | 14 | TYPED_TEST(Subrange, Basic) { 15 | Ops1 ops(10); 16 | 17 | auto local = rng::subrange(ops.vec.begin() + 1, ops.vec.end() - 1); 18 | auto dist = rng::subrange(ops.dist_vec.begin() + 1, ops.dist_vec.end() - 1); 19 | static_assert(compliant_view); 20 | EXPECT_TRUE(check_view(local, dist)); 21 | } 22 | 23 | TYPED_TEST(Subrange, Mutate) { 24 | Ops1 ops(10); 25 | 26 | EXPECT_TRUE(check_mutate_view( 27 | ops, rng::subrange(ops.vec.begin() + 1, ops.vec.end() - 1), 28 | rng::subrange(ops.dist_vec.begin() + 1, ops.dist_vec.end() - 1))); 29 | } 30 | 31 | TYPED_TEST(Subrange, ForEach) { 32 | Ops1 ops(23); 33 | 34 | auto local = rng::subrange(ops.vec.begin() + 1, ops.vec.end() - 2); 35 | auto dist = rng::subrange(ops.dist_vec.begin() + 1, ops.dist_vec.end() - 2); 36 | 37 | auto negate = [](auto v) { return -v; }; 38 | rng::for_each(local, negate); 39 | xp::for_each(dist, negate); 40 | EXPECT_EQ(ops.vec, ops.dist_vec); 41 | } 42 | 43 | TYPED_TEST(Subrange, Transform) { 44 | TypeParam v1(13), v2(13); 45 | xp::iota(v1, 10); 46 | xp::fill(v2, -1); 47 | 48 | auto s1 = rng::subrange(v1.begin() + 1, v1.end() - 2); 49 | auto s2 = rng::subrange(v2.begin() + 1, v2.end() - 2); 50 | 51 | auto null_op = [](auto v) { return v; }; 52 | xp::transform(s1, s2.begin(), null_op); 53 | 54 | EXPECT_TRUE(equal_gtest(v2, std::vector{-1, 11, 12, 13, 14, 15, 16, 17, 55 | 18, 19, 20, -1, -1})); 56 | } 57 | 58 | TYPED_TEST(Subrange, Reduce) { 59 | Ops1 ops(23); 60 | 61 | auto local = rng::subrange(ops.vec.begin() + 1, ops.vec.end() - 2); 62 | auto dist = rng::subrange(ops.dist_vec.begin() + 1, ops.dist_vec.end() - 2); 63 | 64 | EXPECT_EQ(std::reduce(local.begin(), local.end(), 3, std::plus{}), 65 | xp::reduce(dist, 3, std::plus{})); 66 | } 67 | -------------------------------------------------------------------------------- /test/gtest/mp/xp-tests.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #pragma once 5 | 6 | #include "cxxopts.hpp" 7 | #include "dr/mp.hpp" 8 | #include 9 | #include 10 | #include 11 | 12 | #define TEST_MP 13 | 14 | extern MPI_Comm comm; 15 | extern std::size_t comm_rank; 16 | extern std::size_t comm_size; 17 | extern cxxopts::ParseResult options; 18 | 19 | namespace xp = dr::mp; 20 | 21 | template 22 | concept compliant_view = rng::forward_range && rng::random_access_range && 23 | rng::viewable_range && requires(V &v) { 24 | // test one at a time so error is apparent 25 | dr::ranges::segments(v); 26 | dr::ranges::segments(v).begin(); 27 | *dr::ranges::segments(v).begin(); 28 | dr::ranges::rank(*dr::ranges::segments(v).begin()); 29 | // dr::ranges::local(rng::begin(dr::ranges::segments(v)[0])); 30 | // dr::mp::local_segments(v); 31 | }; 32 | 33 | inline void barrier() { dr::mp::barrier(); } 34 | inline void fence() { dr::mp::fence(); } 35 | inline void fence_on(auto &&obj) { obj.fence(); } 36 | 37 | #include "common-tests.hpp" 38 | 39 | // minimal testing for quick builds 40 | #ifdef DRISHMEM 41 | using AllTypes = 42 | ::testing::Types, 43 | dr::mp::distributed_vector>; 44 | using IshmemTypes = 45 | ::testing::Types>; 46 | #else 47 | using AllTypes = ::testing::Types>; 48 | using IshmemTypes = ::testing::Types>; 49 | 50 | #endif 51 | using AllTypesWithoutIshmem = ::testing::Types>; 52 | 53 | namespace dr::mp { 54 | 55 | template 56 | inline std::ostream &operator<<(std::ostream &os, 57 | const dv_segment &segment) { 58 | os << fmt::format("{}", segment); 59 | return os; 60 | } 61 | 62 | } // namespace dr::mp 63 | -------------------------------------------------------------------------------- /test/gtest/sp/containers-3.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | #include "containers.hpp" 5 | 6 | TYPED_TEST_SUITE(DistributedVectorTest, AllocatorTypes); 7 | 8 | TYPED_TEST(DistributedVectorTest, tests_from_this_file_run_on_3_devices) { 9 | EXPECT_EQ(dr::sp::nprocs(), 3); 10 | EXPECT_EQ(std::size(dr::sp::devices()), 3); 11 | } 12 | 13 | TYPED_TEST(DistributedVectorTest, segments_sizes_in_uneven_distribution) { 14 | typename TestFixture::DistVec dv(10); 15 | EXPECT_EQ(rng::size(dv.segments()), 3); 16 | EXPECT_EQ(rng::size(dv.segments()[0]), 4); 17 | EXPECT_EQ(rng::size(dv.segments()[1]), 4); 18 | EXPECT_EQ(rng::size(dv.segments()[2]), 2); 19 | } 20 | 21 | TYPED_TEST(DistributedVectorTest, segments_sizes_in_even_distribution) { 22 | typename TestFixture::DistVec dv(12); 23 | EXPECT_EQ(rng::size(dv.segments()), 3); 24 | EXPECT_EQ(rng::size(dv.segments()[0]), 4); 25 | EXPECT_EQ(rng::size(dv.segments()[1]), 4); 26 | EXPECT_EQ(rng::size(dv.segments()[2]), 4); 27 | } 28 | 29 | TYPED_TEST(DistributedVectorTest, 30 | segments_sizes_in_uneven_zeroending_distribution) { 31 | typename TestFixture::DistVec dv(4); 32 | EXPECT_EQ(rng::size(dv.segments()), 2); 33 | EXPECT_EQ(rng::size(dv.segments()[0]), 2); 34 | EXPECT_EQ(rng::size(dv.segments()[1]), 2); 35 | } 36 | 37 | TYPED_TEST(DistributedVectorTest, segments_sizes_in_empty_vec) { 38 | // this is not consistent, for non-zero sizes we do not return empty segments 39 | // but in case of empty vec we return one empty segment, IMO it should made 40 | // consistent in some way 41 | typename TestFixture::DistVec dv(0); 42 | EXPECT_EQ(rng::size(dv.segments()), 1); 43 | EXPECT_EQ(rng::size(dv.segments()[0]), 0); 44 | } 45 | 46 | TYPED_TEST(DistributedVectorTest, segments_sizes_in_oneitem_vec) { 47 | typename TestFixture::DistVec dv(1); 48 | EXPECT_EQ(rng::size(dv.segments()), 1); 49 | EXPECT_EQ(rng::size(dv.segments()[0]), 1); 50 | } 51 | 52 | TYPED_TEST(DistributedVectorTest, segments_joint_view_same_as_all_view) { 53 | using DV = typename TestFixture::DistVec; 54 | check_segments(DV(0)); 55 | check_segments(DV(1)); 56 | check_segments(DV(4)); 57 | check_segments(DV(10)); 58 | check_segments(DV(12)); 59 | } 60 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Intel Corporation 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # See https://pre-commit.com for more information 6 | # See https://pre-commit.com/hooks.html for more hooks 7 | repos: 8 | 9 | - repo: https://github.com/pre-commit/mirrors-clang-format 10 | rev: v16.0.6 11 | hooks: 12 | - id: clang-format 13 | 14 | - repo: https://github.com/ambv/black 15 | rev: 23.7.0 16 | hooks: 17 | - id: black 18 | args: ['--line-length=79'] 19 | 20 | 21 | - repo: https://github.com/pre-commit/pre-commit-hooks 22 | rev: v4.4.0 23 | hooks: 24 | - id: trailing-whitespace 25 | - id: end-of-file-fixer 26 | - id: mixed-line-ending 27 | - id: check-xml 28 | - id: check-yaml 29 | - id: check-case-conflict 30 | - id: check-toml 31 | - id: check-json 32 | - id: check-added-large-files 33 | args: ['--maxkb=800'] 34 | 35 | - repo: https://github.com/pycqa/flake8 36 | rev: 6.1.0 37 | hooks: 38 | - id: flake8 39 | 40 | - repo: https://github.com/pycqa/doc8 41 | rev: v1.1.1 42 | hooks: 43 | - id: doc8 44 | args: ['--max-line-length=120'] 45 | 46 | - repo: https://github.com/pycqa/isort 47 | rev: 5.12.0 48 | hooks: 49 | - id: isort 50 | 51 | - repo: https://github.com/cheshirekow/cmake-format-precommit 52 | rev: v0.6.13 53 | hooks: 54 | - id: cmake-format 55 | 56 | - repo: https://github.com/fsfe/reuse-tool 57 | rev: v2.1.0 58 | hooks: 59 | - id: reuse 60 | 61 | - repo: https://github.com/codespell-project/codespell 62 | rev: v2.2.4 63 | hooks: 64 | - id: codespell 65 | entry: codespell --ignore-words doc/spec/source/spelling_wordlist.txt 66 | types: [text] 67 | 68 | - repo: local 69 | hooks: 70 | - id: dr-style-all 71 | name: dr-style-all 72 | entry: python3 scripts/dr-style.py --Werror include test examples 73 | language: system 74 | pass_filenames: false 75 | always_run: true 76 | - id: dr-style-include 77 | name: dr-style-include 78 | entry: python3 scripts/dr-style.py --Werror --include include/dr/mp 79 | language: system 80 | pass_filenames: false 81 | always_run: true 82 | - id: sphinx 83 | name: sphinx 84 | entry: make -C doc/spec spelling linkcheck html 85 | language: system 86 | pass_filenames: false 87 | always_run: true 88 | -------------------------------------------------------------------------------- /benchmarks/gbench/sp/gemm.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "../common/dr_bench.hpp" 6 | 7 | using T = float; 8 | 9 | #include 10 | 11 | #include 12 | 13 | template void fill_random(X &&x) { 14 | for (auto &&value : x) { 15 | value = drand48() * 100; 16 | } 17 | } 18 | 19 | const std::size_t m = 16000; 20 | const std::size_t n = m; 21 | const std::size_t k = m; 22 | 23 | static void Gemm_DR(benchmark::State &state) { 24 | auto q = get_queue(); 25 | 26 | auto partitions = dr::sp::partition_matmul(m, n, k); 27 | dr::sp::distributed_dense_matrix a({m, k}, partitions[0]); 28 | dr::sp::distributed_dense_matrix b({k, n}, partitions[1]); 29 | dr::sp::distributed_dense_matrix result({m, n}, partitions[2]); 30 | 31 | Stats stats(state, (m * k + k * n) * sizeof(T), m * n * sizeof(T), m * n * k); 32 | a[{2, 3}] = 12; 33 | a[{5, 7}] = 42; 34 | a[{8, 9}] = 37; 35 | 36 | b[{2, 3}] = 12; 37 | b[{5, 7}] = 42; 38 | b[{8, 9}] = 37; 39 | 40 | for (auto _ : state) { 41 | stats.rep(); 42 | dr::sp::gemm(a, b, result); 43 | } 44 | } 45 | 46 | DR_BENCHMARK(Gemm_DR); 47 | 48 | static void Gemm_Reference(benchmark::State &state) { 49 | auto q = get_queue(); 50 | 51 | std::vector a_local(m * k); 52 | std::vector b_local(k * n); 53 | std::vector c_local(m * n); 54 | 55 | fill_random(a_local); 56 | fill_random(b_local); 57 | fill_random(c_local); 58 | 59 | T *a = sycl::malloc_device(m * k, q); 60 | T *b = sycl::malloc_device(k * n, q); 61 | T *c = sycl::malloc_device(m * n, q); 62 | 63 | q.memcpy(a, a_local.data(), m * k * sizeof(T)).wait(); 64 | q.memcpy(b, b_local.data(), k * n * sizeof(T)).wait(); 65 | q.memcpy(c, c_local.data(), m * n * sizeof(T)).wait(); 66 | 67 | Stats stats(state, (m * k + k * n) * sizeof(T), m * n * sizeof(T), m * n * k); 68 | 69 | for (auto _ : state) { 70 | stats.rep(); 71 | oneapi::mkl::blas::row_major::gemm(q, oneapi::mkl::transpose::nontrans, 72 | oneapi::mkl::transpose::nontrans, m, n, 73 | k, T(1), a, m, b, n, T(1), c, k) 74 | .wait(); 75 | } 76 | } 77 | 78 | DR_BENCHMARK(Gemm_Reference); 79 | -------------------------------------------------------------------------------- /include/dr/mp/algorithms/copy.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace dr::mp { 10 | 11 | /// Copy 12 | void copy(rng::forward_range auto &&in, dr::distributed_iterator auto out) { 13 | if (rng::empty(in)) { 14 | return; 15 | } 16 | 17 | auto copy = [](auto &&v) { std::get<1>(v) = std::get<0>(v); }; 18 | 19 | for_each(views::zip(in, views::counted(out, rng::size(in))), copy); 20 | } 21 | 22 | /// Copy 23 | template 24 | void copy(DI_IN &&first, DI_IN &&last, dr::distributed_iterator auto &&out) { 25 | copy(rng::subrange(first, last), out); 26 | } 27 | 28 | template 29 | void copy(CI_IN &&first, CI_IN &&last, 30 | dr::distributed_contiguous_iterator auto out) { 31 | copy(0, rng::subrange(first, last), out); 32 | } 33 | 34 | void copy(rng::contiguous_range auto &&in, 35 | dr::distributed_contiguous_iterator auto out) { 36 | copy(0, in, out); 37 | } 38 | 39 | void copy(dr::distributed_contiguous_range auto &&in, 40 | std::contiguous_iterator auto out) { 41 | copy(0, in, out); 42 | } 43 | 44 | /// Copy distributed to local 45 | void copy(std::size_t root, dr::distributed_contiguous_range auto &&in, 46 | std::contiguous_iterator auto out) { 47 | if (default_comm().rank() == root) { 48 | for (const auto &segment : dr::ranges::segments(in)) { 49 | auto sz = rng::size(segment); 50 | rng::begin(segment).get(std::to_address(out), sz); 51 | out += sz; 52 | } 53 | } 54 | barrier(); 55 | } 56 | 57 | /// Copy local to distributed 58 | void copy(std::size_t root, rng::contiguous_range auto &&in, 59 | dr::distributed_contiguous_iterator auto out) { 60 | if (default_comm().rank() == root) { 61 | auto in_ptr = std::to_address(in.begin()); 62 | for (auto remainder = rng::size(in); remainder > 0;) { 63 | auto segment = *(dr::ranges::segments(out).begin()); 64 | auto sz = std::min(rng::size(segment), remainder); 65 | assert(sz > 0); 66 | rng::begin(segment).put(in_ptr, sz); 67 | in_ptr += sz; 68 | out += sz; 69 | remainder -= sz; 70 | } 71 | } 72 | barrier(); 73 | } 74 | 75 | } // namespace dr::mp 76 | -------------------------------------------------------------------------------- /test/gtest/mp/broadcasted_vector.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | TEST(BroadcastedVector, BroadcastData) { 8 | std::size_t n = 100; 9 | auto rank = dr::mp::default_comm().rank(); 10 | std::vector data(n); 11 | if (rank == 0) { 12 | for (int i = 0; i < n; i++) { 13 | data[i] = i; 14 | } 15 | } 16 | dr::mp::broadcasted_vector broadcasted; 17 | if (rank == 0) { 18 | broadcasted.broadcast_data(n, 0, data, dr::mp::default_comm()); 19 | } else { 20 | broadcasted.broadcast_data(n, 0, rng::empty_view(), 21 | dr::mp::default_comm()); 22 | } 23 | 24 | std::vector ref(n); 25 | for (int i = 0; i < n; i++) { 26 | ref[i] = i; 27 | } 28 | 29 | EXPECT_EQ(rng::subrange(broadcasted.broadcasted_data(), 30 | broadcasted.broadcasted_data() + n), 31 | ref); 32 | broadcasted.destroy_data(); 33 | } 34 | 35 | TEST(BroadcastedVector, BroadcastDataReuse) { 36 | std::size_t n = 100; 37 | auto rank = dr::mp::default_comm().rank(); 38 | std::vector data(n); 39 | if (rank == 0) { 40 | for (int i = 0; i < n; i++) { 41 | data[i] = i; 42 | } 43 | } 44 | dr::mp::broadcasted_vector broadcasted; 45 | if (rank == 0) { 46 | broadcasted.broadcast_data(n, 0, data, dr::mp::default_comm()); 47 | } else { 48 | broadcasted.broadcast_data(n, 0, rng::empty_view(), 49 | dr::mp::default_comm()); 50 | } 51 | 52 | std::vector ref(n); 53 | for (int i = 0; i < n; i++) { 54 | ref[i] = i; 55 | } 56 | 57 | EXPECT_EQ(rng::subrange(broadcasted.broadcasted_data(), 58 | broadcasted.broadcasted_data() + n), 59 | ref); 60 | broadcasted.destroy_data(); 61 | EXPECT_EQ(broadcasted.broadcasted_data(), nullptr); 62 | if (rank == 0) { 63 | broadcasted.broadcast_data(n, 0, data, dr::mp::default_comm()); 64 | } else { 65 | broadcasted.broadcast_data(n, 0, rng::empty_view(), 66 | dr::mp::default_comm()); 67 | } 68 | EXPECT_EQ(rng::subrange(broadcasted.broadcasted_data(), 69 | broadcasted.broadcasted_data() + n), 70 | ref); 71 | broadcasted.destroy_data(); 72 | } 73 | -------------------------------------------------------------------------------- /include/dr/mp/alignment.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace dr::mp { 12 | 13 | template 14 | concept has_segments = requires(T &t) { dr::ranges::segments(t); }; 15 | 16 | template 17 | concept no_segments = !has_segments; 18 | 19 | auto sub_aligned(has_segments auto &&r) { 20 | if (rng::empty(dr::ranges::segments(r))) { 21 | dr::drlog.debug("unaligned: empty segments\n"); 22 | return false; 23 | } else { 24 | return true; 25 | } 26 | } 27 | 28 | auto sub_aligned(auto &&r) { return true; } 29 | 30 | // iter1 is aligned with iter2, and iter2 is aligned with the rest 31 | bool sub_aligned(has_segments auto &&r1, has_segments auto &&r2, 32 | auto &&...rest) { 33 | auto z = rng::views::zip(dr::ranges::segments(r1), dr::ranges::segments(r2)); 34 | auto i = rng::distance(z) - 1; 35 | for (auto seg : z) { 36 | if (dr::ranges::rank(seg.first) != dr::ranges::rank(seg.second)) { 37 | dr::drlog.debug("unaligned: ranks: {} {}\n", dr::ranges::rank(seg.first), 38 | dr::ranges::rank(seg.second)); 39 | return false; 40 | } 41 | // Size mismatch would misalign following segments. Skip test if this is the 42 | // last segment 43 | if (i > 0 && rng::distance(seg.first) != rng::distance(seg.second)) { 44 | dr::drlog.debug("unaligned: size: {} {}\n", rng::distance(seg.first), 45 | rng::distance(seg.second)); 46 | return false; 47 | } 48 | i--; 49 | } 50 | 51 | return sub_aligned(r2, rest...); 52 | } 53 | 54 | // Skip local iterators 55 | bool sub_aligned(no_segments auto &&r1, has_segments auto &&r2, auto... rest) { 56 | return sub_aligned(r2, rest...); 57 | } 58 | 59 | bool sub_aligned(has_segments auto &&r1, no_segments auto &&r2, 60 | auto &&...rest) { 61 | return sub_aligned(r1, rest...); 62 | } 63 | 64 | // This was added to allow passing state down the call tree, but it is 65 | // no longer needed. I did not delete it in case we need it again. 66 | template bool aligned(Args &&...args) { 67 | return sub_aligned(std::forward(args)...); 68 | } 69 | 70 | } // namespace dr::mp 71 | -------------------------------------------------------------------------------- /examples/mp/transpose-cpu.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "mpi.h" 6 | 7 | #include "dr/distributed-ranges.hpp" 8 | 9 | #include "transpose-serial.hpp" 10 | #include "utils.hpp" 11 | 12 | MPI_Comm comm; 13 | int comm_rank; 14 | int comm_size; 15 | const int root_rank = 0; 16 | 17 | bool is_root() { return root_rank == comm_rank; } 18 | void transpose() { 19 | using T = double; 20 | 21 | const std::size_t m_segment = 2; 22 | // segment has entire row, but it must divide evenly because of the transpose 23 | const std::size_t n_segment = 6; 24 | const std::size_t m = m_segment * comm_size; 25 | const std::size_t n = n_segment * comm_size; 26 | 27 | dr::distributed_vector dv_a(n), dv_b(n); 28 | dr::distributed_mdspan> dm_aT(dv_b.data(), m, n); 29 | 30 | // root initializes dv_a 31 | transpose_serial ref_transpose; 32 | if (is_root()) { 33 | ref_transpose.init(m, n); 34 | ref_transpose.compute(); 35 | std::copy(ref_transpose.a.begin(), ref_transpose.a.end(), dv_a); 36 | } 37 | 38 | // Transpose my segment 39 | std::vector local(m_segment * n); 40 | stdex::mdarray> lm( 41 | transpose(m_segment, n, dv_a.local_segment().data(), local.data()); 42 | 43 | // Copy my segment to target blocks 44 | auto block_size = m_segment * n_segment; 45 | for (int r = 0; r < comm_size; r++) { 46 | // Create a view for the remote target block 47 | // We have block (src_rank, target_rank), store at (target_rank, src_rank) 48 | auto target_block = dr::distributed_submdspan( 49 | dm, std::vector({std::pair(r * m_segment, (r + 1) * m_segment), 50 | std::pair(comm_rank * n_segment, 51 | (comm_rank + 1) * n_segment)})); 52 | 53 | std::copy(local.data() + r * block_size, 54 | local.data() + (r + 1) * block_size, target_block); 55 | } 56 | 57 | std::vector result(m * n); 58 | std::copy(dv_b.begin(), dv_b.end(), result.begin()); 59 | ref_transpose.check(result); 60 | } 61 | 62 | int main(int argc, char *argv[]) { 63 | MPI_Init(&argc, &argv); 64 | comm = MPI_COMM_WORLD; 65 | MPI_Comm_rank(comm, &comm_rank); 66 | MPI_Comm_size(comm, &comm_size); 67 | 68 | transpose(); 69 | 70 | dr::mp::finalize(); 71 | MPI_Finalize(); 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /doc/developer/design/include.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | =============== 6 | Include Files 7 | =============== 8 | 9 | External 10 | ======== 11 | 12 | Application developers using distributed ranges library. 13 | 14 | Use:: 15 | 16 | #include "mp.hpp" 17 | 18 | or:: 19 | 20 | #include "sp.hpp" 21 | 22 | No other includes. The names ``mp.hpp`` and ``sp.hpp`` are likely to 23 | change. In the future, we may support selective includes. Externally 24 | exposed include paths cannot be changed without breaking compatibility. 25 | 26 | 27 | Internal 28 | ======== 29 | 30 | Distributed ranges library developer. 31 | 32 | Header files can be included in any order and therefore should include 33 | their dependencies (internal and external). Include paths are always 34 | relative to root (``-I`` path) and use ``<>``. Example:: 35 | 36 | #pragma once 37 | 38 | #include 39 | #include 40 | 41 | #include 42 | 43 | #include 44 | #include 45 | #include 46 | 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | 53 | Use ``pragma once`` to protect against multiple inclusion. Start with 54 | C++ headers as a block, blank line, external dependencies as 1 or more 55 | blocks, blank line, internal dependencies as a block. ``clang-format`` 56 | sorts includes blocks that are not broken by blank lines. 57 | 58 | Directory Structure 59 | ------------------- 60 | 61 | ``dr`` 62 | The top level ``dr`` directory protects against header name file 63 | collisions with other software. 64 | 65 | ``vendor`` 66 | External header files that we distribute. 67 | 68 | ``dr/detail`` 69 | Does not fit elsewhere 70 | 71 | ``dr/sp`` 72 | Single process, multi GPU model 73 | 74 | ``dr/mp`` 75 | Multi-process, single XPU model 76 | 77 | ``dr/views`` 78 | Views shared between SP/MP 79 | 80 | ``dr/sp/algorithms`` 81 | Algorithm implemenentations specific to SP (e.g. ``sp::for_each``) 82 | 83 | ``dr/sp/containers`` 84 | Container implemenentations specific to SP 85 | (e.g. ``sp::distributed_vector``) 86 | 87 | ``dr/sp/views`` 88 | Views implemenentations specific to SP (e.g. ``sp::views::slice``) 89 | -------------------------------------------------------------------------------- /doc/spec/source/algorithms/reduce.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-FileCopyrightText: Intel Corporation 2 | .. 3 | .. SPDX-License-Identifier: BSD-3-Clause 4 | 5 | .. include:: ../include/distributed-ranges.rst 6 | 7 | .. _reduce: 8 | 9 | ============ 10 | ``reduce`` 11 | ============ 12 | 13 | Interface 14 | ========= 15 | 16 | MP 17 | --- 18 | 19 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DR &&dr, T init, auto &&binary_op) 20 | :outline: 21 | .. doxygenfunction:: dr::mp::reduce(DR &&dr, T init, auto &&binary_op) 22 | :outline: 23 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DR &&dr, T init) 24 | :outline: 25 | .. doxygenfunction:: dr::mp::reduce(DR &&dr, T init) 26 | :outline: 27 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DR &&dr) 28 | :outline: 29 | .. doxygenfunction:: dr::mp::reduce(DR &&dr) 30 | :outline: 31 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DI first, DI last, T init, auto &&binary_op) 32 | :outline: 33 | .. doxygenfunction:: dr::mp::reduce(DI first, DI last, T init, auto &&binary_op) 34 | :outline: 35 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DI first, DI last, T init) 36 | :outline: 37 | .. doxygenfunction:: dr::mp::reduce(DI first, DI last, T init) 38 | :outline: 39 | .. doxygenfunction:: dr::mp::reduce(std::size_t root, DI first, DI last) 40 | :outline: 41 | .. doxygenfunction:: dr::mp::reduce(DI first, DI last) 42 | :outline: 43 | 44 | SP 45 | --- 46 | 47 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) 48 | :outline: 49 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, R &&r, T init) 50 | :outline: 51 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, R &&r) 52 | :outline: 53 | 54 | Iterator versions 55 | 56 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, Iter first, Iter last) 57 | :outline: 58 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init) 59 | :outline: 60 | .. doxygenfunction:: dr::sp::reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init, BinaryOp &&binary_op) 61 | :outline: 62 | 63 | Execution policy-less algorithms 64 | 65 | .. doxygenfunction:: dr::sp::reduce(R &&r) 66 | :outline: 67 | .. doxygenfunction:: dr::sp::reduce(R &&r, T init) 68 | :outline: 69 | .. doxygenfunction:: dr::sp::reduce(R &&r, T init, BinaryOp &&binary_op) 70 | :outline: 71 | 72 | Description 73 | =========== 74 | 75 | .. seealso:: `std::reduce`_ 76 | 77 | Examples 78 | ======== 79 | -------------------------------------------------------------------------------- /include/dr/sp/algorithms/for_each.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace dr::sp { 17 | 18 | template 19 | void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { 20 | static_assert( // currently only one policy supported 21 | std::is_same_v, device_policy>); 22 | 23 | std::vector events; 24 | 25 | for (auto &&segment : dr::ranges::segments(r)) { 26 | auto &&q = __detail::queue(dr::ranges::rank(segment)); 27 | 28 | assert(rng::distance(segment) > 0); 29 | 30 | auto local_segment = __detail::local(segment); 31 | 32 | auto first = rng::begin(local_segment); 33 | 34 | auto event = dr::__detail::parallel_for( 35 | q, sycl::range<>(rng::distance(local_segment)), 36 | [=](auto idx) { fn(*(first + idx)); }); 37 | events.emplace_back(event); 38 | } 39 | __detail::wait(events); 40 | } 41 | 42 | template 43 | void for_each(ExecutionPolicy &&policy, Iter begin, Iter end, Fn &&fn) { 44 | for_each(std::forward(policy), rng::subrange(begin, end), 45 | std::forward(fn)); 46 | } 47 | 48 | template void for_each(R &&r, Fn &&fn) { 49 | for_each(dr::sp::par_unseq, std::forward(r), std::forward(fn)); 50 | } 51 | 52 | template 53 | void for_each(Iter begin, Iter end, Fn &&fn) { 54 | for_each(dr::sp::par_unseq, begin, end, std::forward(fn)); 55 | } 56 | 57 | template 59 | Iter for_each_n(ExecutionPolicy &&policy, Iter begin, I n, Fn fn) { 60 | auto end = begin; 61 | rng::advance(end, n); 62 | for_each(std::forward(policy), begin, end, 63 | std::forward(fn)); 64 | return end; 65 | } 66 | 67 | template 68 | Iter for_each_n(Iter &&r, I n, Fn fn) { 69 | return for_each_n(dr::sp::par_unseq, std::forward(r), n, fn); 70 | } 71 | 72 | } // namespace dr::sp 73 | -------------------------------------------------------------------------------- /test/gtest/mp/mp-tests.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "xp-tests.hpp" 6 | 7 | MPI_Comm comm; 8 | std::size_t comm_rank; 9 | std::size_t comm_size; 10 | 11 | cxxopts::ParseResult options; 12 | 13 | void dr_init() { 14 | #ifdef SYCL_LANGUAGE_VERSION 15 | if (options.count("sycl")) { 16 | sycl::queue q; 17 | if (comm_rank == 0) { 18 | fmt::print("Enable sycl device: {}\n", 19 | q.get_device().get_info()); 20 | } 21 | dr::mp::init(q, options.count("device-memory") ? sycl::usm::alloc::device 22 | : sycl::usm::alloc::shared); 23 | return; 24 | } 25 | #endif 26 | 27 | if (comm_rank == 0) { 28 | fmt::print("Enable CPU\n"); 29 | } 30 | dr::mp::init(); 31 | } 32 | 33 | int main(int argc, char *argv[]) { 34 | MPI_Init(&argc, &argv); 35 | comm = MPI_COMM_WORLD; 36 | int rank, size; 37 | MPI_Comm_rank(comm, &rank); 38 | MPI_Comm_size(comm, &size); 39 | comm_rank = rank; 40 | comm_size = size; 41 | ::testing::InitGoogleTest(&argc, argv); 42 | 43 | cxxopts::Options options_spec(argv[0], "DR MP tests"); 44 | 45 | // clang-format off 46 | options_spec.add_options() 47 | ("drhelp", "Print help") 48 | ("log", "Enable logging") 49 | ("logprefix", "appended .RANK.log", cxxopts::value()->default_value("dr")) 50 | ("log-filter", "Filter the log", cxxopts::value>()) 51 | ("device-memory", "Use device memory") 52 | ("sycl", "Execute on SYCL device"); 53 | // clang-format on 54 | 55 | try { 56 | options = options_spec.parse(argc, argv); 57 | } catch (const cxxopts::OptionParseException &e) { 58 | std::cout << options_spec.help() << "\n"; 59 | exit(1); 60 | } 61 | 62 | if (options.count("drhelp")) { 63 | std::cout << options_spec.help() << "\n"; 64 | exit(0); 65 | } 66 | 67 | std::unique_ptr logfile; 68 | if (options.count("log")) { 69 | logfile.reset(new std::ofstream(options["logprefix"].as() + 70 | fmt::format(".{}.log", comm_rank))); 71 | dr::drlog.set_file(*logfile); 72 | if (options.count("log-filter")) { 73 | dr::drlog.filter(options["log-filter"].as>()); 74 | } 75 | } 76 | 77 | dr_init(); 78 | dr::drlog.debug("Rank: {}\n", comm_rank); 79 | 80 | auto res = RUN_ALL_TESTS(); 81 | 82 | dr::mp::finalize(); 83 | MPI_Finalize(); 84 | 85 | return res; 86 | } 87 | -------------------------------------------------------------------------------- /include/vendor/source_location/source_location.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #ifndef NOSTD_SOURCE_LOCATION_HPP 6 | #define NOSTD_SOURCE_LOCATION_HPP 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace nostd { 13 | struct source_location { 14 | public: 15 | #if not defined(__apple_build_version__) and defined(__clang__) and \ 16 | (__clang_major__ >= 9) 17 | static constexpr source_location 18 | current(const char *fileName = __builtin_FILE(), 19 | const char *functionName = __builtin_FUNCTION(), 20 | const uint_least32_t lineNumber = __builtin_LINE(), 21 | const uint_least32_t columnOffset = __builtin_COLUMN()) noexcept 22 | #elif defined(__GNUC__) and \ 23 | (__GNUC__ > 4 or (__GNUC__ == 4 and __GNUC_MINOR__ >= 8)) 24 | static constexpr source_location 25 | current(const char *fileName = __builtin_FILE(), 26 | const char *functionName = __builtin_FUNCTION(), 27 | const uint_least32_t lineNumber = __builtin_LINE(), 28 | const uint_least32_t columnOffset = 0) noexcept 29 | #else 30 | static constexpr source_location 31 | current(const char *fileName = "unsupported", 32 | const char *functionName = "unsupported", 33 | const uint_least32_t lineNumber = 0, 34 | const uint_least32_t columnOffset = 0) noexcept 35 | #endif 36 | { 37 | return source_location(fileName, functionName, lineNumber, columnOffset); 38 | } 39 | 40 | source_location(const source_location &) = default; 41 | source_location(source_location &&) = default; 42 | 43 | constexpr const char *file_name() const noexcept { return fileName; } 44 | 45 | constexpr const char *function_name() const noexcept { return functionName; } 46 | 47 | constexpr uint_least32_t line() const noexcept { return lineNumber; } 48 | 49 | constexpr std::uint_least32_t column() const noexcept { return columnOffset; } 50 | 51 | private: 52 | constexpr source_location(const char *fileName, const char *functionName, 53 | const uint_least32_t lineNumber, 54 | const uint_least32_t columnOffset) noexcept 55 | : fileName(fileName), functionName(functionName), lineNumber(lineNumber), 56 | columnOffset(columnOffset) {} 57 | 58 | const char *fileName; 59 | const char *functionName; 60 | const std::uint_least32_t lineNumber; 61 | const std::uint_least32_t columnOffset; 62 | }; 63 | } // namespace nostd 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /test/fuzz/cpu/cpu-fuzz.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Intel Corporation 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "cpu-fuzz.hpp" 6 | 7 | MPI_Comm comm; 8 | int comm_rank; 9 | int comm_size; 10 | 11 | cxxopts::ParseResult options; 12 | int controller_rank; 13 | 14 | extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { 15 | MPI_Init(argc, argv); 16 | comm = MPI_COMM_WORLD; 17 | MPI_Comm_rank(comm, &comm_rank); 18 | MPI_Comm_size(comm, &comm_size); 19 | 20 | dr::mp::init(); 21 | 22 | cxxopts::Options options_spec((*argv)[0], "Fuzz CPU tests"); 23 | 24 | // clang-format off 25 | options_spec.add_options() 26 | ("controller", "Rank that controls fuzzing", cxxopts::value()->default_value("0")) 27 | ("log", "Enable logging") 28 | ("fuzz_help", "Print help"); 29 | // clang-format on 30 | options_spec.allow_unrecognised_options(); 31 | 32 | try { 33 | options = options_spec.parse(*argc, *argv); 34 | } catch (const cxxopts::OptionParseException &e) { 35 | std::cout << options_spec.help() << "\n"; 36 | exit(1); 37 | } 38 | 39 | if (options.count("fuzz_help")) { 40 | std::cout << options_spec.help() << "\n"; 41 | exit(0); 42 | } 43 | 44 | controller_rank = options["controller"].as(); 45 | if (comm_rank == controller_rank) { 46 | fmt::print("Controller: {}\n", controller_rank); 47 | } 48 | return 0; 49 | } 50 | 51 | struct fuzz_spec { 52 | uint8_t algorithm; 53 | uint8_t n, b, e; 54 | }; 55 | 56 | enum class Algorithms { 57 | Copy, 58 | Transform, 59 | Last, 60 | }; 61 | 62 | extern "C" int LLVMFuzzerTestOneInput(const fuzz_spec *my_spec, 63 | std::size_t size) { 64 | // Controller broadcasts its fuzz spec 65 | MPI_Bcast(&size, sizeof(size), MPI_BYTE, controller_rank, comm); 66 | if (sizeof(fuzz_spec) < size) 67 | return 0; 68 | 69 | auto spec = *my_spec; 70 | MPI_Bcast(&spec, sizeof(spec), MPI_BYTE, controller_rank, comm); 71 | 72 | auto n = spec.n; 73 | auto b = spec.b; 74 | auto e = spec.e; 75 | if (n > 64 || b > n || e > n || b > e || n == 0) 76 | return 0; 77 | 78 | // fmt::print("n: {} b: {} e: {}\n", n, b, e); 79 | 80 | // Algorithm number is 8 bits. Mod it so we don't generate many test 81 | // cases that do nothing. 82 | switch (Algorithms(spec.algorithm % std::size_t(Algorithms::Last))) { 83 | case Algorithms::Copy: 84 | check_copy(n, b, e); 85 | break; 86 | case Algorithms::Transform: 87 | check_transform(n, b, e); 88 | break; 89 | default: 90 | break; 91 | } 92 | 93 | return 0; 94 | } 95 | --------------------------------------------------------------------------------