├── .github ├── CODEOWNERS ├── copy-pr-bot.yaml ├── actions │ ├── compute-matrix │ │ ├── action.yml │ │ └── compute-matrix.sh │ └── configure_cccl_sccache │ │ └── action.yml └── workflows │ ├── build-and-test.yml │ ├── dispatch-build-and-test.yml │ ├── run-as-coder.yml │ └── verify-devcontainers.yml ├── versions.json ├── ._upstream ├── .github │ ├── ISSUE_TEMPLATE │ │ ├── config.yml │ │ └── feature_request.yml │ └── PULL_REQUEST_TEMPLATE.md └── CONTRIBUTING.md ├── ci ├── test.sh ├── sccache_hit_rate.sh ├── sccache_stats.sh ├── matrix.yml └── pre-commit │ └── doxygen.sh ├── cmake └── thirdparty │ └── get_cccl.cmake ├── tests ├── dynamic_bitset │ ├── size_test.cu │ ├── rank_test.cu │ ├── find_next_test.cu │ ├── get_test.cu │ └── select_test.cu ├── static_set │ ├── size_test.cu │ ├── rehash_test.cu │ └── retrieve_all_test.cu ├── static_map │ ├── rehash_test.cu │ └── hash_test.cu ├── utility │ ├── fast_int_test.cu │ └── extent_test.cu ├── hyperloglog │ ├── unique_sequence_test.cu │ └── device_ref_test.cu ├── test_utils.cuh └── static_multiset │ └── count_test.cu ├── include └── cuco │ ├── detail │ ├── probing_scheme │ │ └── probing_scheme_base.cuh │ ├── utility │ │ ├── math.cuh │ │ └── strong_type.cuh │ ├── hash_functions │ │ ├── utils.cuh │ │ └── identity_hash.cuh │ ├── storage │ │ ├── kernels.cuh │ │ ├── storage.cuh │ │ ├── storage_base.cuh │ │ └── bucket_storage_base.cuh │ ├── operator.inl │ ├── bloom_filter │ │ └── default_filter_policy.inl │ ├── utils.hpp │ ├── __config │ ├── bitwise_compare.cuh │ └── pair │ │ ├── pair.inl │ │ └── tuple_helpers.inl │ ├── cuda_runtime.h │ ├── utility │ ├── cuda_thread_scope.cuh │ ├── error.hpp │ ├── allocator.hpp │ ├── traits.hpp │ └── reduction_functors.cuh │ ├── storage.cuh │ ├── operator.hpp │ ├── types.cuh │ ├── probe_sequences.cuh │ └── hash_functions.cuh ├── rocm-docs ├── build.sh ├── index.md ├── .sphinx │ └── _toc.yml.in └── conf.py ├── benchmarks ├── bloom_filter │ ├── defaults.hpp │ └── utils.hpp ├── benchmark_defaults.hpp ├── benchmark_utils.hpp ├── static_set │ ├── size_bench.cu │ ├── rehash_bench.cu │ ├── retrieve_all_bench.cu │ ├── contains_bench.cu │ ├── find_bench.cu │ └── insert_bench.cu └── static_multiset │ ├── contains_bench.cu │ └── find_bench.cu ├── overrides.cmake ├── .pre-commit-config.yaml ├── .devcontainer ├── devcontainer.json ├── cuda11.8-gcc11 │ └── devcontainer.json ├── cuda12.6-gcc12 │ └── devcontainer.json ├── cuda12.6-gcc13 │ └── devcontainer.json ├── launch.sh └── verify_devcontainer.sh ├── examples ├── hyperloglog │ └── host_bulk_example.cu ├── static_set │ └── host_bulk_example.cu ├── bloom_filter │ └── host_bulk_example.cu ├── static_multiset │ └── host_bulk_example.cu └── static_map │ └── host_bulk_example.cu └── README.md /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @sleeepyjack @PointKernel 2 | -------------------------------------------------------------------------------- /versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "packages" : { 3 | } 4 | } 5 | -------------------------------------------------------------------------------- /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for `copy-pr-bot` GitHub App 2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ 3 | 4 | enabled: true 5 | -------------------------------------------------------------------------------- /._upstream/.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Question 4 | url: https://github.com/NVIDIA/cuCollections/discussions 5 | about: Check out our Discussions page to ask and answer questions. 6 | -------------------------------------------------------------------------------- /ci/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Ensure the script is being executed in its containing directory 18 | cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"; 19 | 20 | source ./build.sh "$@" 21 | 22 | ctest --test-dir ${BUILD_DIR}/tests --output-on-failure --timeout 60 23 | 24 | echo "Test complete" -------------------------------------------------------------------------------- /cmake/thirdparty/get_cccl.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations under 12 | # the License. 13 | # ============================================================================= 14 | 15 | # Use CPM to find or clone CCCL 16 | function(find_and_configure_cccl) 17 | include(${rapids-cmake-dir}/cpm/cccl.cmake) 18 | rapids_cpm_cccl(INSTALL_EXPORT_SET cuco-exports BUILD_EXPORT_SET cuco-exports) 19 | endfunction() 20 | 21 | find_and_configure_cccl() 22 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/size_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | TEST_CASE("dynamic_bitset size computation test", "") 22 | { 23 | cuco::experimental::detail::dynamic_bitset bv; 24 | using size_type = std::size_t; 25 | constexpr size_type num_elements{400}; 26 | 27 | for (size_type i = 0; i < num_elements; i++) { 28 | bv.push_back(i % 2 == 0); // Alternate 0s and 1s pattern 29 | } 30 | 31 | auto size = bv.size(); 32 | REQUIRE(size == num_elements); 33 | } 34 | -------------------------------------------------------------------------------- /include/cuco/detail/probing_scheme/probing_scheme_base.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | namespace detail { 23 | 24 | /** 25 | * @brief Base class of public probing scheme. 26 | * 27 | * This class should not be used directly. 28 | * 29 | * @tparam CGSize Size of CUDA Cooperative Groups 30 | */ 31 | template 32 | class probing_scheme_base { 33 | public: 34 | /** 35 | * @brief The size of the CUDA cooperative thread group. 36 | */ 37 | static constexpr int32_t cg_size = CGSize; 38 | }; 39 | } // namespace detail 40 | } // namespace cuco 41 | -------------------------------------------------------------------------------- /rocm-docs/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2025 Advanced Micro Devices, Inc. 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html 26 | -------------------------------------------------------------------------------- /include/cuco/cuda_runtime.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #pragma once 24 | 25 | #include 26 | 27 | #include 28 | -------------------------------------------------------------------------------- /rocm-docs/index.md: -------------------------------------------------------------------------------- 1 | 24 | 25 | # hipCollections 26 | 27 | Todo 28 | 29 | ## Overview 30 | 31 | ::::{grid} 1 1 2 2 32 | :gutter: 1 33 | 34 | :::{grid-item-card} Source Code Documentation 35 | - {doc}`/doxygen/html/index` 36 | ::: 37 | 38 | :::: 39 | -------------------------------------------------------------------------------- /tests/static_set/size_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | TEST_CASE("static_set size test", "") 26 | { 27 | constexpr std::size_t num_keys{400}; 28 | 29 | cuco::static_set set{cuco::extent{400}, cuco::empty_key{-1}}; 30 | 31 | thrust::device_vector d_keys(num_keys); 32 | 33 | thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); 34 | 35 | auto const num_successes = set.insert(d_keys.begin(), d_keys.end()); 36 | 37 | REQUIRE(set.size() == num_keys); 38 | REQUIRE(num_successes == num_keys); 39 | 40 | set.clear(); 41 | 42 | REQUIRE(set.size() == 0); 43 | } 44 | -------------------------------------------------------------------------------- /rocm-docs/.sphinx/_toc.yml.in: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | defaults: 24 | numbered: False 25 | maxdepth: 6 26 | root: index.md 27 | subtrees: 28 | - caption: Source Code Documentation 29 | entries: 30 | - file: doxygen/html/index 31 | title: Library API Doxygen Documentation 32 | -------------------------------------------------------------------------------- /benchmarks/bloom_filter/defaults.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | namespace cuco::benchmark::defaults { 28 | 29 | using BF_KEY = nvbench::int64_t; 30 | using BF_HASH = cuco::xxhash_64; 31 | using BF_WORD = nvbench::uint32_t; 32 | 33 | static constexpr auto BF_N = 400'000'000; 34 | static constexpr auto BF_SIZE_MB = 2'000; 35 | static constexpr auto BF_WORDS_PER_BLOCK = 8; 36 | 37 | auto const BF_SIZE_MB_RANGE_CACHE = 38 | std::vector{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048}; 39 | auto const BF_PATTERN_BITS_RANGE = std::vector{1, 2, 4, 6, 8, 16}; 40 | 41 | } // namespace cuco::benchmark::defaults 42 | -------------------------------------------------------------------------------- /include/cuco/detail/utility/math.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | 20 | namespace cuco { 21 | namespace detail { 22 | 23 | /** 24 | * @brief Ceiling of an integer division 25 | * 26 | * @tparam T Type of dividend 27 | * @tparam U Type of divisor 28 | * 29 | * @throw If `T` is not an integral type 30 | * @throw If `U` is not an integral type 31 | * 32 | * @param dividend Numerator 33 | * @param divisor Denominator 34 | * 35 | * @return Ceiling of the integer division 36 | */ 37 | template 38 | __host__ __device__ constexpr T int_div_ceil(T dividend, U divisor) noexcept 39 | { 40 | static_assert(cuda::std::is_integral_v); 41 | static_assert(cuda::std::is_integral_v); 42 | return (dividend + divisor - 1) / divisor; 43 | } 44 | 45 | } // namespace detail 46 | } // namespace cuco 47 | -------------------------------------------------------------------------------- /include/cuco/detail/hash_functions/utils.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco::detail { 22 | 23 | template 24 | constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept 25 | { 26 | auto const bytes = reinterpret_cast(data); 27 | T chunk; 28 | memcpy(&chunk, bytes + index * sizeof(T), sizeof(T)); 29 | return chunk; 30 | } 31 | 32 | constexpr __host__ __device__ std::uint32_t rotl32(std::uint32_t x, std::int8_t r) noexcept 33 | { 34 | return (x << r) | (x >> (32 - r)); 35 | } 36 | 37 | constexpr __host__ __device__ std::uint64_t rotl64(std::uint64_t x, std::int8_t r) noexcept 38 | { 39 | return (x << r) | (x >> (64 - r)); 40 | } 41 | 42 | }; // namespace cuco::detail 43 | -------------------------------------------------------------------------------- /overrides.cmake: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # NOTE(HIP/AMD): This overrides must be placed in a separate file according to https://discourse.cmake.org/t/extend-list-of-file-extensions-for-a-language-globally-for-the-whole-project/7307/6 24 | 25 | set(CMAKE_HIP_SOURCE_FILE_EXTENSIONS hip;cu) 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_commit_msg: | 3 | [pre-commit.ci] auto code formatting 4 | autofix_prs: true 5 | autoupdate_branch: '' 6 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' 7 | autoupdate_schedule: quarterly 8 | skip: [] 9 | submodules: false 10 | 11 | repos: 12 | - repo: https://github.com/pre-commit/mirrors-clang-format 13 | rev: v18.1.8 14 | hooks: 15 | - id: clang-format 16 | types_or: [c, c++, cuda] 17 | args: ['-fallback-style=none', '-style=file', '-i'] 18 | - repo: local 19 | hooks: 20 | - id: check-doxygen 21 | name: check-doxygen 22 | entry: ./ci/pre-commit/doxygen.sh 23 | files: ^include/ 24 | types_or: [file] 25 | language: system 26 | pass_filenames: false 27 | verbose: true 28 | - id: check-example-links 29 | name: check-example-links 30 | entry: ./ci/pre-commit/example_links.py 31 | files: ^examples/ 32 | types: [cuda] 33 | language: python 34 | pass_filenames: false 35 | verbose: true 36 | additional_dependencies: 37 | - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple 38 | - gitpython 39 | 40 | default_language_version: 41 | python: python3 42 | -------------------------------------------------------------------------------- /.github/actions/compute-matrix/action.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Compute Matrix 17 | description: "Compute the matrix for a given matrix type from the specified matrix file" 18 | 19 | inputs: 20 | matrix_query: 21 | description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" 22 | required: true 23 | matrix_file: 24 | description: 'The file containing the matrix' 25 | required: true 26 | outputs: 27 | matrix: 28 | description: 'The requested matrix' 29 | value: ${{ steps.compute-matrix.outputs.MATRIX }} 30 | 31 | runs: 32 | using: "composite" 33 | steps: 34 | - name: Compute matrix 35 | id: compute-matrix 36 | run: | 37 | MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) 38 | echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT 39 | shell: bash -euxo pipefail {0} -------------------------------------------------------------------------------- /tests/static_set/rehash_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | TEST_CASE("static_set rehash test", "") 25 | { 26 | using key_type = int; 27 | 28 | constexpr std::size_t num_keys{400}; 29 | constexpr std::size_t num_erased_keys{100}; 30 | 31 | cuco::static_set set{num_keys, cuco::empty_key{-1}, cuco::erased_key{-2}}; 32 | 33 | thrust::device_vector d_keys(num_keys); 34 | 35 | thrust::sequence(d_keys.begin(), d_keys.end()); 36 | 37 | set.insert(d_keys.begin(), d_keys.end()); 38 | 39 | set.rehash(); 40 | REQUIRE(set.size() == num_keys); 41 | 42 | set.rehash(num_keys * 2); 43 | REQUIRE(set.size() == num_keys); 44 | 45 | set.erase(d_keys.begin(), d_keys.begin() + num_erased_keys); 46 | set.rehash(); 47 | REQUIRE(set.size() == num_keys - num_erased_keys); 48 | } 49 | -------------------------------------------------------------------------------- /.github/actions/configure_cccl_sccache/action.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Set up AWS credentials and environment variables for sccache 17 | description: "Set up AWS credentials and environment variables for sccache" 18 | runs: 19 | using: "composite" 20 | steps: 21 | - name: Get AWS credentials for sccache bucket 22 | uses: aws-actions/configure-aws-credentials@v2 23 | with: 24 | role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA 25 | aws-region: us-east-2 26 | role-duration-seconds: 43200 # 12 hours 27 | - name: Set environment variables 28 | run: | 29 | echo "SCCACHE_BUCKET=rapids-sccache-east" >> $GITHUB_ENV 30 | echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV 31 | echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV 32 | echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV 33 | echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV 34 | shell: bash -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.6-gcc13", 18 | "CUCO_CUDA_VERSION": "12.6", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "13" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.6-gcc13" 42 | } 43 | -------------------------------------------------------------------------------- /rocm-docs/conf.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2025 Advanced Micro Devices, Inc. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | from rocm_docs import ROCmDocs 24 | 25 | 26 | external_projects_remote_repository = "" 27 | external_projects_current_project = "hipCollections" 28 | 29 | docs_core = ROCmDocs("hipCollections") 30 | docs_core.run_doxygen(doxygen_root="./doxygen/", doxygen_path=".") # Only if Doxygen is required for this project 31 | docs_core.enable_api_reference() 32 | docs_core.setup() 33 | 34 | for sphinx_var in ROCmDocs.SPHINX_VARS: 35 | globals()[sphinx_var] = getattr(docs_core, sphinx_var) 36 | -------------------------------------------------------------------------------- /.devcontainer/cuda11.8-gcc11/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda11.8-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda11.8-gcc11", 18 | "CUCO_CUDA_VERSION": "11.8", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "11" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda11.8-gcc11" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.6-gcc12/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.6-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.6-gcc12", 18 | "CUCO_CUDA_VERSION": "12.6", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "12" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.6-gcc12" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.6-gcc13/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.6-gcc13", 18 | "CUCO_CUDA_VERSION": "12.6", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "13" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.6-gcc13" 42 | } 43 | -------------------------------------------------------------------------------- /include/cuco/detail/storage/kernels.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | 20 | #include 21 | 22 | namespace cuco { 23 | namespace detail { 24 | 25 | CUCO_SUPPRESS_KERNEL_WARNINGS 26 | 27 | /** 28 | * @brief Initializes each slot in the bucket storage to contain `value`. 29 | * 30 | * @tparam BucketT Bucket type 31 | * 32 | * @param buckets Pointer to flat storage for buckets 33 | * @param n Number of input buckets 34 | * @param value Value to which all values in `slots` are initialized 35 | */ 36 | template 37 | CUCO_KERNEL void initialize(BucketT* buckets, 38 | cuco::detail::index_type n, 39 | typename BucketT::value_type value) 40 | { 41 | auto const loop_stride = cuco::detail::grid_stride(); 42 | auto idx = cuco::detail::global_thread_id(); 43 | 44 | while (idx < n) { 45 | auto& bucket_slots = *(buckets + idx); 46 | #pragma unroll 47 | for (auto& slot : bucket_slots) { 48 | slot = value; 49 | } 50 | idx += loop_stride; 51 | } 52 | } 53 | 54 | } // namespace detail 55 | } // namespace cuco 56 | -------------------------------------------------------------------------------- /include/cuco/utility/cuda_thread_scope.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include // cuda::thread_scope 20 | 21 | namespace cuco { 22 | 23 | /** 24 | * @brief Strongly-typed wrapper for `cuda::thread_scope`. 25 | * 26 | * @tparam Scope `cuda::thread_scope` to be wrapped 27 | */ 28 | template 29 | struct cuda_thread_scope { 30 | /** 31 | * @brief Implicit conversion to `cuda::thread_scope`. 32 | * 33 | * @return The wrapped `cuda::thread_scope` 34 | */ 35 | __host__ __device__ constexpr operator cuda::thread_scope() const noexcept { return Scope; } 36 | }; 37 | 38 | // alias definitions 39 | inline constexpr auto thread_scope_system = 40 | cuda_thread_scope{}; ///< `cuco::thread_scope_system` 41 | inline constexpr auto thread_scope_device = 42 | cuda_thread_scope{}; ///< `cuco::thread_scope_device` 43 | inline constexpr auto thread_scope_block = 44 | cuda_thread_scope{}; ///< `cuco::thread_scope_block` 45 | inline constexpr auto thread_scope_thread = 46 | cuda_thread_scope{}; ///< `cuco::thread_scope_thread` 47 | 48 | } // namespace cuco 49 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/rank_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | extern bool modulo_bitgen(uint64_t i); // Defined in get_test.cu 28 | 29 | TEST_CASE("dynamic_bitset rank test", "") 30 | { 31 | cuco::experimental::detail::dynamic_bitset bv; 32 | 33 | using size_type = std::size_t; 34 | constexpr size_type num_elements{4000}; 35 | 36 | for (size_type i = 0; i < num_elements; i++) { 37 | bv.push_back(modulo_bitgen(i)); 38 | } 39 | 40 | thrust::device_vector keys(num_elements); 41 | thrust::sequence(keys.begin(), keys.end(), 0); 42 | 43 | thrust::device_vector d_ranks(num_elements); 44 | 45 | bv.rank(keys.begin(), keys.end(), d_ranks.begin()); 46 | 47 | thrust::host_vector h_ranks = d_ranks; 48 | 49 | size_type cur_rank = 0; 50 | size_type num_matches = 0; 51 | for (size_type i = 0; i < num_elements; i++) { 52 | num_matches += cur_rank == h_ranks[i]; 53 | if (modulo_bitgen(i)) { cur_rank++; } 54 | } 55 | REQUIRE(num_matches == num_elements); 56 | } 57 | -------------------------------------------------------------------------------- /include/cuco/detail/operator.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco { 24 | namespace detail { 25 | 26 | /** 27 | * @brief CRTP mixin which augments a given `Reference` with an `Operator`. 28 | * 29 | * @throw If the operator is not defined in `include/cuco/operator.hpp` 30 | * 31 | * @tparam Operator Operator type, i.e., `cuco::op::*_tag` 32 | * @tparam Reference The reference type. 33 | * 34 | * @note This primary template should never be instantiated. 35 | */ 36 | template 37 | class operator_impl { 38 | static_assert(cuco::dependent_false, 39 | "Operator type is not supported by reference type."); 40 | }; 41 | 42 | /** 43 | * @brief Checks if the given `Operator` is contained in a list of `Operators`. 44 | * 45 | * @tparam Operator Operator type, i.e., `cuco::op::*_tag` 46 | * @tparam Operators List of operators to search in 47 | * 48 | * @return `true` if `Operator` is contained in `Operators`, `false` otherwise. 49 | */ 50 | template 51 | static constexpr bool has_operator() 52 | { 53 | return ((std::is_same_v) || ...); 54 | } 55 | 56 | } // namespace detail 57 | } // namespace cuco 58 | -------------------------------------------------------------------------------- /._upstream/.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Enhancement 2 | description: Suggest an idea to improve cuCollections 3 | title: '[ENHANCEMENT]: ' 4 | labels: ['type: enhancement'] 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Is your feature request related to a problem? Please describe. 10 | description: A clear and concise description of what the problem is, e.g., "I would like to be able to..." 11 | placeholder: I would like an overload of `cuco::static_map::insert` that returns the success of each insertion. 12 | validations: 13 | required: true 14 | - type: textarea 15 | id: proposed-solution 16 | attributes: 17 | label: Describe the solution you'd like 18 | description: A clear and concise description of what you want to happen. 19 | placeholder: | 20 | Add a new overload of `insert` that takes an output iterator range assignable from `bool` that indicates the success of each insert. 21 | Example API: 22 | template 23 | void insert(InputIt first_input, InputIt last_input, OutputIt first_input, cudaStream_t stream = 0); 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: alternatives 28 | attributes: 29 | label: Describe alternatives you've considered 30 | description: 31 | If applicable, please add a clear and concise description of any alternative solutions or features you've 32 | considered. 33 | placeholder: You can implement this yourself with the device-side API, but it would be more convenient as a bulk function. 34 | validations: 35 | required: false 36 | - type: textarea 37 | id: additional-context 38 | attributes: 39 | label: Additional context 40 | description: Add any other context about the request here. 41 | placeholder: This would be useful for sparse embedding tables in DL usecases. 42 | validations: 43 | required: false 44 | -------------------------------------------------------------------------------- /include/cuco/storage.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | 23 | /** 24 | * @brief Public storage class. 25 | * 26 | * @note This is a public interface used to control storage bucket size. A bucket consists of one 27 | * or multiple contiguous slots. The bucket size defines the workload granularity for each CUDA 28 | * thread, i.e., how many slots a thread would concurrently operate on when performing modify or 29 | * lookup operations. cuCollections uses the array of bucket storage to supersede the raw flat slot 30 | * storage due to its superior granularity control: When bucket size equals one, array of buckets 31 | * performs the same as the flat storage. If the underlying operation is more memory bandwidth 32 | * bound, e.g., high occupancy multimap operations, a larger bucket size can reduce the length of 33 | * probing sequences thus improve runtime performance. 34 | * 35 | * @tparam BucketSize Number of elements per bucket storage 36 | */ 37 | template 38 | class storage { 39 | public: 40 | /// Number of slots per bucket storage 41 | static constexpr int32_t bucket_size = BucketSize; 42 | 43 | /// Type of implementation details 44 | template 45 | using impl = bucket_storage; 46 | }; 47 | 48 | } // namespace cuco 49 | -------------------------------------------------------------------------------- /tests/static_map/rehash_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | TEST_CASE("static_map rehash test", "") 26 | { 27 | using key_type = int; 28 | using mapped_type = long; 29 | 30 | constexpr std::size_t num_keys{400}; 31 | constexpr std::size_t num_erased_keys{100}; 32 | 33 | cuco::static_map map{num_keys, 34 | cuco::empty_key{-1}, 35 | cuco::empty_value{-1}, 36 | cuco::erased_key{-2}}; 37 | 38 | auto keys_begin = thrust::counting_iterator(1); 39 | 40 | auto pairs_begin = thrust::make_transform_iterator( 41 | keys_begin, 42 | cuda::proclaim_return_type>([] __device__(key_type const& x) { 43 | return cuco::pair(x, static_cast(x)); 44 | })); 45 | 46 | map.insert(pairs_begin, pairs_begin + num_keys); 47 | 48 | map.rehash(); 49 | REQUIRE(map.size() == num_keys); 50 | 51 | map.rehash(num_keys * 2); 52 | REQUIRE(map.size() == num_keys); 53 | 54 | map.erase(keys_begin, keys_begin + num_erased_keys); 55 | map.rehash(); 56 | REQUIRE(map.size() == num_keys - num_erased_keys); 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: build and test 17 | 18 | defaults: 19 | run: 20 | shell: bash -eo pipefail {0} 21 | 22 | on: 23 | workflow_call: 24 | inputs: 25 | cpu: {type: string, required: true} 26 | test_name: {type: string, required: false} 27 | build_script: {type: string, required: false} 28 | test_script: {type: string, required: false} 29 | container_image: {type: string, required: false} 30 | run_tests: {type: boolean, required: false, default: true} 31 | 32 | jobs: 33 | build: 34 | name: Build ${{inputs.test_name}} 35 | uses: ./.github/workflows/run-as-coder.yml 36 | with: 37 | name: Build ${{inputs.test_name}} 38 | runner: linux-${{inputs.cpu}}-cpu16 39 | image: ${{ inputs.container_image }} 40 | command: | 41 | ${{ inputs.build_script }} 42 | 43 | test: 44 | needs: build 45 | if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}} 46 | name: Test ${{inputs.test_name}} 47 | uses: ./.github/workflows/run-as-coder.yml 48 | with: 49 | name: Test ${{inputs.test_name}} 50 | runner: linux-${{inputs.cpu}}-gpu-v100-latest-1 51 | image: ${{inputs.container_image}} 52 | command: | 53 | nvidia-smi 54 | ${{ inputs.test_script }} -------------------------------------------------------------------------------- /include/cuco/detail/hash_functions/identity_hash.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco::detail { 23 | 24 | /** 25 | * @brief An Identity hash function to hash the given argument on host and device 26 | * 27 | * @note `identity_hash` is perfect if `hash_table_capacity >= |input set|` 28 | * 29 | * @note `identity_hash` is only intended to be used perfectly. 30 | * 31 | * @note Perfect hashes are deterministic, and thus do not need seeds. 32 | * 33 | * @tparam Key The type of the values to hash 34 | */ 35 | template 36 | struct identity_hash : private thrust::identity { 37 | using argument_type = Key; ///< The type of the values taken as argument 38 | /// The type of the hash values produced 39 | using result_type = cuda::std::conditional_t; 40 | 41 | static_assert(cuda::std::is_convertible_v, 42 | "Key type must be convertible to result_type"); 43 | 44 | /** 45 | * @brief Returns a hash value for its argument, as a value of type `result_type`. 46 | * 47 | * @param x The input argument to hash 48 | * @return A resulting hash value for `x` 49 | */ 50 | __host__ __device__ result_type operator()(Key const& x) const 51 | { 52 | return static_cast(thrust::identity::operator()(x)); 53 | } 54 | }; // identity_hash 55 | 56 | } // namespace cuco::detail 57 | -------------------------------------------------------------------------------- /ci/sccache_hit_rate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euo pipefail 18 | 19 | # Ensure two arguments are provided 20 | if [ $# -ne 2 ]; then 21 | echo "Usage: $0 " >&2 22 | exit 1 23 | fi 24 | 25 | # Print the contents of the before file 26 | echo "=== Contents of $1 ===" >&2 27 | cat $1 >&2 28 | echo "=== End of $1 ===" >&2 29 | 30 | # Print the contents of the after file 31 | echo "=== Contents of $2 ===" >&2 32 | cat $2 >&2 33 | echo "=== End of $2 ===" >&2 34 | 35 | # Extract compile requests and cache hits from the before and after files 36 | requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1") 37 | hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1") 38 | requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2") 39 | hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2") 40 | 41 | # Calculate the differences to find out how many new requests and hits 42 | requests_diff=$((requests_after - requests_before)) 43 | hits_diff=$((hits_after - hits_before)) 44 | 45 | echo "New Compile Requests: $requests_diff" >&2 46 | echo "New Hits: $hits_diff" >&2 47 | 48 | # Calculate and print the hit rate 49 | if [ $requests_diff -eq 0 ]; then 50 | echo "No new compile requests, hit rate is not applicable" 51 | else 52 | hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}') 53 | echo "sccache hit rate: $hit_rate%" >&2 54 | echo "$hit_rate" 55 | fi -------------------------------------------------------------------------------- /include/cuco/utility/error.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco { 23 | /** 24 | * @brief Exception thrown when logical precondition is violated. 25 | * 26 | * This exception should not be thrown directly and is instead thrown by the 27 | * CUCO_EXPECTS macro. 28 | */ 29 | struct logic_error : public std::logic_error { 30 | /** 31 | * @brief Constructs a logic_error with the error message. 32 | * 33 | * @param message Message to be associated with the exception 34 | */ 35 | logic_error(char const* const message) : std::logic_error(message) {} 36 | 37 | /** 38 | * @brief Construct a new logic error object with error message 39 | * 40 | * @param message Message to be associated with the exception 41 | */ 42 | logic_error(std::string const& message) : std::logic_error(message) {} 43 | }; 44 | /** 45 | * @brief Exception thrown when a CUDA error is encountered. 46 | * 47 | */ 48 | struct cuda_error : public std::runtime_error { 49 | /** 50 | * @brief Constructs a `cuda_error` object with the given `message`. 51 | * 52 | * @param message The error char array used to construct `cuda_error` 53 | */ 54 | cuda_error(const char* message) : std::runtime_error(message) {} 55 | /** 56 | * @brief Constructs a `cuda_error` object with the given `message` string. 57 | * 58 | * @param message The `std::string` used to construct `cuda_error` 59 | */ 60 | cuda_error(std::string const& message) : cuda_error{message.c_str()} {} 61 | }; 62 | } // namespace cuco 63 | -------------------------------------------------------------------------------- /.devcontainer/launch.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | launch_devcontainer() { 18 | 19 | # Ensure we're in the repo root 20 | cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )/.."; 21 | 22 | if [[ -z $1 ]] || [[ -z $2 ]]; then 23 | echo "Usage: $0 [CUDA version] [Host compiler]" 24 | echo "Example: $0 12.1 gcc12" 25 | return 1 26 | fi 27 | 28 | local cuda_version="$1" 29 | local host_compiler="$2" 30 | local workspace="$(basename "$(pwd)")"; 31 | local tmpdir="$(mktemp -d)/${workspace}"; 32 | local path="$(pwd)/.devcontainer/cuda${cuda_version}-${host_compiler}"; 33 | 34 | mkdir -p "${tmpdir}"; 35 | mkdir -p "${tmpdir}/.devcontainer"; 36 | cp -arL "$path/devcontainer.json" "${tmpdir}/.devcontainer"; 37 | sed -i "s@\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json"; 38 | path="${tmpdir}"; 39 | 40 | local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')"; 41 | local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cuCollections"; 42 | 43 | echo "devcontainer URL: ${url}"; 44 | 45 | local launch=""; 46 | if type open >/dev/null 2>&1; then 47 | launch="open"; 48 | elif type xdg-open >/dev/null 2>&1; then 49 | launch="xdg-open"; 50 | fi 51 | 52 | if [ -n "${launch}" ]; then 53 | code --new-window "${tmpdir}"; 54 | exec "${launch}" "${url}" >/dev/null 2>&1; 55 | fi 56 | } 57 | 58 | launch_devcontainer "$@"; -------------------------------------------------------------------------------- /include/cuco/detail/bloom_filter/default_filter_policy.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | 23 | template 24 | __host__ 25 | __device__ constexpr default_filter_policy::default_filter_policy( 26 | uint32_t pattern_bits, Hash hash) 27 | : impl_{pattern_bits, hash} 28 | { 29 | } 30 | 31 | template 32 | __device__ constexpr typename default_filter_policy::hash_result_type 33 | default_filter_policy::hash( 34 | typename default_filter_policy::hash_argument_type const& key) const 35 | { 36 | return impl_.hash(key); 37 | } 38 | 39 | template 40 | template 41 | __device__ constexpr auto default_filter_policy::block_index( 42 | typename default_filter_policy::hash_result_type hash, 43 | Extent num_blocks) const 44 | { 45 | return impl_.block_index(hash, num_blocks); 46 | } 47 | 48 | template 49 | __device__ constexpr typename default_filter_policy::word_type 50 | default_filter_policy::word_pattern( 51 | default_filter_policy::hash_result_type hash, 52 | std::uint32_t word_index) const 53 | { 54 | return impl_.word_pattern(hash, word_index); 55 | } 56 | 57 | } // namespace cuco -------------------------------------------------------------------------------- /examples/hyperloglog/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | /** 26 | * @file host_bulk_example.cu 27 | * @brief Demonstrates usage of `cuco::hyperloglog` "bulk" host APIs. 28 | */ 29 | int main(void) 30 | { 31 | using T = int; 32 | constexpr std::size_t num_items = 1ull << 28; // 1GB 33 | 34 | thrust::device_vector items(num_items); 35 | 36 | // Generate `num_items` distinct items 37 | thrust::sequence(items.begin(), items.end(), 0); 38 | 39 | // We define the desired standard deviation of the approximation error 40 | // 0.0122197 is the default value and corresponds to a 32KB sketch size 41 | auto const sd = cuco::standard_deviation{0.0122197}; 42 | 43 | // Initialize the estimator 44 | cuco::hyperloglog estimator{sd}; 45 | 46 | // Add all items to the estimator 47 | estimator.add(items.begin(), items.end()); 48 | 49 | // Adding the same items again will not affect the result 50 | estimator.add(items.begin(), items.begin() + num_items / 2); 51 | 52 | // Calculate the cardinality estimate 53 | std::size_t const estimated_cardinality = estimator.estimate(); 54 | 55 | std::cout << "True cardinality: " << num_items 56 | << "\nEstimated cardinality: " << estimated_cardinality << "\nError: " 57 | << std::abs( 58 | static_cast(estimated_cardinality) / static_cast(num_items) - 1.0) 59 | << std::endl; 60 | 61 | return 0; 62 | } -------------------------------------------------------------------------------- /tests/utility/fast_int_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | TEMPLATE_TEST_CASE( 28 | "utility::fast_int tests", "", std::int32_t, std::uint32_t, std::int64_t, std::uint64_t) 29 | { 30 | TestType value = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456); 31 | TestType lhs = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456); 32 | constexpr auto max_value = std::numeric_limits::max(); 33 | 34 | cuco::utility::fast_int fast_value{value}; 35 | 36 | SECTION("Should be explicitly convertible to the underlying integer type.") 37 | { 38 | REQUIRE(static_cast(fast_value) == value); 39 | } 40 | 41 | SECTION("Fast div/mod should produce correct result.") 42 | { 43 | INFO(lhs << " /% " << value); 44 | REQUIRE(lhs / fast_value == lhs / value); 45 | REQUIRE(lhs % fast_value == lhs % value); 46 | } 47 | 48 | SECTION("Fast div/mod with maximum rhs value should produce correct result.") 49 | { 50 | INFO(lhs << " /% " << max_value); 51 | cuco::utility::fast_int fast_max{max_value}; 52 | REQUIRE(lhs / fast_max == lhs / max_value); 53 | REQUIRE(lhs % fast_max == lhs % max_value); 54 | } 55 | 56 | SECTION("Fast div/mod with maximum lhs value should produce correct result.") 57 | { 58 | INFO(max_value << " /% " << value); 59 | REQUIRE(max_value / fast_value == max_value / value); 60 | REQUIRE(max_value % fast_value == max_value % value); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /benchmarks/benchmark_defaults.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | namespace cuco::benchmark::defaults { 27 | 28 | using KEY_TYPE_RANGE = nvbench::type_list; 29 | using VALUE_TYPE_RANGE = nvbench::type_list; 30 | using HASH_RANGE = nvbench::type_list, 31 | cuco::xxhash_32, 32 | cuco::xxhash_64, 33 | cuco::murmurhash3_32>; //, 34 | // cuco::murmurhash3_x86_128, 35 | // cuco::murmurhash3_x64_128>; // TODO handle tuple-like hash value 36 | 37 | auto constexpr N = 100'000'000; 38 | auto constexpr OCCUPANCY = 0.5; 39 | auto constexpr MULTIPLICITY = 1; 40 | auto constexpr MATCHING_RATE = 1.0; 41 | auto constexpr MAX_NOISE = 3; 42 | auto constexpr SKEW = 0.5; 43 | auto constexpr BATCH_SIZE = 1'000'000; 44 | auto constexpr INITIAL_SIZE = 50'000'000; 45 | 46 | auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000); 47 | auto const N_RANGE_CACHE = 48 | std::vector{8'000, 80'000, 800'000, 8'000'000, 80'000'000}; 49 | auto const OCCUPANCY_RANGE = nvbench::range(0.1, 0.9, 0.1); 50 | auto const MULTIPLICITY_RANGE = std::vector{1, 2, 4, 8, 16}; 51 | auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1); 52 | auto const SKEW_RANGE = nvbench::range(0.1, 1., 0.1); 53 | 54 | } // namespace cuco::benchmark::defaults 55 | -------------------------------------------------------------------------------- /._upstream/.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 45 | -------------------------------------------------------------------------------- /include/cuco/detail/utility/strong_type.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | namespace cuco::detail { 19 | 20 | /** 21 | * @brief A strong type wrapper 22 | * 23 | * @tparam T Type of the value 24 | * 25 | */ 26 | template 27 | struct strong_type { 28 | /** 29 | * @brief Constructs a strong type 30 | * 31 | * @param v Value to be wrapped as a strong type 32 | */ 33 | __host__ __device__ explicit constexpr strong_type(T v) : value{v} {} 34 | 35 | /** 36 | * @brief Implicit conversion operator to the underlying value. 37 | * 38 | * @return Underlying value 39 | */ 40 | __host__ __device__ constexpr operator T() const noexcept { return value; } 41 | 42 | T value; ///< Underlying data value 43 | }; 44 | 45 | } // namespace cuco::detail 46 | 47 | /** 48 | * @brief Convenience wrapper for defining a strong type 49 | */ 50 | #define CUCO_DEFINE_STRONG_TYPE(Name, Type) \ 51 | struct Name : public cuco::detail::strong_type { \ 52 | __host__ __device__ explicit constexpr Name(Type value) \ 53 | : cuco::detail::strong_type(value) \ 54 | { \ 55 | } \ 56 | }; 57 | 58 | /** 59 | * @brief Convenience wrapper for defining a templated strong type 60 | */ 61 | #define CUCO_DEFINE_TEMPLATE_STRONG_TYPE(Name) \ 62 | template \ 63 | struct Name : public cuco::detail::strong_type { \ 64 | __host__ __device__ explicit constexpr Name(T value) : cuco::detail::strong_type(value) {} \ 65 | }; 66 | -------------------------------------------------------------------------------- /benchmarks/benchmark_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace cuco::benchmark { 25 | 26 | template 27 | auto dist_from_state(nvbench::state const& state) 28 | { 29 | if constexpr (std::is_same_v) { 30 | return Dist{}; 31 | } else if constexpr (std::is_same_v) { 32 | auto const multiplicity = state.get_int64("Multiplicity"); 33 | return Dist{multiplicity}; 34 | } else if constexpr (std::is_same_v) { 35 | auto const skew = state.get_float64("Skew"); 36 | return Dist{skew}; 37 | } else { 38 | CUCO_FAIL("Unexpected distribution type"); 39 | } 40 | } 41 | 42 | template 43 | struct rebind_hasher; 44 | 45 | template