├── .github ├── CODEOWNERS ├── copy-pr-bot.yaml ├── ISSUE_TEMPLATE │ ├── config.yml │ └── feature_request.yml ├── actions │ ├── compute-matrix │ │ ├── action.yml │ │ └── compute-matrix.sh │ └── configure_cccl_sccache │ │ └── action.yml ├── workflows │ ├── build-and-test.yml │ ├── dispatch-build-and-test.yml │ ├── run-as-coder.yml │ └── verify-devcontainers.yml └── PULL_REQUEST_TEMPLATE.md ├── SECURITY.md ├── ci ├── test.sh ├── update_rapids_version.sh ├── sccache_hit_rate.sh ├── pre-commit │ └── doxygen.sh ├── sccache_stats.sh └── matrix.yml ├── cmake ├── thirdparty │ └── get_cccl.cmake ├── roaring_testdata.cmake └── header_testing.cmake ├── tests ├── dynamic_bitset │ ├── size_test.cu │ ├── rank_test.cu │ ├── find_next_test.cu │ ├── get_test.cu │ └── select_test.cu ├── static_set │ ├── size_test.cu │ ├── rehash_test.cu │ ├── atomic_storage_test.cu │ ├── insert_and_find_test.cu │ ├── large_input_test.cu │ └── retrieve_all_test.cu ├── hyperloglog │ ├── type_deduction_test.cu │ ├── unique_sequence_test.cu │ └── device_ref_test.cu ├── test_utils.cuh ├── static_map │ ├── rehash_test.cu │ ├── hash_test.cu │ └── key_sentinel_test.cu ├── utility │ ├── fast_int_test.cu │ └── extent_test.cu ├── static_multiset │ ├── load_factor_test.cu │ └── large_input_test.cu └── dynamic_map │ └── unique_sequence_test_experimental.cu ├── include └── cuco │ ├── detail │ ├── probing_scheme │ │ └── probing_scheme_base.cuh │ ├── utility │ │ ├── math.cuh │ │ ├── strong_type.cuh │ │ ├── cuda.hpp │ │ └── cuda.cuh │ ├── hash_functions │ │ ├── utils.cuh │ │ └── identity_hash.cuh │ ├── storage │ │ ├── functors.cuh │ │ ├── storage.cuh │ │ └── storage_base.cuh │ ├── __config │ ├── operator.inl │ ├── pair │ │ ├── pair.inl │ │ └── traits.hpp │ ├── bloom_filter │ │ └── default_filter_policy.inl │ ├── utils.hpp │ └── roaring_bitmap │ │ ├── roaring_bitmap_ref.inl │ │ └── roaring_bitmap.inl │ ├── types.cuh │ ├── utility │ ├── cuda_thread_scope.cuh │ ├── error.hpp │ ├── traits.hpp │ ├── reduction_functors.cuh │ └── allocator.hpp │ ├── storage.cuh │ ├── probe_sequences.cuh │ └── hash_functions.cuh ├── benchmarks ├── bloom_filter │ ├── defaults.hpp │ └── utils.hpp ├── benchmark_defaults.hpp └── static_set │ ├── size_bench.cu │ ├── rehash_bench.cu │ └── retrieve_all_bench.cu ├── .pre-commit-config.yaml ├── .devcontainer ├── devcontainer.json ├── cuda12.0-gcc11 │ └── devcontainer.json ├── cuda12.9-gcc13 │ └── devcontainer.json ├── cuda13.0-gcc13 │ └── devcontainer.json ├── cuda12.0-llvm14 │ └── devcontainer.json ├── cuda12.9-llvm18 │ └── devcontainer.json ├── cuda13.0-llvm20 │ └── devcontainer.json ├── launch.sh └── verify_devcontainer.sh ├── examples ├── hyperloglog │ └── host_bulk_example.cu ├── static_set │ └── host_bulk_example.cu ├── bloom_filter │ └── host_bulk_example.cu ├── static_multimap │ └── host_bulk_example.cu ├── static_multiset │ └── host_bulk_example.cu └── static_map │ └── host_bulk_example.cu ├── .gitignore └── CONTRIBUTING.md /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @sleeepyjack @PointKernel 2 | -------------------------------------------------------------------------------- /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for `copy-pr-bot` GitHub App 2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ 3 | 4 | enabled: true 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Question 4 | url: https://github.com/NVIDIA/cuCollections/discussions 5 | about: Check out our Discussions page to ask and answer questions. 6 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security 2 | 3 | NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization. 4 | 5 | If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.** 6 | 7 | ## Reporting Potential Security Vulnerability in cuCollections 8 | 9 | To report a potential security vulnerability in cuCollections: 10 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html) 11 | - E-Mail: psirt@nvidia.com 12 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key) 13 | - Please include the following information: 14 | - Product/Driver name and version/branch that contains the vulnerability 15 | -------------------------------------------------------------------------------- /ci/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Ensure the script is being executed in its containing directory 18 | cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"; 19 | 20 | source ./build.sh "$@" 21 | 22 | ctest --test-dir ${BUILD_DIR}/tests --output-on-failure --timeout 60 23 | 24 | echo "Test complete" -------------------------------------------------------------------------------- /cmake/thirdparty/get_cccl.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations under 12 | # the License. 13 | # ============================================================================= 14 | 15 | # Use CPM to find or clone CCCL 16 | function(find_and_configure_cccl) 17 | include(${rapids-cmake-dir}/cpm/cccl.cmake) 18 | rapids_cpm_cccl(INSTALL_EXPORT_SET cuco-exports BUILD_EXPORT_SET cuco-exports) 19 | endfunction() 20 | 21 | find_and_configure_cccl() 22 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/size_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | TEST_CASE("dynamic_bitset size computation test", "") 22 | { 23 | cuco::experimental::detail::dynamic_bitset bv; 24 | using size_type = std::size_t; 25 | constexpr size_type num_elements{400}; 26 | 27 | for (size_type i = 0; i < num_elements; i++) { 28 | bv.push_back(i % 2 == 0); // Alternate 0s and 1s pattern 29 | } 30 | 31 | auto size = bv.size(); 32 | REQUIRE(size == num_elements); 33 | } 34 | -------------------------------------------------------------------------------- /include/cuco/detail/probing_scheme/probing_scheme_base.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | namespace detail { 23 | 24 | /** 25 | * @brief Base class of public probing scheme. 26 | * 27 | * This class should not be used directly. 28 | * 29 | * @tparam CGSize Size of CUDA Cooperative Groups 30 | */ 31 | template 32 | class probing_scheme_base { 33 | public: 34 | /** 35 | * @brief The size of the CUDA cooperative thread group. 36 | */ 37 | static constexpr int32_t cg_size = CGSize; 38 | }; 39 | } // namespace detail 40 | } // namespace cuco 41 | -------------------------------------------------------------------------------- /tests/static_set/size_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | TEST_CASE("static_set size test", "") 26 | { 27 | constexpr std::size_t num_keys{400}; 28 | 29 | cuco::static_set set{cuco::extent{400}, cuco::empty_key{-1}}; 30 | 31 | thrust::device_vector d_keys(num_keys); 32 | 33 | thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); 34 | 35 | auto const num_successes = set.insert(d_keys.begin(), d_keys.end()); 36 | 37 | REQUIRE(set.size() == num_keys); 38 | REQUIRE(num_successes == num_keys); 39 | 40 | set.clear(); 41 | 42 | REQUIRE(set.size() == 0); 43 | } 44 | -------------------------------------------------------------------------------- /include/cuco/detail/utility/math.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | 20 | namespace cuco { 21 | namespace detail { 22 | 23 | /** 24 | * @brief Ceiling of an integer division 25 | * 26 | * @tparam T Type of dividend 27 | * @tparam U Type of divisor 28 | * 29 | * @throw If `T` is not an integral type 30 | * @throw If `U` is not an integral type 31 | * 32 | * @param dividend Numerator 33 | * @param divisor Denominator 34 | * 35 | * @return Ceiling of the integer division 36 | */ 37 | template 38 | __host__ __device__ constexpr T int_div_ceil(T dividend, U divisor) noexcept 39 | { 40 | static_assert(cuda::std::is_integral_v); 41 | static_assert(cuda::std::is_integral_v); 42 | return (dividend + divisor - 1) / divisor; 43 | } 44 | 45 | } // namespace detail 46 | } // namespace cuco 47 | -------------------------------------------------------------------------------- /benchmarks/bloom_filter/defaults.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | namespace cuco::benchmark::defaults { 28 | 29 | using BF_KEY = nvbench::int64_t; 30 | using BF_HASH = cuco::xxhash_64; 31 | using BF_WORD = nvbench::uint32_t; 32 | 33 | static constexpr auto BF_N = 1'000'000'000; 34 | static constexpr auto BF_SIZE_MB = 2'000; 35 | static constexpr auto BF_WORDS_PER_BLOCK = 8; 36 | 37 | auto const BF_SIZE_MB_RANGE_CACHE = 38 | std::vector{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048}; 39 | auto const BF_PATTERN_BITS_RANGE = std::vector{1, 2, 4, 6, 8, 16}; 40 | 41 | } // namespace cuco::benchmark::defaults 42 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_commit_msg: | 3 | [pre-commit.ci] auto code formatting 4 | autofix_prs: true 5 | autoupdate_branch: '' 6 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' 7 | autoupdate_schedule: quarterly 8 | skip: [] 9 | submodules: false 10 | 11 | repos: 12 | - repo: https://github.com/pre-commit/mirrors-clang-format 13 | rev: v20.1.4 14 | hooks: 15 | - id: clang-format 16 | types_or: [c, c++, cuda] 17 | args: ['-fallback-style=none', '-style=file', '-i'] 18 | - repo: local 19 | hooks: 20 | - id: check-doxygen 21 | name: check-doxygen 22 | entry: ./ci/pre-commit/doxygen.sh 23 | files: ^include/ 24 | types_or: [file] 25 | language: system 26 | pass_filenames: false 27 | verbose: true 28 | - id: check-example-links 29 | name: check-example-links 30 | entry: ./ci/pre-commit/example_links.py 31 | files: ^examples/ 32 | types: [cuda] 33 | language: python 34 | pass_filenames: false 35 | verbose: true 36 | additional_dependencies: 37 | - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple 38 | - gitpython 39 | 40 | default_language_version: 41 | python: python3 42 | -------------------------------------------------------------------------------- /ci/update_rapids_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2025, NVIDIA CORPORATION. 3 | ########################## 4 | # RAPIDS Version Updater # 5 | ########################## 6 | 7 | ## Usage 8 | # bash update_rapids_version.sh 9 | 10 | # Format is YY.MM.PP - no leading 'v' or trailing 'a' 11 | NEXT_FULL_TAG=$1 12 | 13 | #Get . for next version 14 | NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') 15 | NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') 16 | NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}') 17 | NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} 18 | 19 | # Need to distutils-normalize the versions for some use cases 20 | NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") 21 | 22 | echo "Updating RAPIDS and devcontainers to $NEXT_FULL_TAG" 23 | 24 | # Inplace sed replace; workaround for Linux and Mac 25 | function sed_runner() { 26 | sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak 27 | } 28 | 29 | # Update CI files 30 | sed_runner "/devcontainer_version/ s/'[0-9.]*'/'${NEXT_SHORT_TAG}'/g" ci/matrix.yml 31 | 32 | # Update CMakeLists.txt 33 | sed_runner "s/set(rapids-cmake-version [0-9.]*)/set(rapids-cmake-version ${NEXT_SHORT_TAG})/g" CMakeLists.txt 34 | 35 | # Update .devcontainer files 36 | find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do 37 | sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" 38 | done 39 | -------------------------------------------------------------------------------- /.github/actions/compute-matrix/action.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Compute Matrix 17 | description: "Compute the matrix for a given matrix type from the specified matrix file" 18 | 19 | inputs: 20 | matrix_query: 21 | description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" 22 | required: true 23 | matrix_file: 24 | description: 'The file containing the matrix' 25 | required: true 26 | outputs: 27 | matrix: 28 | description: 'The requested matrix' 29 | value: ${{ steps.compute-matrix.outputs.MATRIX }} 30 | 31 | runs: 32 | using: "composite" 33 | steps: 34 | - name: Compute matrix 35 | id: compute-matrix 36 | run: | 37 | MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) 38 | echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT 39 | shell: bash -euxo pipefail {0} -------------------------------------------------------------------------------- /tests/static_set/rehash_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | TEST_CASE("static_set rehash test", "") 25 | { 26 | using key_type = int; 27 | 28 | constexpr std::size_t num_keys{400}; 29 | constexpr std::size_t num_erased_keys{100}; 30 | 31 | cuco::static_set set{num_keys, cuco::empty_key{-1}, cuco::erased_key{-2}}; 32 | 33 | thrust::device_vector d_keys(num_keys); 34 | 35 | thrust::sequence(d_keys.begin(), d_keys.end()); 36 | 37 | set.insert(d_keys.begin(), d_keys.end()); 38 | 39 | set.rehash(); 40 | REQUIRE(set.size() == num_keys); 41 | 42 | set.rehash(num_keys * 2); 43 | REQUIRE(set.size() == num_keys); 44 | 45 | set.erase(d_keys.begin(), d_keys.begin() + num_erased_keys); 46 | set.rehash(); 47 | REQUIRE(set.size() == num_keys - num_erased_keys); 48 | } 49 | -------------------------------------------------------------------------------- /.github/actions/configure_cccl_sccache/action.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Set up AWS credentials and environment variables for sccache 17 | description: "Set up AWS credentials and environment variables for sccache" 18 | runs: 19 | using: "composite" 20 | steps: 21 | - name: Get AWS credentials for sccache bucket 22 | uses: aws-actions/configure-aws-credentials@v2 23 | with: 24 | role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA 25 | aws-region: us-east-2 26 | role-duration-seconds: 43200 # 12 hours 27 | - name: Set environment variables 28 | run: | 29 | echo "SCCACHE_BUCKET=rapids-sccache-east" >> $GITHUB_ENV 30 | echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV 31 | echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV 32 | echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV 33 | echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV 34 | shell: bash -------------------------------------------------------------------------------- /include/cuco/detail/hash_functions/utils.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco::detail { 23 | 24 | template 25 | constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept 26 | { 27 | auto const bytes = reinterpret_cast(data); 28 | T chunk; 29 | memcpy(&chunk, bytes + index * sizeof(T), sizeof(T)); 30 | return chunk; 31 | } 32 | 33 | constexpr __host__ __device__ cuda::std::uint32_t rotl32(cuda::std::uint32_t x, 34 | cuda::std::int8_t r) noexcept 35 | { 36 | return (x << r) | (x >> (32 - r)); 37 | } 38 | 39 | constexpr __host__ __device__ cuda::std::uint64_t rotl64(cuda::std::uint64_t x, 40 | cuda::std::int8_t r) noexcept 41 | { 42 | return (x << r) | (x >> (64 - r)); 43 | } 44 | 45 | }; // namespace cuco::detail 46 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda13.0-gcc13", 18 | "CUCO_CUDA_VERSION": "13.0", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "13" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda13.0-gcc13" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.0-gcc11/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-gcc11-cuda12.0-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.0-gcc11", 18 | "CUCO_CUDA_VERSION": "12.0", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "11" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.0-gcc11" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.9-gcc13/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda12.9-ubuntu24.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.9-gcc13", 18 | "CUCO_CUDA_VERSION": "12.9", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "13" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.9-gcc13" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda13.0-gcc13/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda13.0-gcc13", 18 | "CUCO_CUDA_VERSION": "13.0", 19 | "CUCO_HOST_COMPILER": "gcc", 20 | "CUCO_HOST_COMPILER_VERSION": "13" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda13.0-gcc13" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.0-llvm14/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-llvm14-cuda12.0-ubuntu20.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.0-llvm14", 18 | "CUCO_CUDA_VERSION": "12.0", 19 | "CUCO_HOST_COMPILER": "llvm", 20 | "CUCO_HOST_COMPILER_VERSION": "14" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.0-llvm14" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda12.9-llvm18/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-llvm18-cuda12.9-ubuntu22.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda12.9-llvm18", 18 | "CUCO_CUDA_VERSION": "12.9", 19 | "CUCO_HOST_COMPILER": "llvm", 20 | "CUCO_HOST_COMPILER_VERSION": "18" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda12.9-llvm18" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/cuda13.0-llvm20/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "shutdownAction": "stopContainer", 3 | "image": "rapidsai/devcontainers:25.12-cpp-llvm20-cuda13.0ext-ubuntu24.04", 4 | "hostRequirements": { 5 | "gpu": true 6 | }, 7 | "initializeCommand": [ 8 | "/bin/bash", 9 | "-c", 10 | "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" 11 | ], 12 | "containerEnv": { 13 | "SCCACHE_REGION": "us-east-2", 14 | "SCCACHE_BUCKET": "rapids-sccache-devs", 15 | "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", 16 | "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", 17 | "DEVCONTAINER_NAME": "cuda13.0-llvm20", 18 | "CUCO_CUDA_VERSION": "13.0", 19 | "CUCO_HOST_COMPILER": "llvm", 20 | "CUCO_HOST_COMPILER_VERSION": "20" 21 | }, 22 | "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", 23 | "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", 24 | "mounts": [ 25 | "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", 26 | "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", 27 | "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" 28 | ], 29 | "customizations": { 30 | "vscode": { 31 | "extensions": [ 32 | "llvm-vs-code-extensions.vscode-clangd" 33 | ], 34 | "settings": { 35 | "clangd.arguments": [ 36 | "--compile-commands-dir=${workspaceFolder}/build/latest" 37 | ] 38 | } 39 | } 40 | }, 41 | "name": "cuda13.0-llvm20" 42 | } 43 | -------------------------------------------------------------------------------- /include/cuco/detail/storage/functors.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | namespace cuco::detail { 19 | /** 20 | * @brief Functor for initializing device memory with a given value 21 | * 22 | * @tparam SizeType Type used for indexing 23 | * @tparam T Type of value being initialized 24 | */ 25 | template 26 | struct initialize_functor { 27 | T* const _d_ptr; ///< Pointer to device memory 28 | T const _key; ///< Value to initialize memory with 29 | 30 | /** 31 | * @brief Constructs functor for initializing device memory 32 | * 33 | * @param d_ptr Pointer to device memory to initialize 34 | * @param key Value to initialize memory with 35 | */ 36 | __host__ __device__ initialize_functor(T* d_ptr, T key) noexcept : _d_ptr{d_ptr}, _key{key} {} 37 | 38 | /** 39 | * @brief Device function to initialize memory at given index 40 | * 41 | * @param idx Index into device memory 42 | */ 43 | __device__ __forceinline__ void operator()(SizeType idx) const noexcept { _d_ptr[idx] = _key; } 44 | }; 45 | } // namespace cuco::detail 46 | -------------------------------------------------------------------------------- /tests/hyperloglog/type_deduction_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include 29 | 30 | TEST_CASE("hyperloglog: type deduction bug with hash functions returning references") 31 | { 32 | auto constexpr sketch_size_kb = 1; 33 | auto constexpr num_items = 1000; 34 | 35 | auto first = thrust::make_transform_iterator(thrust::counting_iterator(0), 36 | cuco::xxhash_64{}); 37 | auto last = first + num_items; 38 | 39 | cuco::hyperloglog estimator{ 40 | cuco::sketch_size_kb(sketch_size_kb)}; 41 | 42 | REQUIRE(estimator.estimate() == 0); 43 | 44 | estimator.add(first, last); 45 | 46 | auto const estimate = estimator.estimate(); 47 | 48 | REQUIRE(estimate > 0); 49 | } 50 | -------------------------------------------------------------------------------- /include/cuco/types.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | /** 22 | * @brief Defines various strong type wrappers used across this library. 23 | * 24 | * @note Each strong type inherits from `cuco::detail::strong_type`. `CUCO_DEFINE_STRONG_TYPE` 25 | * and `CUCO_DEFINE_TEMPLATE_STRONG_TYPE` are convenience macros used to define a named type in a 26 | * single line, e.g., `CUCO_DEFINE_STRONG_TYPE(foo, double)` defines `struct foo : public 27 | * cuco::detail::strong_type {...};`, where `cuco::foo{42.0}` is implicitly convertible to 28 | * `double{42.0}`. 29 | */ 30 | 31 | namespace cuco { 32 | /** 33 | * @brief A strong type wrapper `cuco::empty_key` used to denote the empty key sentinel. 34 | */ 35 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_key); 36 | 37 | /** 38 | * @brief A strong type wrapper `cuco::empty_value` used to denote the empty value sentinel. 39 | */ 40 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_value); 41 | 42 | /** 43 | * @brief A strong type wrapper `cuco::erased_key` used to denote the erased key sentinel. 44 | */ 45 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(erased_key); 46 | 47 | } // namespace cuco 48 | -------------------------------------------------------------------------------- /include/cuco/utility/cuda_thread_scope.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include // cuda::thread_scope 20 | 21 | namespace cuco { 22 | 23 | /** 24 | * @brief Strongly-typed wrapper for `cuda::thread_scope`. 25 | * 26 | * @tparam Scope `cuda::thread_scope` to be wrapped 27 | */ 28 | template 29 | struct cuda_thread_scope { 30 | /** 31 | * @brief Implicit conversion to `cuda::thread_scope`. 32 | * 33 | * @return The wrapped `cuda::thread_scope` 34 | */ 35 | __host__ __device__ constexpr operator cuda::thread_scope() const noexcept { return Scope; } 36 | }; 37 | 38 | // alias definitions 39 | inline constexpr auto thread_scope_system = 40 | cuda_thread_scope{}; ///< `cuco::thread_scope_system` 41 | inline constexpr auto thread_scope_device = 42 | cuda_thread_scope{}; ///< `cuco::thread_scope_device` 43 | inline constexpr auto thread_scope_block = 44 | cuda_thread_scope{}; ///< `cuco::thread_scope_block` 45 | inline constexpr auto thread_scope_thread = 46 | cuda_thread_scope{}; ///< `cuco::thread_scope_thread` 47 | 48 | } // namespace cuco 49 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/rank_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | using cuco::test::modulo_bitgen; 28 | 29 | TEST_CASE("dynamic_bitset rank test", "") 30 | { 31 | cuco::experimental::detail::dynamic_bitset bv; 32 | 33 | using size_type = std::size_t; 34 | constexpr size_type num_elements{4000}; 35 | 36 | for (size_type i = 0; i < num_elements; i++) { 37 | bv.push_back(modulo_bitgen(i)); 38 | } 39 | 40 | thrust::device_vector keys(num_elements); 41 | thrust::sequence(keys.begin(), keys.end(), 0); 42 | 43 | thrust::device_vector d_ranks(num_elements); 44 | 45 | bv.rank(keys.begin(), keys.end(), d_ranks.begin()); 46 | 47 | thrust::host_vector h_ranks = d_ranks; 48 | 49 | size_type cur_rank = 0; 50 | size_type num_matches = 0; 51 | for (size_type i = 0; i < num_elements; i++) { 52 | num_matches += cur_rank == h_ranks[i]; 53 | if (modulo_bitgen(i)) { cur_rank++; } 54 | } 55 | REQUIRE(num_matches == num_elements); 56 | } 57 | -------------------------------------------------------------------------------- /include/cuco/detail/__config: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #if !defined(__CUDACC_VER_MAJOR__) || !defined(__CUDACC_VER_MINOR__) 23 | #error "NVCC version not found" 24 | #elif __CUDACC_VER_MAJOR__ < 12 25 | #error "NVCC version 12.0 or later is required" 26 | #endif 27 | 28 | #if !defined(__CUDACC_EXTENDED_LAMBDA__) 29 | #error "Support for extended device lambdas is required (nvcc flag --expt-extended-lambda)" 30 | #endif 31 | 32 | #if !defined(CCCL_VERSION) || (CCCL_VERSION < 3000000) 33 | #error "CCCL version 3.0.0 or later is required" 34 | #endif 35 | 36 | // WAR for libcudacxx/296 37 | #define CUCO_CUDA_MINIMUM_ARCH _NV_FIRST_ARG(__CUDA_ARCH_LIST__) 38 | 39 | #if defined(CUDART_VERSION) && (CUCO_CUDA_MINIMUM_ARCH >= 700) 40 | #define CUCO_HAS_CUDA_BARRIER 41 | #endif 42 | 43 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12010) 44 | #define CUCO_HAS_CG_INVOKE_ONE 45 | #endif 46 | 47 | #if (CUCO_CUDA_MINIMUM_ARCH >= 700) 48 | #define CUCO_HAS_INDEPENDENT_THREADS 49 | #endif 50 | 51 | #if defined(__SIZEOF_INT128__) 52 | #define CUCO_HAS_INT128 53 | #endif 54 | 55 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12000) 56 | #define CUCO_HAS_CG_REDUCE_UPDATE_ASYNC 57 | #endif 58 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Enhancement 2 | description: Suggest an idea to improve cuCollections 3 | title: '[ENHANCEMENT]: ' 4 | labels: ['type: enhancement'] 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Is your feature request related to a problem? Please describe. 10 | description: A clear and concise description of what the problem is, e.g., "I would like to be able to..." 11 | placeholder: I would like an overload of `cuco::static_map::insert` that returns the success of each insertion. 12 | validations: 13 | required: true 14 | - type: textarea 15 | id: proposed-solution 16 | attributes: 17 | label: Describe the solution you'd like 18 | description: A clear and concise description of what you want to happen. 19 | placeholder: | 20 | Add a new overload of `insert` that takes an output iterator range assignable from `bool` that indicates the success of each insert. 21 | Example API: 22 | template 23 | void insert(InputIt first_input, InputIt last_input, OutputIt first_input, cudaStream_t stream = 0); 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: alternatives 28 | attributes: 29 | label: Describe alternatives you've considered 30 | description: 31 | If applicable, please add a clear and concise description of any alternative solutions or features you've 32 | considered. 33 | placeholder: You can implement this yourself with the device-side API, but it would be more convenient as a bulk function. 34 | validations: 35 | required: false 36 | - type: textarea 37 | id: additional-context 38 | attributes: 39 | label: Additional context 40 | description: Add any other context about the request here. 41 | placeholder: This would be useful for sparse embedding tables in DL usecases. 42 | validations: 43 | required: false 44 | -------------------------------------------------------------------------------- /include/cuco/detail/operator.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco { 24 | namespace detail { 25 | 26 | /** 27 | * @brief CRTP mixin which augments a given `Reference` with an `Operator`. 28 | * 29 | * @throw If the operator is not defined in `include/cuco/operator.hpp` 30 | * 31 | * @tparam Operator Operator type, i.e., `cuco::op::*_tag` 32 | * @tparam Reference The reference type. 33 | * 34 | * @note This primary template should never be instantiated. 35 | */ 36 | template 37 | class operator_impl { 38 | static_assert(cuco::dependent_false, 39 | "Operator type is not supported by reference type."); 40 | }; 41 | 42 | /** 43 | * @brief Checks if the given `Operator` is contained in a list of `Operators`. 44 | * 45 | * @tparam Operator Operator type, i.e., `cuco::op::*_tag` 46 | * @tparam Operators List of operators to search in 47 | * 48 | * @return `true` if `Operator` is contained in `Operators`, `false` otherwise. 49 | */ 50 | template 51 | __host__ __device__ static constexpr bool has_operator() 52 | { 53 | return ((std::is_same_v) || ...); 54 | } 55 | 56 | } // namespace detail 57 | } // namespace cuco 58 | -------------------------------------------------------------------------------- /include/cuco/detail/pair/pair.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco { 23 | 24 | template 25 | __host__ __device__ constexpr pair::pair(First const& f, Second const& s) 26 | : first{f}, second{s} 27 | { 28 | } 29 | 30 | template 31 | template 32 | __host__ __device__ constexpr pair::pair(pair const& p) 33 | : first{p.first}, second{p.second} 34 | { 35 | } 36 | 37 | template 38 | __host__ __device__ constexpr pair, cuda::std::decay_t> make_pair( 39 | F&& f, S&& s) noexcept 40 | { 41 | return pair, cuda::std::decay_t>(cuda::std::forward(f), 42 | cuda::std::forward(s)); 43 | } 44 | 45 | template 46 | __host__ __device__ constexpr bool operator==(cuco::pair const& lhs, 47 | cuco::pair const& rhs) noexcept 48 | { 49 | return lhs.first == rhs.first and lhs.second == rhs.second; 50 | } 51 | 52 | } // namespace cuco 53 | 54 | namespace cuda::std { 55 | #include 56 | } // namespace cuda::std 57 | -------------------------------------------------------------------------------- /tests/test_utils.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | namespace test { 23 | namespace detail { 24 | 25 | template 26 | __global__ void count_if(Iterator begin, 27 | Iterator end, 28 | cuda::atomic* count, 29 | Predicate p) 30 | { 31 | auto tid = blockDim.x * blockIdx.x + threadIdx.x; 32 | auto it = begin + tid; 33 | 34 | while (it < end) { 35 | count->fetch_add(static_cast(p(*it))); 36 | it += gridDim.x * blockDim.x; 37 | } 38 | } 39 | 40 | template 41 | __global__ void count_if(Iterator1 begin1, 42 | Iterator1 end1, 43 | Iterator2 begin2, 44 | cuda::atomic* count, 45 | Predicate p) 46 | { 47 | auto const n = end1 - begin1; 48 | auto tid = blockDim.x * blockIdx.x + threadIdx.x; 49 | 50 | while (tid < n) { 51 | auto cmp = begin1 + tid; 52 | auto ref = begin2 + tid; 53 | count->fetch_add(static_cast(p(*cmp, *ref))); 54 | tid += gridDim.x * blockDim.x; 55 | } 56 | } 57 | 58 | } // namespace detail 59 | } // namespace test 60 | } // namespace cuco 61 | -------------------------------------------------------------------------------- /tests/static_map/rehash_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | TEST_CASE("static_map rehash test", "") 26 | { 27 | using key_type = int; 28 | using mapped_type = long; 29 | 30 | constexpr std::size_t num_keys{400}; 31 | constexpr std::size_t num_erased_keys{100}; 32 | 33 | cuco::static_map map{num_keys, 34 | cuco::empty_key{-1}, 35 | cuco::empty_value{-1}, 36 | cuco::erased_key{-2}}; 37 | 38 | auto keys_begin = thrust::counting_iterator(1); 39 | 40 | auto pairs_begin = thrust::make_transform_iterator( 41 | keys_begin, 42 | cuda::proclaim_return_type>([] __device__(key_type const& x) { 43 | return cuco::pair(x, static_cast(x)); 44 | })); 45 | 46 | map.insert(pairs_begin, pairs_begin + num_keys); 47 | 48 | map.rehash(); 49 | REQUIRE(map.size() == num_keys); 50 | 51 | map.rehash(num_keys * 2); 52 | REQUIRE(map.size() == num_keys); 53 | 54 | map.erase(keys_begin, keys_begin + num_erased_keys); 55 | map.rehash(); 56 | REQUIRE(map.size() == num_keys - num_erased_keys); 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: build and test 17 | 18 | defaults: 19 | run: 20 | shell: bash -eo pipefail {0} 21 | 22 | on: 23 | workflow_call: 24 | inputs: 25 | cpu: {type: string, required: true} 26 | test_name: {type: string, required: false} 27 | build_script: {type: string, required: false} 28 | test_script: {type: string, required: false} 29 | container_image: {type: string, required: false} 30 | run_tests: {type: boolean, required: false, default: true} 31 | 32 | jobs: 33 | build: 34 | name: Build ${{inputs.test_name}} 35 | uses: ./.github/workflows/run-as-coder.yml 36 | with: 37 | name: Build ${{inputs.test_name}} 38 | runner: linux-${{inputs.cpu}}-cpu16 39 | image: ${{ inputs.container_image }} 40 | command: | 41 | ${{ inputs.build_script }} 42 | 43 | test: 44 | needs: build 45 | if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}} 46 | name: Test ${{inputs.test_name}} 47 | uses: ./.github/workflows/run-as-coder.yml 48 | with: 49 | name: Test ${{inputs.test_name}} 50 | runner: linux-${{inputs.cpu}}-gpu-v100-latest-1 51 | image: ${{inputs.container_image}} 52 | command: | 53 | nvidia-smi 54 | ${{ inputs.test_script }} -------------------------------------------------------------------------------- /include/cuco/detail/hash_functions/identity_hash.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco::detail { 23 | 24 | /** 25 | * @brief An Identity hash function to hash the given argument on host and device 26 | * 27 | * @note `identity_hash` is perfect if `hash_table_capacity >= |input set|` 28 | * 29 | * @note `identity_hash` is only intended to be used perfectly. 30 | * 31 | * @note Perfect hashes are deterministic, and thus do not need seeds. 32 | * 33 | * @tparam Key The type of the values to hash 34 | */ 35 | template 36 | struct identity_hash : private cuda::std::identity { 37 | using argument_type = Key; ///< The type of the values taken as argument 38 | /// The type of the hash values produced 39 | using result_type = cuda::std::conditional_t; 40 | 41 | static_assert(cuda::std::is_convertible_v, 42 | "Key type must be convertible to result_type"); 43 | 44 | /** 45 | * @brief Returns a hash value for its argument, as a value of type `result_type`. 46 | * 47 | * @param x The input argument to hash 48 | * @return A resulting hash value for `x` 49 | */ 50 | __host__ __device__ result_type operator()(Key const& x) const 51 | { 52 | return static_cast(cuda::std::identity::operator()(x)); 53 | } 54 | }; // identity_hash 55 | 56 | } // namespace cuco::detail 57 | -------------------------------------------------------------------------------- /include/cuco/detail/pair/traits.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace cuco::detail { 25 | 26 | template 27 | struct is_std_pair_like : cuda::std::false_type {}; 28 | 29 | template 30 | struct is_std_pair_like(cuda::std::declval())), 32 | decltype(std::get<1>(cuda::std::declval()))>> 33 | : cuda::std:: 34 | conditional_t::value == 2, cuda::std::true_type, cuda::std::false_type> {}; 35 | 36 | template 37 | struct is_cuda_std_pair_like_impl : cuda::std::false_type {}; 38 | 39 | template 40 | struct is_cuda_std_pair_like_impl< 41 | T, 42 | cuda::std::void_t(cuda::std::declval())), 43 | decltype(cuda::std::get<1>(cuda::std::declval())), 44 | decltype(cuda::std::tuple_size::value)>> 45 | : cuda::std::conditional_t::value == 2, 46 | cuda::std::true_type, 47 | cuda::std::false_type> {}; 48 | 49 | template 50 | struct is_cuda_std_pair_like 51 | : is_cuda_std_pair_like_impl()))>> {}; 53 | 54 | } // namespace cuco::detail 55 | -------------------------------------------------------------------------------- /ci/sccache_hit_rate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euo pipefail 18 | 19 | # Ensure two arguments are provided 20 | if [ $# -ne 2 ]; then 21 | echo "Usage: $0 " >&2 22 | exit 1 23 | fi 24 | 25 | # Print the contents of the before file 26 | echo "=== Contents of $1 ===" >&2 27 | cat $1 >&2 28 | echo "=== End of $1 ===" >&2 29 | 30 | # Print the contents of the after file 31 | echo "=== Contents of $2 ===" >&2 32 | cat $2 >&2 33 | echo "=== End of $2 ===" >&2 34 | 35 | # Extract compile requests and cache hits from the before and after files 36 | requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1") 37 | hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1") 38 | requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2") 39 | hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2") 40 | 41 | # Calculate the differences to find out how many new requests and hits 42 | requests_diff=$((requests_after - requests_before)) 43 | hits_diff=$((hits_after - hits_before)) 44 | 45 | echo "New Compile Requests: $requests_diff" >&2 46 | echo "New Hits: $hits_diff" >&2 47 | 48 | # Calculate and print the hit rate 49 | if [ $requests_diff -eq 0 ]; then 50 | echo "No new compile requests, hit rate is not applicable" 51 | else 52 | hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}') 53 | echo "sccache hit rate: $hit_rate%" >&2 54 | echo "$hit_rate" 55 | fi -------------------------------------------------------------------------------- /include/cuco/utility/error.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco { 23 | /** 24 | * @brief Exception thrown when logical precondition is violated. 25 | * 26 | * This exception should not be thrown directly and is instead thrown by the 27 | * CUCO_EXPECTS macro. 28 | */ 29 | struct logic_error : public std::logic_error { 30 | /** 31 | * @brief Constructs a logic_error with the error message. 32 | * 33 | * @param message Message to be associated with the exception 34 | */ 35 | logic_error(char const* const message) : std::logic_error(message) {} 36 | 37 | /** 38 | * @brief Construct a new logic error object with error message 39 | * 40 | * @param message Message to be associated with the exception 41 | */ 42 | logic_error(std::string const& message) : std::logic_error(message) {} 43 | }; 44 | /** 45 | * @brief Exception thrown when a CUDA error is encountered. 46 | * 47 | */ 48 | struct cuda_error : public std::runtime_error { 49 | /** 50 | * @brief Constructs a `cuda_error` object with the given `message`. 51 | * 52 | * @param message The error char array used to construct `cuda_error` 53 | */ 54 | cuda_error(const char* message) : std::runtime_error(message) {} 55 | /** 56 | * @brief Constructs a `cuda_error` object with the given `message` string. 57 | * 58 | * @param message The `std::string` used to construct `cuda_error` 59 | */ 60 | cuda_error(std::string const& message) : cuda_error{message.c_str()} {} 61 | }; 62 | } // namespace cuco 63 | -------------------------------------------------------------------------------- /.devcontainer/launch.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | launch_devcontainer() { 18 | 19 | # Ensure we're in the repo root 20 | cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )/.."; 21 | 22 | if [[ -z $1 ]] || [[ -z $2 ]]; then 23 | echo "Usage: $0 [CUDA version] [Host compiler]" 24 | echo "Example: $0 12.1 gcc12" 25 | return 1 26 | fi 27 | 28 | local cuda_version="$1" 29 | local host_compiler="$2" 30 | local workspace="$(basename "$(pwd)")"; 31 | local tmpdir="$(mktemp -d)/${workspace}"; 32 | local path="$(pwd)/.devcontainer/cuda${cuda_version}-${host_compiler}"; 33 | 34 | mkdir -p "${tmpdir}"; 35 | mkdir -p "${tmpdir}/.devcontainer"; 36 | cp -arL "$path/devcontainer.json" "${tmpdir}/.devcontainer"; 37 | sed -i "s@\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json"; 38 | path="${tmpdir}"; 39 | 40 | local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')"; 41 | local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cuCollections"; 42 | 43 | echo "devcontainer URL: ${url}"; 44 | 45 | local launch=""; 46 | if type open >/dev/null 2>&1; then 47 | launch="open"; 48 | elif type xdg-open >/dev/null 2>&1; then 49 | launch="xdg-open"; 50 | fi 51 | 52 | if [ -n "${launch}" ]; then 53 | code --new-window "${tmpdir}"; 54 | exec "${launch}" "${url}" >/dev/null 2>&1; 55 | fi 56 | } 57 | 58 | launch_devcontainer "$@"; -------------------------------------------------------------------------------- /cmake/roaring_testdata.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2025, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations under 12 | # the License. 13 | # ============================================================================= 14 | 15 | # Only act if enabled 16 | if(NOT CUCO_DOWNLOAD_ROARING_TESTDATA) 17 | return() 18 | endif() 19 | 20 | set(CUCO_ROARING_DATA_DIR "${CMAKE_BINARY_DIR}/data/roaring_bitmap") 21 | 22 | file(MAKE_DIRECTORY "${CUCO_ROARING_DATA_DIR}") 23 | 24 | set(ROARING_FORMATSPEC_BASE "https://raw.githubusercontent.com/RoaringBitmap/RoaringFormatSpec/5177ad9") 25 | 26 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithoutruns.bin" 27 | "${CUCO_ROARING_DATA_DIR}/bitmapwithoutruns.bin" 28 | "d719ae2e0150a362ef7cf51c361527585891f01460b1a92bcfb6a7257282a442") 29 | 30 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithruns.bin" 31 | "${CUCO_ROARING_DATA_DIR}/bitmapwithruns.bin" 32 | "1f1909bfdd354fa2f0694fe88b8076833ca5383ad9fc3f68f2709c84a2ab70e3") 33 | 34 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata64/portable_bitmap64.bin" 35 | "${CUCO_ROARING_DATA_DIR}/portable_bitmap64.bin" 36 | "b5a553a759167f5f9ccb3fa21552d943b4c73235635b753376f4faf62067d178") 37 | 38 | message(STATUS "Roaring Bitmap test data downloaded to: ${CUCO_ROARING_DATA_DIR}") 39 | 40 | # Define macro only when data is available 41 | add_compile_definitions(CUCO_ROARING_DATA_DIR="${CUCO_ROARING_DATA_DIR}") -------------------------------------------------------------------------------- /include/cuco/detail/bloom_filter/default_filter_policy.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco { 22 | 23 | template 24 | __host__ 25 | __device__ constexpr default_filter_policy::default_filter_policy( 26 | uint32_t pattern_bits, Hash hash) 27 | : impl_{pattern_bits, hash} 28 | { 29 | } 30 | 31 | template 32 | __device__ constexpr typename default_filter_policy::hash_result_type 33 | default_filter_policy::hash( 34 | typename default_filter_policy::hash_argument_type const& key) const 35 | { 36 | return impl_.hash(key); 37 | } 38 | 39 | template 40 | template 41 | __device__ constexpr auto default_filter_policy::block_index( 42 | typename default_filter_policy::hash_result_type hash, 43 | Extent num_blocks) const 44 | { 45 | return impl_.block_index(hash, num_blocks); 46 | } 47 | 48 | template 49 | __device__ constexpr typename default_filter_policy::word_type 50 | default_filter_policy::word_pattern( 51 | default_filter_policy::hash_result_type hash, 52 | std::uint32_t word_index) const 53 | { 54 | return impl_.word_pattern(hash, word_index); 55 | } 56 | 57 | } // namespace cuco -------------------------------------------------------------------------------- /examples/hyperloglog/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | /** 26 | * @file host_bulk_example.cu 27 | * @brief Demonstrates usage of `cuco::hyperloglog` "bulk" host APIs. 28 | */ 29 | int main(void) 30 | { 31 | using T = int; 32 | constexpr std::size_t num_items = 1ull << 28; // 1GB 33 | 34 | thrust::device_vector items(num_items); 35 | 36 | // Generate `num_items` distinct items 37 | thrust::sequence(items.begin(), items.end(), 0); 38 | 39 | // We define the desired standard deviation of the approximation error 40 | // 0.0122197 is the default value and corresponds to a 32KB sketch size 41 | auto const sd = cuco::standard_deviation{0.0122197}; 42 | 43 | // Initialize the estimator 44 | cuco::hyperloglog estimator{sd}; 45 | 46 | // Add all items to the estimator 47 | estimator.add(items.begin(), items.end()); 48 | 49 | // Adding the same items again will not affect the result 50 | estimator.add(items.begin(), items.begin() + num_items / 2); 51 | 52 | // Calculate the cardinality estimate 53 | std::size_t const estimated_cardinality = estimator.estimate(); 54 | 55 | std::cout << "True cardinality: " << num_items 56 | << "\nEstimated cardinality: " << estimated_cardinality << "\nError: " 57 | << std::abs( 58 | static_cast(estimated_cardinality) / static_cast(num_items) - 1.0) 59 | << std::endl; 60 | 61 | return 0; 62 | } -------------------------------------------------------------------------------- /ci/pre-commit/doxygen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Skip if doxygen is not installed 18 | if ! [ -x "$(command -v doxygen)" ]; then 19 | echo -e "Warning: Doxygen is not installed - skipping check" 20 | exit 0 21 | fi 22 | 23 | # Utility to return version as number for comparison 24 | function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; } 25 | 26 | # Doxygen supported version 1.8.20 to 1.9.1 27 | DOXYGEN_VERSION=$(doxygen --version) 28 | if [ $(version "$DOXYGEN_VERSION") -lt $(version "1.8.20") ] || [ $(version $DOXYGEN_VERSION) -gt $(version "1.9.1") ]; then 29 | echo -e "Warning: Unsupported Doxygen version $DOXYGEN_VERSION - skipping check" 30 | echo -e "Expected Doxygen version from 1.8.20 to 1.9.1" 31 | exit 0 32 | fi 33 | 34 | echo "Using doxygen version: ${DOXYGEN_VERSION}" 35 | 36 | # Run doxygen, ignore missing tag files error 37 | TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..." 38 | TAG_ERROR2="error: cannot open tag file .*.tag for writing" 39 | DOXYGEN_STDERR=`cd doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; } | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"` 40 | RETVAL=$? 41 | 42 | if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then 43 | echo -e "\n>>>> FAILED: doxygen check; begin output\n" 44 | echo -e "$DOXYGEN_STDERR" 45 | echo -e "\n>>>> FAILED: doxygen check; end output\n" 46 | RETVAL=1 #because return value is not generated by doxygen 1.8.17 47 | else 48 | echo -e "\n>>>> PASSED: doxygen check\n" 49 | fi 50 | 51 | exit $RETVAL 52 | -------------------------------------------------------------------------------- /.github/workflows/dispatch-build-and-test.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Dispatch build and test 17 | 18 | on: 19 | workflow_call: 20 | inputs: 21 | per_cuda_compiler_matrix: {type: string, required: true} 22 | devcontainer_version: {type: string, required: true} 23 | 24 | jobs: 25 | # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration 26 | # ensures that the build/test steps can overlap across different configurations. 27 | build_and_test: 28 | name: ${{matrix.cpu}} 29 | uses: ./.github/workflows/build-and-test.yml 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} 34 | with: 35 | cpu: ${{ matrix.cpu }} 36 | test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} 37 | build_script: "./ci/build.sh --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}" 38 | test_script: "./ci/test.sh --tests --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}" 39 | container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} 40 | run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }} 41 | -------------------------------------------------------------------------------- /tests/utility/fast_int_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | TEMPLATE_TEST_CASE( 28 | "utility::fast_int tests", "", std::int32_t, std::uint32_t, std::int64_t, std::uint64_t) 29 | { 30 | TestType value = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456); 31 | TestType lhs = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456); 32 | constexpr auto max_value = std::numeric_limits::max(); 33 | 34 | cuco::utility::fast_int fast_value{value}; 35 | 36 | SECTION("Should be explicitly convertible to the underlying integer type.") 37 | { 38 | REQUIRE(static_cast(fast_value) == value); 39 | } 40 | 41 | SECTION("Fast div/mod should produce correct result.") 42 | { 43 | INFO(lhs << " /% " << value); 44 | REQUIRE(lhs / fast_value == lhs / value); 45 | REQUIRE(lhs % fast_value == lhs % value); 46 | } 47 | 48 | SECTION("Fast div/mod with maximum rhs value should produce correct result.") 49 | { 50 | INFO(lhs << " /% " << max_value); 51 | cuco::utility::fast_int fast_max{max_value}; 52 | REQUIRE(lhs / fast_max == lhs / max_value); 53 | REQUIRE(lhs % fast_max == lhs % max_value); 54 | } 55 | 56 | SECTION("Fast div/mod with maximum lhs value should produce correct result.") 57 | { 58 | INFO(max_value << " /% " << value); 59 | REQUIRE(max_value / fast_value == max_value / value); 60 | REQUIRE(max_value % fast_value == max_value % value); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /include/cuco/storage.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco { 24 | // Forward declaration to avoid circular dependency 25 | template 26 | class bucket_storage; 27 | } // namespace cuco 28 | 29 | namespace cuco { 30 | 31 | /** 32 | * @brief Public storage class. 33 | * 34 | * @note This is a public interface used to control storage bucket size. A bucket consists of one 35 | * or multiple contiguous slots. The bucket size defines the workload granularity for each CUDA 36 | * thread, i.e., how many slots a thread would concurrently operate on when performing modify or 37 | * lookup operations. cuCollections uses the array of bucket storage to supersede the raw flat slot 38 | * storage due to its superior granularity control: When bucket size equals one, array of buckets 39 | * performs the same as the flat storage. If the underlying operation is more memory bandwidth 40 | * bound, e.g., high occupancy multimap operations, a larger bucket size can reduce the length of 41 | * probing sequences thus improve runtime performance. 42 | * 43 | * @tparam BucketSize Number of elements per bucket storage 44 | */ 45 | template 46 | class storage { 47 | public: 48 | /// Number of slots per bucket storage 49 | static constexpr cuda::std::int32_t bucket_size = BucketSize; 50 | 51 | /// Type of implementation details 52 | template 53 | using impl = bucket_storage; 54 | }; 55 | } // namespace cuco 56 | -------------------------------------------------------------------------------- /benchmarks/benchmark_defaults.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | namespace cuco::benchmark::defaults { 27 | 28 | using KEY_TYPE_RANGE = nvbench::type_list; 29 | using VALUE_TYPE_RANGE = nvbench::type_list; 30 | using HASH_RANGE = nvbench::type_list, 31 | cuco::xxhash_32, 32 | cuco::xxhash_64, 33 | cuco::murmurhash3_32>; //, 34 | // cuco::murmurhash3_x86_128, 35 | // cuco::murmurhash3_x64_128>; // TODO handle tuple-like hash value 36 | 37 | auto constexpr N = 100'000'000; 38 | auto constexpr OCCUPANCY = 0.5; 39 | auto constexpr MULTIPLICITY = 1; 40 | auto constexpr MATCHING_RATE = 1.0; 41 | auto constexpr SKEW = 0.5; 42 | auto constexpr BATCH_SIZE = 1'000'000; 43 | auto constexpr INITIAL_SIZE = 50'000'000; 44 | 45 | auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000); 46 | auto const N_RANGE_CACHE = 47 | std::vector{8'000, 80'000, 800'000, 8'000'000, 80'000'000}; 48 | auto const OCCUPANCY_RANGE = nvbench::range(0.1, 0.9, 0.1); 49 | auto const MULTIPLICITY_RANGE = std::vector{1, 2, 4, 8, 16}; 50 | auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1); 51 | auto const SKEW_RANGE = nvbench::range(0.1, 1., 0.1); 52 | 53 | } // namespace cuco::benchmark::defaults 54 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Common 2 | __pycache__ 3 | *.py[cod] 4 | *$py.class 5 | *.a 6 | *.o 7 | *.so 8 | *.dylib 9 | .cache 10 | .vscode 11 | *.code-workspace 12 | *.swp 13 | *.pytest_cache 14 | DartConfiguration.tcl 15 | .DS_Store 16 | *.manifest 17 | *.spec 18 | 19 | ## Python build directories & artifacts 20 | dist/ 21 | python/build 22 | python/*/build 23 | python/*/record.txt 24 | .Python 25 | env/ 26 | develop-eggs/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | /tests/Testing/ 53 | 54 | ## Patching 55 | *.diff 56 | *.orig 57 | *.rej 58 | 59 | ## C++ build directories & artifacts 60 | CMakeFiles/ 61 | Debug 62 | build/ 63 | cpp/build/ 64 | cpp/thirdparty/googletest/ 65 | 66 | ## Eclipse IDE 67 | .project 68 | .cproject 69 | .settings 70 | 71 | ## IntelliJ IDE 72 | .idea/ 73 | .idea_modules/ 74 | *.iml 75 | *.ipr 76 | *.iws 77 | 78 | ## Doxygen 79 | /doxygen/html 80 | /doxygen/latex 81 | /html 82 | /latex 83 | 84 | #Java 85 | target 86 | 87 | # Translations 88 | *.mo 89 | *.pot 90 | 91 | # Django stuff: 92 | *.log 93 | local_settings.py 94 | 95 | # Flask stuff: 96 | instance/ 97 | .webassets-cache 98 | 99 | # Scrapy stuff: 100 | .scrapy 101 | 102 | # Sphinx documentation 103 | docs/_build/ 104 | 105 | # PyBuilder 106 | target/ 107 | 108 | # Jupyter Notebook 109 | .ipynb_checkpoints 110 | 111 | # pyenv 112 | .python-version 113 | 114 | # celery beat schedule file 115 | celerybeat-schedule 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # dotenv 121 | .env 122 | 123 | # virtualenv 124 | .venv 125 | venv/ 126 | ENV/ 127 | 128 | # Spyder project settings 129 | .spyderproject 130 | .spyproject 131 | 132 | # Rope project settings 133 | .ropeproject 134 | 135 | # mkdocs documentation 136 | /site 137 | 138 | # mypy 139 | .mypy_cache/ 140 | 141 | # clang 142 | compile_commands.json 143 | /.clangd/ 144 | 145 | # figures 146 | *.eps 147 | 148 | # Github 149 | /.config/ 150 | /.devcontainer.json 151 | 152 | # AWS cache 153 | /.aws/ 154 | -------------------------------------------------------------------------------- /include/cuco/detail/utility/strong_type.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | namespace cuco::detail { 19 | 20 | /** 21 | * @brief A strong type wrapper 22 | * 23 | * @tparam T Type of the value 24 | * 25 | */ 26 | template 27 | struct strong_type { 28 | /** 29 | * @brief Constructs a strong type 30 | * 31 | * @param v Value to be wrapped as a strong type 32 | */ 33 | __host__ __device__ explicit constexpr strong_type(T v) : value{v} {} 34 | 35 | /** 36 | * @brief Implicit conversion operator to the underlying value. 37 | * 38 | * @return Underlying value 39 | */ 40 | __host__ __device__ constexpr operator T() const noexcept { return value; } 41 | 42 | T value; ///< Underlying data value 43 | }; 44 | 45 | } // namespace cuco::detail 46 | 47 | /** 48 | * @brief Convenience wrapper for defining a strong type 49 | */ 50 | #define CUCO_DEFINE_STRONG_TYPE(Name, Type) \ 51 | struct Name : public cuco::detail::strong_type { \ 52 | __host__ __device__ explicit constexpr Name(Type value) \ 53 | : cuco::detail::strong_type(value) \ 54 | { \ 55 | } \ 56 | }; 57 | 58 | /** 59 | * @brief Convenience wrapper for defining a templated strong type 60 | */ 61 | #define CUCO_DEFINE_TEMPLATE_STRONG_TYPE(Name) \ 62 | template \ 63 | struct Name : public cuco::detail::strong_type { \ 64 | __host__ __device__ explicit constexpr Name(T value) : cuco::detail::strong_type(value) {} \ 65 | }; 66 | -------------------------------------------------------------------------------- /benchmarks/static_set/size_bench.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | using namespace cuco::benchmark; // defaults, dist_from_state 28 | using namespace cuco::utility; // key_generator, distribution 29 | 30 | /** 31 | * @brief A benchmark evaluating `cuco::static_set::size` performance 32 | */ 33 | template 34 | void static_set_size(nvbench::state& state, nvbench::type_list) 35 | { 36 | auto const num_keys = state.get_int64("NumInputs"); 37 | auto const occupancy = state.get_float64("Occupancy"); 38 | 39 | std::size_t const size = num_keys / occupancy; 40 | 41 | thrust::device_vector keys(num_keys); 42 | 43 | [[maybe_unused]] key_generator gen{}; 44 | gen.generate(dist_from_state(state), keys.begin(), keys.end()); 45 | 46 | state.add_element_count(num_keys); 47 | 48 | cuco::static_set set{size, cuco::empty_key{-1}}; 49 | 50 | set.insert(keys.begin(), keys.end()); 51 | 52 | state.exec(nvbench::exec_tag::sync, 53 | [&](nvbench::launch& launch) { auto const size = set.size({launch.get_stream()}); }); 54 | } 55 | 56 | NVBENCH_BENCH_TYPES(static_set_size, 57 | NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE, 58 | nvbench::type_list)) 59 | .set_name("static_set_size_unique_occupancy") 60 | .set_type_axes_names({"Key", "Distribution"}) 61 | .add_int64_axis("NumInputs", {defaults::N}) 62 | .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE); 63 | -------------------------------------------------------------------------------- /.github/actions/compute-matrix/compute-matrix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euo pipefail 18 | 19 | write_output() { 20 | local key="$1" 21 | local value="$2" 22 | echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" 23 | } 24 | 25 | explode_std_versions() { 26 | jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))' 27 | } 28 | 29 | extract_matrix() { 30 | local file="$1" 31 | local type="$2" 32 | local matrix=$(yq -o=json "$file" | jq -cr ".$type") 33 | write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" 34 | local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )" 35 | write_output "NVCC_FULL_MATRIX" "$nvcc_full_matrix" 36 | write_output "CUDA_VERSIONS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .cuda] | unique')" 37 | write_output "HOST_COMPILERS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .compiler.name] | unique')" 38 | write_output "PER_CUDA_COMPILER_MATRIX" "$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" 39 | } 40 | 41 | main() { 42 | if [ "$1" == "-v" ]; then 43 | set -x 44 | shift 45 | fi 46 | 47 | if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then 48 | echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE" 49 | echo " -v : Enable verbose output" 50 | echo " MATRIX_FILE : The path to the matrix file." 51 | echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'" 52 | exit 1 53 | fi 54 | 55 | echo "Input matrix file:" >&2 56 | cat "$1" >&2 57 | echo "Matrix Type: $2" >&2 58 | 59 | extract_matrix "$1" "$2" 60 | } 61 | 62 | main "$@" -------------------------------------------------------------------------------- /benchmarks/static_set/rehash_bench.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | using namespace cuco::benchmark; // defaults, dist_from_state 28 | using namespace cuco::utility; // key_generator, distribution 29 | 30 | /** 31 | * @brief A benchmark evaluating `cuco::static_set::rehash` performance 32 | */ 33 | template 34 | void static_set_rehash(nvbench::state& state, nvbench::type_list) 35 | { 36 | std::size_t const capacity = state.get_int64("Capacity"); 37 | auto const occupancy = state.get_float64("Occupancy"); 38 | 39 | std::size_t const num_keys = capacity * occupancy; 40 | 41 | thrust::device_vector keys(num_keys); // slots per second 42 | 43 | [[maybe_unused]] key_generator gen{}; 44 | gen.generate(dist_from_state(state), keys.begin(), keys.end()); 45 | 46 | state.add_element_count(capacity); 47 | 48 | cuco::static_set set{capacity, cuco::empty_key{-1}}; 49 | 50 | set.insert(keys.begin(), keys.end()); 51 | 52 | state.exec(nvbench::exec_tag::sync, 53 | [&](nvbench::launch& launch) { set.rehash({launch.get_stream()}); }); 54 | } 55 | 56 | NVBENCH_BENCH_TYPES(static_set_rehash, 57 | NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE, 58 | nvbench::type_list)) 59 | .set_name("static_set_rehash_unique_occupancy") 60 | .set_type_axes_names({"Key", "Distribution"}) 61 | .add_int64_axis("Capacity", {defaults::N}) 62 | .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE); 63 | -------------------------------------------------------------------------------- /benchmarks/static_set/retrieve_all_bench.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | using namespace cuco::benchmark; // defaults, dist_from_state 28 | using namespace cuco::utility; // key_generator, distribution 29 | 30 | /** 31 | * @brief A benchmark evaluating `cuco::static_set::retrieve_all` performance 32 | */ 33 | template 34 | void static_set_retrieve_all(nvbench::state& state, nvbench::type_list) 35 | { 36 | auto const num_keys = state.get_int64("NumInputs"); 37 | auto const occupancy = state.get_float64("Occupancy"); 38 | 39 | std::size_t const size = num_keys / occupancy; 40 | 41 | thrust::device_vector keys(num_keys); 42 | 43 | [[maybe_unused]] key_generator gen{}; 44 | gen.generate(dist_from_state(state), keys.begin(), keys.end()); 45 | 46 | cuco::static_set set{size, cuco::empty_key{-1}}; 47 | set.insert(keys.begin(), keys.end()); 48 | 49 | thrust::device_vector result(num_keys); 50 | 51 | state.add_element_count(num_keys); 52 | state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { 53 | [[maybe_unused]] auto end = set.retrieve_all(result.begin(), {launch.get_stream()}); 54 | }); 55 | } 56 | 57 | NVBENCH_BENCH_TYPES(static_set_retrieve_all, 58 | NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE, 59 | nvbench::type_list)) 60 | .set_name("static_set_retrieve_all_unique_occupancy") 61 | .set_type_axes_names({"Key", "Distribution"}) 62 | .add_int64_axis("NumInputs", {defaults::N}) 63 | .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE); 64 | -------------------------------------------------------------------------------- /ci/sccache_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script prints the sccache hit rate between two calls to sccache --show-stats. 18 | # It should be sourced in your script before and after the operations you want to profile, 19 | # with the 'start' or 'end' argument respectively. 20 | 21 | mode=$1 22 | 23 | if [[ "$mode" != "start" && "$mode" != "end" ]]; then 24 | echo "Invalid mode: $mode" 25 | echo "Usage: $0 {start|end}" 26 | exit 1 27 | fi 28 | 29 | case $mode in 30 | start) 31 | export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}') 32 | export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}') 33 | ;; 34 | end) 35 | if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then 36 | echo "Error: start stats not collected. Did you call this script with 'start' before your operations?" 37 | exit 1 38 | fi 39 | 40 | final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}') 41 | final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}') 42 | hits=$((final_hits - SCCACHE_START_HITS)) 43 | misses=$((final_misses - SCCACHE_START_MISSES)) 44 | total=$((hits + misses)) 45 | 46 | prefix="" 47 | if [ ${GITHUB_ACTIONS:-false} = "true" ]; then 48 | prefix="::notice::" 49 | fi 50 | 51 | if (( total > 0 )); then 52 | hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }') 53 | echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%" 54 | else 55 | echo ${prefix}"sccache stats: N/A No new compilation requests" 56 | fi 57 | unset SCCACHE_START_HITS 58 | unset SCCACHE_START_MISSES 59 | ;; 60 | esac -------------------------------------------------------------------------------- /include/cuco/detail/storage/storage.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco::detail { 24 | /** 25 | * @brief Intermediate class internally used by data structures 26 | * 27 | * @tparam StorageImpl Storage implementation class 28 | * @tparam T Storage element type 29 | * @tparam Extent Type of extent denoting number of buckets 30 | * @tparam Allocator Type of allocator used for device storage 31 | */ 32 | template 33 | class storage : StorageImpl::template impl { 34 | public: 35 | /// Storage implementation type 36 | using impl_type = typename StorageImpl::template impl; 37 | using ref_type = typename impl_type::ref_type; ///< Storage ref type 38 | using value_type = typename impl_type::value_type; ///< Storage value type 39 | using allocator_type = typename impl_type::allocator_type; ///< Storage value type 40 | 41 | /// Number of elements per bucket 42 | static constexpr int bucket_size = impl_type::bucket_size; 43 | 44 | using impl_type::allocator; 45 | using impl_type::capacity; 46 | using impl_type::data; 47 | using impl_type::extent; 48 | using impl_type::initialize; 49 | using impl_type::initialize_async; 50 | using impl_type::num_buckets; 51 | using impl_type::ref; 52 | 53 | /** 54 | * @brief Constructs storage. 55 | * 56 | * @param size Number of slots to (de)allocate 57 | * @param allocator Allocator used for (de)allocating device storage 58 | * @param stream Stream to use for (de)allocating device storage 59 | */ 60 | explicit constexpr storage(Extent size, Allocator const& allocator, cuda::stream_ref stream) 61 | : impl_type{size, allocator, stream} 62 | { 63 | } 64 | }; 65 | } // namespace cuco::detail 66 | -------------------------------------------------------------------------------- /tests/static_multiset/load_factor_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | 23 | using size_type = int32_t; 24 | 25 | TEMPLATE_TEST_CASE_SIG( 26 | "static_multiset load factor tests", 27 | "", 28 | ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), 29 | (int32_t, cuco::test::probe_sequence::double_hashing, 1), 30 | (int32_t, cuco::test::probe_sequence::double_hashing, 2), 31 | (int64_t, cuco::test::probe_sequence::double_hashing, 1), 32 | (int64_t, cuco::test::probe_sequence::double_hashing, 2), 33 | (int32_t, cuco::test::probe_sequence::linear_probing, 1), 34 | (int32_t, cuco::test::probe_sequence::linear_probing, 2), 35 | (int64_t, cuco::test::probe_sequence::linear_probing, 1), 36 | (int64_t, cuco::test::probe_sequence::linear_probing, 2)) 37 | { 38 | constexpr size_type num_keys{10}; 39 | 40 | using probe = std::conditional_t>, 42 | cuco::double_hashing>>; 43 | 44 | SECTION("Negative load factor will throw exception") 45 | { 46 | REQUIRE_THROWS(cuco::static_multiset{ 47 | num_keys, -0.1, cuco::empty_key{-1}, {}, probe{}, {}, cuco::storage<2>{}}); 48 | } 49 | 50 | SECTION("Zero load factor will throw exception") 51 | { 52 | REQUIRE_THROWS(cuco::static_multiset{ 53 | num_keys, 0.0, cuco::empty_key{-1}, {}, probe{}, {}, cuco::storage<2>{}}); 54 | } 55 | 56 | SECTION("Load factor larger than one will throw exception") 57 | { 58 | REQUIRE_THROWS(cuco::static_multiset{ 59 | num_keys, 1.1, cuco::empty_key{-1}, {}, probe{}, {}, cuco::storage<2>{}}); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /include/cuco/detail/utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace cuco { 25 | namespace detail { 26 | 27 | template 28 | __host__ __device__ constexpr inline index_type distance(Iterator begin, Iterator end) 29 | { 30 | using category = typename cuda::std::iterator_traits::iterator_category; 31 | static_assert(cuda::std::is_base_of_v, 32 | "Input iterator should be a random access iterator."); 33 | // `int64_t` instead of arch-dependant `long int` 34 | return static_cast(cuda::std::distance(begin, end)); 35 | } 36 | 37 | /** 38 | * @brief C++17 constexpr backport of `std::lower_bound`. 39 | * 40 | * @tparam ForwardIt Type of input iterator 41 | * @tparam T Type of `value` 42 | * 43 | * @param first Iterator defining the start of the range to examine 44 | * @param last Iterator defining the start of the range to examine 45 | * @param value Value to compare the elements to 46 | * 47 | * @return Iterator pointing to the first element in the range [first, last) that does not satisfy 48 | * element < value 49 | */ 50 | template 51 | constexpr ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) 52 | { 53 | using diff_type = typename std::iterator_traits::difference_type; 54 | 55 | ForwardIt it{}; 56 | diff_type count = std::distance(first, last); 57 | diff_type step{}; 58 | 59 | while (count > 0) { 60 | it = first; 61 | step = count / 2; 62 | std::advance(it, step); 63 | 64 | if (static_cast(*it) < value) { 65 | first = ++it; 66 | count -= step + 1; 67 | } else 68 | count = step; 69 | } 70 | 71 | return first; 72 | } 73 | 74 | } // namespace detail 75 | } // namespace cuco 76 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/find_next_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | template 28 | __global__ void find_next_kernel(BitsetRef ref, size_type num_elements, OutputIt output) 29 | { 30 | cuco::detail::index_type index = blockIdx.x * blockDim.x + threadIdx.x; 31 | cuco::detail::index_type stride = gridDim.x * blockDim.x; 32 | while (index < num_elements) { 33 | output[index] = ref.find_next(index); 34 | index += stride; 35 | } 36 | } 37 | 38 | using cuco::test::modulo_bitgen; 39 | 40 | TEST_CASE("dynamic_bitset find next set test", "") 41 | { 42 | cuco::experimental::detail::dynamic_bitset bv; 43 | 44 | using size_type = std::size_t; 45 | constexpr size_type num_elements{400}; 46 | 47 | for (size_type i = 0; i < num_elements; i++) { 48 | bv.push_back(modulo_bitgen(i)); 49 | } 50 | 51 | thrust::device_vector device_result(num_elements); 52 | auto ref = bv.ref(); 53 | find_next_kernel<<<1, 1024>>>(ref, num_elements, device_result.data()); 54 | 55 | thrust::host_vector host_result = device_result; 56 | size_type num_matches = 0; 57 | 58 | size_type next_set_pos = -1lu; 59 | do { 60 | next_set_pos++; 61 | } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos)); 62 | 63 | for (size_type key = 0; key < num_elements; key++) { 64 | num_matches += host_result[key] == next_set_pos; 65 | 66 | if (key == next_set_pos) { 67 | do { 68 | next_set_pos++; 69 | } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos)); 70 | } 71 | } 72 | REQUIRE(num_matches == num_elements); 73 | } 74 | -------------------------------------------------------------------------------- /examples/static_set/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | /** 28 | * @file host_bulk_example.cu 29 | * @brief Demonstrates usage of the static_set "bulk" host APIs. 30 | * 31 | * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or 32 | * `contains` on a set of keys. 33 | * 34 | */ 35 | int main(void) 36 | { 37 | using Key = int; 38 | 39 | // Empty slots are represented by reserved "sentinel" values. These values should be selected such 40 | // that they never occur in your input data. 41 | Key constexpr empty_key_sentinel = -1; 42 | 43 | // Number of keys to be inserted 44 | std::size_t constexpr num_keys = 50'000; 45 | 46 | // Compute capacity based on a 50% load factor 47 | auto constexpr load_factor = 0.5; 48 | std::size_t const capacity = std::ceil(num_keys / load_factor); 49 | 50 | // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel. 51 | cuco::static_set set{capacity, cuco::empty_key{empty_key_sentinel}}; 52 | 53 | // Create a sequence of keys {0, 1, 2, .., i} 54 | thrust::device_vector keys(num_keys); 55 | thrust::sequence(keys.begin(), keys.end(), 0); 56 | 57 | // Inserts all keys into the hash set 58 | set.insert(keys.begin(), keys.end()); 59 | 60 | // Storage for result 61 | thrust::device_vector found(num_keys); 62 | 63 | // Check if all keys are contained in the set 64 | set.contains(keys.begin(), keys.end(), found.begin()); 65 | 66 | // Verify that all keys have been found 67 | bool const all_keys_found = thrust::all_of(found.begin(), found.end(), cuda::std::identity{}); 68 | 69 | if (all_keys_found) { std::cout << "Success! Found all keys.\n"; } 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/get_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | template 28 | __global__ void test_kernel(BitsetRef ref, size_type num_elements, OutputIt output) 29 | { 30 | cuco::detail::index_type index = blockIdx.x * blockDim.x + threadIdx.x; 31 | cuco::detail::index_type stride = gridDim.x * blockDim.x; 32 | while (index < num_elements) { 33 | output[index] = ref.test(index); 34 | index += stride; 35 | } 36 | } 37 | 38 | using cuco::test::modulo_bitgen; 39 | 40 | TEST_CASE("dynamic_bitset get test", "") 41 | { 42 | cuco::experimental::detail::dynamic_bitset bv; 43 | 44 | using size_type = std::size_t; 45 | constexpr size_type num_elements{400}; 46 | 47 | size_type num_set_ref = 0; 48 | for (size_type i = 0; i < num_elements; i++) { 49 | bv.push_back(modulo_bitgen(i)); 50 | num_set_ref += modulo_bitgen(i); 51 | } 52 | 53 | // Host-bulk test 54 | thrust::device_vector keys(num_elements); 55 | thrust::sequence(keys.begin(), keys.end(), 0); 56 | 57 | thrust::device_vector test_result(num_elements); 58 | thrust::fill(test_result.begin(), test_result.end(), 0); 59 | 60 | bv.test(keys.begin(), keys.end(), test_result.begin()); 61 | 62 | size_type num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0); 63 | REQUIRE(num_set == num_set_ref); 64 | 65 | // Device-ref test 66 | auto ref = bv.ref(); 67 | thrust::fill(test_result.begin(), test_result.end(), 0); 68 | test_kernel<<<1, 1024>>>(ref, num_elements, test_result.data()); 69 | 70 | num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0); 71 | REQUIRE(num_set == num_set_ref); 72 | } 73 | -------------------------------------------------------------------------------- /tests/static_map/hash_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | using size_type = std::size_t; 30 | 31 | template 32 | void test_hash_function() 33 | { 34 | using Value = int64_t; 35 | 36 | constexpr size_type num_keys{400}; 37 | 38 | auto map = cuco::static_map, 41 | cuda::thread_scope_device, 42 | cuda::std::equal_to, 43 | cuco::linear_probing<1, Hash>, 44 | cuco::cuda_allocator, 45 | cuco::storage<2>>{ 46 | num_keys, cuco::empty_key{-1}, cuco::empty_value{-1}}; 47 | 48 | auto keys_begin = thrust::counting_iterator(1); 49 | 50 | auto pairs_begin = thrust::make_transform_iterator( 51 | keys_begin, cuda::proclaim_return_type>([] __device__(auto i) { 52 | return cuco::pair(i, i); 53 | })); 54 | 55 | thrust::device_vector d_keys_exist(num_keys); 56 | 57 | map.insert(pairs_begin, pairs_begin + num_keys); 58 | 59 | REQUIRE(map.size() == num_keys); 60 | 61 | map.contains(keys_begin, keys_begin + num_keys, d_keys_exist.begin()); 62 | 63 | REQUIRE(cuco::test::all_of(d_keys_exist.begin(), d_keys_exist.end(), cuda::std::identity{})); 64 | } 65 | 66 | TEMPLATE_TEST_CASE_SIG("static_map hash tests", "", ((typename Key)), (int32_t), (int64_t)) 67 | { 68 | test_hash_function>(); 69 | test_hash_function>(); 70 | test_hash_function>(); 71 | test_hash_function>(); 72 | } 73 | -------------------------------------------------------------------------------- /tests/static_set/atomic_storage_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | using T = int32_t; 27 | using Hash = uint32_t; 28 | using Key = cuco::pair; 29 | 30 | struct hasher { 31 | __device__ Hash operator()(Key const& k) const { return k.first; } 32 | }; 33 | 34 | struct always_not_equal { 35 | __device__ constexpr bool operator()(Key const&, Key const&) const noexcept 36 | { 37 | // All build table keys are distinct thus `false` no matter what 38 | return false; 39 | } 40 | }; 41 | 42 | class build_fn { 43 | public: 44 | __device__ __forceinline__ auto operator()(T i) const noexcept { return cuco::pair{_hash(i), i}; } 45 | 46 | private: 47 | cuco::default_hash_function _hash{}; 48 | }; 49 | 50 | // This test exercise is designed to replicate a Spark runtime failure scenario 51 | // https://github.com/NVIDIA/spark-rapids/issues/12586 and 52 | // https://github.com/rapidsai/cudf/issues/18587 53 | // that is not addressed by the current test suite. It will result in a runtime 54 | // crash if the CCCL atomic storage is not managed correctly. 55 | TEST_CASE("atomic_storage_test", "") 56 | { 57 | using probe = cuco::linear_probing<1, hasher>; 58 | 59 | auto const num_keys = 100'000; 60 | 61 | auto set = cuco::static_set{cuco::extent{num_keys}, 62 | 0.5, 63 | cuco::empty_key{Key{std::numeric_limits::max(), -1}}, 64 | always_not_equal{}, 65 | probe{}, 66 | {}, 67 | cuco::storage<1>{}}; 68 | 69 | auto keys_begin = thrust::make_transform_iterator(thrust::counting_iterator{0}, build_fn{}); 70 | 71 | set.insert_async(keys_begin, keys_begin + num_keys); 72 | auto const count = set.size(); 73 | 74 | REQUIRE(count == num_keys); 75 | } 76 | -------------------------------------------------------------------------------- /.devcontainer/verify_devcontainer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | echo "Usage: $0" 5 | echo 6 | echo "This script is intended to be run within one of CUCO's Dev Containers." 7 | echo "It verifies that the expected environment variables and binary versions match what is expected." 8 | } 9 | 10 | check_envvars() { 11 | for var_name in "$@"; do 12 | if [[ -z "${!var_name:-}" ]]; then 13 | echo "::error:: ${var_name} variable is not set." 14 | exit 1 15 | else 16 | echo "$var_name=${!var_name}" 17 | fi 18 | done 19 | } 20 | 21 | check_host_compiler_version() { 22 | local version_output=$($CXX --version) 23 | 24 | if [[ "$CXX" == "g++" ]]; then 25 | local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1) 26 | local expected_compiler="gcc" 27 | elif [[ "$CXX" == "clang++" ]]; then 28 | if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then 29 | actual_version=${BASH_REMATCH[1]} 30 | else 31 | echo "::error:: Unable to determine clang version." 32 | exit 1 33 | fi 34 | expected_compiler="llvm" 35 | else 36 | echo "::error:: Unexpected CXX value ($CXX)." 37 | exit 1 38 | fi 39 | 40 | if [[ "$expected_compiler" != "${CUCO_HOST_COMPILER}" || "$actual_version" != "$CUCO_HOST_COMPILER_VERSION" ]]; then 41 | echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CUCO_HOST_COMPILER}) and version (${CUCO_HOST_COMPILER_VERSION})." 42 | exit 1 43 | else 44 | echo "Detected host compiler: $CXX version $actual_version" 45 | fi 46 | } 47 | 48 | check_cuda_version() { 49 | local cuda_version_output=$(nvcc --version) 50 | if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then 51 | local actual_cuda_version=${BASH_REMATCH[1]} 52 | else 53 | echo "::error:: Unable to determine CUDA version from nvcc." 54 | exit 1 55 | fi 56 | 57 | if [[ "$actual_cuda_version" != "$CUCO_CUDA_VERSION" ]]; then 58 | echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CUCO_CUDA_VERSION)." 59 | exit 1 60 | else 61 | echo "Detected CUDA version: $actual_cuda_version" 62 | fi 63 | } 64 | 65 | main() { 66 | if [[ "$1" == "-h" || "$1" == "--help" ]]; then 67 | usage 68 | exit 0 69 | fi 70 | 71 | set -euo pipefail 72 | 73 | check_envvars DEVCONTAINER_NAME CXX CUCO_HOST_COMPILER CUCO_CUDA_VERSION CUCO_HOST_COMPILER_VERSION 74 | 75 | check_host_compiler_version 76 | 77 | check_cuda_version 78 | 79 | echo "Dev Container successfully verified!" 80 | } 81 | 82 | main "$@" -------------------------------------------------------------------------------- /.github/workflows/run-as-coder.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: Run as coder user 17 | 18 | defaults: 19 | run: 20 | shell: bash -exo pipefail {0} 21 | 22 | 23 | on: 24 | workflow_call: 25 | inputs: 26 | name: {type: string, required: true} 27 | image: {type: string, required: true} 28 | runner: {type: string, required: true} 29 | command: {type: string, required: true} 30 | env: { type: string, required: false, default: "" } 31 | 32 | jobs: 33 | run-as-coder: 34 | name: ${{inputs.name}} 35 | runs-on: ${{inputs.runner}} 36 | container: 37 | options: -u root 38 | image: ${{inputs.image}} 39 | env: 40 | NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} 41 | permissions: 42 | id-token: write 43 | steps: 44 | - name: Checkout repo 45 | uses: actions/checkout@v3 46 | with: 47 | path: cuCollections 48 | persist-credentials: false 49 | - name: Move files to coder user home directory 50 | run: | 51 | cp -R cuCollections /home/coder/cuCollections 52 | chown -R coder:coder /home/coder/ 53 | - name: Configure credentials and environment variables for sccache 54 | uses: ./cuCollections/.github/actions/configure_cccl_sccache 55 | - name: Run command 56 | shell: su coder {0} 57 | run: | 58 | set -exo pipefail 59 | cd ~/cuCollections 60 | eval "${{inputs.command}}" || exit_code=$? 61 | if [ ! -z "$exit_code" ]; then 62 | echo "::error::Error! To checkout the corresponding code and reproduce locally, run the following commands:" 63 | echo "git clone --branch $GITHUB_REF_NAME --single-branch --recurse-submodules https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" 64 | echo "docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}" 65 | exit $exit_code 66 | fi 67 | -------------------------------------------------------------------------------- /tests/static_multiset/large_input_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | template 33 | void test_unique_sequence(Set& set, typename Set::value_type* res_begin, std::size_t num_keys) 34 | { 35 | using Key = typename Set::key_type; 36 | 37 | auto const keys_begin = thrust::counting_iterator(0); 38 | auto const keys_end = keys_begin + num_keys; 39 | 40 | set.insert(keys_begin, keys_end); 41 | REQUIRE(set.size() == num_keys); 42 | 43 | SECTION("All inserted keys can be retrieved.") 44 | { 45 | auto const [_, res_end] = 46 | set.retrieve(keys_begin, keys_end, thrust::make_discard_iterator(), res_begin); 47 | REQUIRE(static_cast(std::distance(res_begin, res_end)) == num_keys); 48 | 49 | thrust::sort(thrust::device, res_begin, res_end); 50 | 51 | REQUIRE(cuco::test::equal(res_begin, res_end, keys_begin, cuda::std::equal_to{})); 52 | } 53 | } 54 | 55 | TEMPLATE_TEST_CASE_SIG( 56 | "cuco::static_multiset large input test", 57 | "", 58 | ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), 59 | (int64_t, cuco::test::probe_sequence::double_hashing, 1), 60 | (int64_t, cuco::test::probe_sequence::double_hashing, 2)) 61 | { 62 | constexpr std::size_t num_keys{1'200'000'000}; 63 | 64 | using extent_type = cuco::extent; 65 | using probe = cuco::double_hashing>; 66 | 67 | try { 68 | auto set = cuco::static_multiset{num_keys * 2, cuco::empty_key{-1}, {}, probe{}}; 69 | 70 | thrust::device_vector d_retrieved(num_keys); 71 | test_unique_sequence(set, d_retrieved.data().get(), num_keys); 72 | } catch (cuco::cuda_error&) { 73 | SKIP("Out of memory"); 74 | } catch (std::bad_alloc&) { 75 | SKIP("Out of memory"); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /include/cuco/utility/traits.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco { 23 | 24 | /** 25 | * @brief Customization point that can be specialized to indicate that it is safe to perform bitwise 26 | * equality comparisons on the object-representation of objects of type `T`. 27 | * 28 | * By default, only types where `std::has_unique_object_representations_v` is true are safe for 29 | * bitwise equality. However, this can be too restrictive for some types, e.g., floating point 30 | * types. 31 | * 32 | * User-defined specializations of `is_bitwise_comparable` are allowed, but it is the users 33 | * responsibility to ensure values do not occur that would lead to unexpected behavior. For example, 34 | * if a `NaN` bit pattern were used as the empty sentinel value, it may not compare bitwise equal to 35 | * other `NaN` bit patterns. 36 | * 37 | */ 38 | template 39 | struct is_bitwise_comparable : cuda::std::false_type {}; 40 | 41 | /// By default, only types with unique object representations are allowed 42 | template 43 | struct is_bitwise_comparable< 44 | T, 45 | cuda::std::enable_if_t>> 46 | : cuda::std::true_type {}; 47 | 48 | template 49 | inline constexpr bool is_bitwise_comparable_v = 50 | is_bitwise_comparable::value; ///< Shortcut definition 51 | 52 | /** 53 | * @brief Declares that a type `Type` is bitwise comparable. 54 | * 55 | */ 56 | #define CUCO_DECLARE_BITWISE_COMPARABLE(Type) \ 57 | namespace cuco { \ 58 | template <> \ 59 | struct is_bitwise_comparable : cuda::std::true_type {}; \ 60 | } 61 | 62 | template 63 | inline constexpr bool dependent_bool_value = value; ///< Unpacked dependent bool value 64 | 65 | template 66 | inline constexpr bool dependent_false = 67 | dependent_bool_value; ///< Emits a `false` value which is dependent on the given 68 | ///< argument types 69 | 70 | } // namespace cuco 71 | -------------------------------------------------------------------------------- /examples/bloom_filter/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | int main(void) 27 | { 28 | int constexpr num_keys = 10'000; ///< Generate 10'000 keys 29 | int constexpr num_tp = num_keys * 0.5; ///< Insert the first half keys into the filter. 30 | int constexpr num_tn = num_keys - num_tp; 31 | int constexpr sub_filters = 200; ///< 200 sub-filters per bloom filter 32 | 33 | // key type for bloom filter 34 | using key_type = int; 35 | 36 | // Spawn a bloom filter with default policy and 200 sub-filters. 37 | cuco::bloom_filter filter{sub_filters}; 38 | 39 | std::cout << "Bulk insert into bloom filter with default fingerprint generation policy: " 40 | << std::endl; 41 | 42 | thrust::device_vector keys(num_keys); 43 | thrust::sequence(keys.begin(), keys.end(), 1); 44 | 45 | auto tp_begin = keys.begin(); 46 | auto tp_end = tp_begin + num_tp; 47 | auto tn_begin = tp_end; 48 | auto tn_end = keys.end(); 49 | 50 | // Insert the first half of the keys. 51 | filter.add(tp_begin, tp_end); 52 | 53 | thrust::device_vector tp_result(num_tp, false); 54 | thrust::device_vector tn_result(num_keys - num_tp, false); 55 | 56 | // Query the filter for the previously inserted keys. 57 | // This should result in a true-positive rate of TPR=1. 58 | filter.contains(tp_begin, tp_end, tp_result.begin()); 59 | 60 | // Query the filter for the keys that are not present in the filter. 61 | // Since bloom filters are probalistic data structures, the filter 62 | // exhibits a false-positive rate FPR>0 depending on the number of bits in 63 | // the filter and the number of hashes used per key. 64 | filter.contains(tn_begin, tn_end, tn_result.begin()); 65 | 66 | float tp_rate = 67 | float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp); 68 | float fp_rate = 69 | float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); 70 | 71 | std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl; 72 | 73 | return 0; 74 | } -------------------------------------------------------------------------------- /include/cuco/detail/utility/cuda.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco { 24 | namespace detail { 25 | 26 | constexpr std::int32_t default_block_size() noexcept { return 128; } 27 | /// Default stride 28 | constexpr std::int32_t default_stride() noexcept { return 1; } 29 | 30 | /** 31 | * @brief Computes the desired 1D grid size with the given parameters 32 | * 33 | * @param num Number of elements to handle in the kernel 34 | * @param cg_size Number of threads per CUDA Cooperative Group 35 | * @param stride Number of elements to be handled by each thread 36 | * @param block_size Number of threads in each thread block 37 | * 38 | * @return The resulting grid size 39 | */ 40 | constexpr auto grid_size(index_type num, 41 | std::int32_t cg_size = 1, 42 | std::int32_t stride = default_stride(), 43 | std::int32_t block_size = default_block_size()) noexcept 44 | { 45 | return (cg_size * num + stride * block_size - 1) / (stride * block_size); 46 | } 47 | 48 | /** 49 | * @brief Computes the ideal 1D grid size with the given parameters 50 | * 51 | * @tparam Kernel Kernel type 52 | * 53 | * @param block_size Number of threads in each thread block 54 | * @param kernel CUDA kernel to launch 55 | * @param dynamic_shm_size Dynamic shared memory size 56 | * 57 | * @return The grid size that delivers the highest occupancy 58 | */ 59 | template 60 | constexpr auto max_occupancy_grid_size(std::int32_t block_size, 61 | Kernel kernel, 62 | std::size_t dynamic_shm_size = 0) 63 | { 64 | int device = 0; 65 | CUCO_CUDA_TRY(cudaGetDevice(&device)); 66 | 67 | int num_multiprocessors = -1; 68 | CUCO_CUDA_TRY( 69 | cudaDeviceGetAttribute(&num_multiprocessors, cudaDevAttrMultiProcessorCount, device)); 70 | 71 | int max_active_blocks_per_multiprocessor{}; 72 | CUCO_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( 73 | &max_active_blocks_per_multiprocessor, kernel, block_size, dynamic_shm_size)); 74 | 75 | return max_active_blocks_per_multiprocessor * num_multiprocessors; 76 | } 77 | 78 | } // namespace detail 79 | } // namespace cuco 80 | -------------------------------------------------------------------------------- /tests/dynamic_map/unique_sequence_test_experimental.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | TEMPLATE_TEST_CASE_SIG("experimental::dynamic_map: unique sequence", 31 | "", 32 | ((typename Key, typename T), Key, T), 33 | (int32_t, int32_t), 34 | (int32_t, int64_t), 35 | (int64_t, int32_t), 36 | (int64_t, int64_t)) 37 | { 38 | constexpr std::size_t num_keys{1'000'000}; 39 | 40 | cuco::experimental::dynamic_map map{ 41 | 30'000'000, cuco::empty_key{-1}, cuco::empty_value{-1}}; 42 | 43 | thrust::device_vector d_keys(num_keys); 44 | thrust::device_vector d_values(num_keys); 45 | 46 | thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); 47 | thrust::sequence(thrust::device, d_values.begin(), d_values.end()); 48 | 49 | auto pairs_begin = 50 | thrust::make_transform_iterator(thrust::make_counting_iterator(0), 51 | cuda::proclaim_return_type>( 52 | [] __device__(auto i) { return cuco::pair(i, i); })); 53 | 54 | thrust::device_vector d_results(num_keys); 55 | thrust::device_vector d_contained(num_keys); 56 | 57 | // bulk function test cases 58 | 59 | SECTION("All inserted keys-value pairs should be contained") 60 | { 61 | map.insert(pairs_begin, pairs_begin + num_keys); 62 | map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); 63 | 64 | REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), cuda::std::identity{})); 65 | } 66 | 67 | SECTION("Non-inserted keys-value pairs should not be contained") 68 | { 69 | // segfaults 70 | map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); 71 | 72 | REQUIRE(cuco::test::none_of(d_contained.begin(), d_contained.end(), cuda::std::identity{})); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /tests/utility/extent_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | auto constexpr cg_size = 2; 28 | auto constexpr bucket_size = 4; 29 | 30 | using storage_t = cuco::storage; 31 | template 32 | using probing_t = cuco::double_hashing; 33 | 34 | TEMPLATE_TEST_CASE_SIG( 35 | "utility extent tests", "", ((typename SizeType), SizeType), (int32_t), (int64_t), (std::size_t)) 36 | { 37 | SizeType constexpr num = 1234; 38 | SizeType constexpr gold_reference = 1256; // 157 x 2 x 4 39 | 40 | SECTION("Static extent must be evaluated at compile time.") 41 | { 42 | auto const size = cuco::extent{}; 43 | STATIC_REQUIRE(num == size); 44 | } 45 | 46 | SECTION("Dynamic extent is evaluated at run time.") 47 | { 48 | auto const size = cuco::extent(num); 49 | REQUIRE(size == num); 50 | } 51 | 52 | SECTION("Compute static valid extent at compile time.") 53 | { 54 | auto constexpr size = cuco::extent{}; 55 | auto constexpr res = cuco::make_valid_extent(size); 56 | STATIC_REQUIRE(gold_reference == res.value()); 57 | } 58 | 59 | SECTION("Compute dynamic valid extent at run time.") 60 | { 61 | auto const size = cuco::extent{num}; 62 | auto const res = cuco::make_valid_extent(size); 63 | REQUIRE(gold_reference == res.value()); 64 | } 65 | 66 | SECTION("Invalid desired load factor throws exception") 67 | { 68 | using probing_scheme_type = cuco::linear_probing>; 69 | using storage_type = cuco::storage; 70 | 71 | auto const size = cuco::extent{num}; 72 | 73 | // Test load factor <= 0 74 | REQUIRE_THROWS(cuco::make_valid_extent(size, 0.0)); 75 | REQUIRE_THROWS(cuco::make_valid_extent(size, -0.5)); 76 | 77 | // Test load factor > 1 78 | REQUIRE_THROWS(cuco::make_valid_extent(size, 1.5)); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /benchmarks/bloom_filter/utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_64, "xxhash_64", "cuco::xxhash_64"); 31 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_32, "xxhash_32", "cuco::xxhash_32"); 32 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_32, 33 | "murmurhash3_32", 34 | "cuco::murmurhash3_32"); 35 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x86_128, 36 | "murmurhash3_x86_128", 37 | "cuco::murmurhash3_x86_128"); 38 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x64_128, 39 | "murmurhash3_x64_128", 40 | "cuco::murmurhash3_x64_128"); 41 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::identity_hash, 42 | "identity_hash", 43 | "cuco::identity_hash"); 44 | 45 | namespace cuco::benchmark { 46 | 47 | template 48 | void add_fpr_summary(nvbench::state& state, FilterType& filter) 49 | { 50 | filter.clear(); 51 | 52 | auto const num_keys = state.get_int64("NumInputs"); 53 | 54 | thrust::device_vector keys(num_keys * 2); 55 | thrust::sequence(thrust::device, keys.begin(), keys.end(), 1); 56 | thrust::device_vector result(num_keys, false); 57 | 58 | auto tp_begin = keys.begin(); 59 | auto tp_end = tp_begin + num_keys; 60 | auto tn_begin = tp_end; 61 | auto tn_end = keys.end(); 62 | filter.add(tp_begin, tp_end); 63 | filter.contains(tn_begin, tn_end, result.begin()); 64 | 65 | float fp = thrust::count(thrust::device, result.begin(), result.end(), true); 66 | 67 | auto& summ = state.add_summary("FalsePositiveRate"); 68 | summ.set_string("hint", "FPR"); 69 | summ.set_string("short_name", "FPR"); 70 | summ.set_string("description", "False-positive rate of the bloom filter."); 71 | summ.set_float64("value", fp / num_keys); 72 | 73 | filter.clear(); 74 | } 75 | 76 | } // namespace cuco::benchmark -------------------------------------------------------------------------------- /include/cuco/detail/roaring_bitmap/roaring_bitmap_ref.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace cuco::experimental { 26 | 27 | template 28 | __host__ __device__ roaring_bitmap_ref::roaring_bitmap_ref(storage_ref_type const& storage_ref) 29 | : impl_{storage_ref} 30 | { 31 | } 32 | 33 | template 34 | template > */> 36 | __device__ roaring_bitmap_ref::roaring_bitmap_ref(cuda::std::byte const* bitmap) : impl_{bitmap} 37 | { 38 | } 39 | 40 | template 41 | template 42 | __host__ void roaring_bitmap_ref::contains(InputIt first, 43 | InputIt last, 44 | OutputIt output, 45 | cuda::stream_ref stream) const 46 | { 47 | impl_.contains(first, last, output, stream); 48 | } 49 | 50 | template 51 | template 52 | __host__ void roaring_bitmap_ref::contains_async(InputIt first, 53 | InputIt last, 54 | OutputIt output, 55 | cuda::stream_ref stream) const noexcept 56 | { 57 | impl_.contains_async(first, last, output, stream); 58 | } 59 | 60 | template 61 | __device__ bool roaring_bitmap_ref::contains(T value) const 62 | { 63 | return impl_.contains(value); 64 | } 65 | 66 | template 67 | __host__ __device__ cuda::std::size_t roaring_bitmap_ref::size() const noexcept 68 | { 69 | return impl_.size(); 70 | } 71 | 72 | template 73 | __host__ __device__ bool roaring_bitmap_ref::empty() const noexcept 74 | { 75 | return impl_.empty(); 76 | } 77 | 78 | template 79 | __host__ __device__ cuda::std::byte const* roaring_bitmap_ref::data() const noexcept 80 | { 81 | return impl_.data(); 82 | } 83 | 84 | template 85 | __host__ __device__ cuda::std::size_t roaring_bitmap_ref::size_bytes() const noexcept 86 | { 87 | return impl_.size_bytes(); 88 | } 89 | 90 | } // namespace cuco::experimental -------------------------------------------------------------------------------- /include/cuco/probe_sequences.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace cuco::legacy { 22 | 23 | /** 24 | * @brief Public linear probing scheme class. 25 | * 26 | * Linear probing is efficient when few collisions are present. Performance hints: 27 | * - Use linear probing when collisions are rare. e.g. low occupancy or low multiplicity. 28 | * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise. 29 | * 30 | * `Hash` should be callable object type. 31 | * 32 | * @tparam CGSize Size of CUDA Cooperative Groups 33 | * @tparam Hash Unary callable type 34 | */ 35 | template 36 | class linear_probing : public detail::probe_sequence_base { 37 | public: 38 | using probe_sequence_base_type = 39 | detail::probe_sequence_base; ///< The base probe scheme type 40 | using probe_sequence_base_type::cg_size; 41 | using probe_sequence_base_type::vector_width; 42 | 43 | /// Type of implementation details 44 | template 45 | using impl = detail::linear_probing_impl; 46 | }; 47 | 48 | /** 49 | * 50 | * @brief Public double hashing scheme class. 51 | * 52 | * Default probe sequence for `cuco::static_multimap`. Double hashing shows superior 53 | * performance when dealing with high multiplicty and/or high occupancy use cases. Performance 54 | * hints: 55 | * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise. 56 | * 57 | * `Hash1` and `Hash2` should be callable object type. 58 | * 59 | * @tparam CGSize Size of CUDA Cooperative Groups 60 | * @tparam Hash1 Unary callable type 61 | * @tparam Hash2 Unary callable type 62 | */ 63 | template 64 | class double_hashing : public detail::probe_sequence_base { 65 | public: 66 | using probe_sequence_base_type = 67 | detail::probe_sequence_base; ///< The base probe scheme type 68 | using probe_sequence_base_type::cg_size; 69 | using probe_sequence_base_type::vector_width; 70 | 71 | /// Type of implementation details 72 | template 73 | using impl = detail::double_hashing_impl; 74 | }; 75 | 76 | } // namespace cuco::legacy 77 | -------------------------------------------------------------------------------- /include/cuco/utility/reduction_functors.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | 20 | namespace cuco::reduce { 21 | 22 | /** 23 | * @brief Device functor performing sum reduction, used with `insert-or-apply` 24 | */ 25 | struct plus { 26 | /** 27 | * @brief Performs atomic fetch_add on payload and the new value to be inserted 28 | * 29 | * @tparam T The payload type 30 | * @tparam Scope The cuda::thread_scope used for atomic_ref 31 | * 32 | * @param payload_ref The atomic_ref pointing to payload part of the slot 33 | * @param val The new value to be applied as reduction to the current value 34 | * in the payload. 35 | */ 36 | template 37 | __device__ void operator()(cuda::atomic_ref payload_ref, const T& val) 38 | { 39 | payload_ref.fetch_add(val, cuda::memory_order_relaxed); 40 | } 41 | }; 42 | 43 | /** 44 | * @brief Device functor performing max reduction, used with `insert-or-apply` 45 | */ 46 | struct max { 47 | /** 48 | * @brief Performs atomic fetch_max on payload and the new value to be inserted 49 | * 50 | * @tparam T The payload type 51 | * @tparam Scope The cuda::thread_scope used for atomic_ref 52 | * 53 | * @param payload_ref The atomic_ref pointing to payload part of the slot 54 | * @param val The new value to be applied as reduction to the current value 55 | * in the payload. 56 | */ 57 | template 58 | __device__ void operator()(cuda::atomic_ref payload_ref, const T& val) 59 | { 60 | payload_ref.fetch_max(val, cuda::memory_order_relaxed); 61 | } 62 | }; 63 | 64 | /** 65 | * @brief Device functor performing min reduction, used with `insert-or-apply` 66 | */ 67 | struct min { 68 | /** 69 | * @brief Performs atomic fetch_min on payload and the new value to be inserted 70 | * 71 | * @tparam T The payload type 72 | * @tparam Scope The cuda::thread_scope used for atomic_ref 73 | * 74 | * @param payload_ref The atomic_ref pointing to payload part of the slot 75 | * @param val The new value to be applied as reduction to the current value 76 | * in the payload. 77 | */ 78 | template 79 | __device__ void operator()(cuda::atomic_ref payload_ref, const T& val) 80 | { 81 | payload_ref.fetch_min(val, cuda::memory_order_relaxed); 82 | } 83 | }; 84 | 85 | } // namespace cuco::reduce -------------------------------------------------------------------------------- /examples/static_multimap/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | int main(void) 27 | { 28 | using key_type = int; 29 | using value_type = int; 30 | 31 | key_type empty_key_sentinel = -1; 32 | value_type empty_value_sentinel = -1; 33 | 34 | constexpr std::size_t N = 50'000; 35 | 36 | // Constructs a multimap with 100,000 slots using -1 and -1 as the empty key/value 37 | // sentinels. Note the capacity is chosen knowing we will insert 50,000 keys, 38 | // for an load factor of 50%. 39 | cuco::static_multimap map{ 40 | N * 2, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}}; 41 | 42 | thrust::device_vector> pairs(N); 43 | 44 | // Create a sequence of pairs. Eeach key has two matches. 45 | // E.g., {{0,0}, {1,1}, ... {0,25'000}, {1, 25'001}, ...} 46 | thrust::transform( 47 | thrust::make_counting_iterator(0), 48 | thrust::make_counting_iterator(pairs.size()), 49 | pairs.begin(), 50 | [] __device__(auto i) { return cuco::pair{i % (N / 2), i}; }); 51 | 52 | // Inserts all pairs into the map 53 | map.insert(pairs.begin(), pairs.end()); 54 | 55 | // Sequence of probe keys {0, 1, 2, ... 49'999} 56 | thrust::device_vector keys_to_find(N); 57 | thrust::sequence(keys_to_find.begin(), keys_to_find.end(), 0); 58 | 59 | // Counts the occurrences of keys in [0, 50'000) contained in the multimap. 60 | // The `_outer` suffix indicates that the occurrence of a non-match is 1. 61 | auto const output_size = map.count_outer(keys_to_find.begin(), keys_to_find.end()); 62 | 63 | thrust::device_vector> d_results(output_size); 64 | 65 | // Finds all keys {0, 1, 2, ...} and stores associated key/value pairs into `d_results` 66 | // If a key `keys_to_find[i]` doesn't exist, `d_results[i].second == empty_value_sentinel` 67 | auto output_end = map.retrieve_outer(keys_to_find.begin(), keys_to_find.end(), d_results.begin()); 68 | auto retrieve_size = output_end - d_results.begin(); 69 | 70 | // The total number of outer matches should be `N + N / 2` 71 | assert(not(output_size == retrieve_size == N + N / 2)); 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /tests/hyperloglog/unique_sequence_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | TEMPLATE_TEST_CASE_SIG("hyperloglog: unique sequence", 33 | "", 34 | ((typename T, typename Hash), T, Hash), 35 | (int32_t, cuco::xxhash_64), 36 | (int64_t, cuco::xxhash_64), 37 | (__int128_t, cuco::xxhash_64<__int128_t>)) 38 | { 39 | auto num_items_pow2 = GENERATE(25, 26, 28); 40 | auto hll_precision = GENERATE(8, 10, 12, 13, 18, 20); 41 | auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024; 42 | INFO("hll_precision=" << hll_precision); 43 | INFO("sketch_size_kb=" << sketch_size_kb); 44 | INFO("num_items=2^" << num_items_pow2); 45 | auto num_items = 1ull << num_items_pow2; 46 | 47 | // This factor determines the error threshold for passing the test 48 | double constexpr tolerance_factor = 2.5; 49 | // RSD for a given precision is given by the following formula 50 | double const relative_standard_deviation = 51 | 1.04 / std::sqrt(static_cast(1ull << hll_precision)); 52 | 53 | thrust::device_vector items(num_items); 54 | 55 | // Generate `num_items` distinct items 56 | thrust::sequence(items.begin(), items.end(), 0); 57 | 58 | // Initialize the estimator 59 | cuco::hyperloglog estimator{ 60 | cuco::sketch_size_kb(sketch_size_kb)}; 61 | 62 | REQUIRE(estimator.estimate() == 0); 63 | 64 | // Add all items to the estimator 65 | estimator.add(items.begin(), items.end()); 66 | 67 | auto const estimate = estimator.estimate(); 68 | 69 | // Adding the same items again should not affect the result 70 | estimator.add(items.begin(), items.begin() + num_items / 2); 71 | REQUIRE(estimator.estimate() == estimate); 72 | 73 | // Clearing the estimator should reset the estimate 74 | estimator.clear(); 75 | REQUIRE(estimator.estimate() == 0); 76 | 77 | double const relative_error = 78 | std::abs((static_cast(estimate) / static_cast(num_items)) - 1.0); 79 | 80 | // Check if the error is acceptable 81 | REQUIRE(relative_error < tolerance_factor * relative_standard_deviation); 82 | } 83 | -------------------------------------------------------------------------------- /include/cuco/utility/allocator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | 25 | namespace cuco { 26 | /** 27 | * @brief A stream-ordered device allocator using `cudaMallocAsync`/`cudaFreeAsync`. 28 | * 29 | * @tparam T The allocator's value type 30 | */ 31 | template 32 | class cuda_allocator { 33 | public: 34 | using value_type = T; ///< Allocator's value type 35 | 36 | cuda_allocator() = default; 37 | 38 | /** 39 | * @brief Copy constructor. 40 | */ 41 | template 42 | cuda_allocator(cuda_allocator const&) noexcept 43 | { 44 | } 45 | 46 | /** 47 | * @brief Allocates storage for `n` objects of type `T` using `cudaMallocAsync`. 48 | * 49 | * @param n The number of objects to allocate storage for 50 | * @param stream The stream to order the allocation on 51 | * @return Pointer to the allocated storage 52 | */ 53 | value_type* allocate(std::size_t n, cuda::stream_ref stream) 54 | { 55 | value_type* p; 56 | CUCO_CUDA_TRY(cudaMallocAsync(&p, sizeof(value_type) * n, stream.get())); 57 | return p; 58 | } 59 | 60 | /** 61 | * @brief Deallocates storage pointed to by `p` using `cudaFreeAsync`. 62 | * 63 | * @param p Pointer to memory to deallocate 64 | * @param stream The stream to order the deallocation on 65 | */ 66 | void deallocate(value_type* p, std::size_t, cuda::stream_ref stream) 67 | { 68 | CUCO_CUDA_TRY(cudaFreeAsync(p, stream.get())); 69 | } 70 | }; 71 | 72 | /** 73 | * @brief Equality comparison operator. 74 | * 75 | * @tparam T Value type of LHS object 76 | * @tparam U Value type of RHS object 77 | * 78 | * @return `true` iff given arguments are equal 79 | */ 80 | template 81 | bool operator==(cuda_allocator const&, cuda_allocator const&) noexcept 82 | { 83 | return true; 84 | } 85 | 86 | /** 87 | * @brief Inequality comparison operator. 88 | * 89 | * @tparam T Value type of LHS object 90 | * @tparam U Value type of RHS object 91 | * 92 | * @param lhs Left-hand side object to compare 93 | * @param rhs Right-hand side object to compare 94 | * 95 | * @return `true` iff given arguments are not equal 96 | */ 97 | template 98 | bool operator!=(cuda_allocator const& lhs, cuda_allocator const& rhs) noexcept 99 | { 100 | return not(lhs == rhs); 101 | } 102 | 103 | } // namespace cuco 104 | -------------------------------------------------------------------------------- /include/cuco/detail/roaring_bitmap/roaring_bitmap.inl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace cuco::experimental { 23 | 24 | template 25 | roaring_bitmap::roaring_bitmap(cuda::std::byte const* bitmap, 26 | Allocator const& alloc, 27 | cuda::stream_ref stream) 28 | : storage_{bitmap, alloc, stream} 29 | { 30 | } 31 | 32 | template 33 | template 34 | void roaring_bitmap::contains(InputIt first, 35 | InputIt last, 36 | OutputIt output, 37 | cuda::stream_ref stream) const 38 | { 39 | ref_type{storage_.ref()}.contains(first, last, output, stream); 40 | } 41 | 42 | template 43 | template 44 | void roaring_bitmap::contains_async(InputIt first, 45 | InputIt last, 46 | OutputIt output, 47 | cuda::stream_ref stream) const noexcept 48 | { 49 | ref_type{storage_.ref()}.contains_async(first, last, output, stream); 50 | } 51 | 52 | template 53 | cuda::std::size_t roaring_bitmap::size() const noexcept 54 | { 55 | return ref_type{storage_.ref()}.size(); 56 | } 57 | 58 | template 59 | bool roaring_bitmap::empty() const noexcept 60 | { 61 | return ref_type{storage_.ref()}.empty(); 62 | } 63 | 64 | template 65 | cuda::std::byte const* roaring_bitmap::data() const noexcept 66 | { 67 | return ref_type{storage_.ref()}.data(); 68 | } 69 | 70 | template 71 | cuda::std::size_t roaring_bitmap::size_bytes() const noexcept 72 | { 73 | return ref_type{storage_.ref()}.size_bytes(); 74 | } 75 | 76 | template 77 | typename roaring_bitmap::allocator_type roaring_bitmap::allocator() 78 | const noexcept 79 | { 80 | return storage_.allocator(); 81 | } 82 | 83 | template 84 | typename roaring_bitmap::ref_type roaring_bitmap::ref() const noexcept 85 | { 86 | return ref_type{storage_.ref()}; 87 | } 88 | } // namespace cuco::experimental -------------------------------------------------------------------------------- /include/cuco/detail/utility/cuda.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | */ 15 | 16 | #pragma once 17 | 18 | #include 19 | 20 | #include 21 | 22 | #if defined(CUCO_DISABLE_KERNEL_VISIBILITY_WARNING_SUPPRESSION) 23 | #define CUCO_SUPPRESS_KERNEL_WARNINGS 24 | #elif defined(__NVCC__) && (defined(__GNUC__) || defined(__clang__)) 25 | // handle when nvcc is the CUDA compiler and gcc or clang is host 26 | #define CUCO_SUPPRESS_KERNEL_WARNINGS _Pragma("nv_diag_suppress 1407") 27 | _Pragma("GCC diagnostic ignored \"-Wattributes\"") 28 | #elif defined(__clang__) 29 | // handle when clang is the CUDA compiler 30 | #define CUCO_SUPPRESS_KERNEL_WARNINGS _Pragma("clang diagnostic ignored \"-Wattributes\"") 31 | #elif defined(__NVCOMPILER) 32 | #define CUCO_SUPPRESS_KERNEL_WARNINGS #pragma diag_suppress attribute_requires_external_linkage 33 | #endif 34 | 35 | #ifndef CUCO_KERNEL 36 | #define CUCO_KERNEL __attribute__((visibility("hidden"))) __global__ 37 | #endif 38 | namespace cuco { 39 | namespace detail { 40 | 41 | using index_type = cuda::std::int64_t; ///< CUDA thread index type 42 | 43 | /// Default block size 44 | /// CUDA warp size 45 | [[nodiscard]] __device__ constexpr cuda::std::int32_t warp_size() noexcept { return 32; } 46 | 47 | /** 48 | * @brief Returns the global thread index in a 1D scalar grid 49 | * 50 | * @return The global thread index 51 | */ 52 | [[nodiscard]] __device__ inline index_type global_thread_id() noexcept 53 | { 54 | return index_type{threadIdx.x} + index_type{blockDim.x} * index_type{blockIdx.x}; 55 | } 56 | 57 | /** 58 | * @brief Returns the grid stride of a 1D grid 59 | * 60 | * @return The grid stride 61 | */ 62 | [[nodiscard]] __device__ inline index_type grid_stride() noexcept 63 | { 64 | return index_type{gridDim.x} * index_type{blockDim.x}; 65 | } 66 | 67 | /** 68 | * @brief Constexpr helper to extract the size of a Cooperative Group. 69 | * 70 | * @tparam Tile The Cooperative Group type 71 | */ 72 | template 73 | struct tile_size; 74 | 75 | /** 76 | * @brief Specialization of `cuco::detail::tile_size` for 'cooperative_groups::thread_block_tile'. 77 | * 78 | * @tparam CGSize The Cooperative Group size 79 | * @tparam ParentCG The Cooperative Group the tile has been created from 80 | */ 81 | template 82 | struct tile_size> { 83 | static constexpr uint32_t value = CGSize; ///< Size of the `thread_block_tile` 84 | }; 85 | 86 | template 87 | __device__ constexpr uint32_t tile_size_v = tile_size::value; 88 | 89 | } // namespace detail 90 | } // namespace cuco 91 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to cuCollections 2 | 3 | If you are interested in contributing to cuCollections, your contributions will fall 4 | into three categories: 5 | 1. You want to report a bug, feature request, or documentation issue 6 | - File an [issue](https://github.com/NVIDIA/cuCollections/issues/new/choose) 7 | describing what you encountered or what you want to see changed. 8 | - The RAPIDS team will evaluate the issues and triage them, scheduling 9 | them for a release. If you believe the issue needs priority attention 10 | comment on the issue to notify the team. 11 | 2. You want to propose a new Feature and implement it 12 | - Post about your intended feature, and we shall discuss the design and 13 | implementation. 14 | - Once we agree that the plan looks good, go ahead and implement it, using 15 | the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below. 16 | 3. You want to implement a feature or bug-fix for an outstanding issue 17 | - Follow the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below. 18 | - If you need more context on a particular issue, please ask and we shall 19 | provide. 20 | 21 | ## Code contributions 22 | 23 | ### Your first issue 24 | 25 | 1. Read the project's [README.md](https://github.com/NVIDIA/cuCollections/blob/master/README.md) 26 | to learn how to setup the development environment 27 | 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) 28 | or [help wanted](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels 29 | 3. Comment on the issue saying you are going to work on it 30 | 4. Code! Make sure to update unit tests! 31 | 5. When done, [create your pull request](https://github.com/NVIDIA/cuCollections/compare) 32 | 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed 33 | 7. Wait for other developers to review your code and update code as needed 34 | 8. Once reviewed and approved, a RAPIDS developer will merge your pull request 35 | 36 | Remember, if you are unsure about anything, don't hesitate to comment on issues 37 | and ask for clarifications! 38 | 39 | ### Seasoned developers 40 | 41 | Once you have gotten your feet wet and are more comfortable with the code, you 42 | can look at the prioritized issues of our next release in our [project boards](https://github.com/NVIDIA/cuCollections/projects). 43 | 44 | > **Pro Tip:** Always look at the release board with the highest number for 45 | issues to work on. This is where RAPIDS developers also focus their efforts. 46 | 47 | Look at the unassigned issues, and find an issue you are comfortable with 48 | contributing to. Start with _Step 3_ from above, commenting on the issue to let 49 | others know you are working on it. If you have any questions related to the 50 | implementation of the issue, ask them in the issue instead of the PR. 51 | 52 | ## Attribution 53 | Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md 54 | -------------------------------------------------------------------------------- /ci/matrix.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | cuda_12_0: &cuda_12_0 '12.0' 17 | cuda_12_9: &cuda_12_9 '12.9' 18 | cuda_13_0: &cuda_13_0 '13.0' 19 | 20 | # The GPUs to test on 21 | # Note: This assumes that the appropriate gpu_build_archs are set to include building for the GPUs listed here 22 | gpus: 23 | - 'a100' 24 | - 'v100' 25 | 26 | # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers 27 | devcontainer_version: '25.12' 28 | 29 | # Each environment below will generate a unique build/test job 30 | # See the "compute-matrix" job in the workflow for how this is parsed and used 31 | # cuda: The CUDA Toolkit version 32 | # os: The operating system used 33 | # cpu: The CPU architecture 34 | # compiler: The compiler to use 35 | # name: The compiler name 36 | # version: The compiler version 37 | # exe: The unverionsed compiler binary name 38 | # To use the system's default compiler set "exe: 'c++'" or "name: 'cc'" 39 | # gpu_build_archs: The GPU architectures to build for (comma-separated list) 40 | # std: The C++ standards to build for 41 | # This field is unique as it will generate an independent build/test job for each value 42 | 43 | # Configurations that will run for every PR 44 | pull_request: 45 | nvcc: 46 | - {cuda: *cuda_12_0, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']} 47 | - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']} 48 | - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']} 49 | - {cuda: *cuda_12_0, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, gpu_build_archs: '70', std: [17], jobs: ['build']} 50 | - {cuda: *cuda_12_9, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '18', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']} 51 | - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80', std: [17], jobs: ['build']} 52 | - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']} 53 | - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'llvm', version: '20', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']} 54 | -------------------------------------------------------------------------------- /tests/dynamic_bitset/select_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | template 28 | __global__ void select_false_kernel(BitsetRef ref, size_type num_elements, OutputIt output) 29 | { 30 | cuco::detail::index_type index = blockIdx.x * blockDim.x + threadIdx.x; 31 | cuco::detail::index_type stride = gridDim.x * blockDim.x; 32 | while (index < num_elements) { 33 | output[index] = ref.select_false(index); 34 | index += stride; 35 | } 36 | } 37 | 38 | using cuco::test::modulo_bitgen; 39 | 40 | TEST_CASE("dynamic_bitset select test", "") 41 | { 42 | cuco::experimental::detail::dynamic_bitset bv; 43 | 44 | using size_type = std::size_t; 45 | constexpr size_type num_elements{4000}; 46 | 47 | size_type num_set = 0; 48 | for (size_type i = 0; i < num_elements; i++) { 49 | bv.push_back(modulo_bitgen(i)); 50 | num_set += modulo_bitgen(i); 51 | } 52 | 53 | // Check select 54 | { 55 | thrust::device_vector keys(num_set); 56 | thrust::sequence(keys.begin(), keys.end(), 0); 57 | 58 | thrust::device_vector d_selects(num_set); 59 | 60 | bv.select(keys.begin(), keys.end(), d_selects.begin()); 61 | 62 | thrust::host_vector h_selects = d_selects; 63 | 64 | size_type num_matches = 0; 65 | size_type cur_set_pos = -1lu; 66 | for (size_type i = 0; i < num_set; i++) { 67 | do { 68 | cur_set_pos++; 69 | } while (cur_set_pos < num_elements and !modulo_bitgen(cur_set_pos)); 70 | 71 | num_matches += cur_set_pos == h_selects[i]; 72 | } 73 | REQUIRE(num_matches == num_set); 74 | } 75 | 76 | // Check select_false 77 | { 78 | size_type num_not_set = num_elements - num_set; 79 | 80 | auto ref = bv.ref(); 81 | thrust::device_vector device_result(num_not_set); 82 | select_false_kernel<<<1, 1024>>>(ref, num_not_set, device_result.data()); 83 | thrust::host_vector host_result = device_result; 84 | 85 | size_type num_matches = 0; 86 | size_type cur_not_set_pos = -1lu; 87 | for (size_type i = 0; i < num_not_set; i++) { 88 | do { 89 | cur_not_set_pos++; 90 | } while (cur_not_set_pos < num_elements and modulo_bitgen(cur_not_set_pos)); 91 | 92 | num_matches += cur_not_set_pos == host_result[i]; 93 | } 94 | REQUIRE(num_matches == num_not_set); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /examples/static_multiset/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | /** 28 | * @file host_bulk_example.cu 29 | * @brief Demonstrates usage of the static_multiset "bulk" host APIs. 30 | * 31 | * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or 32 | * `retrieve` on a multiset of keys. 33 | * 34 | */ 35 | int main(void) 36 | { 37 | using key_type = int; 38 | 39 | // Empty slots are represented by reserved "sentinel" values. These values should be selected such 40 | // that they never occur in your input data. 41 | key_type constexpr empty_key_sentinel = -1; 42 | 43 | // Number of keys to be inserted 44 | std::size_t constexpr num_keys = 50'000; 45 | 46 | // Compute capacity based on a 50% load factor 47 | auto constexpr load_factor = 0.5; 48 | std::size_t const capacity = std::ceil(num_keys / load_factor); 49 | 50 | // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel. 51 | cuco::static_multiset multiset{capacity, cuco::empty_key{empty_key_sentinel}}; 52 | 53 | // Create a sequence of keys {0, 1, 2, .., i} 54 | // We're going to insert each key twice so we only need 'num_keys / 2' distinct keys. 55 | thrust::device_vector keys(num_keys / 2); 56 | thrust::sequence(keys.begin(), keys.end(), 0); 57 | 58 | // Inserts all keys into the hash set 59 | multiset.insert(keys.begin(), keys.end()); 60 | // Insert the same set of keys again, so each distinct key should occur twice in the multiset 61 | multiset.insert(keys.begin(), keys.end()); 62 | 63 | // Counts the occurrences of matching keys contained in the multiset. 64 | std::size_t const counted_output_size = multiset.count(keys.begin(), keys.end()); 65 | 66 | // Storage for result 67 | thrust::device_vector output_probes(counted_output_size); 68 | thrust::device_vector output_matches(counted_output_size); 69 | 70 | // Retrieve all matching keys 71 | auto const [output_probes_end, _] = 72 | multiset.retrieve(keys.begin(), keys.end(), output_probes.begin(), output_matches.begin()); 73 | std::size_t const retrieved_output_size = output_probes_end - output_probes.begin(); 74 | 75 | if ((retrieved_output_size == counted_output_size) and (retrieved_output_size == num_keys)) { 76 | std::cout << "Success! Found all keys.\n"; 77 | } else { 78 | std::cout << "Fail! Something went wrong.\n"; 79 | } 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /tests/static_set/insert_and_find_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | template 28 | void test_insert_and_find(Set& set, std::size_t num_keys) 29 | { 30 | using Key = typename Set::key_type; 31 | static auto constexpr cg_size = Set::cg_size; 32 | 33 | auto const keys_begin = thrust::counting_iterator(0); 34 | auto const keys_end = thrust::counting_iterator(num_keys); 35 | 36 | thrust::device_vector iters1(num_keys); 37 | thrust::device_vector iters2(num_keys); 38 | 39 | thrust::device_vector inserted(num_keys); 40 | 41 | // insert first time, fills inserted with true 42 | set.insert_and_find(keys_begin, keys_end, iters1.begin(), inserted.begin()); 43 | REQUIRE(cuco::test::all_of(inserted.begin(), inserted.end(), cuda::std::identity{})); 44 | 45 | // insert second time, fills inserted with false as keys already in set 46 | set.insert_and_find(keys_begin, keys_end, iters2.begin(), inserted.begin()); 47 | REQUIRE(cuco::test::none_of(inserted.begin(), inserted.end(), cuda::std::identity{})); 48 | 49 | // both iters1 and iters2 should be same, as keys will be referring to same slot 50 | REQUIRE( 51 | cuco::test::equal(iters1.begin(), iters1.end(), iters2.begin(), cuda::std::equal_to{})); 52 | } 53 | 54 | TEMPLATE_TEST_CASE_SIG( 55 | "static_set Insert and find", 56 | "", 57 | ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), 58 | (int32_t, cuco::test::probe_sequence::double_hashing, 1), 59 | (int32_t, cuco::test::probe_sequence::double_hashing, 2), 60 | (int64_t, cuco::test::probe_sequence::double_hashing, 1), 61 | (int64_t, cuco::test::probe_sequence::double_hashing, 2), 62 | (int32_t, cuco::test::probe_sequence::linear_probing, 1), 63 | (int32_t, cuco::test::probe_sequence::linear_probing, 2), 64 | (int64_t, cuco::test::probe_sequence::linear_probing, 1), 65 | (int64_t, cuco::test::probe_sequence::linear_probing, 2)) 66 | { 67 | constexpr std::size_t num_keys{400}; 68 | 69 | using probe = std::conditional_t>, 71 | cuco::double_hashing>>; 72 | 73 | auto set = 74 | cuco::static_set{num_keys, cuco::empty_key{-1}, {}, probe{}, {}, cuco::storage<2>{}}; 75 | 76 | test_insert_and_find(set, num_keys); 77 | } 78 | -------------------------------------------------------------------------------- /tests/static_map/key_sentinel_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #define SIZE 10 29 | __device__ int A[SIZE]; 30 | 31 | template 32 | struct custom_equals { 33 | __device__ bool operator()(T lhs, T rhs) const { return A[lhs] == A[rhs]; } 34 | }; 35 | 36 | TEMPLATE_TEST_CASE_SIG("static_map key sentinel tests", "", ((typename T), T), (int32_t), (int64_t)) 37 | { 38 | using Key = T; 39 | using Value = T; 40 | 41 | constexpr std::size_t num_keys{SIZE}; 42 | auto map = cuco::static_map{SIZE * 2, 43 | cuco::empty_key{-1}, 44 | cuco::empty_value{-1}, 45 | custom_equals{}, 46 | cuco::linear_probing<1, cuco::default_hash_function>{}}; 47 | 48 | auto insert_ref = map.ref(cuco::op::insert); 49 | auto find_ref = map.ref(cuco::op::find); 50 | 51 | int h_A[SIZE]; 52 | for (int i = 0; i < SIZE; i++) { 53 | h_A[i] = i; 54 | } 55 | CUCO_CUDA_TRY(cudaMemcpyToSymbol(A, h_A, SIZE * sizeof(int))); 56 | 57 | auto pairs_begin = thrust::make_transform_iterator( 58 | thrust::make_counting_iterator(0), 59 | cuda::proclaim_return_type>( 60 | [] __device__(auto i) { return cuco::pair(i, i); })); 61 | 62 | SECTION( 63 | "Tests of non-CG insert: The custom `key_equal` can never be used to compare against sentinel") 64 | { 65 | REQUIRE( 66 | cuco::test::all_of(pairs_begin, 67 | pairs_begin + num_keys, 68 | cuda::proclaim_return_type( 69 | [insert_ref] __device__(cuco::pair const& pair) mutable { 70 | return insert_ref.insert(pair); 71 | }))); 72 | } 73 | 74 | SECTION( 75 | "Tests of CG insert: The custom `key_equal` can never be used to compare against sentinel") 76 | { 77 | map.insert(pairs_begin, pairs_begin + num_keys); 78 | // All keys inserted via custom `key_equal` should be found 79 | REQUIRE(cuco::test::all_of( 80 | pairs_begin, 81 | pairs_begin + num_keys, 82 | cuda::proclaim_return_type([find_ref] __device__(cuco::pair const& pair) { 83 | auto const found = find_ref.find(pair.first); 84 | return (found != find_ref.end()) and 85 | (found->first == pair.first and found->second == pair.second); 86 | }))); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /tests/static_set/large_input_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | template 32 | void test_unique_sequence(Set& set, bool* res_begin, std::size_t num_keys) 33 | { 34 | using Key = typename Set::key_type; 35 | 36 | auto const keys_begin = thrust::counting_iterator(0); 37 | auto const keys_end = thrust::counting_iterator(num_keys); 38 | 39 | SECTION("Non-inserted keys should not be contained.") 40 | { 41 | REQUIRE(set.size() == 0); 42 | 43 | set.contains(keys_begin, keys_end, res_begin); 44 | REQUIRE(cuco::test::none_of(res_begin, res_begin + num_keys, cuda::std::identity{})); 45 | } 46 | 47 | set.insert(keys_begin, keys_end); 48 | REQUIRE(set.size() == num_keys); 49 | 50 | SECTION("All inserted key/value pairs should be contained.") 51 | { 52 | set.contains(keys_begin, keys_end, res_begin); 53 | REQUIRE(cuco::test::all_of(res_begin, res_begin + num_keys, cuda::std::identity{})); 54 | } 55 | 56 | SECTION("All inserted key/value pairs can be retrieved.") 57 | { 58 | auto output_keys = thrust::device_vector(num_keys); 59 | 60 | auto const keys_end = set.retrieve_all(output_keys.begin()); 61 | REQUIRE(static_cast(std::distance(output_keys.begin(), keys_end)) == num_keys); 62 | 63 | thrust::sort(output_keys.begin(), keys_end); 64 | 65 | REQUIRE(cuco::test::equal(output_keys.begin(), 66 | output_keys.end(), 67 | thrust::counting_iterator(0), 68 | cuda::std::equal_to{})); 69 | } 70 | } 71 | 72 | TEMPLATE_TEST_CASE_SIG( 73 | "cuco::static_set large input test", 74 | "", 75 | ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), 76 | (int64_t, cuco::test::probe_sequence::double_hashing, 1), 77 | (int64_t, cuco::test::probe_sequence::double_hashing, 2)) 78 | { 79 | constexpr std::size_t num_keys{1'200'000'000}; 80 | 81 | using extent_type = cuco::extent; 82 | using probe = cuco::double_hashing>; 83 | 84 | try { 85 | auto set = cuco::static_set{num_keys * 2, cuco::empty_key{-1}, {}, probe{}}; 86 | 87 | thrust::device_vector d_contained(num_keys); 88 | test_unique_sequence(set, d_contained.data().get(), num_keys); 89 | } catch (cuco::cuda_error&) { 90 | SKIP("Out of memory"); 91 | } catch (std::bad_alloc&) { 92 | SKIP("Out of memory"); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /include/cuco/detail/storage/storage_base.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace cuco { 24 | namespace detail { 25 | /** 26 | * @brief Custom deleter for unique pointer. 27 | * 28 | * @tparam SizeType Type of device storage size 29 | * @tparam Allocator Type of allocator used for device storage 30 | */ 31 | template 32 | struct custom_deleter { 33 | using pointer = typename Allocator::value_type*; ///< Value pointer type 34 | 35 | /** 36 | * @brief Constructor of custom deleter. 37 | * 38 | * @param size Number of values to deallocate 39 | * @param allocator Allocator used for deallocating device storage 40 | * @param stream Stream to use for deallocation 41 | */ 42 | explicit constexpr custom_deleter(SizeType size, Allocator& allocator, cuda::stream_ref stream) 43 | : size_{size}, allocator_{allocator}, stream_{stream} 44 | { 45 | } 46 | 47 | /** 48 | * @brief Operator for deallocation 49 | * 50 | * @param ptr Pointer to the first value for deallocation 51 | */ 52 | void operator()(pointer ptr) { allocator_.deallocate(ptr, size_, stream_); } 53 | 54 | SizeType size_; ///< Number of values to delete 55 | Allocator& allocator_; ///< Allocator used deallocating values 56 | cuda::stream_ref stream_; ///< Stream used for deallocation 57 | }; 58 | 59 | /** 60 | * @brief Base class of open addressing storage. 61 | * 62 | * This class should not be used directly. 63 | * 64 | * @tparam Extent Type of extent denoting storage capacity 65 | */ 66 | template 67 | class storage_base { 68 | public: 69 | using extent_type = Extent; ///< Storage extent type 70 | using size_type = typename extent_type::value_type; ///< Storage size type 71 | 72 | /** 73 | * @brief Constructor of base storage. 74 | * 75 | * @param size Number of elements to (de)allocate 76 | */ 77 | __host__ __device__ explicit constexpr storage_base(Extent size) : extent_{size} {} 78 | 79 | /** 80 | * @brief Gets the total number of elements in the current storage. 81 | * 82 | * @return The total number of elements 83 | */ 84 | [[nodiscard]] __host__ __device__ constexpr size_type capacity() const noexcept 85 | { 86 | return static_cast(extent_); 87 | } 88 | 89 | /** 90 | * @brief Gets the extent of the current storage. 91 | * 92 | * @return The extent. 93 | */ 94 | [[nodiscard]] __host__ __device__ constexpr extent_type extent() const noexcept 95 | { 96 | return extent_; 97 | } 98 | 99 | protected: 100 | extent_type extent_; ///< Total number of elements 101 | }; 102 | 103 | } // namespace detail 104 | } // namespace cuco 105 | -------------------------------------------------------------------------------- /.github/workflows/verify-devcontainers.yml: -------------------------------------------------------------------------------- 1 | name: Verify devcontainers 2 | 3 | on: 4 | workflow_call: 5 | 6 | defaults: 7 | run: 8 | shell: bash -euo pipefail {0} 9 | 10 | jobs: 11 | verify-make-devcontainers: 12 | name: Verify devcontainers 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v3 17 | - name: Setup jq and yq 18 | run: | 19 | sudo apt-get update 20 | sudo apt-get install jq -y 21 | sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64 22 | sudo chmod +x /usr/local/bin/yq 23 | - name: Run the script to generate devcontainer files 24 | run: | 25 | ./.devcontainer/make_devcontainers.sh --verbose 26 | - name: Check for changes 27 | run: | 28 | if [[ $(git diff --stat) != '' ]]; then 29 | git diff --minimal 30 | echo "::error:: Dev Container files are out of date. Run the .devcontainer/make_devcontainers.sh script and commit the changes." 31 | exit 1 32 | else 33 | echo "::note::Dev Container files are up-to-date." 34 | fi 35 | get-devcontainer-list: 36 | needs: verify-make-devcontainers 37 | name: List devcontainers 38 | runs-on: ubuntu-latest 39 | outputs: 40 | devcontainers: ${{ steps.get-list.outputs.devcontainers }} 41 | steps: 42 | - name: Check out the code 43 | uses: actions/checkout@v3 44 | - name: Get list of devcontainer.json paths and names 45 | id: get-list 46 | run: | 47 | devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do 48 | jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer" 49 | done | jq -s -c .) 50 | echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}" 51 | verify-devcontainers: 52 | needs: get-devcontainer-list 53 | name: ${{matrix.devcontainer.name}} 54 | runs-on: ubuntu-latest 55 | strategy: 56 | fail-fast: false 57 | matrix: 58 | devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}} 59 | permissions: 60 | id-token: write 61 | contents: read 62 | steps: 63 | - name: Check out the code 64 | uses: actions/checkout@v3 65 | # devcontainer/ci doesn't supported nested devcontainer.json files, so we need to copy the devcontainer.json 66 | # file to the top level .devcontainer/ directory 67 | - name: Copy devcontainer.json to .devcontainer/ 68 | run: | 69 | src="${{ matrix.devcontainer.path }}" 70 | dst=".devcontainer/devcontainer.json" 71 | if [[ "$src" != "$dst" ]]; then 72 | cp "$src" "$dst" 73 | fi 74 | # We don't really need sccache configured, but we need the AWS credentials envvars to be set 75 | # in order to avoid the devcontainer hanging waiting for GitHub authentication 76 | - name: Configure credentials and environment variables for sccache 77 | uses: ./.github/actions/configure_cccl_sccache 78 | - name: Run in devcontainer 79 | uses: devcontainers/ci@v0.3 80 | with: 81 | push: never 82 | env: | 83 | SCCACHE_REGION=${{ env.SCCACHE_REGION }} 84 | AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }} 85 | AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }} 86 | AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }} 87 | runCmd: | 88 | .devcontainer/verify_devcontainer.sh -------------------------------------------------------------------------------- /cmake/header_testing.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2025, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | # For every public header, build a translation unit containing `#include
` 18 | # to let the compiler try to figure out warnings in that header if it is not otherwise 19 | # included in tests, and also to verify if the headers are modular enough. 20 | # .inl files are not globbed for, because they are not supposed to be used as public 21 | # entrypoints. 22 | 23 | function(cuco_add_header_tests) 24 | file(GLOB_RECURSE headers 25 | RELATIVE "${CUCO_SOURCE_DIR}/include" 26 | CONFIGURE_DEPENDS 27 | "${CUCO_SOURCE_DIR}/include/cuco/*.cuh" 28 | "${CUCO_SOURCE_DIR}/include/cuco/*.hpp" 29 | ) 30 | 31 | list(LENGTH headers headers_count) 32 | message(STATUS "Found ${headers_count} headers for testing") 33 | 34 | # List of headers that have known issues or are not meant to be included directly 35 | set(excluded_headers 36 | # Add any headers that should be excluded from testing here 37 | # Example: cuco/internal_header.cuh 38 | ) 39 | 40 | # Remove excluded headers 41 | if(excluded_headers) 42 | list(REMOVE_ITEM headers ${excluded_headers}) 43 | endif() 44 | 45 | foreach (header IN LISTS headers) 46 | # Create a safe target name by replacing path separators and dots 47 | string(REPLACE "/" "_" header_target_name "${header}") 48 | string(REPLACE "." "_" header_target_name "${header_target_name}") 49 | # Use a hash to ensure uniqueness in case of similar names 50 | string(MD5 header_hash "${header}") 51 | string(SUBSTRING "${header_hash}" 0 8 header_hash_short) 52 | set(headertest_target "cuco_header_${header_target_name}_${header_hash_short}") 53 | 54 | set(header_src "${CMAKE_CURRENT_BINARY_DIR}/headers/${headertest_target}/${header}.cu") 55 | 56 | # Create the directory if it doesn't exist 57 | get_filename_component(header_dir "${header_src}" DIRECTORY) 58 | file(MAKE_DIRECTORY "${header_dir}") 59 | 60 | # Write simple test file that includes the header 61 | file(WRITE "${header_src}" "#include <${header}>\nint main() { return 0; }\n") 62 | 63 | # Create executable test for this specific header 64 | add_executable(${headertest_target} ${header_src}) 65 | target_link_libraries(${headertest_target} PRIVATE cuco::cuco CUDA::cudart) 66 | 67 | # Use common compile options (includes all compiler-specific warning suppressions) 68 | cuco_set_common_compile_options(${headertest_target}) 69 | 70 | set_target_properties(${headertest_target} PROPERTIES 71 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests/headers" 72 | ) 73 | 74 | # Add as a CTest test 75 | add_test(NAME ${headertest_target} COMMAND ${headertest_target}) 76 | endforeach() 77 | endfunction() 78 | 79 | cuco_add_header_tests() 80 | -------------------------------------------------------------------------------- /include/cuco/hash_functions.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | namespace cuco { 26 | 27 | /** 28 | * @brief An Identity hash function to hash the given argument on host and device 29 | * 30 | * @throw A key must not be larger than uint64_t 31 | * 32 | * @tparam Key The type of the values to hash 33 | */ 34 | template 35 | using identity_hash = detail::identity_hash; 36 | 37 | /** 38 | * @brief The 32-bit integer finalizer function of `MurmurHash3` to hash the given argument on host 39 | * and device. 40 | * 41 | * @throw Key type must be 4 bytes in size 42 | * 43 | * @tparam Key The type of the values to hash 44 | */ 45 | template 46 | using murmurhash3_fmix_32 = detail::MurmurHash3_fmix32; 47 | 48 | /** 49 | * @brief The 64-bit integer finalizer function of `MurmurHash3` to hash the given argument on host 50 | * and device. 51 | * 52 | * @throw Key type must be 8 bytes in size 53 | * 54 | * @tparam Key The type of the values to hash 55 | */ 56 | template 57 | using murmurhash3_fmix_64 = detail::MurmurHash3_fmix64; 58 | 59 | /** 60 | * @brief A 32-bit `MurmurHash3` hash function to hash the given argument on host and device. 61 | * 62 | * @tparam Key The type of the values to hash 63 | */ 64 | template 65 | using murmurhash3_32 = detail::MurmurHash3_32; 66 | 67 | /** 68 | * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device. 69 | * 70 | * @tparam Key The type of the values to hash 71 | */ 72 | template 73 | using murmurhash3_x64_128 = detail::MurmurHash3_x64_128; 74 | 75 | /** 76 | * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device. 77 | * 78 | * @tparam Key The type of the values to hash 79 | */ 80 | template 81 | using murmurhash3_x86_128 = detail::MurmurHash3_x86_128; 82 | 83 | /** 84 | * @brief A 32-bit `XXH32` hash function to hash the given argument on host and device. 85 | * 86 | * @tparam Key The type of the values to hash 87 | */ 88 | template 89 | using xxhash_32 = detail::XXHash_32; 90 | 91 | /** 92 | * @brief A 64-bit `XXH64` hash function to hash the given argument on host and device. 93 | * 94 | * @tparam Key The type of the values to hash 95 | */ 96 | template 97 | using xxhash_64 = detail::XXHash_64; 98 | 99 | /** 100 | * @brief Default hash function. 101 | * 102 | * @tparam Key The type of the values to hash 103 | */ 104 | template 105 | using default_hash_function = xxhash_32; 106 | 107 | } // namespace cuco 108 | -------------------------------------------------------------------------------- /tests/hyperloglog/device_ref_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | template 33 | __global__ void estimate_kernel(cuco::sketch_size_kb sketch_size_kb, 34 | InputIt in, 35 | size_t n, 36 | OutputIt out) 37 | { 38 | extern __shared__ cuda::std::byte local_sketch[]; 39 | 40 | auto const block = cooperative_groups::this_thread_block(); 41 | 42 | // only a single block computes the estimate 43 | if (block.group_index().x == 0) { 44 | Ref estimator(cuda::std::span(local_sketch, Ref::sketch_bytes(sketch_size_kb))); 45 | 46 | estimator.clear(block); 47 | block.sync(); 48 | 49 | for (int i = block.thread_rank(); i < n; i += block.num_threads()) { 50 | estimator.add(*(in + i)); 51 | } 52 | block.sync(); 53 | auto const estimate = estimator.estimate(block); 54 | if (block.thread_rank() == 0) { *out = estimate; } 55 | } 56 | } 57 | 58 | TEMPLATE_TEST_CASE_SIG("hyperloglog: device ref", 59 | "", 60 | ((typename T, typename Hash), T, Hash), 61 | (int32_t, cuco::xxhash_64), 62 | (int64_t, cuco::xxhash_64), 63 | (__int128_t, cuco::xxhash_64<__int128_t>)) 64 | { 65 | using estimator_type = cuco::hyperloglog; 66 | 67 | auto num_items_pow2 = GENERATE(25, 26, 28); 68 | auto hll_precision = GENERATE(8, 10, 12, 13); 69 | auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024; 70 | INFO("hll_precision=" << hll_precision); 71 | INFO("sketch_size_kb=" << sketch_size_kb); 72 | INFO("num_items=2^" << num_items_pow2); 73 | auto num_items = 1ull << num_items_pow2; 74 | 75 | thrust::device_vector items(num_items); 76 | 77 | // Generate `num_items` distinct items 78 | thrust::sequence(items.begin(), items.end(), 0); 79 | 80 | // Initialize the estimator 81 | estimator_type estimator{cuco::sketch_size_kb(sketch_size_kb)}; 82 | 83 | // Add all items to the estimator 84 | estimator.add(items.begin(), items.end()); 85 | 86 | auto const host_estimate = estimator.estimate(); 87 | 88 | thrust::device_vector device_estimate(1); 89 | estimate_kernel> 90 | <<<1, 512, estimator.sketch_bytes()>>>( 91 | cuco::sketch_size_kb(sketch_size_kb), items.begin(), num_items, device_estimate.begin()); 92 | 93 | REQUIRE(device_estimate[0] == host_estimate); 94 | } 95 | -------------------------------------------------------------------------------- /tests/static_set/retrieve_all_test.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | template 30 | void test_unique_sequence(Set& set, std::size_t num_keys) 31 | { 32 | using Key = typename Set::key_type; 33 | 34 | thrust::device_vector d_keys(num_keys); 35 | thrust::sequence(d_keys.begin(), d_keys.end()); 36 | auto keys_begin = d_keys.begin(); 37 | 38 | SECTION("Non-inserted keys should not be contained.") 39 | { 40 | REQUIRE(set.size() == 0); 41 | 42 | auto keys_end = set.retrieve_all(keys_begin); 43 | REQUIRE(std::distance(keys_begin, keys_end) == 0); 44 | } 45 | 46 | set.insert(keys_begin, keys_begin + num_keys); 47 | REQUIRE(set.size() == num_keys); 48 | 49 | SECTION("All inserted key/value pairs should be contained.") 50 | { 51 | thrust::device_vector d_res(num_keys); 52 | auto d_res_end = set.retrieve_all(d_res.begin()); 53 | thrust::sort(d_res.begin(), d_res_end); 54 | REQUIRE(cuco::test::equal( 55 | d_res.begin(), d_res_end, thrust::counting_iterator(0), cuda::std::equal_to{})); 56 | } 57 | } 58 | 59 | TEMPLATE_TEST_CASE_SIG( 60 | "static_set::retrieve_all tests", 61 | "", 62 | ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), 63 | (int32_t, cuco::test::probe_sequence::double_hashing, 1), 64 | (int32_t, cuco::test::probe_sequence::double_hashing, 2), 65 | (int64_t, cuco::test::probe_sequence::double_hashing, 1), 66 | (int64_t, cuco::test::probe_sequence::double_hashing, 2), 67 | (int32_t, cuco::test::probe_sequence::linear_probing, 1), 68 | (int32_t, cuco::test::probe_sequence::linear_probing, 2), 69 | (int64_t, cuco::test::probe_sequence::linear_probing, 1), 70 | (int64_t, cuco::test::probe_sequence::linear_probing, 2)) 71 | { 72 | constexpr std::size_t num_keys{400}; 73 | constexpr double desired_load_factor = 1.; 74 | 75 | using probe = std::conditional_t>, 77 | cuco::double_hashing>>; 78 | 79 | constexpr std::size_t gold_capacity = [&]() { 80 | if constexpr (cuco::is_double_hashing::value) { 81 | return (CGSize == 1) ? 409 // 409 x 1 x 2 82 | : 422; // 211 x 2 x 2 83 | } else { 84 | return 400; 85 | } 86 | }(); 87 | 88 | auto set = cuco::static_set{num_keys, desired_load_factor, cuco::empty_key{-1}, {}, probe{}}; 89 | 90 | REQUIRE(set.capacity() == gold_capacity); 91 | 92 | test_unique_sequence(set, num_keys); 93 | } 94 | -------------------------------------------------------------------------------- /examples/static_map/host_bulk_example.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | /** 32 | * @file host_bulk_example.cu 33 | * @brief Demonstrates usage of the static_map "bulk" host APIs. 34 | * 35 | * The bulk APIs are only invocable from the host and are used for doing operations like insert or 36 | * find on a set of keys. 37 | * 38 | */ 39 | 40 | int main(void) 41 | { 42 | using Key = int; 43 | using Value = int; 44 | 45 | // Empty slots are represented by reserved "sentinel" values. These values should be selected such 46 | // that they never occur in your input data. 47 | Key constexpr empty_key_sentinel = -1; 48 | Value constexpr empty_value_sentinel = -1; 49 | 50 | // Number of key/value pairs to be inserted 51 | std::size_t constexpr num_keys = 50'000; 52 | 53 | // Compute capacity based on a 50% load factor 54 | auto constexpr load_factor = 0.5; 55 | std::size_t const capacity = std::ceil(num_keys / load_factor); 56 | 57 | // Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels. 58 | auto map = cuco::static_map{ 59 | capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}}; 60 | 61 | // Create a sequence of keys and values 62 | thrust::device_vector insert_keys(num_keys); 63 | thrust::sequence(insert_keys.begin(), insert_keys.end(), 0); 64 | thrust::device_vector insert_values(num_keys); 65 | thrust::sequence(insert_values.begin(), insert_values.end(), 0); 66 | // Combine keys and values into pairs {{0,0}, {1,1}, ... {i,i}} 67 | auto pairs = thrust::make_transform_iterator( 68 | thrust::counting_iterator{0}, 69 | cuda::proclaim_return_type>( 70 | [keys = insert_keys.begin(), values = insert_values.begin()] __device__(auto i) { 71 | return cuco::pair{keys[i], values[i]}; 72 | })); 73 | 74 | // Inserts all pairs into the map 75 | map.insert(pairs, pairs + num_keys); 76 | 77 | // Storage for found values 78 | thrust::device_vector found_values(num_keys); 79 | 80 | // Finds all keys {0, 1, 2, ...} and stores associated values into `found_values` 81 | // If a key `keys_to_find[i]` doesn't exist, `found_values[i] == empty_value_sentinel` 82 | map.find(insert_keys.begin(), insert_keys.end(), found_values.begin()); 83 | 84 | // Verify that all the found values match the inserted values 85 | bool const all_values_match = 86 | thrust::equal(found_values.begin(), found_values.end(), insert_values.begin()); 87 | 88 | if (all_values_match) { std::cout << "Success! Found all values.\n"; } 89 | 90 | return 0; 91 | } 92 | --------------------------------------------------------------------------------