├── .github
    ├── CODEOWNERS
    ├── copy-pr-bot.yaml
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   └── feature_request.yml
    ├── actions
    │   ├── compute-matrix
    │   │   ├── action.yml
    │   │   └── compute-matrix.sh
    │   └── configure_cccl_sccache
    │   │   └── action.yml
    ├── workflows
    │   ├── build-and-test.yml
    │   ├── dispatch-build-and-test.yml
    │   ├── run-as-coder.yml
    │   └── verify-devcontainers.yml
    └── PULL_REQUEST_TEMPLATE.md
├── SECURITY.md
├── ci
    ├── test.sh
    ├── update_rapids_version.sh
    ├── sccache_hit_rate.sh
    ├── pre-commit
    │   └── doxygen.sh
    ├── sccache_stats.sh
    └── matrix.yml
├── cmake
    ├── thirdparty
    │   └── get_cccl.cmake
    ├── roaring_testdata.cmake
    └── header_testing.cmake
├── tests
    ├── dynamic_bitset
    │   ├── size_test.cu
    │   ├── rank_test.cu
    │   ├── find_next_test.cu
    │   ├── get_test.cu
    │   └── select_test.cu
    ├── static_set
    │   ├── size_test.cu
    │   ├── rehash_test.cu
    │   ├── atomic_storage_test.cu
    │   ├── insert_and_find_test.cu
    │   ├── large_input_test.cu
    │   └── retrieve_all_test.cu
    ├── hyperloglog
    │   ├── type_deduction_test.cu
    │   ├── unique_sequence_test.cu
    │   └── device_ref_test.cu
    ├── test_utils.cuh
    ├── static_map
    │   ├── rehash_test.cu
    │   ├── hash_test.cu
    │   └── key_sentinel_test.cu
    ├── utility
    │   ├── fast_int_test.cu
    │   └── extent_test.cu
    ├── static_multiset
    │   ├── load_factor_test.cu
    │   └── large_input_test.cu
    └── dynamic_map
    │   └── unique_sequence_test_experimental.cu
├── include
    └── cuco
    │   ├── detail
    │       ├── probing_scheme
    │       │   └── probing_scheme_base.cuh
    │       ├── utility
    │       │   ├── math.cuh
    │       │   ├── strong_type.cuh
    │       │   ├── cuda.hpp
    │       │   └── cuda.cuh
    │       ├── hash_functions
    │       │   ├── utils.cuh
    │       │   └── identity_hash.cuh
    │       ├── storage
    │       │   ├── functors.cuh
    │       │   ├── storage.cuh
    │       │   └── storage_base.cuh
    │       ├── __config
    │       ├── operator.inl
    │       ├── pair
    │       │   ├── pair.inl
    │       │   └── traits.hpp
    │       ├── bloom_filter
    │       │   └── default_filter_policy.inl
    │       ├── utils.hpp
    │       └── roaring_bitmap
    │       │   ├── roaring_bitmap_ref.inl
    │       │   └── roaring_bitmap.inl
    │   ├── types.cuh
    │   ├── utility
    │       ├── cuda_thread_scope.cuh
    │       ├── error.hpp
    │       ├── traits.hpp
    │       ├── reduction_functors.cuh
    │       └── allocator.hpp
    │   ├── storage.cuh
    │   ├── probe_sequences.cuh
    │   └── hash_functions.cuh
├── benchmarks
    ├── bloom_filter
    │   ├── defaults.hpp
    │   └── utils.hpp
    ├── benchmark_defaults.hpp
    └── static_set
    │   ├── size_bench.cu
    │   ├── rehash_bench.cu
    │   └── retrieve_all_bench.cu
├── .pre-commit-config.yaml
├── .devcontainer
    ├── devcontainer.json
    ├── cuda12.0-gcc11
    │   └── devcontainer.json
    ├── cuda12.9-gcc13
    │   └── devcontainer.json
    ├── cuda13.0-gcc13
    │   └── devcontainer.json
    ├── cuda12.0-llvm14
    │   └── devcontainer.json
    ├── cuda12.9-llvm18
    │   └── devcontainer.json
    ├── cuda13.0-llvm20
    │   └── devcontainer.json
    ├── launch.sh
    └── verify_devcontainer.sh
├── examples
    ├── hyperloglog
    │   └── host_bulk_example.cu
    ├── static_set
    │   └── host_bulk_example.cu
    ├── bloom_filter
    │   └── host_bulk_example.cu
    ├── static_multimap
    │   └── host_bulk_example.cu
    ├── static_multiset
    │   └── host_bulk_example.cu
    └── static_map
    │   └── host_bulk_example.cu
├── .gitignore
└── CONTRIBUTING.md


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @sleeepyjack @PointKernel
2 | 


--------------------------------------------------------------------------------
/.github/copy-pr-bot.yaml:
--------------------------------------------------------------------------------
1 | # Configuration file for `copy-pr-bot` GitHub App
2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
3 | 
4 | enabled: true
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Question
4 |     url: https://github.com/NVIDIA/cuCollections/discussions
5 |     about: Check out our Discussions page to ask and answer questions. 
6 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 |  ## Security
 2 | 
 3 | NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
 4 | 
 5 | If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
 6 | 
 7 | ## Reporting Potential Security Vulnerability in cuCollections
 8 | 
 9 | To report a potential security vulnerability in cuCollections:
10 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
11 | - E-Mail: psirt@nvidia.com
12 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
13 | - Please include the following information:
14 |    	 - Product/Driver name and version/branch that contains the vulnerability
15 | 


--------------------------------------------------------------------------------
/ci/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Ensure the script is being executed in its containing directory
18 | cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
19 | 
20 | source ./build.sh "$@"
21 | 
22 | ctest --test-dir ${BUILD_DIR}/tests --output-on-failure --timeout 60
23 | 
24 | echo "Test complete"


--------------------------------------------------------------------------------
/cmake/thirdparty/get_cccl.cmake:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 5 | # in compliance with the License. You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 | 
15 | # Use CPM to find or clone CCCL
16 | function(find_and_configure_cccl)
17 |     include(${rapids-cmake-dir}/cpm/cccl.cmake)
18 |     rapids_cpm_cccl(INSTALL_EXPORT_SET cuco-exports BUILD_EXPORT_SET cuco-exports)
19 | endfunction()
20 | 
21 | find_and_configure_cccl()
22 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/size_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
18 | 
19 | #include <catch2/catch_test_macros.hpp>
20 | 
21 | TEST_CASE("dynamic_bitset size computation test", "")
22 | {
23 |   cuco::experimental::detail::dynamic_bitset bv;
24 |   using size_type = std::size_t;
25 |   constexpr size_type num_elements{400};
26 | 
27 |   for (size_type i = 0; i < num_elements; i++) {
28 |     bv.push_back(i % 2 == 0);  // Alternate 0s and 1s pattern
29 |   }
30 | 
31 |   auto size = bv.size();
32 |   REQUIRE(size == num_elements);
33 | }
34 | 


--------------------------------------------------------------------------------
/include/cuco/detail/probing_scheme/probing_scheme_base.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cstdint>
20 | 
21 | namespace cuco {
22 | namespace detail {
23 | 
24 | /**
25 |  * @brief Base class of public probing scheme.
26 |  *
27 |  * This class should not be used directly.
28 |  *
29 |  * @tparam CGSize Size of CUDA Cooperative Groups
30 |  */
31 | template <int32_t CGSize>
32 | class probing_scheme_base {
33 |  public:
34 |   /**
35 |    * @brief The size of the CUDA cooperative thread group.
36 |    */
37 |   static constexpr int32_t cg_size = CGSize;
38 | };
39 | }  // namespace detail
40 | }  // namespace cuco
41 | 


--------------------------------------------------------------------------------
/tests/static_set/size_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/execution_policy.h>
21 | #include <thrust/sequence.h>
22 | 
23 | #include <catch2/catch_test_macros.hpp>
24 | 
25 | TEST_CASE("static_set size test", "")
26 | {
27 |   constexpr std::size_t num_keys{400};
28 | 
29 |   cuco::static_set<int> set{cuco::extent<std::size_t>{400}, cuco::empty_key{-1}};
30 | 
31 |   thrust::device_vector<int> d_keys(num_keys);
32 | 
33 |   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
34 | 
35 |   auto const num_successes = set.insert(d_keys.begin(), d_keys.end());
36 | 
37 |   REQUIRE(set.size() == num_keys);
38 |   REQUIRE(num_successes == num_keys);
39 | 
40 |   set.clear();
41 | 
42 |   REQUIRE(set.size() == 0);
43 | }
44 | 


--------------------------------------------------------------------------------
/include/cuco/detail/utility/math.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuda/std/type_traits>
19 | 
20 | namespace cuco {
21 | namespace detail {
22 | 
23 | /**
24 |  * @brief Ceiling of an integer division
25 |  *
26 |  * @tparam T Type of dividend
27 |  * @tparam U Type of divisor
28 |  *
29 |  * @throw If `T` is not an integral type
30 |  * @throw If `U` is not an integral type
31 |  *
32 |  * @param dividend Numerator
33 |  * @param divisor Denominator
34 |  *
35 |  * @return Ceiling of the integer division
36 |  */
37 | template <typename T, typename U>
38 | __host__ __device__ constexpr T int_div_ceil(T dividend, U divisor) noexcept
39 | {
40 |   static_assert(cuda::std::is_integral_v<T>);
41 |   static_assert(cuda::std::is_integral_v<U>);
42 |   return (dividend + divisor - 1) / divisor;
43 | }
44 | 
45 | }  // namespace detail
46 | }  // namespace cuco
47 | 


--------------------------------------------------------------------------------
/benchmarks/bloom_filter/defaults.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <cuda/std/array>
24 | 
25 | #include <vector>
26 | 
27 | namespace cuco::benchmark::defaults {
28 | 
29 | using BF_KEY  = nvbench::int64_t;
30 | using BF_HASH = cuco::xxhash_64<char>;
31 | using BF_WORD = nvbench::uint32_t;
32 | 
33 | static constexpr auto BF_N               = 1'000'000'000;
34 | static constexpr auto BF_SIZE_MB         = 2'000;
35 | static constexpr auto BF_WORDS_PER_BLOCK = 8;
36 | 
37 | auto const BF_SIZE_MB_RANGE_CACHE =
38 |   std::vector<nvbench::int64_t>{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};
39 | auto const BF_PATTERN_BITS_RANGE = std::vector<nvbench::int64_t>{1, 2, 4, 6, 8, 16};
40 | 
41 | }  // namespace cuco::benchmark::defaults
42 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |     autofix_commit_msg: |
 3 |       [pre-commit.ci] auto code formatting
 4 |     autofix_prs: true
 5 |     autoupdate_branch: ''
 6 |     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
 7 |     autoupdate_schedule: quarterly
 8 |     skip: []
 9 |     submodules: false
10 | 
11 | repos:
12 |       - repo: https://github.com/pre-commit/mirrors-clang-format
13 |         rev: v20.1.4
14 |         hooks:
15 |               - id: clang-format
16 |                 types_or: [c, c++, cuda]
17 |                 args: ['-fallback-style=none', '-style=file', '-i']
18 |       - repo: local
19 |         hooks:
20 |               - id: check-doxygen
21 |                 name: check-doxygen
22 |                 entry: ./ci/pre-commit/doxygen.sh
23 |                 files: ^include/
24 |                 types_or: [file]
25 |                 language: system
26 |                 pass_filenames: false
27 |                 verbose: true
28 |               - id: check-example-links
29 |                 name: check-example-links
30 |                 entry: ./ci/pre-commit/example_links.py
31 |                 files: ^examples/
32 |                 types: [cuda]
33 |                 language: python
34 |                 pass_filenames: false
35 |                 verbose: true
36 |                 additional_dependencies:
37 |                   - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
38 |                   - gitpython
39 | 
40 | default_language_version:
41 |       python: python3
42 | 


--------------------------------------------------------------------------------
/ci/update_rapids_version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2025, NVIDIA CORPORATION.
 3 | ##########################
 4 | # RAPIDS Version Updater #
 5 | ##########################
 6 | 
 7 | ## Usage
 8 | # bash update_rapids_version.sh <new_version>
 9 | 
10 | # Format is YY.MM.PP - no leading 'v' or trailing 'a'
11 | NEXT_FULL_TAG=$1
12 | 
13 | #Get <major>.<minor> for next version
14 | NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
15 | NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
16 | NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}')
17 | NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
18 | 
19 | # Need to distutils-normalize the versions for some use cases
20 | NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
21 | 
22 | echo "Updating RAPIDS and devcontainers to $NEXT_FULL_TAG"
23 | 
24 | # Inplace sed replace; workaround for Linux and Mac
25 | function sed_runner() {
26 |     sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
27 | }
28 | 
29 | # Update CI files
30 | sed_runner "/devcontainer_version/ s/'[0-9.]*'/'${NEXT_SHORT_TAG}'/g" ci/matrix.yml
31 | 
32 | # Update CMakeLists.txt
33 | sed_runner "s/set(rapids-cmake-version [0-9.]*)/set(rapids-cmake-version ${NEXT_SHORT_TAG})/g" CMakeLists.txt
34 | 
35 | # Update .devcontainer files
36 | find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
37 |     sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
38 | done
39 | 


--------------------------------------------------------------------------------
/.github/actions/compute-matrix/action.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Compute Matrix
17 | description: "Compute the matrix for a given matrix type from the specified matrix file"
18 | 
19 | inputs:
20 |   matrix_query:
21 |     description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
22 |     required: true
23 |   matrix_file:
24 |     description: 'The file containing the matrix'
25 |     required: true
26 | outputs:
27 |   matrix:
28 |     description: 'The requested matrix'
29 |     value: ${{ steps.compute-matrix.outputs.MATRIX }}
30 | 
31 | runs:
32 |   using: "composite"
33 |   steps:
34 |     - name: Compute matrix
35 |       id: compute-matrix
36 |       run: |
37 |         MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}}  ${{inputs.matrix_query}} )
38 |         echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
39 |       shell: bash -euxo pipefail {0}


--------------------------------------------------------------------------------
/tests/static_set/rehash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/sequence.h>
21 | 
22 | #include <catch2/catch_test_macros.hpp>
23 | 
24 | TEST_CASE("static_set rehash test", "")
25 | {
26 |   using key_type = int;
27 | 
28 |   constexpr std::size_t num_keys{400};
29 |   constexpr std::size_t num_erased_keys{100};
30 | 
31 |   cuco::static_set set{num_keys, cuco::empty_key<key_type>{-1}, cuco::erased_key<key_type>{-2}};
32 | 
33 |   thrust::device_vector<key_type> d_keys(num_keys);
34 | 
35 |   thrust::sequence(d_keys.begin(), d_keys.end());
36 | 
37 |   set.insert(d_keys.begin(), d_keys.end());
38 | 
39 |   set.rehash();
40 |   REQUIRE(set.size() == num_keys);
41 | 
42 |   set.rehash(num_keys * 2);
43 |   REQUIRE(set.size() == num_keys);
44 | 
45 |   set.erase(d_keys.begin(), d_keys.begin() + num_erased_keys);
46 |   set.rehash();
47 |   REQUIRE(set.size() == num_keys - num_erased_keys);
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/actions/configure_cccl_sccache/action.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Set up AWS credentials and environment variables for sccache
17 | description: "Set up AWS credentials and environment variables for sccache"
18 | runs:
19 |   using: "composite"
20 |   steps:
21 |     - name: Get AWS credentials for sccache bucket
22 |       uses: aws-actions/configure-aws-credentials@v2
23 |       with:
24 |         role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
25 |         aws-region: us-east-2
26 |         role-duration-seconds: 43200 # 12 hours
27 |     - name: Set environment variables
28 |       run: |
29 |         echo "SCCACHE_BUCKET=rapids-sccache-east" >> $GITHUB_ENV
30 |         echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
31 |         echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
32 |         echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
33 |         echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
34 |       shell: bash


--------------------------------------------------------------------------------
/include/cuco/detail/hash_functions/utils.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/cstddef>
20 | #include <cuda/std/cstdint>
21 | 
22 | namespace cuco::detail {
23 | 
24 | template <typename T, typename U, typename Extent>
25 | constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept
26 | {
27 |   auto const bytes = reinterpret_cast<cuda::std::byte const*>(data);
28 |   T chunk;
29 |   memcpy(&chunk, bytes + index * sizeof(T), sizeof(T));
30 |   return chunk;
31 | }
32 | 
33 | constexpr __host__ __device__ cuda::std::uint32_t rotl32(cuda::std::uint32_t x,
34 |                                                          cuda::std::int8_t r) noexcept
35 | {
36 |   return (x << r) | (x >> (32 - r));
37 | }
38 | 
39 | constexpr __host__ __device__ cuda::std::uint64_t rotl64(cuda::std::uint64_t x,
40 |                                                          cuda::std::int8_t r) noexcept
41 | {
42 |   return (x << r) | (x >> (64 - r));
43 | }
44 | 
45 | };  // namespace cuco::detail
46 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda13.0-gcc13",
18 |     "CUCO_CUDA_VERSION": "13.0",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "13"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda13.0-gcc13"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.0-gcc11/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-gcc11-cuda12.0-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.0-gcc11",
18 |     "CUCO_CUDA_VERSION": "12.0",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "11"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.0-gcc11"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.9-gcc13/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda12.9-ubuntu24.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.9-gcc13",
18 |     "CUCO_CUDA_VERSION": "12.9",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "13"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.9-gcc13"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda13.0-gcc13/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda13.0-gcc13",
18 |     "CUCO_CUDA_VERSION": "13.0",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "13"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda13.0-gcc13"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.0-llvm14/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-llvm14-cuda12.0-ubuntu20.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.0-llvm14",
18 |     "CUCO_CUDA_VERSION": "12.0",
19 |     "CUCO_HOST_COMPILER": "llvm",
20 |     "CUCO_HOST_COMPILER_VERSION": "14"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.0-llvm14"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.9-llvm18/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-llvm18-cuda12.9-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.9-llvm18",
18 |     "CUCO_CUDA_VERSION": "12.9",
19 |     "CUCO_HOST_COMPILER": "llvm",
20 |     "CUCO_HOST_COMPILER_VERSION": "18"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.9-llvm18"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda13.0-llvm20/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:25.12-cpp-llvm20-cuda13.0ext-ubuntu24.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda13.0-llvm20",
18 |     "CUCO_CUDA_VERSION": "13.0",
19 |     "CUCO_HOST_COMPILER": "llvm",
20 |     "CUCO_HOST_COMPILER_VERSION": "20"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda13.0-llvm20"
42 | }
43 | 


--------------------------------------------------------------------------------
/include/cuco/detail/storage/functors.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | namespace cuco::detail {
19 | /**
20 |  * @brief Functor for initializing device memory with a given value
21 |  *
22 |  * @tparam SizeType Type used for indexing
23 |  * @tparam T Type of value being initialized
24 |  */
25 | template <typename SizeType, typename T>
26 | struct initialize_functor {
27 |   T* const _d_ptr;  ///< Pointer to device memory
28 |   T const _key;     ///< Value to initialize memory with
29 | 
30 |   /**
31 |    * @brief Constructs functor for initializing device memory
32 |    *
33 |    * @param d_ptr Pointer to device memory to initialize
34 |    * @param key Value to initialize memory with
35 |    */
36 |   __host__ __device__ initialize_functor(T* d_ptr, T key) noexcept : _d_ptr{d_ptr}, _key{key} {}
37 | 
38 |   /**
39 |    * @brief Device function to initialize memory at given index
40 |    *
41 |    * @param idx Index into device memory
42 |    */
43 |   __device__ __forceinline__ void operator()(SizeType idx) const noexcept { _d_ptr[idx] = _key; }
44 | };
45 | }  // namespace cuco::detail
46 | 


--------------------------------------------------------------------------------
/tests/hyperloglog/type_deduction_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/hyperloglog.cuh>
21 | 
22 | #include <cuda/functional>
23 | #include <thrust/iterator/counting_iterator.h>
24 | #include <thrust/iterator/transform_iterator.h>
25 | 
26 | #include <catch2/catch_test_macros.hpp>
27 | 
28 | #include <cstdint>
29 | 
30 | TEST_CASE("hyperloglog: type deduction bug with hash functions returning references")
31 | {
32 |   auto constexpr sketch_size_kb = 1;
33 |   auto constexpr num_items      = 1000;
34 | 
35 |   auto first = thrust::make_transform_iterator(thrust::counting_iterator<uint64_t>(0),
36 |                                                cuco::xxhash_64<uint64_t>{});
37 |   auto last  = first + num_items;
38 | 
39 |   cuco::hyperloglog<uint64_t, cuda::thread_scope_device, cuda::std::identity> estimator{
40 |     cuco::sketch_size_kb(sketch_size_kb)};
41 | 
42 |   REQUIRE(estimator.estimate() == 0);
43 | 
44 |   estimator.add(first, last);
45 | 
46 |   auto const estimate = estimator.estimate();
47 | 
48 |   REQUIRE(estimate > 0);
49 | }
50 | 


--------------------------------------------------------------------------------
/include/cuco/types.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/utility/strong_type.cuh>
20 | 
21 | /**
22 |  * @brief Defines various strong type wrappers used across this library.
23 |  *
24 |  * @note Each strong type inherits from `cuco::detail::strong_type<T>`. `CUCO_DEFINE_STRONG_TYPE`
25 |  * and `CUCO_DEFINE_TEMPLATE_STRONG_TYPE` are convenience macros used to define a named type in a
26 |  * single line, e.g., `CUCO_DEFINE_STRONG_TYPE(foo, double)` defines `struct foo : public
27 |  * cuco::detail::strong_type<double> {...};`, where `cuco::foo{42.0}` is implicitly convertible to
28 |  * `double{42.0}`.
29 |  */
30 | 
31 | namespace cuco {
32 | /**
33 |  * @brief A strong type wrapper `cuco::empty_key<Key>` used to denote the empty key sentinel.
34 |  */
35 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_key);
36 | 
37 | /**
38 |  * @brief A strong type wrapper `cuco::empty_value<T>` used to denote the empty value sentinel.
39 |  */
40 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_value);
41 | 
42 | /**
43 |  * @brief A strong type wrapper `cuco::erased_key<Key>` used to denote the erased key sentinel.
44 |  */
45 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(erased_key);
46 | 
47 | }  // namespace cuco
48 | 


--------------------------------------------------------------------------------
/include/cuco/utility/cuda_thread_scope.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/atomic>  // cuda::thread_scope
20 | 
21 | namespace cuco {
22 | 
23 | /**
24 |  * @brief Strongly-typed wrapper for `cuda::thread_scope`.
25 |  *
26 |  * @tparam Scope `cuda::thread_scope` to be wrapped
27 |  */
28 | template <cuda::thread_scope Scope>
29 | struct cuda_thread_scope {
30 |   /**
31 |    * @brief Implicit conversion to `cuda::thread_scope`.
32 |    *
33 |    * @return The wrapped `cuda::thread_scope`
34 |    */
35 |   __host__ __device__ constexpr operator cuda::thread_scope() const noexcept { return Scope; }
36 | };
37 | 
38 | // alias definitions
39 | inline constexpr auto thread_scope_system =
40 |   cuda_thread_scope<cuda::thread_scope_system>{};  ///< `cuco::thread_scope_system`
41 | inline constexpr auto thread_scope_device =
42 |   cuda_thread_scope<cuda::thread_scope_device>{};  ///< `cuco::thread_scope_device`
43 | inline constexpr auto thread_scope_block =
44 |   cuda_thread_scope<cuda::thread_scope_block>{};  ///< `cuco::thread_scope_block`
45 | inline constexpr auto thread_scope_thread =
46 |   cuda_thread_scope<cuda::thread_scope_thread>{};  ///< `cuco::thread_scope_thread`
47 | 
48 | }  // namespace cuco
49 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/rank_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/host_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | using cuco::test::modulo_bitgen;
28 | 
29 | TEST_CASE("dynamic_bitset rank test", "")
30 | {
31 |   cuco::experimental::detail::dynamic_bitset bv;
32 | 
33 |   using size_type = std::size_t;
34 |   constexpr size_type num_elements{4000};
35 | 
36 |   for (size_type i = 0; i < num_elements; i++) {
37 |     bv.push_back(modulo_bitgen(i));
38 |   }
39 | 
40 |   thrust::device_vector<size_type> keys(num_elements);
41 |   thrust::sequence(keys.begin(), keys.end(), 0);
42 | 
43 |   thrust::device_vector<size_type> d_ranks(num_elements);
44 | 
45 |   bv.rank(keys.begin(), keys.end(), d_ranks.begin());
46 | 
47 |   thrust::host_vector<size_type> h_ranks = d_ranks;
48 | 
49 |   size_type cur_rank    = 0;
50 |   size_type num_matches = 0;
51 |   for (size_type i = 0; i < num_elements; i++) {
52 |     num_matches += cur_rank == h_ranks[i];
53 |     if (modulo_bitgen(i)) { cur_rank++; }
54 |   }
55 |   REQUIRE(num_matches == num_elements);
56 | }
57 | 


--------------------------------------------------------------------------------
/include/cuco/detail/__config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <nv/target>
20 | #include <cuda/std/version>
21 | 
22 | #if !defined(__CUDACC_VER_MAJOR__) || !defined(__CUDACC_VER_MINOR__)
23 | #error "NVCC version not found"
24 | #elif __CUDACC_VER_MAJOR__ < 12 
25 | #error "NVCC version 12.0 or later is required"
26 | #endif
27 | 
28 | #if !defined(__CUDACC_EXTENDED_LAMBDA__)
29 | #error "Support for extended device lambdas is required (nvcc flag --expt-extended-lambda)"
30 | #endif
31 | 
32 | #if !defined(CCCL_VERSION) || (CCCL_VERSION < 3000000)
33 | #error "CCCL version 3.0.0 or later is required"
34 | #endif
35 | 
36 | // WAR for libcudacxx/296
37 | #define CUCO_CUDA_MINIMUM_ARCH _NV_FIRST_ARG(__CUDA_ARCH_LIST__)
38 | 
39 | #if defined(CUDART_VERSION) && (CUCO_CUDA_MINIMUM_ARCH >= 700)
40 | #define CUCO_HAS_CUDA_BARRIER
41 | #endif
42 | 
43 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12010)
44 | #define CUCO_HAS_CG_INVOKE_ONE
45 | #endif
46 | 
47 | #if (CUCO_CUDA_MINIMUM_ARCH >= 700)
48 | #define CUCO_HAS_INDEPENDENT_THREADS
49 | #endif
50 | 
51 | #if defined(__SIZEOF_INT128__)
52 | #define CUCO_HAS_INT128
53 | #endif
54 | 
55 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12000)
56 | #define CUCO_HAS_CG_REDUCE_UPDATE_ASYNC
57 | #endif
58 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Enhancement
 2 | description: Suggest an idea to improve cuCollections
 3 | title: '[ENHANCEMENT]: '
 4 | labels: ['type: enhancement']
 5 | body:
 6 |   - type: textarea
 7 |     id: description
 8 |     attributes:
 9 |       label: Is your feature request related to a problem? Please describe.
10 |       description: A clear and concise description of what the problem is, e.g., "I would like to be able to..."
11 |       placeholder: I would like an overload of `cuco::static_map::insert` that returns the success of each insertion. 
12 |     validations:
13 |       required: true
14 |   - type: textarea
15 |     id: proposed-solution
16 |     attributes:
17 |       label: Describe the solution you'd like
18 |       description: A clear and concise description of what you want to happen.
19 |       placeholder: |
20 |         Add a new overload of `insert` that takes an output iterator range assignable from `bool` that indicates the success of each insert.
21 |         Example API: 
22 |         template <typename InputIt, typename OutputIt>
23 |         void insert(InputIt first_input, InputIt last_input, OutputIt first_input, cudaStream_t stream = 0);
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: alternatives
28 |     attributes:
29 |       label: Describe alternatives you've considered
30 |       description:
31 |         If applicable, please add a clear and concise description of any alternative solutions or features you've
32 |         considered.
33 |       placeholder: You can implement this yourself with the device-side API, but it would be more convenient as a bulk function.
34 |     validations:
35 |       required: false
36 |   - type: textarea
37 |     id: additional-context
38 |     attributes:
39 |       label: Additional context
40 |       description: Add any other context about the request here.
41 |       placeholder: This would be useful for sparse embedding tables in DL usecases. 
42 |     validations:
43 |       required: false
44 | 


--------------------------------------------------------------------------------
/include/cuco/detail/operator.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/utility/traits.hpp>
20 | 
21 | #include <type_traits>
22 | 
23 | namespace cuco {
24 | namespace detail {
25 | 
26 | /**
27 |  * @brief CRTP mixin which augments a given `Reference` with an `Operator`.
28 |  *
29 |  * @throw If the operator is not defined in `include/cuco/operator.hpp`
30 |  *
31 |  * @tparam Operator Operator type, i.e., `cuco::op::*_tag`
32 |  * @tparam Reference The reference type.
33 |  *
34 |  * @note This primary template should never be instantiated.
35 |  */
36 | template <typename Operator, typename Reference>
37 | class operator_impl {
38 |   static_assert(cuco::dependent_false<Operator, Reference>,
39 |                 "Operator type is not supported by reference type.");
40 | };
41 | 
42 | /**
43 |  * @brief Checks if the given `Operator` is contained in a list of `Operators`.
44 |  *
45 |  * @tparam Operator Operator type, i.e., `cuco::op::*_tag`
46 |  * @tparam Operators List of operators to search in
47 |  *
48 |  * @return `true` if `Operator` is contained in `Operators`, `false` otherwise.
49 |  */
50 | template <typename Operator, typename... Operators>
51 | __host__ __device__ static constexpr bool has_operator()
52 | {
53 |   return ((std::is_same_v<Operators, Operator>) || ...);
54 | }
55 | 
56 | }  // namespace detail
57 | }  // namespace cuco
58 | 


--------------------------------------------------------------------------------
/include/cuco/detail/pair/pair.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/type_traits>
20 | #include <cuda/std/utility>
21 | 
22 | namespace cuco {
23 | 
24 | template <typename First, typename Second>
25 | __host__ __device__ constexpr pair<First, Second>::pair(First const& f, Second const& s)
26 |   : first{f}, second{s}
27 | {
28 | }
29 | 
30 | template <typename First, typename Second>
31 | template <typename F, typename S>
32 | __host__ __device__ constexpr pair<First, Second>::pair(pair<F, S> const& p)
33 |   : first{p.first}, second{p.second}
34 | {
35 | }
36 | 
37 | template <typename F, typename S>
38 | __host__ __device__ constexpr pair<cuda::std::decay_t<F>, cuda::std::decay_t<S>> make_pair(
39 |   F&& f, S&& s) noexcept
40 | {
41 |   return pair<cuda::std::decay_t<F>, cuda::std::decay_t<S>>(cuda::std::forward<F>(f),
42 |                                                             cuda::std::forward<S>(s));
43 | }
44 | 
45 | template <class T1, class T2, class U1, class U2>
46 | __host__ __device__ constexpr bool operator==(cuco::pair<T1, T2> const& lhs,
47 |                                               cuco::pair<U1, U2> const& rhs) noexcept
48 | {
49 |   return lhs.first == rhs.first and lhs.second == rhs.second;
50 | }
51 | 
52 | }  // namespace cuco
53 | 
54 | namespace cuda::std {
55 | #include <cuco/detail/pair/tuple_helpers.inl>
56 | }  // namespace cuda::std
57 | 


--------------------------------------------------------------------------------
/tests/test_utils.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/atomic>
20 | 
21 | namespace cuco {
22 | namespace test {
23 | namespace detail {
24 | 
25 | template <typename Iterator, typename Predicate>
26 | __global__ void count_if(Iterator begin,
27 |                          Iterator end,
28 |                          cuda::atomic<int, cuda::thread_scope_device>* count,
29 |                          Predicate p)
30 | {
31 |   auto tid = blockDim.x * blockIdx.x + threadIdx.x;
32 |   auto it  = begin + tid;
33 | 
34 |   while (it < end) {
35 |     count->fetch_add(static_cast<int>(p(*it)));
36 |     it += gridDim.x * blockDim.x;
37 |   }
38 | }
39 | 
40 | template <typename Iterator1, typename Iterator2, typename Predicate>
41 | __global__ void count_if(Iterator1 begin1,
42 |                          Iterator1 end1,
43 |                          Iterator2 begin2,
44 |                          cuda::atomic<int, cuda::thread_scope_device>* count,
45 |                          Predicate p)
46 | {
47 |   auto const n = end1 - begin1;
48 |   auto tid     = blockDim.x * blockIdx.x + threadIdx.x;
49 | 
50 |   while (tid < n) {
51 |     auto cmp = begin1 + tid;
52 |     auto ref = begin2 + tid;
53 |     count->fetch_add(static_cast<int>(p(*cmp, *ref)));
54 |     tid += gridDim.x * blockDim.x;
55 |   }
56 | }
57 | 
58 | }  // namespace detail
59 | }  // namespace test
60 | }  // namespace cuco
61 | 


--------------------------------------------------------------------------------
/tests/static_map/rehash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_map.cuh>
18 | 
19 | #include <cuda/functional>
20 | #include <thrust/iterator/counting_iterator.h>
21 | #include <thrust/iterator/transform_iterator.h>
22 | 
23 | #include <catch2/catch_test_macros.hpp>
24 | 
25 | TEST_CASE("static_map rehash test", "")
26 | {
27 |   using key_type    = int;
28 |   using mapped_type = long;
29 | 
30 |   constexpr std::size_t num_keys{400};
31 |   constexpr std::size_t num_erased_keys{100};
32 | 
33 |   cuco::static_map map{num_keys,
34 |                        cuco::empty_key<key_type>{-1},
35 |                        cuco::empty_value<mapped_type>{-1},
36 |                        cuco::erased_key<key_type>{-2}};
37 | 
38 |   auto keys_begin = thrust::counting_iterator<key_type>(1);
39 | 
40 |   auto pairs_begin = thrust::make_transform_iterator(
41 |     keys_begin,
42 |     cuda::proclaim_return_type<cuco::pair<key_type, mapped_type>>([] __device__(key_type const& x) {
43 |       return cuco::pair<key_type, mapped_type>(x, static_cast<mapped_type>(x));
44 |     }));
45 | 
46 |   map.insert(pairs_begin, pairs_begin + num_keys);
47 | 
48 |   map.rehash();
49 |   REQUIRE(map.size() == num_keys);
50 | 
51 |   map.rehash(num_keys * 2);
52 |   REQUIRE(map.size() == num_keys);
53 | 
54 |   map.erase(keys_begin, keys_begin + num_erased_keys);
55 |   map.rehash();
56 |   REQUIRE(map.size() == num_keys - num_erased_keys);
57 | }
58 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: build and test
17 | 
18 | defaults:
19 |   run:
20 |     shell: bash -eo pipefail {0}
21 | 
22 | on:
23 |   workflow_call:
24 |     inputs:
25 |       cpu: {type: string, required: true}
26 |       test_name: {type: string, required: false}
27 |       build_script: {type: string, required: false}
28 |       test_script: {type: string, required: false}
29 |       container_image: {type: string, required: false}
30 |       run_tests: {type: boolean, required: false, default: true}
31 | 
32 | jobs:
33 |   build:
34 |     name: Build ${{inputs.test_name}}
35 |     uses: ./.github/workflows/run-as-coder.yml
36 |     with:
37 |       name: Build ${{inputs.test_name}}
38 |       runner: linux-${{inputs.cpu}}-cpu16
39 |       image:  ${{ inputs.container_image }}
40 |       command: |
41 |         ${{ inputs.build_script }}
42 | 
43 |   test:
44 |     needs: build
45 |     if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
46 |     name: Test ${{inputs.test_name}}
47 |     uses: ./.github/workflows/run-as-coder.yml
48 |     with:
49 |       name: Test ${{inputs.test_name}}
50 |       runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
51 |       image: ${{inputs.container_image}}
52 |       command: |
53 |         nvidia-smi
54 |         ${{ inputs.test_script }}


--------------------------------------------------------------------------------
/include/cuco/detail/hash_functions/identity_hash.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/functional>
20 | #include <cuda/std/type_traits>
21 | 
22 | namespace cuco::detail {
23 | 
24 | /**
25 |  * @brief An Identity hash function to hash the given argument on host and device
26 |  *
27 |  * @note `identity_hash` is perfect if `hash_table_capacity >= |input set|`
28 |  *
29 |  * @note `identity_hash` is only intended to be used perfectly.
30 |  *
31 |  * @note Perfect hashes are deterministic, and thus do not need seeds.
32 |  *
33 |  * @tparam Key The type of the values to hash
34 |  */
35 | template <typename Key>
36 | struct identity_hash : private cuda::std::identity {
37 |   using argument_type = Key;  ///< The type of the values taken as argument
38 |   /// The type of the hash values produced
39 |   using result_type = cuda::std::conditional_t<sizeof(Key) <= 4, uint32_t, uint64_t>;
40 | 
41 |   static_assert(cuda::std::is_convertible_v<Key, result_type>,
42 |                 "Key type must be convertible to result_type");
43 | 
44 |   /**
45 |    * @brief Returns a hash value for its argument, as a value of type `result_type`.
46 |    *
47 |    * @param x The input argument to hash
48 |    * @return A resulting hash value for `x`
49 |    */
50 |   __host__ __device__ result_type operator()(Key const& x) const
51 |   {
52 |     return static_cast<result_type>(cuda::std::identity::operator()(x));
53 |   }
54 | };  // identity_hash
55 | 
56 | }  //  namespace cuco::detail
57 | 


--------------------------------------------------------------------------------
/include/cuco/detail/pair/traits.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuda/std/tuple>
19 | #include <cuda/std/type_traits>
20 | #include <thrust/device_reference.h>
21 | 
22 | #include <tuple>
23 | 
24 | namespace cuco::detail {
25 | 
26 | template <typename T, typename = void>
27 | struct is_std_pair_like : cuda::std::false_type {};
28 | 
29 | template <typename T>
30 | struct is_std_pair_like<T,
31 |                         cuda::std::void_t<decltype(std::get<0>(cuda::std::declval<T>())),
32 |                                           decltype(std::get<1>(cuda::std::declval<T>()))>>
33 |   : cuda::std::
34 |       conditional_t<std::tuple_size<T>::value == 2, cuda::std::true_type, cuda::std::false_type> {};
35 | 
36 | template <typename T, typename = void>
37 | struct is_cuda_std_pair_like_impl : cuda::std::false_type {};
38 | 
39 | template <typename T>
40 | struct is_cuda_std_pair_like_impl<
41 |   T,
42 |   cuda::std::void_t<decltype(cuda::std::get<0>(cuda::std::declval<T>())),
43 |                     decltype(cuda::std::get<1>(cuda::std::declval<T>())),
44 |                     decltype(cuda::std::tuple_size<T>::value)>>
45 |   : cuda::std::conditional_t<cuda::std::tuple_size<T>::value == 2,
46 |                              cuda::std::true_type,
47 |                              cuda::std::false_type> {};
48 | 
49 | template <typename T>
50 | struct is_cuda_std_pair_like
51 |   : is_cuda_std_pair_like_impl<cuda::std::remove_reference_t<decltype(thrust::raw_reference_cast(
52 |       cuda::std::declval<T>()))>> {};
53 | 
54 | }  // namespace cuco::detail
55 | 


--------------------------------------------------------------------------------
/ci/sccache_hit_rate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -euo pipefail
18 | 
19 | # Ensure two arguments are provided
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 <before-file> <after-file>" >&2
22 |   exit 1
23 | fi
24 | 
25 | # Print the contents of the before file
26 | echo "=== Contents of $1 ===" >&2
27 | cat $1 >&2
28 | echo "=== End of $1 ===" >&2
29 | 
30 | # Print the contents of the after file
31 | echo "=== Contents of $2 ==="  >&2
32 | cat $2 >&2
33 | echo "=== End of $2 ===" >&2
34 | 
35 | # Extract compile requests and cache hits from the before and after files
36 | requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1")
37 | hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1")
38 | requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2")
39 | hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2")
40 | 
41 | # Calculate the differences to find out how many new requests and hits
42 | requests_diff=$((requests_after - requests_before))
43 | hits_diff=$((hits_after - hits_before))
44 | 
45 | echo "New Compile Requests: $requests_diff" >&2
46 | echo "New Hits: $hits_diff" >&2
47 | 
48 | # Calculate and print the hit rate
49 | if [ $requests_diff -eq 0 ]; then
50 |     echo "No new compile requests, hit rate is not applicable"
51 | else
52 |     hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
53 |     echo "sccache hit rate: $hit_rate%" >&2
54 |     echo "$hit_rate"
55 | fi


--------------------------------------------------------------------------------
/include/cuco/utility/error.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <stdexcept>
20 | #include <string>
21 | 
22 | namespace cuco {
23 | /**
24 |  * @brief Exception thrown when logical precondition is violated.
25 |  *
26 |  * This exception should not be thrown directly and is instead thrown by the
27 |  * CUCO_EXPECTS macro.
28 |  */
29 | struct logic_error : public std::logic_error {
30 |   /**
31 |    * @brief Constructs a logic_error with the error message.
32 |    *
33 |    * @param message Message to be associated with the exception
34 |    */
35 |   logic_error(char const* const message) : std::logic_error(message) {}
36 | 
37 |   /**
38 |    * @brief Construct a new logic error object with error message
39 |    *
40 |    * @param message Message to be associated with the exception
41 |    */
42 |   logic_error(std::string const& message) : std::logic_error(message) {}
43 | };
44 | /**
45 |  * @brief Exception thrown when a CUDA error is encountered.
46 |  *
47 |  */
48 | struct cuda_error : public std::runtime_error {
49 |   /**
50 |    * @brief Constructs a `cuda_error` object with the given `message`.
51 |    *
52 |    * @param message The error char array used to construct `cuda_error`
53 |    */
54 |   cuda_error(const char* message) : std::runtime_error(message) {}
55 |   /**
56 |    * @brief Constructs a `cuda_error` object with the given `message` string.
57 |    *
58 |    * @param message The `std::string` used to construct `cuda_error`
59 |    */
60 |   cuda_error(std::string const& message) : cuda_error{message.c_str()} {}
61 | };
62 | }  // namespace cuco
63 | 


--------------------------------------------------------------------------------
/.devcontainer/launch.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | launch_devcontainer() {
18 | 
19 |     # Ensure we're in the repo root
20 |     cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )/..";
21 | 
22 |     if [[ -z $1 ]] || [[ -z $2 ]]; then
23 |         echo "Usage: $0 [CUDA version] [Host compiler]"
24 |         echo "Example: $0 12.1 gcc12"
25 |         return 1
26 |     fi
27 | 
28 |     local cuda_version="$1"
29 |     local host_compiler="$2"
30 |     local workspace="$(basename "$(pwd)")";
31 |     local tmpdir="$(mktemp -d)/${workspace}";
32 |     local path="$(pwd)/.devcontainer/cuda${cuda_version}-${host_compiler}";
33 | 
34 |     mkdir -p "${tmpdir}";
35 |     mkdir -p "${tmpdir}/.devcontainer";
36 |     cp -arL "$path/devcontainer.json" "${tmpdir}/.devcontainer";
37 |     sed -i "s@\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json";
38 |     path="${tmpdir}";
39 | 
40 |     local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')";
41 |     local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cuCollections";
42 | 
43 |     echo "devcontainer URL: ${url}";
44 | 
45 |     local launch="";
46 |     if type open >/dev/null 2>&1; then
47 |         launch="open";
48 |     elif type xdg-open >/dev/null 2>&1; then
49 |         launch="xdg-open";
50 |     fi
51 | 
52 |     if [ -n "${launch}" ]; then
53 |         code --new-window "${tmpdir}";
54 |         exec "${launch}" "${url}" >/dev/null 2>&1;
55 |     fi
56 | }
57 | 
58 | launch_devcontainer "$@";


--------------------------------------------------------------------------------
/cmake/roaring_testdata.cmake:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2025, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 5 | # in compliance with the License. You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 | 
15 | # Only act if enabled
16 | if(NOT CUCO_DOWNLOAD_ROARING_TESTDATA)
17 |   return()
18 | endif()
19 | 
20 | set(CUCO_ROARING_DATA_DIR "${CMAKE_BINARY_DIR}/data/roaring_bitmap")
21 | 
22 | file(MAKE_DIRECTORY "${CUCO_ROARING_DATA_DIR}")
23 | 
24 | set(ROARING_FORMATSPEC_BASE "https://raw.githubusercontent.com/RoaringBitmap/RoaringFormatSpec/5177ad9")
25 | 
26 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithoutruns.bin"
27 |                                  "${CUCO_ROARING_DATA_DIR}/bitmapwithoutruns.bin"
28 |                                  "d719ae2e0150a362ef7cf51c361527585891f01460b1a92bcfb6a7257282a442")
29 | 
30 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithruns.bin"
31 |                                  "${CUCO_ROARING_DATA_DIR}/bitmapwithruns.bin"
32 |                                  "1f1909bfdd354fa2f0694fe88b8076833ca5383ad9fc3f68f2709c84a2ab70e3")
33 | 
34 | rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata64/portable_bitmap64.bin"
35 |                                  "${CUCO_ROARING_DATA_DIR}/portable_bitmap64.bin"
36 |                                  "b5a553a759167f5f9ccb3fa21552d943b4c73235635b753376f4faf62067d178")
37 | 
38 | message(STATUS "Roaring Bitmap test data downloaded to: ${CUCO_ROARING_DATA_DIR}")
39 | 
40 | # Define macro only when data is available
41 | add_compile_definitions(CUCO_ROARING_DATA_DIR="${CUCO_ROARING_DATA_DIR}")


--------------------------------------------------------------------------------
/include/cuco/detail/bloom_filter/default_filter_policy.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cstdint>
20 | 
21 | namespace cuco {
22 | 
23 | template <class Hash, class Word, uint32_t WordsPerBlock>
24 | __host__
25 |   __device__ constexpr default_filter_policy<Hash, Word, WordsPerBlock>::default_filter_policy(
26 |     uint32_t pattern_bits, Hash hash)
27 |   : impl_{pattern_bits, hash}
28 | {
29 | }
30 | 
31 | template <class Hash, class Word, uint32_t WordsPerBlock>
32 | __device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type
33 | default_filter_policy<Hash, Word, WordsPerBlock>::hash(
34 |   typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_argument_type const& key) const
35 | {
36 |   return impl_.hash(key);
37 | }
38 | 
39 | template <class Hash, class Word, uint32_t WordsPerBlock>
40 | template <class Extent>
41 | __device__ constexpr auto default_filter_policy<Hash, Word, WordsPerBlock>::block_index(
42 |   typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
43 |   Extent num_blocks) const
44 | {
45 |   return impl_.block_index(hash, num_blocks);
46 | }
47 | 
48 | template <class Hash, class Word, uint32_t WordsPerBlock>
49 | __device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::word_type
50 | default_filter_policy<Hash, Word, WordsPerBlock>::word_pattern(
51 |   default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
52 |   std::uint32_t word_index) const
53 | {
54 |   return impl_.word_pattern(hash, word_index);
55 | }
56 | 
57 | }  // namespace cuco


--------------------------------------------------------------------------------
/examples/hyperloglog/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <cuco/hyperloglog.cuh>
17 | 
18 | #include <thrust/device_vector.h>
19 | #include <thrust/sequence.h>
20 | 
21 | #include <cmath>
22 | #include <cstddef>
23 | #include <iostream>
24 | 
25 | /**
26 |  * @file host_bulk_example.cu
27 |  * @brief Demonstrates usage of `cuco::hyperloglog` "bulk" host APIs.
28 |  */
29 | int main(void)
30 | {
31 |   using T                         = int;
32 |   constexpr std::size_t num_items = 1ull << 28;  // 1GB
33 | 
34 |   thrust::device_vector<T> items(num_items);
35 | 
36 |   // Generate `num_items` distinct items
37 |   thrust::sequence(items.begin(), items.end(), 0);
38 | 
39 |   // We define the desired standard deviation of the approximation error
40 |   // 0.0122197 is the default value and corresponds to a 32KB sketch size
41 |   auto const sd = cuco::standard_deviation{0.0122197};
42 | 
43 |   // Initialize the estimator
44 |   cuco::hyperloglog<T> estimator{sd};
45 | 
46 |   // Add all items to the estimator
47 |   estimator.add(items.begin(), items.end());
48 | 
49 |   // Adding the same items again will not affect the result
50 |   estimator.add(items.begin(), items.begin() + num_items / 2);
51 | 
52 |   // Calculate the cardinality estimate
53 |   std::size_t const estimated_cardinality = estimator.estimate();
54 | 
55 |   std::cout << "True cardinality: " << num_items
56 |             << "\nEstimated cardinality: " << estimated_cardinality << "\nError: "
57 |             << std::abs(
58 |                  static_cast<double>(estimated_cardinality) / static_cast<double>(num_items) - 1.0)
59 |             << std::endl;
60 | 
61 |   return 0;
62 | }


--------------------------------------------------------------------------------
/ci/pre-commit/doxygen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Skip if doxygen is not installed
18 | if ! [ -x "$(command -v doxygen)" ]; then
19 |   echo -e "Warning: Doxygen is not installed - skipping check"
20 |   exit 0
21 | fi
22 | 
23 | # Utility to return version as number for comparison
24 | function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }
25 | 
26 | # Doxygen supported version 1.8.20 to 1.9.1
27 | DOXYGEN_VERSION=$(doxygen --version)
28 | if [ $(version "$DOXYGEN_VERSION") -lt $(version "1.8.20") ] ||  [ $(version $DOXYGEN_VERSION) -gt $(version "1.9.1") ]; then
29 |   echo -e "Warning: Unsupported Doxygen version $DOXYGEN_VERSION - skipping check"
30 |   echo -e "Expected Doxygen version from 1.8.20 to 1.9.1"
31 |   exit 0
32 | fi
33 | 
34 | echo "Using doxygen version: ${DOXYGEN_VERSION}"
35 | 
36 | # Run doxygen, ignore missing tag files error
37 | TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
38 | TAG_ERROR2="error: cannot open tag file .*.tag for writing"
39 | DOXYGEN_STDERR=`cd doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; }  | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"`
40 | RETVAL=$?
41 | 
42 | if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then
43 |   echo -e "\n>>>> FAILED: doxygen check; begin output\n"
44 |   echo -e "$DOXYGEN_STDERR"
45 |   echo -e "\n>>>> FAILED: doxygen check; end output\n"
46 |   RETVAL=1 #because return value is not generated by doxygen 1.8.17
47 | else
48 |   echo -e "\n>>>> PASSED: doxygen check\n"
49 | fi
50 | 
51 | exit $RETVAL
52 | 


--------------------------------------------------------------------------------
/.github/workflows/dispatch-build-and-test.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Dispatch build and test
17 | 
18 | on:
19 |   workflow_call:
20 |     inputs:
21 |       per_cuda_compiler_matrix: {type: string, required: true}
22 |       devcontainer_version: {type: string, required: true}
23 | 
24 | jobs:
25 |   # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
26 |   # ensures that the build/test steps can overlap across different configurations.
27 |   build_and_test:
28 |     name: ${{matrix.cpu}}
29 |     uses: ./.github/workflows/build-and-test.yml
30 |     strategy:
31 |       fail-fast: false
32 |       matrix:
33 |         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
34 |     with:
35 |       cpu: ${{ matrix.cpu }}
36 |       test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
37 |       build_script: "./ci/build.sh --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
38 |       test_script:  "./ci/test.sh --tests --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
39 |       container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
40 |       run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
41 | 


--------------------------------------------------------------------------------
/tests/utility/fast_int_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/utility/fast_int.cuh>
20 | 
21 | #include <catch2/catch_template_test_macros.hpp>
22 | #include <catch2/generators/catch_generators.hpp>
23 | 
24 | #include <cstdint>
25 | #include <type_traits>
26 | 
27 | TEMPLATE_TEST_CASE(
28 |   "utility::fast_int tests", "", std::int32_t, std::uint32_t, std::int64_t, std::uint64_t)
29 | {
30 |   TestType value           = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456);
31 |   TestType lhs             = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456);
32 |   constexpr auto max_value = std::numeric_limits<TestType>::max();
33 | 
34 |   cuco::utility::fast_int fast_value{value};
35 | 
36 |   SECTION("Should be explicitly convertible to the underlying integer type.")
37 |   {
38 |     REQUIRE(static_cast<TestType>(fast_value) == value);
39 |   }
40 | 
41 |   SECTION("Fast div/mod should produce correct result.")
42 |   {
43 |     INFO(lhs << " /% " << value);
44 |     REQUIRE(lhs / fast_value == lhs / value);
45 |     REQUIRE(lhs % fast_value == lhs % value);
46 |   }
47 | 
48 |   SECTION("Fast div/mod with maximum rhs value should produce correct result.")
49 |   {
50 |     INFO(lhs << " /% " << max_value);
51 |     cuco::utility::fast_int fast_max{max_value};
52 |     REQUIRE(lhs / fast_max == lhs / max_value);
53 |     REQUIRE(lhs % fast_max == lhs % max_value);
54 |   }
55 | 
56 |   SECTION("Fast div/mod with maximum lhs value should produce correct result.")
57 |   {
58 |     INFO(max_value << " /% " << value);
59 |     REQUIRE(max_value / fast_value == max_value / value);
60 |     REQUIRE(max_value % fast_value == max_value % value);
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/include/cuco/storage.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/storage/storage.cuh>
20 | 
21 | #include <cuda/std/cstdint>
22 | 
23 | namespace cuco {
24 | // Forward declaration to avoid circular dependency
25 | template <typename T, int32_t BucketSize, typename Extent, typename Allocator>
26 | class bucket_storage;
27 | }  // namespace cuco
28 | 
29 | namespace cuco {
30 | 
31 | /**
32 |  * @brief Public storage class.
33 |  *
34 |  * @note This is a public interface used to control storage bucket size. A bucket consists of one
35 |  * or multiple contiguous slots. The bucket size defines the workload granularity for each CUDA
36 |  * thread, i.e., how many slots a thread would concurrently operate on when performing modify or
37 |  * lookup operations. cuCollections uses the array of bucket storage to supersede the raw flat slot
38 |  * storage due to its superior granularity control: When bucket size equals one, array of buckets
39 |  * performs the same as the flat storage. If the underlying operation is more memory bandwidth
40 |  * bound, e.g., high occupancy multimap operations, a larger bucket size can reduce the length of
41 |  * probing sequences thus improve runtime performance.
42 |  *
43 |  * @tparam BucketSize Number of elements per bucket storage
44 |  */
45 | template <int BucketSize>
46 | class storage {
47 |  public:
48 |   /// Number of slots per bucket storage
49 |   static constexpr cuda::std::int32_t bucket_size = BucketSize;
50 | 
51 |   /// Type of implementation details
52 |   template <class T, class Extent, class Allocator>
53 |   using impl = bucket_storage<T, BucketSize, Extent, Allocator>;
54 | };
55 | }  // namespace cuco
56 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_defaults.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <cstdint>
24 | #include <vector>
25 | 
26 | namespace cuco::benchmark::defaults {
27 | 
28 | using KEY_TYPE_RANGE   = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
29 | using VALUE_TYPE_RANGE = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
30 | using HASH_RANGE       = nvbench::type_list<cuco::identity_hash<char>,
31 |                                             cuco::xxhash_32<char>,
32 |                                             cuco::xxhash_64<char>,
33 |                                             cuco::murmurhash3_32<char>>;  //,
34 | // cuco::murmurhash3_x86_128<char>,
35 | // cuco::murmurhash3_x64_128<char>>; // TODO handle tuple-like hash value
36 | 
37 | auto constexpr N             = 100'000'000;
38 | auto constexpr OCCUPANCY     = 0.5;
39 | auto constexpr MULTIPLICITY  = 1;
40 | auto constexpr MATCHING_RATE = 1.0;
41 | auto constexpr SKEW          = 0.5;
42 | auto constexpr BATCH_SIZE    = 1'000'000;
43 | auto constexpr INITIAL_SIZE  = 50'000'000;
44 | 
45 | auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
46 | auto const N_RANGE_CACHE =
47 |   std::vector<nvbench::int64_t>{8'000, 80'000, 800'000, 8'000'000, 80'000'000};
48 | auto const OCCUPANCY_RANGE     = nvbench::range(0.1, 0.9, 0.1);
49 | auto const MULTIPLICITY_RANGE  = std::vector<nvbench::int64_t>{1, 2, 4, 8, 16};
50 | auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1);
51 | auto const SKEW_RANGE          = nvbench::range(0.1, 1., 0.1);
52 | 
53 | }  // namespace cuco::benchmark::defaults
54 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | 
 3 | Thank you for contributing to cuCollections :)
 4 | 
 5 | Here are some guidelines to help the review process go smoothly.
 6 | 
 7 | 1. Please write a description in this text box of the changes that are being
 8 |    made.
 9 | 
10 | 2. Please ensure that you have written units tests for the changes made/features
11 |    added.
12 | 
13 | 3. If you are closing an issue please use one of the automatic closing words as
14 |    noted here: https://help.github.com/articles/closing-issues-using-keywords/
15 | 
16 | 4. If your pull request is not ready for review but you want to make use of the
17 |    continuous integration testing facilities please label it with `[WIP]`.
18 | 
19 | 5. If your pull request is ready to be reviewed without requiring additional
20 |    work on top of it, then remove the `[WIP]` label (if present) and replace
21 |    it with `[REVIEW]`. If assistance is required to complete the functionality,
22 |    for example when the C/C++ code of a feature is complete but Python bindings
23 |    are still required, then add the label `[HELP-REQ]` so that others can triage
24 |    and assist. The additional changes then can be implemented on top of the
25 |    same PR. If the assistance is done by members of the rapidsAI team, then no
26 |    additional actions are required by the creator of the original PR for this,
27 |    otherwise the original author of the PR needs to give permission to the
28 |    person(s) assisting to commit to their personal fork of the project. If that
29 |    doesn't happen then a new PR based on the code of the original PR can be
30 |    opened by the person assisting, which then will be the PR that will be
31 |    merged.
32 | 
33 | 6. Once all work has been done and review has taken place please do not add
34 |    features or make changes out of the scope of those requested by the reviewer
35 |    (doing this just add delays as already reviewed code ends up having to be
36 |    re-reviewed/it is hard to tell what is new etc!). Further, please do not
37 |    rebase your branch on master/force push/rewrite history, doing any of these
38 |    causes the context of any comments made by reviewers to be lost. If
39 |    conflicts occur against master they should be resolved by merging master
40 |    into the branch used for making the pull request.
41 | 
42 | Many thanks in advance for your cooperation!
43 | 
44 | -->
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Common
  2 | __pycache__
  3 | *.py[cod]
  4 | *$py.class
  5 | *.a
  6 | *.o
  7 | *.so
  8 | *.dylib
  9 | .cache
 10 | .vscode
 11 | *.code-workspace
 12 | *.swp
 13 | *.pytest_cache
 14 | DartConfiguration.tcl
 15 | .DS_Store
 16 | *.manifest
 17 | *.spec
 18 | 
 19 | ## Python build directories & artifacts
 20 | dist/
 21 | python/build
 22 | python/*/build
 23 | python/*/record.txt
 24 | .Python
 25 | env/
 26 | develop-eggs/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | /tests/Testing/
 53 | 
 54 | ## Patching
 55 | *.diff
 56 | *.orig
 57 | *.rej
 58 | 
 59 | ## C++ build directories & artifacts
 60 | CMakeFiles/
 61 | Debug
 62 | build/
 63 | cpp/build/
 64 | cpp/thirdparty/googletest/
 65 | 
 66 | ## Eclipse IDE
 67 | .project
 68 | .cproject
 69 | .settings
 70 | 
 71 | ## IntelliJ IDE
 72 | .idea/
 73 | .idea_modules/
 74 | *.iml
 75 | *.ipr
 76 | *.iws
 77 | 
 78 | ## Doxygen
 79 | /doxygen/html
 80 | /doxygen/latex
 81 | /html
 82 | /latex
 83 | 
 84 | #Java
 85 | target
 86 | 
 87 | # Translations
 88 | *.mo
 89 | *.pot
 90 | 
 91 | # Django stuff:
 92 | *.log
 93 | local_settings.py
 94 | 
 95 | # Flask stuff:
 96 | instance/
 97 | .webassets-cache
 98 | 
 99 | # Scrapy stuff:
100 | .scrapy
101 | 
102 | # Sphinx documentation
103 | docs/_build/
104 | 
105 | # PyBuilder
106 | target/
107 | 
108 | # Jupyter Notebook
109 | .ipynb_checkpoints
110 | 
111 | # pyenv
112 | .python-version
113 | 
114 | # celery beat schedule file
115 | celerybeat-schedule
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # dotenv
121 | .env
122 | 
123 | # virtualenv
124 | .venv
125 | venv/
126 | ENV/
127 | 
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 | 
132 | # Rope project settings
133 | .ropeproject
134 | 
135 | # mkdocs documentation
136 | /site
137 | 
138 | # mypy
139 | .mypy_cache/
140 | 
141 | # clang
142 | compile_commands.json
143 | /.clangd/
144 | 
145 | # figures
146 | *.eps
147 | 
148 | # Github
149 | /.config/
150 | /.devcontainer.json
151 | 
152 | # AWS cache
153 | /.aws/
154 | 


--------------------------------------------------------------------------------
/include/cuco/detail/utility/strong_type.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | namespace cuco::detail {
19 | 
20 | /**
21 |  * @brief A strong type wrapper
22 |  *
23 |  * @tparam T Type of the value
24 |  *
25 |  */
26 | template <class T>
27 | struct strong_type {
28 |   /**
29 |    * @brief Constructs a strong type
30 |    *
31 |    * @param v Value to be wrapped as a strong type
32 |    */
33 |   __host__ __device__ explicit constexpr strong_type(T v) : value{v} {}
34 | 
35 |   /**
36 |    * @brief Implicit conversion operator to the underlying value.
37 |    *
38 |    * @return Underlying value
39 |    */
40 |   __host__ __device__ constexpr operator T() const noexcept { return value; }
41 | 
42 |   T value;  ///< Underlying data value
43 | };
44 | 
45 | }  // namespace cuco::detail
46 | 
47 | /**
48 |  * @brief Convenience wrapper for defining a strong type
49 |  */
50 | #define CUCO_DEFINE_STRONG_TYPE(Name, Type)                 \
51 |   struct Name : public cuco::detail::strong_type<Type> {    \
52 |     __host__ __device__ explicit constexpr Name(Type value) \
53 |       : cuco::detail::strong_type<Type>(value)              \
54 |     {                                                       \
55 |     }                                                       \
56 |   };
57 | 
58 | /**
59 |  * @brief Convenience wrapper for defining a templated strong type
60 |  */
61 | #define CUCO_DEFINE_TEMPLATE_STRONG_TYPE(Name)                                                    \
62 |   template <typename T>                                                                           \
63 |   struct Name : public cuco::detail::strong_type<T> {                                             \
64 |     __host__ __device__ explicit constexpr Name(T value) : cuco::detail::strong_type<T>(value) {} \
65 |   };
66 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/size_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::size` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_size(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys  = state.get_int64("NumInputs");
37 |   auto const occupancy = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const size = num_keys / occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);
42 | 
43 |   [[maybe_unused]] key_generator gen{};
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   state.add_element_count(num_keys);
47 | 
48 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
49 | 
50 |   set.insert(keys.begin(), keys.end());
51 | 
52 |   state.exec(nvbench::exec_tag::sync,
53 |              [&](nvbench::launch& launch) { auto const size = set.size({launch.get_stream()}); });
54 | }
55 | 
56 | NVBENCH_BENCH_TYPES(static_set_size,
57 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
58 |                                       nvbench::type_list<distribution::unique>))
59 |   .set_name("static_set_size_unique_occupancy")
60 |   .set_type_axes_names({"Key", "Distribution"})
61 |   .add_int64_axis("NumInputs", {defaults::N})
62 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
63 | 


--------------------------------------------------------------------------------
/.github/actions/compute-matrix/compute-matrix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -euo pipefail
18 | 
19 | write_output() {
20 |   local key="$1"
21 |   local value="$2"
22 |   echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
23 | }
24 | 
25 | explode_std_versions() {
26 |   jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
27 | }
28 | 
29 | extract_matrix() {
30 |   local file="$1"
31 |   local type="$2"
32 |   local matrix=$(yq -o=json "$file" | jq -cr ".$type")
33 |   write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
34 |   local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
35 |   write_output "NVCC_FULL_MATRIX" "$nvcc_full_matrix"
36 |   write_output "CUDA_VERSIONS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .cuda] | unique')"
37 |   write_output "HOST_COMPILERS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .compiler.name] | unique')"
38 |   write_output "PER_CUDA_COMPILER_MATRIX" "$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
39 | }
40 | 
41 | main() {
42 |   if [ "$1" == "-v" ]; then
43 |     set -x
44 |     shift
45 |   fi
46 | 
47 |   if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
48 |     echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
49 |     echo "  -v            : Enable verbose output"
50 |     echo "  MATRIX_FILE   : The path to the matrix file."
51 |     echo "  MATRIX_TYPE   : The desired matrix. Supported values: 'pull_request'"
52 |     exit 1
53 |   fi
54 | 
55 |   echo "Input matrix file:" >&2
56 |   cat "$1" >&2
57 |   echo "Matrix Type: $2" >&2
58 | 
59 |   extract_matrix "$1" "$2"
60 | }
61 | 
62 | main "$@"


--------------------------------------------------------------------------------
/benchmarks/static_set/rehash_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::rehash` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_rehash(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   std::size_t const capacity = state.get_int64("Capacity");
37 |   auto const occupancy       = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const num_keys = capacity * occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);  // slots per second
42 | 
43 |   [[maybe_unused]] key_generator gen{};
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   state.add_element_count(capacity);
47 | 
48 |   cuco::static_set<Key> set{capacity, cuco::empty_key<Key>{-1}};
49 | 
50 |   set.insert(keys.begin(), keys.end());
51 | 
52 |   state.exec(nvbench::exec_tag::sync,
53 |              [&](nvbench::launch& launch) { set.rehash({launch.get_stream()}); });
54 | }
55 | 
56 | NVBENCH_BENCH_TYPES(static_set_rehash,
57 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
58 |                                       nvbench::type_list<distribution::unique>))
59 |   .set_name("static_set_rehash_unique_occupancy")
60 |   .set_type_axes_names({"Key", "Distribution"})
61 |   .add_int64_axis("Capacity", {defaults::N})
62 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
63 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/retrieve_all_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::retrieve_all` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_retrieve_all(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys  = state.get_int64("NumInputs");
37 |   auto const occupancy = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const size = num_keys / occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);
42 | 
43 |   [[maybe_unused]] key_generator gen{};
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
47 |   set.insert(keys.begin(), keys.end());
48 | 
49 |   thrust::device_vector<Key> result(num_keys);
50 | 
51 |   state.add_element_count(num_keys);
52 |   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
53 |     [[maybe_unused]] auto end = set.retrieve_all(result.begin(), {launch.get_stream()});
54 |   });
55 | }
56 | 
57 | NVBENCH_BENCH_TYPES(static_set_retrieve_all,
58 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
59 |                                       nvbench::type_list<distribution::unique>))
60 |   .set_name("static_set_retrieve_all_unique_occupancy")
61 |   .set_type_axes_names({"Key", "Distribution"})
62 |   .add_int64_axis("NumInputs", {defaults::N})
63 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
64 | 


--------------------------------------------------------------------------------
/ci/sccache_stats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script prints the sccache hit rate between two calls to sccache --show-stats.
18 | # It should be sourced in your script before and after the operations you want to profile,
19 | # with the 'start' or 'end' argument respectively.
20 | 
21 | mode=$1
22 | 
23 | if [[ "$mode" != "start" && "$mode" != "end" ]]; then
24 |     echo "Invalid mode: $mode"
25 |     echo "Usage: $0 {start|end}"
26 |     exit 1
27 | fi
28 | 
29 | case $mode in
30 |   start)
31 |     export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
32 |     export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
33 |     ;;
34 |   end)
35 |     if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then
36 |         echo "Error: start stats not collected. Did you call this script with 'start' before your operations?"
37 |         exit 1
38 |     fi
39 | 
40 |     final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
41 |     final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
42 |     hits=$((final_hits - SCCACHE_START_HITS))
43 |     misses=$((final_misses - SCCACHE_START_MISSES))
44 |     total=$((hits + misses))
45 | 
46 |     prefix=""
47 |     if [ ${GITHUB_ACTIONS:-false} = "true" ]; then
48 |       prefix="::notice::"
49 |     fi
50 | 
51 |     if (( total > 0 )); then
52 |       hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }')
53 |       echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%"
54 |     else
55 |       echo ${prefix}"sccache stats: N/A No new compilation requests"
56 |     fi
57 |     unset SCCACHE_START_HITS
58 |     unset SCCACHE_START_MISSES
59 |     ;;
60 | esac


--------------------------------------------------------------------------------
/include/cuco/detail/storage/storage.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/bucket_storage.cuh>
20 | 
21 | #include <cuda/stream_ref>
22 | 
23 | namespace cuco::detail {
24 | /**
25 |  * @brief Intermediate class internally used by data structures
26 |  *
27 |  * @tparam StorageImpl Storage implementation class
28 |  * @tparam T Storage element type
29 |  * @tparam Extent Type of extent denoting number of buckets
30 |  * @tparam Allocator Type of allocator used for device storage
31 |  */
32 | template <class StorageImpl, class T, class Extent, class Allocator>
33 | class storage : StorageImpl::template impl<T, Extent, Allocator> {
34 |  public:
35 |   /// Storage implementation type
36 |   using impl_type      = typename StorageImpl::template impl<T, Extent, Allocator>;
37 |   using ref_type       = typename impl_type::ref_type;        ///< Storage ref type
38 |   using value_type     = typename impl_type::value_type;      ///< Storage value type
39 |   using allocator_type = typename impl_type::allocator_type;  ///< Storage value type
40 | 
41 |   /// Number of elements per bucket
42 |   static constexpr int bucket_size = impl_type::bucket_size;
43 | 
44 |   using impl_type::allocator;
45 |   using impl_type::capacity;
46 |   using impl_type::data;
47 |   using impl_type::extent;
48 |   using impl_type::initialize;
49 |   using impl_type::initialize_async;
50 |   using impl_type::num_buckets;
51 |   using impl_type::ref;
52 | 
53 |   /**
54 |    * @brief Constructs storage.
55 |    *
56 |    * @param size Number of slots to (de)allocate
57 |    * @param allocator Allocator used for (de)allocating device storage
58 |    * @param stream Stream to use for (de)allocating device storage
59 |    */
60 |   explicit constexpr storage(Extent size, Allocator const& allocator, cuda::stream_ref stream)
61 |     : impl_type{size, allocator, stream}
62 |   {
63 |   }
64 | };
65 | }  // namespace cuco::detail
66 | 


--------------------------------------------------------------------------------
/tests/static_multiset/load_factor_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_multiset.cuh>
20 | 
21 | #include <catch2/catch_template_test_macros.hpp>
22 | 
23 | using size_type = int32_t;
24 | 
25 | TEMPLATE_TEST_CASE_SIG(
26 |   "static_multiset load factor tests",
27 |   "",
28 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
29 |   (int32_t, cuco::test::probe_sequence::double_hashing, 1),
30 |   (int32_t, cuco::test::probe_sequence::double_hashing, 2),
31 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
32 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2),
33 |   (int32_t, cuco::test::probe_sequence::linear_probing, 1),
34 |   (int32_t, cuco::test::probe_sequence::linear_probing, 2),
35 |   (int64_t, cuco::test::probe_sequence::linear_probing, 1),
36 |   (int64_t, cuco::test::probe_sequence::linear_probing, 2))
37 | {
38 |   constexpr size_type num_keys{10};
39 | 
40 |   using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
41 |                                    cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
42 |                                    cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;
43 | 
44 |   SECTION("Negative load factor will throw exception")
45 |   {
46 |     REQUIRE_THROWS(cuco::static_multiset{
47 |       num_keys, -0.1, cuco::empty_key<Key>{-1}, {}, probe{}, {}, cuco::storage<2>{}});
48 |   }
49 | 
50 |   SECTION("Zero load factor will throw exception")
51 |   {
52 |     REQUIRE_THROWS(cuco::static_multiset{
53 |       num_keys, 0.0, cuco::empty_key<Key>{-1}, {}, probe{}, {}, cuco::storage<2>{}});
54 |   }
55 | 
56 |   SECTION("Load factor larger than one will throw exception")
57 |   {
58 |     REQUIRE_THROWS(cuco::static_multiset{
59 |       num_keys, 1.1, cuco::empty_key<Key>{-1}, {}, probe{}, {}, cuco::storage<2>{}});
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/include/cuco/detail/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuco/detail/error.hpp>
19 | #include <cuco/detail/utility/cuda.hpp>
20 | 
21 | #include <cuda/std/iterator>
22 | #include <cuda/std/type_traits>
23 | 
24 | namespace cuco {
25 | namespace detail {
26 | 
27 | template <typename Iterator>
28 | __host__ __device__ constexpr inline index_type distance(Iterator begin, Iterator end)
29 | {
30 |   using category = typename cuda::std::iterator_traits<Iterator>::iterator_category;
31 |   static_assert(cuda::std::is_base_of_v<cuda::std::random_access_iterator_tag, category>,
32 |                 "Input iterator should be a random access iterator.");
33 |   // `int64_t` instead of arch-dependant `long int`
34 |   return static_cast<index_type>(cuda::std::distance(begin, end));
35 | }
36 | 
37 | /**
38 |  * @brief C++17 constexpr backport of `std::lower_bound`.
39 |  *
40 |  * @tparam ForwardIt Type of input iterator
41 |  * @tparam T Type of `value`
42 |  *
43 |  * @param first Iterator defining the start of the range to examine
44 |  * @param last Iterator defining the start of the range to examine
45 |  * @param value Value to compare the elements to
46 |  *
47 |  * @return Iterator pointing to the first element in the range [first, last) that does not satisfy
48 |  * element < value
49 |  */
50 | template <class ForwardIt, class T>
51 | constexpr ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value)
52 | {
53 |   using diff_type = typename std::iterator_traits<ForwardIt>::difference_type;
54 | 
55 |   ForwardIt it{};
56 |   diff_type count = std::distance(first, last);
57 |   diff_type step{};
58 | 
59 |   while (count > 0) {
60 |     it   = first;
61 |     step = count / 2;
62 |     std::advance(it, step);
63 | 
64 |     if (static_cast<T>(*it) < value) {
65 |       first = ++it;
66 |       count -= step + 1;
67 |     } else
68 |       count = step;
69 |   }
70 | 
71 |   return first;
72 | }
73 | 
74 | }  // namespace detail
75 | }  // namespace cuco
76 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/find_next_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/execution_policy.h>
23 | #include <thrust/host_vector.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void find_next_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.find_next(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | using cuco::test::modulo_bitgen;
39 | 
40 | TEST_CASE("dynamic_bitset find next set test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{400};
46 | 
47 |   for (size_type i = 0; i < num_elements; i++) {
48 |     bv.push_back(modulo_bitgen(i));
49 |   }
50 | 
51 |   thrust::device_vector<size_type> device_result(num_elements);
52 |   auto ref = bv.ref();
53 |   find_next_kernel<<<1, 1024>>>(ref, num_elements, device_result.data());
54 | 
55 |   thrust::host_vector<size_type> host_result = device_result;
56 |   size_type num_matches                      = 0;
57 | 
58 |   size_type next_set_pos = -1lu;
59 |   do {
60 |     next_set_pos++;
61 |   } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos));
62 | 
63 |   for (size_type key = 0; key < num_elements; key++) {
64 |     num_matches += host_result[key] == next_set_pos;
65 | 
66 |     if (key == next_set_pos) {
67 |       do {
68 |         next_set_pos++;
69 |       } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos));
70 |     }
71 |   }
72 |   REQUIRE(num_matches == num_elements);
73 | }
74 | 


--------------------------------------------------------------------------------
/examples/static_set/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <cuda/std/functional>
20 | #include <thrust/device_vector.h>
21 | #include <thrust/logical.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | #include <limits>
26 | 
27 | /**
28 |  * @file host_bulk_example.cu
29 |  * @brief Demonstrates usage of the static_set "bulk" host APIs.
30 |  *
31 |  * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or
32 |  * `contains` on a set of keys.
33 |  *
34 |  */
35 | int main(void)
36 | {
37 |   using Key = int;
38 | 
39 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
40 |   // that they never occur in your input data.
41 |   Key constexpr empty_key_sentinel = -1;
42 | 
43 |   // Number of keys to be inserted
44 |   std::size_t constexpr num_keys = 50'000;
45 | 
46 |   // Compute capacity based on a 50% load factor
47 |   auto constexpr load_factor = 0.5;
48 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
49 | 
50 |   // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel.
51 |   cuco::static_set<Key> set{capacity, cuco::empty_key{empty_key_sentinel}};
52 | 
53 |   // Create a sequence of keys {0, 1, 2, .., i}
54 |   thrust::device_vector<Key> keys(num_keys);
55 |   thrust::sequence(keys.begin(), keys.end(), 0);
56 | 
57 |   // Inserts all keys into the hash set
58 |   set.insert(keys.begin(), keys.end());
59 | 
60 |   // Storage for result
61 |   thrust::device_vector<bool> found(num_keys);
62 | 
63 |   // Check if all keys are contained in the set
64 |   set.contains(keys.begin(), keys.end(), found.begin());
65 | 
66 |   // Verify that all keys have been found
67 |   bool const all_keys_found = thrust::all_of(found.begin(), found.end(), cuda::std::identity{});
68 | 
69 |   if (all_keys_found) { std::cout << "Success! Found all keys.\n"; }
70 | 
71 |   return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/get_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/execution_policy.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void test_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.test(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | using cuco::test::modulo_bitgen;
39 | 
40 | TEST_CASE("dynamic_bitset get test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{400};
46 | 
47 |   size_type num_set_ref = 0;
48 |   for (size_type i = 0; i < num_elements; i++) {
49 |     bv.push_back(modulo_bitgen(i));
50 |     num_set_ref += modulo_bitgen(i);
51 |   }
52 | 
53 |   // Host-bulk test
54 |   thrust::device_vector<size_type> keys(num_elements);
55 |   thrust::sequence(keys.begin(), keys.end(), 0);
56 | 
57 |   thrust::device_vector<size_type> test_result(num_elements);
58 |   thrust::fill(test_result.begin(), test_result.end(), 0);
59 | 
60 |   bv.test(keys.begin(), keys.end(), test_result.begin());
61 | 
62 |   size_type num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0);
63 |   REQUIRE(num_set == num_set_ref);
64 | 
65 |   // Device-ref test
66 |   auto ref = bv.ref();
67 |   thrust::fill(test_result.begin(), test_result.end(), 0);
68 |   test_kernel<<<1, 1024>>>(ref, num_elements, test_result.data());
69 | 
70 |   num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0);
71 |   REQUIRE(num_set == num_set_ref);
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/static_map/hash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/static_map.cuh>
21 | 
22 | #include <cuda/std/functional>
23 | #include <thrust/device_vector.h>
24 | #include <thrust/iterator/counting_iterator.h>
25 | #include <thrust/iterator/transform_iterator.h>
26 | 
27 | #include <catch2/catch_template_test_macros.hpp>
28 | 
29 | using size_type = std::size_t;
30 | 
31 | template <typename Key, typename Hash>
32 | void test_hash_function()
33 | {
34 |   using Value = int64_t;
35 | 
36 |   constexpr size_type num_keys{400};
37 | 
38 |   auto map = cuco::static_map<Key,
39 |                               Value,
40 |                               cuco::extent<size_type>,
41 |                               cuda::thread_scope_device,
42 |                               cuda::std::equal_to<Key>,
43 |                               cuco::linear_probing<1, Hash>,
44 |                               cuco::cuda_allocator<cuda::std::byte>,
45 |                               cuco::storage<2>>{
46 |     num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
47 | 
48 |   auto keys_begin = thrust::counting_iterator<Key>(1);
49 | 
50 |   auto pairs_begin = thrust::make_transform_iterator(
51 |     keys_begin, cuda::proclaim_return_type<cuco::pair<Key, Value>>([] __device__(auto i) {
52 |       return cuco::pair<Key, Value>(i, i);
53 |     }));
54 | 
55 |   thrust::device_vector<bool> d_keys_exist(num_keys);
56 | 
57 |   map.insert(pairs_begin, pairs_begin + num_keys);
58 | 
59 |   REQUIRE(map.size() == num_keys);
60 | 
61 |   map.contains(keys_begin, keys_begin + num_keys, d_keys_exist.begin());
62 | 
63 |   REQUIRE(cuco::test::all_of(d_keys_exist.begin(), d_keys_exist.end(), cuda::std::identity{}));
64 | }
65 | 
66 | TEMPLATE_TEST_CASE_SIG("static_map hash tests", "", ((typename Key)), (int32_t), (int64_t))
67 | {
68 |   test_hash_function<Key, cuco::murmurhash3_32<Key>>();
69 |   test_hash_function<Key, cuco::murmurhash3_x64_128<Key>>();
70 |   test_hash_function<Key, cuco::xxhash_32<Key>>();
71 |   test_hash_function<Key, cuco::xxhash_64<Key>>();
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/static_set/atomic_storage_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <cuda/functional>
20 | #include <thrust/device_vector.h>
21 | #include <thrust/iterator/counting_iterator.h>
22 | #include <thrust/iterator/transform_iterator.h>
23 | 
24 | #include <catch2/catch_test_macros.hpp>
25 | 
26 | using T    = int32_t;
27 | using Hash = uint32_t;
28 | using Key  = cuco::pair<Hash, T>;
29 | 
30 | struct hasher {
31 |   __device__ Hash operator()(Key const& k) const { return k.first; }
32 | };
33 | 
34 | struct always_not_equal {
35 |   __device__ constexpr bool operator()(Key const&, Key const&) const noexcept
36 |   {
37 |     // All build table keys are distinct thus `false` no matter what
38 |     return false;
39 |   }
40 | };
41 | 
42 | class build_fn {
43 |  public:
44 |   __device__ __forceinline__ auto operator()(T i) const noexcept { return cuco::pair{_hash(i), i}; }
45 | 
46 |  private:
47 |   cuco::default_hash_function<T> _hash{};
48 | };
49 | 
50 | // This test exercise is designed to replicate a Spark runtime failure scenario
51 | // https://github.com/NVIDIA/spark-rapids/issues/12586 and
52 | // https://github.com/rapidsai/cudf/issues/18587
53 | // that is not addressed by the current test suite. It will result in a runtime
54 | // crash if the CCCL atomic storage is not managed correctly.
55 | TEST_CASE("atomic_storage_test", "")
56 | {
57 |   using probe = cuco::linear_probing<1, hasher>;
58 | 
59 |   auto const num_keys = 100'000;
60 | 
61 |   auto set = cuco::static_set{cuco::extent<int>{num_keys},
62 |                               0.5,
63 |                               cuco::empty_key<Key>{Key{std::numeric_limits<Hash>::max(), -1}},
64 |                               always_not_equal{},
65 |                               probe{},
66 |                               {},
67 |                               cuco::storage<1>{}};
68 | 
69 |   auto keys_begin = thrust::make_transform_iterator(thrust::counting_iterator{0}, build_fn{});
70 | 
71 |   set.insert_async(keys_begin, keys_begin + num_keys);
72 |   auto const count = set.size();
73 | 
74 |   REQUIRE(count == num_keys);
75 | }
76 | 


--------------------------------------------------------------------------------
/.devcontainer/verify_devcontainer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function usage {
 4 |     echo "Usage: $0"
 5 |     echo
 6 |     echo "This script is intended to be run within one of CUCO's Dev Containers."
 7 |     echo "It verifies that the expected environment variables and binary versions match what is expected."
 8 | }
 9 | 
10 | check_envvars() {
11 |     for var_name in "$@"; do
12 |         if [[ -z "${!var_name:-}" ]]; then
13 |             echo "::error:: ${var_name} variable is not set."
14 |             exit 1
15 |         else
16 |             echo "$var_name=${!var_name}"
17 |         fi
18 |     done
19 | }
20 | 
21 | check_host_compiler_version() {
22 |     local version_output=$($CXX --version)
23 | 
24 |     if [[ "$CXX" == "g++" ]]; then
25 |         local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1)
26 |         local expected_compiler="gcc"
27 |     elif [[ "$CXX" == "clang++" ]]; then
28 |         if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then
29 |             actual_version=${BASH_REMATCH[1]}
30 |         else
31 |             echo "::error:: Unable to determine clang version."
32 |             exit 1
33 |         fi
34 |         expected_compiler="llvm"
35 |     else
36 |         echo "::error:: Unexpected CXX value ($CXX)."
37 |         exit 1
38 |     fi
39 | 
40 |     if [[ "$expected_compiler" != "${CUCO_HOST_COMPILER}" || "$actual_version" != "$CUCO_HOST_COMPILER_VERSION" ]]; then
41 |         echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CUCO_HOST_COMPILER}) and version (${CUCO_HOST_COMPILER_VERSION})."
42 |         exit 1
43 |     else
44 |         echo "Detected host compiler: $CXX version $actual_version"
45 |     fi
46 | }
47 | 
48 | check_cuda_version() {
49 |     local cuda_version_output=$(nvcc --version)
50 |     if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then
51 |         local actual_cuda_version=${BASH_REMATCH[1]}
52 |     else
53 |         echo "::error:: Unable to determine CUDA version from nvcc."
54 |         exit 1
55 |     fi
56 | 
57 |     if [[ "$actual_cuda_version" != "$CUCO_CUDA_VERSION" ]]; then
58 |         echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CUCO_CUDA_VERSION)."
59 |         exit 1
60 |     else
61 |         echo "Detected CUDA version: $actual_cuda_version"
62 |     fi
63 | }
64 | 
65 | main() {
66 |     if [[ "$1" == "-h" || "$1" == "--help" ]]; then
67 |         usage
68 |         exit 0
69 |     fi
70 | 
71 |     set -euo pipefail
72 | 
73 |     check_envvars DEVCONTAINER_NAME CXX CUCO_HOST_COMPILER CUCO_CUDA_VERSION CUCO_HOST_COMPILER_VERSION
74 | 
75 |     check_host_compiler_version
76 | 
77 |     check_cuda_version
78 | 
79 |     echo "Dev Container successfully verified!"
80 | }
81 | 
82 | main "$@"


--------------------------------------------------------------------------------
/.github/workflows/run-as-coder.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Run as coder user
17 | 
18 | defaults:
19 |   run:
20 |     shell: bash -exo pipefail {0}
21 | 
22 | 
23 | on:
24 |   workflow_call:
25 |     inputs:
26 |       name: {type: string, required: true}
27 |       image: {type: string, required: true}
28 |       runner: {type: string, required: true}
29 |       command: {type: string, required: true}
30 |       env: { type: string, required: false, default: "" }
31 | 
32 | jobs:
33 |   run-as-coder:
34 |     name: ${{inputs.name}}
35 |     runs-on: ${{inputs.runner}}
36 |     container:
37 |       options: -u root
38 |       image: ${{inputs.image}}
39 |       env:
40 |         NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
41 |     permissions:
42 |       id-token: write
43 |     steps:
44 |       - name: Checkout repo
45 |         uses: actions/checkout@v3
46 |         with:
47 |           path: cuCollections
48 |           persist-credentials: false
49 |       - name: Move files to coder user home directory
50 |         run: |
51 |           cp -R cuCollections /home/coder/cuCollections
52 |           chown -R coder:coder /home/coder/
53 |       - name: Configure credentials and environment variables for sccache
54 |         uses: ./cuCollections/.github/actions/configure_cccl_sccache
55 |       - name: Run command
56 |         shell: su coder {0}
57 |         run: |
58 |             set -exo pipefail
59 |             cd ~/cuCollections
60 |             eval "${{inputs.command}}" || exit_code=$?
61 |             if [ ! -z "$exit_code" ]; then
62 |                 echo "::error::Error! To checkout the corresponding code and reproduce locally, run the following commands:"
63 |                 echo "git clone --branch $GITHUB_REF_NAME --single-branch --recurse-submodules https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
64 |                 echo "docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
65 |                 exit $exit_code
66 |             fi
67 | 


--------------------------------------------------------------------------------
/tests/static_multiset/large_input_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_multiset.cuh>
20 | 
21 | #include <cuda/std/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/iterator/counting_iterator.h>
24 | #include <thrust/iterator/discard_iterator.h>
25 | #include <thrust/sort.h>
26 | 
27 | #include <catch2/catch_template_test_macros.hpp>
28 | 
29 | #include <cstdint>
30 | #include <iterator>
31 | 
32 | template <typename Set>
33 | void test_unique_sequence(Set& set, typename Set::value_type* res_begin, std::size_t num_keys)
34 | {
35 |   using Key = typename Set::key_type;
36 | 
37 |   auto const keys_begin = thrust::counting_iterator<Key>(0);
38 |   auto const keys_end   = keys_begin + num_keys;
39 | 
40 |   set.insert(keys_begin, keys_end);
41 |   REQUIRE(set.size() == num_keys);
42 | 
43 |   SECTION("All inserted keys can be retrieved.")
44 |   {
45 |     auto const [_, res_end] =
46 |       set.retrieve(keys_begin, keys_end, thrust::make_discard_iterator(), res_begin);
47 |     REQUIRE(static_cast<std::size_t>(std::distance(res_begin, res_end)) == num_keys);
48 | 
49 |     thrust::sort(thrust::device, res_begin, res_end);
50 | 
51 |     REQUIRE(cuco::test::equal(res_begin, res_end, keys_begin, cuda::std::equal_to<Key>{}));
52 |   }
53 | }
54 | 
55 | TEMPLATE_TEST_CASE_SIG(
56 |   "cuco::static_multiset large input test",
57 |   "",
58 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
59 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
60 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2))
61 | {
62 |   constexpr std::size_t num_keys{1'200'000'000};
63 | 
64 |   using extent_type = cuco::extent<std::size_t>;
65 |   using probe       = cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>;
66 | 
67 |   try {
68 |     auto set = cuco::static_multiset{num_keys * 2, cuco::empty_key<Key>{-1}, {}, probe{}};
69 | 
70 |     thrust::device_vector<Key> d_retrieved(num_keys);
71 |     test_unique_sequence(set, d_retrieved.data().get(), num_keys);
72 |   } catch (cuco::cuda_error&) {
73 |     SKIP("Out of memory");
74 |   } catch (std::bad_alloc&) {
75 |     SKIP("Out of memory");
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/include/cuco/utility/traits.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/type_traits>
20 | #include <thrust/device_reference.h>
21 | 
22 | namespace cuco {
23 | 
24 | /**
25 |  * @brief Customization point that can be specialized to indicate that it is safe to perform bitwise
26 |  * equality comparisons on the object-representation of objects of type `T`.
27 |  *
28 |  * By default, only types where `std::has_unique_object_representations_v<T>` is true are safe for
29 |  * bitwise equality. However, this can be too restrictive for some types, e.g., floating point
30 |  * types.
31 |  *
32 |  * User-defined specializations of `is_bitwise_comparable` are allowed, but it is the users
33 |  * responsibility to ensure values do not occur that would lead to unexpected behavior. For example,
34 |  * if a `NaN` bit pattern were used as the empty sentinel value, it may not compare bitwise equal to
35 |  * other `NaN` bit patterns.
36 |  *
37 |  */
38 | template <typename T, typename = void>
39 | struct is_bitwise_comparable : cuda::std::false_type {};
40 | 
41 | /// By default, only types with unique object representations are allowed
42 | template <typename T>
43 | struct is_bitwise_comparable<
44 |   T,
45 |   cuda::std::enable_if_t<cuda::std::has_unique_object_representations_v<T>>>
46 |   : cuda::std::true_type {};
47 | 
48 | template <typename T>
49 | inline constexpr bool is_bitwise_comparable_v =
50 |   is_bitwise_comparable<T>::value;  ///< Shortcut definition
51 | 
52 | /**
53 |  * @brief Declares that a type `Type` is bitwise comparable.
54 |  *
55 |  */
56 | #define CUCO_DECLARE_BITWISE_COMPARABLE(Type)                   \
57 |   namespace cuco {                                              \
58 |   template <>                                                   \
59 |   struct is_bitwise_comparable<Type> : cuda::std::true_type {}; \
60 |   }
61 | 
62 | template <bool value, typename... Args>
63 | inline constexpr bool dependent_bool_value = value;  ///< Unpacked dependent bool value
64 | 
65 | template <typename... Args>
66 | inline constexpr bool dependent_false =
67 |   dependent_bool_value<false, Args...>;  ///< Emits a `false` value which is dependent on the given
68 |                                          ///< argument types
69 | 
70 | }  // namespace cuco
71 | 


--------------------------------------------------------------------------------
/examples/bloom_filter/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/bloom_filter.cuh>
18 | 
19 | #include <thrust/count.h>
20 | #include <thrust/device_vector.h>
21 | #include <thrust/execution_policy.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | 
26 | int main(void)
27 | {
28 |   int constexpr num_keys    = 10'000;          ///< Generate 10'000 keys
29 |   int constexpr num_tp      = num_keys * 0.5;  ///< Insert the first half keys into the filter.
30 |   int constexpr num_tn      = num_keys - num_tp;
31 |   int constexpr sub_filters = 200;  ///< 200 sub-filters per bloom filter
32 | 
33 |   // key type for bloom filter
34 |   using key_type = int;
35 | 
36 |   // Spawn a bloom filter with default policy and 200 sub-filters.
37 |   cuco::bloom_filter<key_type> filter{sub_filters};
38 | 
39 |   std::cout << "Bulk insert into bloom filter with default fingerprint generation policy: "
40 |             << std::endl;
41 | 
42 |   thrust::device_vector<key_type> keys(num_keys);
43 |   thrust::sequence(keys.begin(), keys.end(), 1);
44 | 
45 |   auto tp_begin = keys.begin();
46 |   auto tp_end   = tp_begin + num_tp;
47 |   auto tn_begin = tp_end;
48 |   auto tn_end   = keys.end();
49 | 
50 |   // Insert the first half of the keys.
51 |   filter.add(tp_begin, tp_end);
52 | 
53 |   thrust::device_vector<bool> tp_result(num_tp, false);
54 |   thrust::device_vector<bool> tn_result(num_keys - num_tp, false);
55 | 
56 |   // Query the filter for the previously inserted keys.
57 |   // This should result in a true-positive rate of TPR=1.
58 |   filter.contains(tp_begin, tp_end, tp_result.begin());
59 | 
60 |   // Query the filter for the keys that are not present in the filter.
61 |   // Since bloom filters are probalistic data structures, the filter
62 |   // exhibits a false-positive rate FPR>0 depending on the number of bits in
63 |   // the filter and the number of hashes used per key.
64 |   filter.contains(tn_begin, tn_end, tn_result.begin());
65 | 
66 |   float tp_rate =
67 |     float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp);
68 |   float fp_rate =
69 |     float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn);
70 | 
71 |   std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl;
72 | 
73 |   return 0;
74 | }


--------------------------------------------------------------------------------
/include/cuco/detail/utility/cuda.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuco/detail/error.hpp>
19 | #include <cuco/detail/utility/cuda.cuh>
20 | 
21 | #include <cstdint>
22 | 
23 | namespace cuco {
24 | namespace detail {
25 | 
26 | constexpr std::int32_t default_block_size() noexcept { return 128; }
27 | /// Default stride
28 | constexpr std::int32_t default_stride() noexcept { return 1; }
29 | 
30 | /**
31 |  * @brief Computes the desired 1D grid size with the given parameters
32 |  *
33 |  * @param num Number of elements to handle in the kernel
34 |  * @param cg_size Number of threads per CUDA Cooperative Group
35 |  * @param stride Number of elements to be handled by each thread
36 |  * @param block_size Number of threads in each thread block
37 |  *
38 |  * @return The resulting grid size
39 |  */
40 | constexpr auto grid_size(index_type num,
41 |                          std::int32_t cg_size    = 1,
42 |                          std::int32_t stride     = default_stride(),
43 |                          std::int32_t block_size = default_block_size()) noexcept
44 | {
45 |   return (cg_size * num + stride * block_size - 1) / (stride * block_size);
46 | }
47 | 
48 | /**
49 |  * @brief Computes the ideal 1D grid size with the given parameters
50 |  *
51 |  * @tparam Kernel Kernel type
52 |  *
53 |  * @param block_size Number of threads in each thread block
54 |  * @param kernel CUDA kernel to launch
55 |  * @param dynamic_shm_size Dynamic shared memory size
56 |  *
57 |  * @return The grid size that delivers the highest occupancy
58 |  */
59 | template <typename Kernel>
60 | constexpr auto max_occupancy_grid_size(std::int32_t block_size,
61 |                                        Kernel kernel,
62 |                                        std::size_t dynamic_shm_size = 0)
63 | {
64 |   int device = 0;
65 |   CUCO_CUDA_TRY(cudaGetDevice(&device));
66 | 
67 |   int num_multiprocessors = -1;
68 |   CUCO_CUDA_TRY(
69 |     cudaDeviceGetAttribute(&num_multiprocessors, cudaDevAttrMultiProcessorCount, device));
70 | 
71 |   int max_active_blocks_per_multiprocessor{};
72 |   CUCO_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
73 |     &max_active_blocks_per_multiprocessor, kernel, block_size, dynamic_shm_size));
74 | 
75 |   return max_active_blocks_per_multiprocessor * num_multiprocessors;
76 | }
77 | 
78 | }  // namespace detail
79 | }  // namespace cuco
80 | 


--------------------------------------------------------------------------------
/tests/dynamic_map/unique_sequence_test_experimental.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/dynamic_map.cuh>
20 | 
21 | #include <cuda/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/execution_policy.h>
24 | #include <thrust/iterator/counting_iterator.h>
25 | #include <thrust/iterator/transform_iterator.h>
26 | #include <thrust/sequence.h>
27 | 
28 | #include <catch2/catch_template_test_macros.hpp>
29 | 
30 | TEMPLATE_TEST_CASE_SIG("experimental::dynamic_map: unique sequence",
31 |                        "",
32 |                        ((typename Key, typename T), Key, T),
33 |                        (int32_t, int32_t),
34 |                        (int32_t, int64_t),
35 |                        (int64_t, int32_t),
36 |                        (int64_t, int64_t))
37 | {
38 |   constexpr std::size_t num_keys{1'000'000};
39 | 
40 |   cuco::experimental::dynamic_map<Key, T> map{
41 |     30'000'000, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
42 | 
43 |   thrust::device_vector<Key> d_keys(num_keys);
44 |   thrust::device_vector<T> d_values(num_keys);
45 | 
46 |   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
47 |   thrust::sequence(thrust::device, d_values.begin(), d_values.end());
48 | 
49 |   auto pairs_begin =
50 |     thrust::make_transform_iterator(thrust::make_counting_iterator<int>(0),
51 |                                     cuda::proclaim_return_type<cuco::pair<Key, T>>(
52 |                                       [] __device__(auto i) { return cuco::pair<Key, T>(i, i); }));
53 | 
54 |   thrust::device_vector<T> d_results(num_keys);
55 |   thrust::device_vector<bool> d_contained(num_keys);
56 | 
57 |   // bulk function test cases
58 | 
59 |   SECTION("All inserted keys-value pairs should be contained")
60 |   {
61 |     map.insert(pairs_begin, pairs_begin + num_keys);
62 |     map.contains(d_keys.begin(), d_keys.end(), d_contained.begin());
63 | 
64 |     REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), cuda::std::identity{}));
65 |   }
66 | 
67 |   SECTION("Non-inserted keys-value pairs should not be contained")
68 |   {
69 |     // segfaults
70 |     map.contains(d_keys.begin(), d_keys.end(), d_contained.begin());
71 | 
72 |     REQUIRE(cuco::test::none_of(d_contained.begin(), d_contained.end(), cuda::std::identity{}));
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/utility/extent_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/extent.cuh>
18 | #include <cuco/hash_functions.cuh>
19 | #include <cuco/probing_scheme.cuh>
20 | #include <cuco/storage.cuh>
21 | 
22 | #include <catch2/catch_template_test_macros.hpp>
23 | #include <catch2/catch_test_macros.hpp>
24 | 
25 | #include <stdexcept>
26 | 
27 | auto constexpr cg_size     = 2;
28 | auto constexpr bucket_size = 4;
29 | 
30 | using storage_t = cuco::storage<bucket_size>;
31 | template <typename H1, typename H2>
32 | using probing_t = cuco::double_hashing<cg_size, H1, H2>;
33 | 
34 | TEMPLATE_TEST_CASE_SIG(
35 |   "utility extent tests", "", ((typename SizeType), SizeType), (int32_t), (int64_t), (std::size_t))
36 | {
37 |   SizeType constexpr num            = 1234;
38 |   SizeType constexpr gold_reference = 1256;  // 157 x 2 x 4
39 | 
40 |   SECTION("Static extent must be evaluated at compile time.")
41 |   {
42 |     auto const size = cuco::extent<SizeType, num>{};
43 |     STATIC_REQUIRE(num == size);
44 |   }
45 | 
46 |   SECTION("Dynamic extent is evaluated at run time.")
47 |   {
48 |     auto const size = cuco::extent(num);
49 |     REQUIRE(size == num);
50 |   }
51 | 
52 |   SECTION("Compute static valid extent at compile time.")
53 |   {
54 |     auto constexpr size = cuco::extent<SizeType, num>{};
55 |     auto constexpr res  = cuco::make_valid_extent<probing_t, storage_t>(size);
56 |     STATIC_REQUIRE(gold_reference == res.value());
57 |   }
58 | 
59 |   SECTION("Compute dynamic valid extent at run time.")
60 |   {
61 |     auto const size = cuco::extent<SizeType>{num};
62 |     auto const res  = cuco::make_valid_extent<probing_t, storage_t>(size);
63 |     REQUIRE(gold_reference == res.value());
64 |   }
65 | 
66 |   SECTION("Invalid desired load factor throws exception")
67 |   {
68 |     using probing_scheme_type = cuco::linear_probing<cg_size, cuco::default_hash_function<int>>;
69 |     using storage_type        = cuco::storage<bucket_size>;
70 | 
71 |     auto const size = cuco::extent<SizeType>{num};
72 | 
73 |     // Test load factor <= 0
74 |     REQUIRE_THROWS(cuco::make_valid_extent<probing_scheme_type, storage_type>(size, 0.0));
75 |     REQUIRE_THROWS(cuco::make_valid_extent<probing_scheme_type, storage_type>(size, -0.5));
76 | 
77 |     // Test load factor > 1
78 |     REQUIRE_THROWS(cuco::make_valid_extent<probing_scheme_type, storage_type>(size, 1.5));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/benchmarks/bloom_filter/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <thrust/count.h>
24 | #include <thrust/device_vector.h>
25 | #include <thrust/execution_policy.h>
26 | #include <thrust/sequence.h>
27 | 
28 | #include <cstdint>
29 | 
30 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_64<char>, "xxhash_64", "cuco::xxhash_64");
31 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_32<char>, "xxhash_32", "cuco::xxhash_32");
32 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_32<char>,
33 |                              "murmurhash3_32",
34 |                              "cuco::murmurhash3_32");
35 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x86_128<char>,
36 |                              "murmurhash3_x86_128",
37 |                              "cuco::murmurhash3_x86_128");
38 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x64_128<char>,
39 |                              "murmurhash3_x64_128",
40 |                              "cuco::murmurhash3_x64_128");
41 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::identity_hash<char>,
42 |                              "identity_hash",
43 |                              "cuco::identity_hash");
44 | 
45 | namespace cuco::benchmark {
46 | 
47 | template <typename FilterType>
48 | void add_fpr_summary(nvbench::state& state, FilterType& filter)
49 | {
50 |   filter.clear();
51 | 
52 |   auto const num_keys = state.get_int64("NumInputs");
53 | 
54 |   thrust::device_vector<typename FilterType::key_type> keys(num_keys * 2);
55 |   thrust::sequence(thrust::device, keys.begin(), keys.end(), 1);
56 |   thrust::device_vector<bool> result(num_keys, false);
57 | 
58 |   auto tp_begin = keys.begin();
59 |   auto tp_end   = tp_begin + num_keys;
60 |   auto tn_begin = tp_end;
61 |   auto tn_end   = keys.end();
62 |   filter.add(tp_begin, tp_end);
63 |   filter.contains(tn_begin, tn_end, result.begin());
64 | 
65 |   float fp = thrust::count(thrust::device, result.begin(), result.end(), true);
66 | 
67 |   auto& summ = state.add_summary("FalsePositiveRate");
68 |   summ.set_string("hint", "FPR");
69 |   summ.set_string("short_name", "FPR");
70 |   summ.set_string("description", "False-positive rate of the bloom filter.");
71 |   summ.set_float64("value", fp / num_keys);
72 | 
73 |   filter.clear();
74 | }
75 | 
76 | }  // namespace cuco::benchmark


--------------------------------------------------------------------------------
/include/cuco/detail/roaring_bitmap/roaring_bitmap_ref.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025 NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh>
20 | 
21 | #include <cuda/std/cstddef>
22 | #include <cuda/std/type_traits>
23 | #include <cuda/stream_ref>
24 | 
25 | namespace cuco::experimental {
26 | 
27 | template <class T>
28 | __host__ __device__ roaring_bitmap_ref<T>::roaring_bitmap_ref(storage_ref_type const& storage_ref)
29 |   : impl_{storage_ref}
30 | {
31 | }
32 | 
33 | template <class T>
34 | template <class U /* = T */,
35 |           class /* = cuda::std::enable_if_t<cuda::std::is_same_v<U, cuda::std::uint32_t>> */>
36 | __device__ roaring_bitmap_ref<T>::roaring_bitmap_ref(cuda::std::byte const* bitmap) : impl_{bitmap}
37 | {
38 | }
39 | 
40 | template <class T>
41 | template <class InputIt, class OutputIt>
42 | __host__ void roaring_bitmap_ref<T>::contains(InputIt first,
43 |                                               InputIt last,
44 |                                               OutputIt output,
45 |                                               cuda::stream_ref stream) const
46 | {
47 |   impl_.contains(first, last, output, stream);
48 | }
49 | 
50 | template <class T>
51 | template <class InputIt, class OutputIt>
52 | __host__ void roaring_bitmap_ref<T>::contains_async(InputIt first,
53 |                                                     InputIt last,
54 |                                                     OutputIt output,
55 |                                                     cuda::stream_ref stream) const noexcept
56 | {
57 |   impl_.contains_async(first, last, output, stream);
58 | }
59 | 
60 | template <class T>
61 | __device__ bool roaring_bitmap_ref<T>::contains(T value) const
62 | {
63 |   return impl_.contains(value);
64 | }
65 | 
66 | template <class T>
67 | __host__ __device__ cuda::std::size_t roaring_bitmap_ref<T>::size() const noexcept
68 | {
69 |   return impl_.size();
70 | }
71 | 
72 | template <class T>
73 | __host__ __device__ bool roaring_bitmap_ref<T>::empty() const noexcept
74 | {
75 |   return impl_.empty();
76 | }
77 | 
78 | template <class T>
79 | __host__ __device__ cuda::std::byte const* roaring_bitmap_ref<T>::data() const noexcept
80 | {
81 |   return impl_.data();
82 | }
83 | 
84 | template <class T>
85 | __host__ __device__ cuda::std::size_t roaring_bitmap_ref<T>::size_bytes() const noexcept
86 | {
87 |   return impl_.size_bytes();
88 | }
89 | 
90 | }  // namespace cuco::experimental


--------------------------------------------------------------------------------
/include/cuco/probe_sequences.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/probe_sequence_impl.cuh>
20 | 
21 | namespace cuco::legacy {
22 | 
23 | /**
24 |  * @brief Public linear probing scheme class.
25 |  *
26 |  * Linear probing is efficient when few collisions are present. Performance hints:
27 |  * - Use linear probing when collisions are rare. e.g. low occupancy or low multiplicity.
28 |  * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise.
29 |  *
30 |  * `Hash` should be callable object type.
31 |  *
32 |  * @tparam CGSize Size of CUDA Cooperative Groups
33 |  * @tparam Hash Unary callable type
34 |  */
35 | template <int CGSize, typename Hash>
36 | class linear_probing : public detail::probe_sequence_base<CGSize> {
37 |  public:
38 |   using probe_sequence_base_type =
39 |     detail::probe_sequence_base<CGSize>;  ///< The base probe scheme type
40 |   using probe_sequence_base_type::cg_size;
41 |   using probe_sequence_base_type::vector_width;
42 | 
43 |   /// Type of implementation details
44 |   template <typename Key, typename Value, cuda::thread_scope Scope>
45 |   using impl = detail::linear_probing_impl<Key, Value, Scope, vector_width(), CGSize, Hash>;
46 | };
47 | 
48 | /**
49 |  *
50 |  * @brief Public double hashing scheme class.
51 |  *
52 |  * Default probe sequence for `cuco::static_multimap`. Double hashing shows superior
53 |  * performance when dealing with high multiplicty and/or high occupancy use cases. Performance
54 |  * hints:
55 |  * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise.
56 |  *
57 |  * `Hash1` and `Hash2` should be callable object type.
58 |  *
59 |  * @tparam CGSize Size of CUDA Cooperative Groups
60 |  * @tparam Hash1 Unary callable type
61 |  * @tparam Hash2 Unary callable type
62 |  */
63 | template <int CGSize, typename Hash1, typename Hash2 = Hash1>
64 | class double_hashing : public detail::probe_sequence_base<CGSize> {
65 |  public:
66 |   using probe_sequence_base_type =
67 |     detail::probe_sequence_base<CGSize>;  ///< The base probe scheme type
68 |   using probe_sequence_base_type::cg_size;
69 |   using probe_sequence_base_type::vector_width;
70 | 
71 |   /// Type of implementation details
72 |   template <typename Key, typename Value, cuda::thread_scope Scope>
73 |   using impl = detail::double_hashing_impl<Key, Value, Scope, vector_width(), CGSize, Hash1, Hash2>;
74 | };
75 | 
76 | }  // namespace cuco::legacy
77 | 


--------------------------------------------------------------------------------
/include/cuco/utility/reduction_functors.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include <cuda/atomic>
19 | 
20 | namespace cuco::reduce {
21 | 
22 | /**
23 |  * @brief Device functor performing sum reduction, used with `insert-or-apply`
24 |  */
25 | struct plus {
26 |   /**
27 |    * @brief Performs atomic fetch_add on payload and the new value to be inserted
28 |    *
29 |    * @tparam T The payload type
30 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
31 |    *
32 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
33 |    * @param val The new value to be applied as reduction to the current value
34 |    * in the payload.
35 |    */
36 |   template <typename T, cuda::thread_scope Scope>
37 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
38 |   {
39 |     payload_ref.fetch_add(val, cuda::memory_order_relaxed);
40 |   }
41 | };
42 | 
43 | /**
44 |  * @brief Device functor performing max reduction, used with `insert-or-apply`
45 |  */
46 | struct max {
47 |   /**
48 |    * @brief Performs atomic fetch_max on payload and the new value to be inserted
49 |    *
50 |    * @tparam T The payload type
51 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
52 |    *
53 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
54 |    * @param val The new value to be applied as reduction to the current value
55 |    * in the payload.
56 |    */
57 |   template <typename T, cuda::thread_scope Scope>
58 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
59 |   {
60 |     payload_ref.fetch_max(val, cuda::memory_order_relaxed);
61 |   }
62 | };
63 | 
64 | /**
65 |  * @brief Device functor performing min reduction, used with `insert-or-apply`
66 |  */
67 | struct min {
68 |   /**
69 |    * @brief Performs atomic fetch_min on payload and the new value to be inserted
70 |    *
71 |    * @tparam T The payload type
72 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
73 |    *
74 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
75 |    * @param val The new value to be applied as reduction to the current value
76 |    * in the payload.
77 |    */
78 |   template <typename T, cuda::thread_scope Scope>
79 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
80 |   {
81 |     payload_ref.fetch_min(val, cuda::memory_order_relaxed);
82 |   }
83 | };
84 | 
85 | }  // namespace cuco::reduce


--------------------------------------------------------------------------------
/examples/static_multimap/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_multimap.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/iterator/counting_iterator.h>
21 | #include <thrust/sequence.h>
22 | #include <thrust/transform.h>
23 | 
24 | #include <limits>
25 | 
26 | int main(void)
27 | {
28 |   using key_type   = int;
29 |   using value_type = int;
30 | 
31 |   key_type empty_key_sentinel     = -1;
32 |   value_type empty_value_sentinel = -1;
33 | 
34 |   constexpr std::size_t N = 50'000;
35 | 
36 |   // Constructs a multimap with 100,000 slots using -1 and -1 as the empty key/value
37 |   // sentinels. Note the capacity is chosen knowing we will insert 50,000 keys,
38 |   // for an load factor of 50%.
39 |   cuco::static_multimap<key_type, value_type> map{
40 |     N * 2, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
41 | 
42 |   thrust::device_vector<cuco::pair<key_type, value_type>> pairs(N);
43 | 
44 |   // Create a sequence of pairs. Eeach key has two matches.
45 |   // E.g., {{0,0}, {1,1}, ... {0,25'000}, {1, 25'001}, ...}
46 |   thrust::transform(
47 |     thrust::make_counting_iterator<int>(0),
48 |     thrust::make_counting_iterator<int>(pairs.size()),
49 |     pairs.begin(),
50 |     [] __device__(auto i) { return cuco::pair<key_type, value_type>{i % (N / 2), i}; });
51 | 
52 |   // Inserts all pairs into the map
53 |   map.insert(pairs.begin(), pairs.end());
54 | 
55 |   // Sequence of probe keys {0, 1, 2, ... 49'999}
56 |   thrust::device_vector<key_type> keys_to_find(N);
57 |   thrust::sequence(keys_to_find.begin(), keys_to_find.end(), 0);
58 | 
59 |   // Counts the occurrences of keys in [0, 50'000) contained in the multimap.
60 |   // The `_outer` suffix indicates that the occurrence of a non-match is 1.
61 |   auto const output_size = map.count_outer(keys_to_find.begin(), keys_to_find.end());
62 | 
63 |   thrust::device_vector<cuco::pair<key_type, value_type>> d_results(output_size);
64 | 
65 |   // Finds all keys {0, 1, 2, ...} and stores associated key/value pairs into `d_results`
66 |   // If a key `keys_to_find[i]` doesn't exist, `d_results[i].second == empty_value_sentinel`
67 |   auto output_end = map.retrieve_outer(keys_to_find.begin(), keys_to_find.end(), d_results.begin());
68 |   auto retrieve_size = output_end - d_results.begin();
69 | 
70 |   // The total number of outer matches should be `N + N / 2`
71 |   assert(not(output_size == retrieve_size == N + N / 2));
72 | 
73 |   return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/hyperloglog/unique_sequence_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/hyperloglog.cuh>
21 | 
22 | #include <thrust/device_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_template_test_macros.hpp>
26 | #include <catch2/generators/catch_generators.hpp>
27 | 
28 | #include <cmath>
29 | #include <cstddef>
30 | #include <cstdint>
31 | 
32 | TEMPLATE_TEST_CASE_SIG("hyperloglog: unique sequence",
33 |                        "",
34 |                        ((typename T, typename Hash), T, Hash),
35 |                        (int32_t, cuco::xxhash_64<int32_t>),
36 |                        (int64_t, cuco::xxhash_64<int64_t>),
37 |                        (__int128_t, cuco::xxhash_64<__int128_t>))
38 | {
39 |   auto num_items_pow2 = GENERATE(25, 26, 28);
40 |   auto hll_precision  = GENERATE(8, 10, 12, 13, 18, 20);
41 |   auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024;
42 |   INFO("hll_precision=" << hll_precision);
43 |   INFO("sketch_size_kb=" << sketch_size_kb);
44 |   INFO("num_items=2^" << num_items_pow2);
45 |   auto num_items = 1ull << num_items_pow2;
46 | 
47 |   // This factor determines the error threshold for passing the test
48 |   double constexpr tolerance_factor = 2.5;
49 |   // RSD for a given precision is given by the following formula
50 |   double const relative_standard_deviation =
51 |     1.04 / std::sqrt(static_cast<double>(1ull << hll_precision));
52 | 
53 |   thrust::device_vector<T> items(num_items);
54 | 
55 |   // Generate `num_items` distinct items
56 |   thrust::sequence(items.begin(), items.end(), 0);
57 | 
58 |   // Initialize the estimator
59 |   cuco::hyperloglog<T, cuda::thread_scope_device, Hash> estimator{
60 |     cuco::sketch_size_kb(sketch_size_kb)};
61 | 
62 |   REQUIRE(estimator.estimate() == 0);
63 | 
64 |   // Add all items to the estimator
65 |   estimator.add(items.begin(), items.end());
66 | 
67 |   auto const estimate = estimator.estimate();
68 | 
69 |   // Adding the same items again should not affect the result
70 |   estimator.add(items.begin(), items.begin() + num_items / 2);
71 |   REQUIRE(estimator.estimate() == estimate);
72 | 
73 |   // Clearing the estimator should reset the estimate
74 |   estimator.clear();
75 |   REQUIRE(estimator.estimate() == 0);
76 | 
77 |   double const relative_error =
78 |     std::abs((static_cast<double>(estimate) / static_cast<double>(num_items)) - 1.0);
79 | 
80 |   // Check if the error is acceptable
81 |   REQUIRE(relative_error < tolerance_factor * relative_standard_deviation);
82 | }
83 | 


--------------------------------------------------------------------------------
/include/cuco/utility/allocator.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuco/detail/error.hpp>
 20 | 
 21 | #include <cuda/stream_ref>
 22 | 
 23 | #include <cstddef>
 24 | 
 25 | namespace cuco {
 26 | /**
 27 |  * @brief A stream-ordered device allocator using `cudaMallocAsync`/`cudaFreeAsync`.
 28 |  *
 29 |  * @tparam T The allocator's value type
 30 |  */
 31 | template <typename T>
 32 | class cuda_allocator {
 33 |  public:
 34 |   using value_type = T;  ///< Allocator's value type
 35 | 
 36 |   cuda_allocator() = default;
 37 | 
 38 |   /**
 39 |    * @brief Copy constructor.
 40 |    */
 41 |   template <class U>
 42 |   cuda_allocator(cuda_allocator<U> const&) noexcept
 43 |   {
 44 |   }
 45 | 
 46 |   /**
 47 |    * @brief Allocates storage for `n` objects of type `T` using `cudaMallocAsync`.
 48 |    *
 49 |    * @param n The number of objects to allocate storage for
 50 |    * @param stream The stream to order the allocation on
 51 |    * @return Pointer to the allocated storage
 52 |    */
 53 |   value_type* allocate(std::size_t n, cuda::stream_ref stream)
 54 |   {
 55 |     value_type* p;
 56 |     CUCO_CUDA_TRY(cudaMallocAsync(&p, sizeof(value_type) * n, stream.get()));
 57 |     return p;
 58 |   }
 59 | 
 60 |   /**
 61 |    * @brief Deallocates storage pointed to by `p` using `cudaFreeAsync`.
 62 |    *
 63 |    * @param p Pointer to memory to deallocate
 64 |    * @param stream The stream to order the deallocation on
 65 |    */
 66 |   void deallocate(value_type* p, std::size_t, cuda::stream_ref stream)
 67 |   {
 68 |     CUCO_CUDA_TRY(cudaFreeAsync(p, stream.get()));
 69 |   }
 70 | };
 71 | 
 72 | /**
 73 |  * @brief Equality comparison operator.
 74 |  *
 75 |  * @tparam T Value type of LHS object
 76 |  * @tparam U Value type of RHS object
 77 |  *
 78 |  * @return `true` iff given arguments are equal
 79 |  */
 80 | template <typename T, typename U>
 81 | bool operator==(cuda_allocator<T> const&, cuda_allocator<U> const&) noexcept
 82 | {
 83 |   return true;
 84 | }
 85 | 
 86 | /**
 87 |  * @brief Inequality comparison operator.
 88 |  *
 89 |  * @tparam T Value type of LHS object
 90 |  * @tparam U Value type of RHS object
 91 |  *
 92 |  * @param lhs Left-hand side object to compare
 93 |  * @param rhs Right-hand side object to compare
 94 |  *
 95 |  * @return `true` iff given arguments are not equal
 96 |  */
 97 | template <typename T, typename U>
 98 | bool operator!=(cuda_allocator<T> const& lhs, cuda_allocator<U> const& rhs) noexcept
 99 | {
100 |   return not(lhs == rhs);
101 | }
102 | 
103 | }  // namespace cuco
104 | 


--------------------------------------------------------------------------------
/include/cuco/detail/roaring_bitmap/roaring_bitmap.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025 NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/cstddef>
20 | #include <cuda/stream_ref>
21 | 
22 | namespace cuco::experimental {
23 | 
24 | template <class T, class Allocator>
25 | roaring_bitmap<T, Allocator>::roaring_bitmap(cuda::std::byte const* bitmap,
26 |                                              Allocator const& alloc,
27 |                                              cuda::stream_ref stream)
28 |   : storage_{bitmap, alloc, stream}
29 | {
30 | }
31 | 
32 | template <class T, class Allocator>
33 | template <class InputIt, class OutputIt>
34 | void roaring_bitmap<T, Allocator>::contains(InputIt first,
35 |                                             InputIt last,
36 |                                             OutputIt output,
37 |                                             cuda::stream_ref stream) const
38 | {
39 |   ref_type{storage_.ref()}.contains(first, last, output, stream);
40 | }
41 | 
42 | template <class T, class Allocator>
43 | template <class InputIt, class OutputIt>
44 | void roaring_bitmap<T, Allocator>::contains_async(InputIt first,
45 |                                                   InputIt last,
46 |                                                   OutputIt output,
47 |                                                   cuda::stream_ref stream) const noexcept
48 | {
49 |   ref_type{storage_.ref()}.contains_async(first, last, output, stream);
50 | }
51 | 
52 | template <class T, class Allocator>
53 | cuda::std::size_t roaring_bitmap<T, Allocator>::size() const noexcept
54 | {
55 |   return ref_type{storage_.ref()}.size();
56 | }
57 | 
58 | template <class T, class Allocator>
59 | bool roaring_bitmap<T, Allocator>::empty() const noexcept
60 | {
61 |   return ref_type{storage_.ref()}.empty();
62 | }
63 | 
64 | template <class T, class Allocator>
65 | cuda::std::byte const* roaring_bitmap<T, Allocator>::data() const noexcept
66 | {
67 |   return ref_type{storage_.ref()}.data();
68 | }
69 | 
70 | template <class T, class Allocator>
71 | cuda::std::size_t roaring_bitmap<T, Allocator>::size_bytes() const noexcept
72 | {
73 |   return ref_type{storage_.ref()}.size_bytes();
74 | }
75 | 
76 | template <class T, class Allocator>
77 | typename roaring_bitmap<T, Allocator>::allocator_type roaring_bitmap<T, Allocator>::allocator()
78 |   const noexcept
79 | {
80 |   return storage_.allocator();
81 | }
82 | 
83 | template <class T, class Allocator>
84 | typename roaring_bitmap<T, Allocator>::ref_type roaring_bitmap<T, Allocator>::ref() const noexcept
85 | {
86 |   return ref_type{storage_.ref()};
87 | }
88 | }  // namespace cuco::experimental


--------------------------------------------------------------------------------
/include/cuco/detail/utility/cuda.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuda/std/cstdint>
19 | 
20 | #include <cooperative_groups.h>
21 | 
22 | #if defined(CUCO_DISABLE_KERNEL_VISIBILITY_WARNING_SUPPRESSION)
23 | #define CUCO_SUPPRESS_KERNEL_WARNINGS
24 | #elif defined(__NVCC__) && (defined(__GNUC__) || defined(__clang__))
25 | // handle when nvcc is the CUDA compiler and gcc or clang is host
26 | #define CUCO_SUPPRESS_KERNEL_WARNINGS _Pragma("nv_diag_suppress 1407")
27 | _Pragma("GCC diagnostic ignored \"-Wattributes\"")
28 | #elif defined(__clang__)
29 | // handle when clang is the CUDA compiler
30 | #define CUCO_SUPPRESS_KERNEL_WARNINGS _Pragma("clang diagnostic ignored \"-Wattributes\"")
31 | #elif defined(__NVCOMPILER)
32 | #define CUCO_SUPPRESS_KERNEL_WARNINGS #pragma diag_suppress attribute_requires_external_linkage
33 | #endif
34 | 
35 | #ifndef CUCO_KERNEL
36 | #define CUCO_KERNEL __attribute__((visibility("hidden"))) __global__
37 | #endif
38 | namespace cuco {
39 | namespace detail {
40 | 
41 | using index_type = cuda::std::int64_t;  ///< CUDA thread index type
42 | 
43 | /// Default block size
44 | /// CUDA warp size
45 | [[nodiscard]] __device__ constexpr cuda::std::int32_t warp_size() noexcept { return 32; }
46 | 
47 | /**
48 |  * @brief Returns the global thread index in a 1D scalar grid
49 |  *
50 |  * @return The global thread index
51 |  */
52 | [[nodiscard]] __device__ inline index_type global_thread_id() noexcept
53 | {
54 |   return index_type{threadIdx.x} + index_type{blockDim.x} * index_type{blockIdx.x};
55 | }
56 | 
57 | /**
58 |  * @brief Returns the grid stride of a 1D grid
59 |  *
60 |  * @return The grid stride
61 |  */
62 | [[nodiscard]] __device__ inline index_type grid_stride() noexcept
63 | {
64 |   return index_type{gridDim.x} * index_type{blockDim.x};
65 | }
66 | 
67 | /**
68 |  * @brief Constexpr helper to extract the size of a Cooperative Group.
69 |  *
70 |  * @tparam Tile The Cooperative Group type
71 |  */
72 | template <typename Tile>
73 | struct tile_size;
74 | 
75 | /**
76 |  * @brief Specialization of `cuco::detail::tile_size` for 'cooperative_groups::thread_block_tile'.
77 |  *
78 |  * @tparam CGSize The Cooperative Group size
79 |  * @tparam ParentCG The Cooperative Group the tile has been created from
80 |  */
81 | template <uint32_t CGSize, class ParentCG>
82 | struct tile_size<cooperative_groups::thread_block_tile<CGSize, ParentCG>> {
83 |   static constexpr uint32_t value = CGSize;  ///< Size of the `thread_block_tile`
84 | };
85 | 
86 | template <typename Tile>
87 | __device__ constexpr uint32_t tile_size_v = tile_size<Tile>::value;
88 | 
89 | }  // namespace detail
90 | }  // namespace cuco
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to cuCollections
 2 | 
 3 | If you are interested in contributing to cuCollections, your contributions will fall
 4 | into three categories:
 5 | 1. You want to report a bug, feature request, or documentation issue
 6 |     - File an [issue](https://github.com/NVIDIA/cuCollections/issues/new/choose)
 7 |     describing what you encountered or what you want to see changed.
 8 |     - The RAPIDS team will evaluate the issues and triage them, scheduling
 9 |     them for a release. If you believe the issue needs priority attention
10 |     comment on the issue to notify the team.
11 | 2. You want to propose a new Feature and implement it
12 |     - Post about your intended feature, and we shall discuss the design and
13 |     implementation.
14 |     - Once we agree that the plan looks good, go ahead and implement it, using
15 |     the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below.
16 | 3. You want to implement a feature or bug-fix for an outstanding issue
17 |     - Follow the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below.
18 |     - If you need more context on a particular issue, please ask and we shall
19 |     provide.
20 | 
21 | ## Code contributions
22 | 
23 | ### Your first issue
24 | 
25 | 1. Read the project's [README.md](https://github.com/NVIDIA/cuCollections/blob/master/README.md)
26 |     to learn how to setup the development environment
27 | 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
28 |     or [help wanted](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
29 | 3. Comment on the issue saying you are going to work on it
30 | 4. Code! Make sure to update unit tests!
31 | 5. When done, [create your pull request](https://github.com/NVIDIA/cuCollections/compare)
32 | 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
33 | 7. Wait for other developers to review your code and update code as needed
34 | 8. Once reviewed and approved, a RAPIDS developer will merge your pull request
35 | 
36 | Remember, if you are unsure about anything, don't hesitate to comment on issues
37 | and ask for clarifications!
38 | 
39 | ### Seasoned developers
40 | 
41 | Once you have gotten your feet wet and are more comfortable with the code, you
42 | can look at the prioritized issues of our next release in our [project boards](https://github.com/NVIDIA/cuCollections/projects).
43 | 
44 | > **Pro Tip:** Always look at the release board with the highest number for
45 | issues to work on. This is where RAPIDS developers also focus their efforts.
46 | 
47 | Look at the unassigned issues, and find an issue you are comfortable with
48 | contributing to. Start with _Step 3_ from above, commenting on the issue to let
49 | others know you are working on it. If you have any questions related to the
50 | implementation of the issue, ask them in the issue instead of the PR.
51 | 
52 | ## Attribution
53 | Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
54 | 


--------------------------------------------------------------------------------
/ci/matrix.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | cuda_12_0: &cuda_12_0 '12.0'
17 | cuda_12_9: &cuda_12_9 '12.9'
18 | cuda_13_0: &cuda_13_0 '13.0'
19 | 
20 | # The GPUs to test on
21 | # Note: This assumes that the appropriate gpu_build_archs are set to include building for the GPUs listed here
22 | gpus:
23 |   - 'a100'
24 |   - 'v100'
25 | 
26 | # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
27 | devcontainer_version: '25.12'
28 | 
29 | # Each environment below will generate a unique build/test job
30 | # See the "compute-matrix" job in the workflow for how this is parsed and used
31 | # cuda: The CUDA Toolkit version
32 | # os: The operating system used
33 | # cpu: The CPU architecture
34 | # compiler: The compiler to use
35 | #   name: The compiler name
36 | #   version: The compiler version
37 | #   exe: The unverionsed compiler binary name
38 | #   To use the system's default compiler set "exe: 'c++'" or "name: 'cc'"
39 | # gpu_build_archs: The GPU architectures to build for (comma-separated list)
40 | # std: The C++ standards to build for
41 | #    This field is unique as it will generate an independent build/test job for each value
42 | 
43 | # Configurations that will run for every PR
44 | pull_request:
45 |   nvcc:
46 |     - {cuda: *cuda_12_0, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
47 |     - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
48 |     - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']}
49 |     - {cuda: *cuda_12_0, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, gpu_build_archs: '70', std: [17], jobs: ['build']}
50 |     - {cuda: *cuda_12_9, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '18', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']}
51 |     - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80', std: [17], jobs: ['build']}
52 |     - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']}
53 |     - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'llvm', version: '20', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']}
54 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/select_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/host_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void select_false_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.select_false(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | using cuco::test::modulo_bitgen;
39 | 
40 | TEST_CASE("dynamic_bitset select test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{4000};
46 | 
47 |   size_type num_set = 0;
48 |   for (size_type i = 0; i < num_elements; i++) {
49 |     bv.push_back(modulo_bitgen(i));
50 |     num_set += modulo_bitgen(i);
51 |   }
52 | 
53 |   // Check select
54 |   {
55 |     thrust::device_vector<size_type> keys(num_set);
56 |     thrust::sequence(keys.begin(), keys.end(), 0);
57 | 
58 |     thrust::device_vector<size_type> d_selects(num_set);
59 | 
60 |     bv.select(keys.begin(), keys.end(), d_selects.begin());
61 | 
62 |     thrust::host_vector<size_type> h_selects = d_selects;
63 | 
64 |     size_type num_matches = 0;
65 |     size_type cur_set_pos = -1lu;
66 |     for (size_type i = 0; i < num_set; i++) {
67 |       do {
68 |         cur_set_pos++;
69 |       } while (cur_set_pos < num_elements and !modulo_bitgen(cur_set_pos));
70 | 
71 |       num_matches += cur_set_pos == h_selects[i];
72 |     }
73 |     REQUIRE(num_matches == num_set);
74 |   }
75 | 
76 |   // Check select_false
77 |   {
78 |     size_type num_not_set = num_elements - num_set;
79 | 
80 |     auto ref = bv.ref();
81 |     thrust::device_vector<size_type> device_result(num_not_set);
82 |     select_false_kernel<<<1, 1024>>>(ref, num_not_set, device_result.data());
83 |     thrust::host_vector<size_type> host_result = device_result;
84 | 
85 |     size_type num_matches     = 0;
86 |     size_type cur_not_set_pos = -1lu;
87 |     for (size_type i = 0; i < num_not_set; i++) {
88 |       do {
89 |         cur_not_set_pos++;
90 |       } while (cur_not_set_pos < num_elements and modulo_bitgen(cur_not_set_pos));
91 | 
92 |       num_matches += cur_not_set_pos == host_result[i];
93 |     }
94 |     REQUIRE(num_matches == num_not_set);
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/examples/static_multiset/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_multiset.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/functional.h>
21 | #include <thrust/logical.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | #include <limits>
26 | 
27 | /**
28 |  * @file host_bulk_example.cu
29 |  * @brief Demonstrates usage of the static_multiset "bulk" host APIs.
30 |  *
31 |  * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or
32 |  * `retrieve` on a multiset of keys.
33 |  *
34 |  */
35 | int main(void)
36 | {
37 |   using key_type = int;
38 | 
39 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
40 |   // that they never occur in your input data.
41 |   key_type constexpr empty_key_sentinel = -1;
42 | 
43 |   // Number of keys to be inserted
44 |   std::size_t constexpr num_keys = 50'000;
45 | 
46 |   // Compute capacity based on a 50% load factor
47 |   auto constexpr load_factor = 0.5;
48 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
49 | 
50 |   // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel.
51 |   cuco::static_multiset<key_type> multiset{capacity, cuco::empty_key{empty_key_sentinel}};
52 | 
53 |   // Create a sequence of keys {0, 1, 2, .., i}
54 |   // We're going to insert each key twice so we only need 'num_keys / 2' distinct keys.
55 |   thrust::device_vector<key_type> keys(num_keys / 2);
56 |   thrust::sequence(keys.begin(), keys.end(), 0);
57 | 
58 |   // Inserts all keys into the hash set
59 |   multiset.insert(keys.begin(), keys.end());
60 |   // Insert the same set of keys again, so each distinct key should occur twice in the multiset
61 |   multiset.insert(keys.begin(), keys.end());
62 | 
63 |   // Counts the occurrences of matching keys contained in the multiset.
64 |   std::size_t const counted_output_size = multiset.count(keys.begin(), keys.end());
65 | 
66 |   // Storage for result
67 |   thrust::device_vector<key_type> output_probes(counted_output_size);
68 |   thrust::device_vector<key_type> output_matches(counted_output_size);
69 | 
70 |   // Retrieve all matching keys
71 |   auto const [output_probes_end, _] =
72 |     multiset.retrieve(keys.begin(), keys.end(), output_probes.begin(), output_matches.begin());
73 |   std::size_t const retrieved_output_size = output_probes_end - output_probes.begin();
74 | 
75 |   if ((retrieved_output_size == counted_output_size) and (retrieved_output_size == num_keys)) {
76 |     std::cout << "Success! Found all keys.\n";
77 |   } else {
78 |     std::cout << "Fail! Something went wrong.\n";
79 |   }
80 | 
81 |   return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/static_set/insert_and_find_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_set.cuh>
20 | 
21 | #include <cuda/std/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/iterator/counting_iterator.h>
24 | 
25 | #include <catch2/catch_template_test_macros.hpp>
26 | 
27 | template <typename Set>
28 | void test_insert_and_find(Set& set, std::size_t num_keys)
29 | {
30 |   using Key                     = typename Set::key_type;
31 |   static auto constexpr cg_size = Set::cg_size;
32 | 
33 |   auto const keys_begin = thrust::counting_iterator<Key>(0);
34 |   auto const keys_end   = thrust::counting_iterator<Key>(num_keys);
35 | 
36 |   thrust::device_vector<Key> iters1(num_keys);
37 |   thrust::device_vector<int> iters2(num_keys);
38 | 
39 |   thrust::device_vector<bool> inserted(num_keys);
40 | 
41 |   // insert first time, fills inserted with true
42 |   set.insert_and_find(keys_begin, keys_end, iters1.begin(), inserted.begin());
43 |   REQUIRE(cuco::test::all_of(inserted.begin(), inserted.end(), cuda::std::identity{}));
44 | 
45 |   // insert second time, fills inserted with false as keys already in set
46 |   set.insert_and_find(keys_begin, keys_end, iters2.begin(), inserted.begin());
47 |   REQUIRE(cuco::test::none_of(inserted.begin(), inserted.end(), cuda::std::identity{}));
48 | 
49 |   // both iters1 and iters2 should be same, as keys will be referring to same slot
50 |   REQUIRE(
51 |     cuco::test::equal(iters1.begin(), iters1.end(), iters2.begin(), cuda::std::equal_to<Key>{}));
52 | }
53 | 
54 | TEMPLATE_TEST_CASE_SIG(
55 |   "static_set Insert and find",
56 |   "",
57 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
58 |   (int32_t, cuco::test::probe_sequence::double_hashing, 1),
59 |   (int32_t, cuco::test::probe_sequence::double_hashing, 2),
60 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
61 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2),
62 |   (int32_t, cuco::test::probe_sequence::linear_probing, 1),
63 |   (int32_t, cuco::test::probe_sequence::linear_probing, 2),
64 |   (int64_t, cuco::test::probe_sequence::linear_probing, 1),
65 |   (int64_t, cuco::test::probe_sequence::linear_probing, 2))
66 | {
67 |   constexpr std::size_t num_keys{400};
68 | 
69 |   using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
70 |                                    cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
71 |                                    cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;
72 | 
73 |   auto set =
74 |     cuco::static_set{num_keys, cuco::empty_key<Key>{-1}, {}, probe{}, {}, cuco::storage<2>{}};
75 | 
76 |   test_insert_and_find(set, num_keys);
77 | }
78 | 


--------------------------------------------------------------------------------
/tests/static_map/key_sentinel_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_map.cuh>
20 | 
21 | #include <cuda/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/iterator/counting_iterator.h>
24 | #include <thrust/iterator/transform_iterator.h>
25 | 
26 | #include <catch2/catch_template_test_macros.hpp>
27 | 
28 | #define SIZE 10
29 | __device__ int A[SIZE];
30 | 
31 | template <typename T>
32 | struct custom_equals {
33 |   __device__ bool operator()(T lhs, T rhs) const { return A[lhs] == A[rhs]; }
34 | };
35 | 
36 | TEMPLATE_TEST_CASE_SIG("static_map key sentinel tests", "", ((typename T), T), (int32_t), (int64_t))
37 | {
38 |   using Key   = T;
39 |   using Value = T;
40 | 
41 |   constexpr std::size_t num_keys{SIZE};
42 |   auto map = cuco::static_map{SIZE * 2,
43 |                               cuco::empty_key<Key>{-1},
44 |                               cuco::empty_value<Value>{-1},
45 |                               custom_equals<Key>{},
46 |                               cuco::linear_probing<1, cuco::default_hash_function<Key>>{}};
47 | 
48 |   auto insert_ref = map.ref(cuco::op::insert);
49 |   auto find_ref   = map.ref(cuco::op::find);
50 | 
51 |   int h_A[SIZE];
52 |   for (int i = 0; i < SIZE; i++) {
53 |     h_A[i] = i;
54 |   }
55 |   CUCO_CUDA_TRY(cudaMemcpyToSymbol(A, h_A, SIZE * sizeof(int)));
56 | 
57 |   auto pairs_begin = thrust::make_transform_iterator(
58 |     thrust::make_counting_iterator<T>(0),
59 |     cuda::proclaim_return_type<cuco::pair<Key, Value>>(
60 |       [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); }));
61 | 
62 |   SECTION(
63 |     "Tests of non-CG insert: The custom `key_equal` can never be used to compare against sentinel")
64 |   {
65 |     REQUIRE(
66 |       cuco::test::all_of(pairs_begin,
67 |                          pairs_begin + num_keys,
68 |                          cuda::proclaim_return_type<bool>(
69 |                            [insert_ref] __device__(cuco::pair<Key, Value> const& pair) mutable {
70 |                              return insert_ref.insert(pair);
71 |                            })));
72 |   }
73 | 
74 |   SECTION(
75 |     "Tests of CG insert: The custom `key_equal` can never be used to compare against sentinel")
76 |   {
77 |     map.insert(pairs_begin, pairs_begin + num_keys);
78 |     // All keys inserted via custom `key_equal` should be found
79 |     REQUIRE(cuco::test::all_of(
80 |       pairs_begin,
81 |       pairs_begin + num_keys,
82 |       cuda::proclaim_return_type<bool>([find_ref] __device__(cuco::pair<Key, Value> const& pair) {
83 |         auto const found = find_ref.find(pair.first);
84 |         return (found != find_ref.end()) and
85 |                (found->first == pair.first and found->second == pair.second);
86 |       })));
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/tests/static_set/large_input_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_set.cuh>
20 | 
21 | #include <cuda/std/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/execution_policy.h>
24 | #include <thrust/iterator/counting_iterator.h>
25 | #include <thrust/sequence.h>
26 | #include <thrust/sort.h>
27 | #include <thrust/transform.h>
28 | 
29 | #include <catch2/catch_template_test_macros.hpp>
30 | 
31 | template <typename Set>
32 | void test_unique_sequence(Set& set, bool* res_begin, std::size_t num_keys)
33 | {
34 |   using Key = typename Set::key_type;
35 | 
36 |   auto const keys_begin = thrust::counting_iterator<Key>(0);
37 |   auto const keys_end   = thrust::counting_iterator<Key>(num_keys);
38 | 
39 |   SECTION("Non-inserted keys should not be contained.")
40 |   {
41 |     REQUIRE(set.size() == 0);
42 | 
43 |     set.contains(keys_begin, keys_end, res_begin);
44 |     REQUIRE(cuco::test::none_of(res_begin, res_begin + num_keys, cuda::std::identity{}));
45 |   }
46 | 
47 |   set.insert(keys_begin, keys_end);
48 |   REQUIRE(set.size() == num_keys);
49 | 
50 |   SECTION("All inserted key/value pairs should be contained.")
51 |   {
52 |     set.contains(keys_begin, keys_end, res_begin);
53 |     REQUIRE(cuco::test::all_of(res_begin, res_begin + num_keys, cuda::std::identity{}));
54 |   }
55 | 
56 |   SECTION("All inserted key/value pairs can be retrieved.")
57 |   {
58 |     auto output_keys = thrust::device_vector<Key>(num_keys);
59 | 
60 |     auto const keys_end = set.retrieve_all(output_keys.begin());
61 |     REQUIRE(static_cast<std::size_t>(std::distance(output_keys.begin(), keys_end)) == num_keys);
62 | 
63 |     thrust::sort(output_keys.begin(), keys_end);
64 | 
65 |     REQUIRE(cuco::test::equal(output_keys.begin(),
66 |                               output_keys.end(),
67 |                               thrust::counting_iterator<Key>(0),
68 |                               cuda::std::equal_to<Key>{}));
69 |   }
70 | }
71 | 
72 | TEMPLATE_TEST_CASE_SIG(
73 |   "cuco::static_set large input test",
74 |   "",
75 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
76 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
77 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2))
78 | {
79 |   constexpr std::size_t num_keys{1'200'000'000};
80 | 
81 |   using extent_type = cuco::extent<std::size_t>;
82 |   using probe       = cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>;
83 | 
84 |   try {
85 |     auto set = cuco::static_set{num_keys * 2, cuco::empty_key<Key>{-1}, {}, probe{}};
86 | 
87 |     thrust::device_vector<bool> d_contained(num_keys);
88 |     test_unique_sequence(set, d_contained.data().get(), num_keys);
89 |   } catch (cuco::cuda_error&) {
90 |     SKIP("Out of memory");
91 |   } catch (std::bad_alloc&) {
92 |     SKIP("Out of memory");
93 |   }
94 | }
95 | 


--------------------------------------------------------------------------------
/include/cuco/detail/storage/storage_base.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuda/stream_ref>
 20 | 
 21 | #include <cstddef>
 22 | 
 23 | namespace cuco {
 24 | namespace detail {
 25 | /**
 26 |  * @brief Custom deleter for unique pointer.
 27 |  *
 28 |  * @tparam SizeType Type of device storage size
 29 |  * @tparam Allocator Type of allocator used for device storage
 30 |  */
 31 | template <typename SizeType, typename Allocator>
 32 | struct custom_deleter {
 33 |   using pointer = typename Allocator::value_type*;  ///< Value pointer type
 34 | 
 35 |   /**
 36 |    * @brief Constructor of custom deleter.
 37 |    *
 38 |    * @param size Number of values to deallocate
 39 |    * @param allocator Allocator used for deallocating device storage
 40 |    * @param stream Stream to use for deallocation
 41 |    */
 42 |   explicit constexpr custom_deleter(SizeType size, Allocator& allocator, cuda::stream_ref stream)
 43 |     : size_{size}, allocator_{allocator}, stream_{stream}
 44 |   {
 45 |   }
 46 | 
 47 |   /**
 48 |    * @brief Operator for deallocation
 49 |    *
 50 |    * @param ptr Pointer to the first value for deallocation
 51 |    */
 52 |   void operator()(pointer ptr) { allocator_.deallocate(ptr, size_, stream_); }
 53 | 
 54 |   SizeType size_;            ///< Number of values to delete
 55 |   Allocator& allocator_;     ///< Allocator used deallocating values
 56 |   cuda::stream_ref stream_;  ///< Stream used for deallocation
 57 | };
 58 | 
 59 | /**
 60 |  * @brief Base class of open addressing storage.
 61 |  *
 62 |  * This class should not be used directly.
 63 |  *
 64 |  * @tparam Extent Type of extent denoting storage capacity
 65 |  */
 66 | template <typename Extent>
 67 | class storage_base {
 68 |  public:
 69 |   using extent_type = Extent;                            ///< Storage extent type
 70 |   using size_type   = typename extent_type::value_type;  ///< Storage size type
 71 | 
 72 |   /**
 73 |    * @brief Constructor of base storage.
 74 |    *
 75 |    * @param size Number of elements to (de)allocate
 76 |    */
 77 |   __host__ __device__ explicit constexpr storage_base(Extent size) : extent_{size} {}
 78 | 
 79 |   /**
 80 |    * @brief Gets the total number of elements in the current storage.
 81 |    *
 82 |    * @return The total number of elements
 83 |    */
 84 |   [[nodiscard]] __host__ __device__ constexpr size_type capacity() const noexcept
 85 |   {
 86 |     return static_cast<size_type>(extent_);
 87 |   }
 88 | 
 89 |   /**
 90 |    * @brief Gets the extent of the current storage.
 91 |    *
 92 |    * @return The extent.
 93 |    */
 94 |   [[nodiscard]] __host__ __device__ constexpr extent_type extent() const noexcept
 95 |   {
 96 |     return extent_;
 97 |   }
 98 | 
 99 |  protected:
100 |   extent_type extent_;  ///< Total number of elements
101 | };
102 | 
103 | }  // namespace detail
104 | }  // namespace cuco
105 | 


--------------------------------------------------------------------------------
/.github/workflows/verify-devcontainers.yml:
--------------------------------------------------------------------------------
 1 | name: Verify devcontainers
 2 | 
 3 | on:
 4 |   workflow_call:
 5 | 
 6 | defaults:
 7 |   run:
 8 |     shell: bash -euo pipefail {0}
 9 | 
10 | jobs:
11 |   verify-make-devcontainers:
12 |     name: Verify devcontainers
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout repository
16 |       uses: actions/checkout@v3
17 |     - name: Setup jq and yq
18 |       run: |
19 |         sudo apt-get update
20 |         sudo apt-get install jq -y
21 |         sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64
22 |         sudo chmod +x /usr/local/bin/yq
23 |     - name: Run the script to generate devcontainer files
24 |       run: |
25 |         ./.devcontainer/make_devcontainers.sh --verbose
26 |     - name: Check for changes
27 |       run: |
28 |         if [[ $(git diff --stat) != '' ]]; then
29 |           git diff --minimal
30 |           echo "::error:: Dev Container files are out of date. Run the .devcontainer/make_devcontainers.sh script and commit the changes."
31 |           exit 1
32 |         else
33 |           echo "::note::Dev Container files are up-to-date."
34 |         fi
35 |   get-devcontainer-list:
36 |     needs: verify-make-devcontainers
37 |     name: List devcontainers
38 |     runs-on: ubuntu-latest
39 |     outputs:
40 |       devcontainers: ${{ steps.get-list.outputs.devcontainers }}
41 |     steps:
42 |     - name: Check out the code
43 |       uses: actions/checkout@v3
44 |     - name: Get list of devcontainer.json paths and names
45 |       id: get-list
46 |       run: |
47 |         devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do
48 |           jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer"
49 |           done | jq -s -c .)
50 |         echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}"
51 |   verify-devcontainers:
52 |     needs: get-devcontainer-list
53 |     name: ${{matrix.devcontainer.name}}
54 |     runs-on: ubuntu-latest
55 |     strategy:
56 |       fail-fast: false
57 |       matrix:
58 |         devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}}
59 |     permissions:
60 |       id-token: write
61 |       contents: read
62 |     steps:
63 |     - name: Check out the code
64 |       uses: actions/checkout@v3
65 |       # devcontainer/ci doesn't supported nested devcontainer.json files, so we need to copy the devcontainer.json
66 |       # file to the top level .devcontainer/ directory
67 |     - name: Copy devcontainer.json to .devcontainer/
68 |       run: |
69 |         src="${{ matrix.devcontainer.path }}"
70 |         dst=".devcontainer/devcontainer.json"
71 |         if [[ "$src" != "$dst" ]]; then
72 |           cp "$src" "$dst"
73 |         fi
74 |       # We don't really need sccache configured, but we need the AWS credentials envvars to be set
75 |       # in order to avoid the devcontainer hanging waiting for GitHub authentication
76 |     - name: Configure credentials and environment variables for sccache
77 |       uses: ./.github/actions/configure_cccl_sccache
78 |     - name: Run in devcontainer
79 |       uses: devcontainers/ci@v0.3
80 |       with:
81 |         push: never
82 |         env: |
83 |           SCCACHE_REGION=${{ env.SCCACHE_REGION }}
84 |           AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }}
85 |           AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }}
86 |           AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }}
87 |         runCmd: |
88 |           .devcontainer/verify_devcontainer.sh


--------------------------------------------------------------------------------
/cmake/header_testing.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2025, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | # For every public header, build a translation unit containing `#include <header>`
18 | # to let the compiler try to figure out warnings in that header if it is not otherwise
19 | # included in tests, and also to verify if the headers are modular enough.
20 | # .inl files are not globbed for, because they are not supposed to be used as public
21 | # entrypoints.
22 | 
23 | function(cuco_add_header_tests)
24 |   file(GLOB_RECURSE headers
25 |     RELATIVE "${CUCO_SOURCE_DIR}/include"
26 |     CONFIGURE_DEPENDS
27 |     "${CUCO_SOURCE_DIR}/include/cuco/*.cuh"
28 |     "${CUCO_SOURCE_DIR}/include/cuco/*.hpp"
29 |   )
30 |   
31 |   list(LENGTH headers headers_count)
32 |   message(STATUS "Found ${headers_count} headers for testing")
33 | 
34 |   # List of headers that have known issues or are not meant to be included directly
35 |   set(excluded_headers
36 |     # Add any headers that should be excluded from testing here
37 |     # Example: cuco/internal_header.cuh
38 |   )
39 |   
40 |   # Remove excluded headers
41 |   if(excluded_headers)
42 |     list(REMOVE_ITEM headers ${excluded_headers})
43 |   endif()
44 | 
45 |   foreach (header IN LISTS headers)
46 |     # Create a safe target name by replacing path separators and dots
47 |     string(REPLACE "/" "_" header_target_name "${header}")
48 |     string(REPLACE "." "_" header_target_name "${header_target_name}")
49 |     # Use a hash to ensure uniqueness in case of similar names
50 |     string(MD5 header_hash "${header}")
51 |     string(SUBSTRING "${header_hash}" 0 8 header_hash_short)
52 |     set(headertest_target "cuco_header_${header_target_name}_${header_hash_short}")
53 |     
54 |     set(header_src "${CMAKE_CURRENT_BINARY_DIR}/headers/${headertest_target}/${header}.cu")
55 |     
56 |     # Create the directory if it doesn't exist
57 |     get_filename_component(header_dir "${header_src}" DIRECTORY)
58 |     file(MAKE_DIRECTORY "${header_dir}")
59 |     
60 |     # Write simple test file that includes the header
61 |     file(WRITE "${header_src}" "#include <${header}>\nint main() { return 0; }\n")
62 | 
63 |     # Create executable test for this specific header
64 |     add_executable(${headertest_target} ${header_src})
65 |     target_link_libraries(${headertest_target} PRIVATE cuco::cuco CUDA::cudart)
66 |     
67 |     # Use common compile options (includes all compiler-specific warning suppressions)
68 |     cuco_set_common_compile_options(${headertest_target})
69 | 
70 |     set_target_properties(${headertest_target} PROPERTIES
71 |       RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests/headers"
72 |     )
73 | 
74 |     # Add as a CTest test
75 |     add_test(NAME ${headertest_target} COMMAND ${headertest_target})
76 |   endforeach()
77 | endfunction()
78 | 
79 | cuco_add_header_tests()
80 | 


--------------------------------------------------------------------------------
/include/cuco/hash_functions.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuco/detail/hash_functions/identity_hash.cuh>
 20 | #include <cuco/detail/hash_functions/murmurhash3.cuh>
 21 | #include <cuco/detail/hash_functions/xxhash.cuh>
 22 | 
 23 | #include <thrust/functional.h>
 24 | 
 25 | namespace cuco {
 26 | 
 27 | /**
 28 |  * @brief An Identity hash function to hash the given argument on host and device
 29 |  *
 30 |  * @throw A key must not be larger than uint64_t
 31 |  *
 32 |  * @tparam Key The type of the values to hash
 33 |  */
 34 | template <typename Key>
 35 | using identity_hash = detail::identity_hash<Key>;
 36 | 
 37 | /**
 38 |  * @brief The 32-bit integer finalizer function of `MurmurHash3` to hash the given argument on host
 39 |  * and device.
 40 |  *
 41 |  * @throw Key type must be 4 bytes in size
 42 |  *
 43 |  * @tparam Key The type of the values to hash
 44 |  */
 45 | template <typename Key>
 46 | using murmurhash3_fmix_32 = detail::MurmurHash3_fmix32<Key>;
 47 | 
 48 | /**
 49 |  * @brief The 64-bit integer finalizer function of `MurmurHash3` to hash the given argument on host
 50 |  * and device.
 51 |  *
 52 |  * @throw Key type must be 8 bytes in size
 53 |  *
 54 |  * @tparam Key The type of the values to hash
 55 |  */
 56 | template <typename Key>
 57 | using murmurhash3_fmix_64 = detail::MurmurHash3_fmix64<Key>;
 58 | 
 59 | /**
 60 |  * @brief A 32-bit `MurmurHash3` hash function to hash the given argument on host and device.
 61 |  *
 62 |  * @tparam Key The type of the values to hash
 63 |  */
 64 | template <typename Key>
 65 | using murmurhash3_32 = detail::MurmurHash3_32<Key>;
 66 | 
 67 | /**
 68 |  * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device.
 69 |  *
 70 |  * @tparam Key The type of the values to hash
 71 |  */
 72 | template <typename Key>
 73 | using murmurhash3_x64_128 = detail::MurmurHash3_x64_128<Key>;
 74 | 
 75 | /**
 76 |  * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device.
 77 |  *
 78 |  * @tparam Key The type of the values to hash
 79 |  */
 80 | template <typename Key>
 81 | using murmurhash3_x86_128 = detail::MurmurHash3_x86_128<Key>;
 82 | 
 83 | /**
 84 |  * @brief A 32-bit `XXH32` hash function to hash the given argument on host and device.
 85 |  *
 86 |  * @tparam Key The type of the values to hash
 87 |  */
 88 | template <typename Key>
 89 | using xxhash_32 = detail::XXHash_32<Key>;
 90 | 
 91 | /**
 92 |  * @brief A 64-bit `XXH64` hash function to hash the given argument on host and device.
 93 |  *
 94 |  * @tparam Key The type of the values to hash
 95 |  */
 96 | template <typename Key>
 97 | using xxhash_64 = detail::XXHash_64<Key>;
 98 | 
 99 | /**
100 |  * @brief Default hash function.
101 |  *
102 |  * @tparam Key The type of the values to hash
103 |  */
104 | template <typename Key>
105 | using default_hash_function = xxhash_32<Key>;
106 | 
107 | }  // namespace cuco
108 | 


--------------------------------------------------------------------------------
/tests/hyperloglog/device_ref_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/hyperloglog.cuh>
21 | 
22 | #include <cuda/std/cstddef>
23 | #include <thrust/device_vector.h>
24 | #include <thrust/sequence.h>
25 | 
26 | #include <catch2/catch_template_test_macros.hpp>
27 | #include <catch2/generators/catch_generators.hpp>
28 | 
29 | #include <cmath>
30 | #include <cstdint>
31 | 
32 | template <typename Ref, typename InputIt, typename OutputIt>
33 | __global__ void estimate_kernel(cuco::sketch_size_kb sketch_size_kb,
34 |                                 InputIt in,
35 |                                 size_t n,
36 |                                 OutputIt out)
37 | {
38 |   extern __shared__ cuda::std::byte local_sketch[];
39 | 
40 |   auto const block = cooperative_groups::this_thread_block();
41 | 
42 |   // only a single block computes the estimate
43 |   if (block.group_index().x == 0) {
44 |     Ref estimator(cuda::std::span(local_sketch, Ref::sketch_bytes(sketch_size_kb)));
45 | 
46 |     estimator.clear(block);
47 |     block.sync();
48 | 
49 |     for (int i = block.thread_rank(); i < n; i += block.num_threads()) {
50 |       estimator.add(*(in + i));
51 |     }
52 |     block.sync();
53 |     auto const estimate = estimator.estimate(block);
54 |     if (block.thread_rank() == 0) { *out = estimate; }
55 |   }
56 | }
57 | 
58 | TEMPLATE_TEST_CASE_SIG("hyperloglog: device ref",
59 |                        "",
60 |                        ((typename T, typename Hash), T, Hash),
61 |                        (int32_t, cuco::xxhash_64<int32_t>),
62 |                        (int64_t, cuco::xxhash_64<int64_t>),
63 |                        (__int128_t, cuco::xxhash_64<__int128_t>))
64 | {
65 |   using estimator_type = cuco::hyperloglog<T, cuda::thread_scope_device, Hash>;
66 | 
67 |   auto num_items_pow2 = GENERATE(25, 26, 28);
68 |   auto hll_precision  = GENERATE(8, 10, 12, 13);
69 |   auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024;
70 |   INFO("hll_precision=" << hll_precision);
71 |   INFO("sketch_size_kb=" << sketch_size_kb);
72 |   INFO("num_items=2^" << num_items_pow2);
73 |   auto num_items = 1ull << num_items_pow2;
74 | 
75 |   thrust::device_vector<T> items(num_items);
76 | 
77 |   // Generate `num_items` distinct items
78 |   thrust::sequence(items.begin(), items.end(), 0);
79 | 
80 |   // Initialize the estimator
81 |   estimator_type estimator{cuco::sketch_size_kb(sketch_size_kb)};
82 | 
83 |   // Add all items to the estimator
84 |   estimator.add(items.begin(), items.end());
85 | 
86 |   auto const host_estimate = estimator.estimate();
87 | 
88 |   thrust::device_vector<std::size_t> device_estimate(1);
89 |   estimate_kernel<typename estimator_type::template ref_type<cuda::thread_scope_block>>
90 |     <<<1, 512, estimator.sketch_bytes()>>>(
91 |       cuco::sketch_size_kb(sketch_size_kb), items.begin(), num_items, device_estimate.begin());
92 | 
93 |   REQUIRE(device_estimate[0] == host_estimate);
94 | }
95 | 


--------------------------------------------------------------------------------
/tests/static_set/retrieve_all_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_set.cuh>
20 | 
21 | #include <cuda/std/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/iterator/counting_iterator.h>
24 | #include <thrust/sequence.h>
25 | #include <thrust/sort.h>
26 | 
27 | #include <catch2/catch_template_test_macros.hpp>
28 | 
29 | template <typename Set>
30 | void test_unique_sequence(Set& set, std::size_t num_keys)
31 | {
32 |   using Key = typename Set::key_type;
33 | 
34 |   thrust::device_vector<Key> d_keys(num_keys);
35 |   thrust::sequence(d_keys.begin(), d_keys.end());
36 |   auto keys_begin = d_keys.begin();
37 | 
38 |   SECTION("Non-inserted keys should not be contained.")
39 |   {
40 |     REQUIRE(set.size() == 0);
41 | 
42 |     auto keys_end = set.retrieve_all(keys_begin);
43 |     REQUIRE(std::distance(keys_begin, keys_end) == 0);
44 |   }
45 | 
46 |   set.insert(keys_begin, keys_begin + num_keys);
47 |   REQUIRE(set.size() == num_keys);
48 | 
49 |   SECTION("All inserted key/value pairs should be contained.")
50 |   {
51 |     thrust::device_vector<Key> d_res(num_keys);
52 |     auto d_res_end = set.retrieve_all(d_res.begin());
53 |     thrust::sort(d_res.begin(), d_res_end);
54 |     REQUIRE(cuco::test::equal(
55 |       d_res.begin(), d_res_end, thrust::counting_iterator<Key>(0), cuda::std::equal_to<Key>{}));
56 |   }
57 | }
58 | 
59 | TEMPLATE_TEST_CASE_SIG(
60 |   "static_set::retrieve_all tests",
61 |   "",
62 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
63 |   (int32_t, cuco::test::probe_sequence::double_hashing, 1),
64 |   (int32_t, cuco::test::probe_sequence::double_hashing, 2),
65 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
66 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2),
67 |   (int32_t, cuco::test::probe_sequence::linear_probing, 1),
68 |   (int32_t, cuco::test::probe_sequence::linear_probing, 2),
69 |   (int64_t, cuco::test::probe_sequence::linear_probing, 1),
70 |   (int64_t, cuco::test::probe_sequence::linear_probing, 2))
71 | {
72 |   constexpr std::size_t num_keys{400};
73 |   constexpr double desired_load_factor = 1.;
74 | 
75 |   using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
76 |                                    cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
77 |                                    cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;
78 | 
79 |   constexpr std::size_t gold_capacity = [&]() {
80 |     if constexpr (cuco::is_double_hashing<probe>::value) {
81 |       return (CGSize == 1) ? 409   // 409 x 1 x 2
82 |                            : 422;  // 211 x 2 x 2
83 |     } else {
84 |       return 400;
85 |     }
86 |   }();
87 | 
88 |   auto set = cuco::static_set{num_keys, desired_load_factor, cuco::empty_key<Key>{-1}, {}, probe{}};
89 | 
90 |   REQUIRE(set.capacity() == gold_capacity);
91 | 
92 |   test_unique_sequence(set, num_keys);
93 | }
94 | 


--------------------------------------------------------------------------------
/examples/static_map/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_map.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/equal.h>
21 | #include <thrust/iterator/counting_iterator.h>
22 | #include <thrust/iterator/transform_iterator.h>
23 | #include <thrust/sequence.h>
24 | #include <thrust/transform.h>
25 | 
26 | #include <cmath>
27 | #include <cstddef>
28 | #include <iostream>
29 | #include <limits>
30 | 
31 | /**
32 |  * @file host_bulk_example.cu
33 |  * @brief Demonstrates usage of the static_map "bulk" host APIs.
34 |  *
35 |  * The bulk APIs are only invocable from the host and are used for doing operations like insert or
36 |  * find on a set of keys.
37 |  *
38 |  */
39 | 
40 | int main(void)
41 | {
42 |   using Key   = int;
43 |   using Value = int;
44 | 
45 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
46 |   // that they never occur in your input data.
47 |   Key constexpr empty_key_sentinel     = -1;
48 |   Value constexpr empty_value_sentinel = -1;
49 | 
50 |   // Number of key/value pairs to be inserted
51 |   std::size_t constexpr num_keys = 50'000;
52 | 
53 |   // Compute capacity based on a 50% load factor
54 |   auto constexpr load_factor = 0.5;
55 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
56 | 
57 |   // Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
58 |   auto map = cuco::static_map{
59 |     capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
60 | 
61 |   // Create a sequence of keys and values
62 |   thrust::device_vector<Key> insert_keys(num_keys);
63 |   thrust::sequence(insert_keys.begin(), insert_keys.end(), 0);
64 |   thrust::device_vector<Value> insert_values(num_keys);
65 |   thrust::sequence(insert_values.begin(), insert_values.end(), 0);
66 |   // Combine keys and values into pairs {{0,0}, {1,1}, ... {i,i}}
67 |   auto pairs = thrust::make_transform_iterator(
68 |     thrust::counting_iterator<std::size_t>{0},
69 |     cuda::proclaim_return_type<cuco::pair<Key, Value>>(
70 |       [keys = insert_keys.begin(), values = insert_values.begin()] __device__(auto i) {
71 |         return cuco::pair<Key, Value>{keys[i], values[i]};
72 |       }));
73 | 
74 |   // Inserts all pairs into the map
75 |   map.insert(pairs, pairs + num_keys);
76 | 
77 |   // Storage for found values
78 |   thrust::device_vector<Value> found_values(num_keys);
79 | 
80 |   // Finds all keys {0, 1, 2, ...} and stores associated values into `found_values`
81 |   // If a key `keys_to_find[i]` doesn't exist, `found_values[i] == empty_value_sentinel`
82 |   map.find(insert_keys.begin(), insert_keys.end(), found_values.begin());
83 | 
84 |   // Verify that all the found values match the inserted values
85 |   bool const all_values_match =
86 |     thrust::equal(found_values.begin(), found_values.end(), insert_values.begin());
87 | 
88 |   if (all_values_match) { std::cout << "Success! Found all values.\n"; }
89 | 
90 |   return 0;
91 | }
92 | 


--------------------------------------------------------------------------------