├── .github
    ├── CODEOWNERS
    ├── copy-pr-bot.yaml
    ├── actions
    │   ├── compute-matrix
    │   │   ├── action.yml
    │   │   └── compute-matrix.sh
    │   └── configure_cccl_sccache
    │   │   └── action.yml
    └── workflows
    │   ├── build-and-test.yml
    │   ├── dispatch-build-and-test.yml
    │   ├── run-as-coder.yml
    │   └── verify-devcontainers.yml
├── versions.json
├── ._upstream
    ├── .github
    │   ├── ISSUE_TEMPLATE
    │   │   ├── config.yml
    │   │   └── feature_request.yml
    │   └── PULL_REQUEST_TEMPLATE.md
    └── CONTRIBUTING.md
├── ci
    ├── test.sh
    ├── sccache_hit_rate.sh
    ├── sccache_stats.sh
    ├── matrix.yml
    └── pre-commit
    │   └── doxygen.sh
├── cmake
    └── thirdparty
    │   └── get_cccl.cmake
├── tests
    ├── dynamic_bitset
    │   ├── size_test.cu
    │   ├── rank_test.cu
    │   ├── find_next_test.cu
    │   ├── get_test.cu
    │   └── select_test.cu
    ├── static_set
    │   ├── size_test.cu
    │   ├── rehash_test.cu
    │   └── retrieve_all_test.cu
    ├── static_map
    │   ├── rehash_test.cu
    │   └── hash_test.cu
    ├── utility
    │   ├── fast_int_test.cu
    │   └── extent_test.cu
    ├── hyperloglog
    │   ├── unique_sequence_test.cu
    │   └── device_ref_test.cu
    ├── test_utils.cuh
    └── static_multiset
    │   └── count_test.cu
├── include
    └── cuco
    │   ├── detail
    │       ├── probing_scheme
    │       │   └── probing_scheme_base.cuh
    │       ├── utility
    │       │   ├── math.cuh
    │       │   └── strong_type.cuh
    │       ├── hash_functions
    │       │   ├── utils.cuh
    │       │   └── identity_hash.cuh
    │       ├── storage
    │       │   ├── kernels.cuh
    │       │   ├── storage.cuh
    │       │   ├── storage_base.cuh
    │       │   └── bucket_storage_base.cuh
    │       ├── operator.inl
    │       ├── bloom_filter
    │       │   └── default_filter_policy.inl
    │       ├── utils.hpp
    │       ├── __config
    │       ├── bitwise_compare.cuh
    │       └── pair
    │       │   ├── pair.inl
    │       │   └── tuple_helpers.inl
    │   ├── cuda_runtime.h
    │   ├── utility
    │       ├── cuda_thread_scope.cuh
    │       ├── error.hpp
    │       ├── allocator.hpp
    │       ├── traits.hpp
    │       └── reduction_functors.cuh
    │   ├── storage.cuh
    │   ├── operator.hpp
    │   ├── types.cuh
    │   ├── probe_sequences.cuh
    │   └── hash_functions.cuh
├── rocm-docs
    ├── build.sh
    ├── index.md
    ├── .sphinx
    │   └── _toc.yml.in
    └── conf.py
├── benchmarks
    ├── bloom_filter
    │   ├── defaults.hpp
    │   └── utils.hpp
    ├── benchmark_defaults.hpp
    ├── benchmark_utils.hpp
    ├── static_set
    │   ├── size_bench.cu
    │   ├── rehash_bench.cu
    │   ├── retrieve_all_bench.cu
    │   ├── contains_bench.cu
    │   ├── find_bench.cu
    │   └── insert_bench.cu
    └── static_multiset
    │   ├── contains_bench.cu
    │   └── find_bench.cu
├── overrides.cmake
├── .pre-commit-config.yaml
├── .devcontainer
    ├── devcontainer.json
    ├── cuda11.8-gcc11
    │   └── devcontainer.json
    ├── cuda12.6-gcc12
    │   └── devcontainer.json
    ├── cuda12.6-gcc13
    │   └── devcontainer.json
    ├── launch.sh
    └── verify_devcontainer.sh
├── examples
    ├── hyperloglog
    │   └── host_bulk_example.cu
    ├── static_set
    │   └── host_bulk_example.cu
    ├── bloom_filter
    │   └── host_bulk_example.cu
    ├── static_multiset
    │   └── host_bulk_example.cu
    └── static_map
    │   └── host_bulk_example.cu
└── README.md


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @sleeepyjack @PointKernel
2 | 


--------------------------------------------------------------------------------
/versions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "packages" : {
3 |   }
4 | }
5 | 


--------------------------------------------------------------------------------
/.github/copy-pr-bot.yaml:
--------------------------------------------------------------------------------
1 | # Configuration file for `copy-pr-bot` GitHub App
2 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
3 | 
4 | enabled: true
5 | 


--------------------------------------------------------------------------------
/._upstream/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Question
4 |     url: https://github.com/NVIDIA/cuCollections/discussions
5 |     about: Check out our Discussions page to ask and answer questions. 
6 | 


--------------------------------------------------------------------------------
/ci/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Ensure the script is being executed in its containing directory
18 | cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
19 | 
20 | source ./build.sh "$@"
21 | 
22 | ctest --test-dir ${BUILD_DIR}/tests --output-on-failure --timeout 60
23 | 
24 | echo "Test complete"


--------------------------------------------------------------------------------
/cmake/thirdparty/get_cccl.cmake:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 5 | # in compliance with the License. You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 | 
15 | # Use CPM to find or clone CCCL
16 | function(find_and_configure_cccl)
17 |     include(${rapids-cmake-dir}/cpm/cccl.cmake)
18 |     rapids_cpm_cccl(INSTALL_EXPORT_SET cuco-exports BUILD_EXPORT_SET cuco-exports)
19 | endfunction()
20 | 
21 | find_and_configure_cccl()
22 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/size_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
18 | 
19 | #include <catch2/catch_test_macros.hpp>
20 | 
21 | TEST_CASE("dynamic_bitset size computation test", "")
22 | {
23 |   cuco::experimental::detail::dynamic_bitset bv;
24 |   using size_type = std::size_t;
25 |   constexpr size_type num_elements{400};
26 | 
27 |   for (size_type i = 0; i < num_elements; i++) {
28 |     bv.push_back(i % 2 == 0);  // Alternate 0s and 1s pattern
29 |   }
30 | 
31 |   auto size = bv.size();
32 |   REQUIRE(size == num_elements);
33 | }
34 | 


--------------------------------------------------------------------------------
/include/cuco/detail/probing_scheme/probing_scheme_base.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cstdint>
20 | 
21 | namespace cuco {
22 | namespace detail {
23 | 
24 | /**
25 |  * @brief Base class of public probing scheme.
26 |  *
27 |  * This class should not be used directly.
28 |  *
29 |  * @tparam CGSize Size of CUDA Cooperative Groups
30 |  */
31 | template <int32_t CGSize>
32 | class probing_scheme_base {
33 |  public:
34 |   /**
35 |    * @brief The size of the CUDA cooperative thread group.
36 |    */
37 |   static constexpr int32_t cg_size = CGSize;
38 | };
39 | }  // namespace detail
40 | }  // namespace cuco
41 | 


--------------------------------------------------------------------------------
/rocm-docs/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # MIT License
 4 | #
 5 | # Copyright (c) 2025 Advanced Micro Devices, Inc.
 6 | #
 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | # of this software and associated documentation files (the "Software"), to deal
 9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 | 
25 | python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
26 | 


--------------------------------------------------------------------------------
/include/cuco/cuda_runtime.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #pragma once
24 | 
25 | #include <hip/hip_runtime.h>
26 | 
27 | #include <cuco/cuda_runtime_api.h>
28 | 


--------------------------------------------------------------------------------
/rocm-docs/index.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | MIT License
 3 | 
 4 | Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | -->
24 | 
25 | # hipCollections
26 | 
27 | Todo
28 | 
29 | ## Overview
30 | 
31 | ::::{grid} 1 1 2 2
32 | :gutter: 1
33 | 
34 | :::{grid-item-card} Source Code Documentation
35 | - {doc}`/doxygen/html/index`
36 | :::
37 | 
38 | ::::
39 | 


--------------------------------------------------------------------------------
/tests/static_set/size_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/execution_policy.h>
21 | #include <thrust/sequence.h>
22 | 
23 | #include <catch2/catch_test_macros.hpp>
24 | 
25 | TEST_CASE("static_set size test", "")
26 | {
27 |   constexpr std::size_t num_keys{400};
28 | 
29 |   cuco::static_set<int> set{cuco::extent<std::size_t>{400}, cuco::empty_key{-1}};
30 | 
31 |   thrust::device_vector<int> d_keys(num_keys);
32 | 
33 |   thrust::sequence(thrust::device, d_keys.begin(), d_keys.end());
34 | 
35 |   auto const num_successes = set.insert(d_keys.begin(), d_keys.end());
36 | 
37 |   REQUIRE(set.size() == num_keys);
38 |   REQUIRE(num_successes == num_keys);
39 | 
40 |   set.clear();
41 | 
42 |   REQUIRE(set.size() == 0);
43 | }
44 | 


--------------------------------------------------------------------------------
/rocm-docs/.sphinx/_toc.yml.in:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | #
 3 | # Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | defaults:
24 |   numbered: False
25 |   maxdepth: 6
26 | root: index.md
27 | subtrees:
28 |   - caption: Source Code Documentation
29 |     entries: 
30 |        - file: doxygen/html/index
31 |          title: Library API Doxygen Documentation
32 | 


--------------------------------------------------------------------------------
/benchmarks/bloom_filter/defaults.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <cuda/std/array>
24 | 
25 | #include <vector>
26 | 
27 | namespace cuco::benchmark::defaults {
28 | 
29 | using BF_KEY  = nvbench::int64_t;
30 | using BF_HASH = cuco::xxhash_64<char>;
31 | using BF_WORD = nvbench::uint32_t;
32 | 
33 | static constexpr auto BF_N               = 400'000'000;
34 | static constexpr auto BF_SIZE_MB         = 2'000;
35 | static constexpr auto BF_WORDS_PER_BLOCK = 8;
36 | 
37 | auto const BF_SIZE_MB_RANGE_CACHE =
38 |   std::vector<nvbench::int64_t>{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};
39 | auto const BF_PATTERN_BITS_RANGE = std::vector<nvbench::int64_t>{1, 2, 4, 6, 8, 16};
40 | 
41 | }  // namespace cuco::benchmark::defaults
42 | 


--------------------------------------------------------------------------------
/include/cuco/detail/utility/math.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuda/std/type_traits>
19 | 
20 | namespace cuco {
21 | namespace detail {
22 | 
23 | /**
24 |  * @brief Ceiling of an integer division
25 |  *
26 |  * @tparam T Type of dividend
27 |  * @tparam U Type of divisor
28 |  *
29 |  * @throw If `T` is not an integral type
30 |  * @throw If `U` is not an integral type
31 |  *
32 |  * @param dividend Numerator
33 |  * @param divisor Denominator
34 |  *
35 |  * @return Ceiling of the integer division
36 |  */
37 | template <typename T, typename U>
38 | __host__ __device__ constexpr T int_div_ceil(T dividend, U divisor) noexcept
39 | {
40 |   static_assert(cuda::std::is_integral_v<T>);
41 |   static_assert(cuda::std::is_integral_v<U>);
42 |   return (dividend + divisor - 1) / divisor;
43 | }
44 | 
45 | }  // namespace detail
46 | }  // namespace cuco
47 | 


--------------------------------------------------------------------------------
/include/cuco/detail/hash_functions/utils.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/cstddef>
20 | 
21 | namespace cuco::detail {
22 | 
23 | template <typename T, typename U, typename Extent>
24 | constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept
25 | {
26 |   auto const bytes = reinterpret_cast<cuda::std::byte const*>(data);
27 |   T chunk;
28 |   memcpy(&chunk, bytes + index * sizeof(T), sizeof(T));
29 |   return chunk;
30 | }
31 | 
32 | constexpr __host__ __device__ std::uint32_t rotl32(std::uint32_t x, std::int8_t r) noexcept
33 | {
34 |   return (x << r) | (x >> (32 - r));
35 | }
36 | 
37 | constexpr __host__ __device__ std::uint64_t rotl64(std::uint64_t x, std::int8_t r) noexcept
38 | {
39 |   return (x << r) | (x >> (64 - r));
40 | }
41 | 
42 | };  // namespace cuco::detail
43 | 


--------------------------------------------------------------------------------
/overrides.cmake:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | #
 3 | # Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # NOTE(HIP/AMD): This overrides must be placed in a separate file according to https://discourse.cmake.org/t/extend-list-of-file-extensions-for-a-language-globally-for-the-whole-project/7307/6
24 | 
25 | set(CMAKE_HIP_SOURCE_FILE_EXTENSIONS hip;cu)
26 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |     autofix_commit_msg: |
 3 |       [pre-commit.ci] auto code formatting
 4 |     autofix_prs: true
 5 |     autoupdate_branch: ''
 6 |     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
 7 |     autoupdate_schedule: quarterly
 8 |     skip: []
 9 |     submodules: false
10 | 
11 | repos:
12 |       - repo: https://github.com/pre-commit/mirrors-clang-format
13 |         rev: v18.1.8
14 |         hooks:
15 |               - id: clang-format
16 |                 types_or: [c, c++, cuda]
17 |                 args: ['-fallback-style=none', '-style=file', '-i']
18 |       - repo: local
19 |         hooks:
20 |               - id: check-doxygen
21 |                 name: check-doxygen
22 |                 entry: ./ci/pre-commit/doxygen.sh
23 |                 files: ^include/
24 |                 types_or: [file]
25 |                 language: system
26 |                 pass_filenames: false
27 |                 verbose: true
28 |               - id: check-example-links
29 |                 name: check-example-links
30 |                 entry: ./ci/pre-commit/example_links.py
31 |                 files: ^examples/
32 |                 types: [cuda]
33 |                 language: python
34 |                 pass_filenames: false
35 |                 verbose: true
36 |                 additional_dependencies:
37 |                   - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
38 |                   - gitpython
39 | 
40 | default_language_version:
41 |       python: python3
42 | 


--------------------------------------------------------------------------------
/.github/actions/compute-matrix/action.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Compute Matrix
17 | description: "Compute the matrix for a given matrix type from the specified matrix file"
18 | 
19 | inputs:
20 |   matrix_query:
21 |     description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
22 |     required: true
23 |   matrix_file:
24 |     description: 'The file containing the matrix'
25 |     required: true
26 | outputs:
27 |   matrix:
28 |     description: 'The requested matrix'
29 |     value: ${{ steps.compute-matrix.outputs.MATRIX }}
30 | 
31 | runs:
32 |   using: "composite"
33 |   steps:
34 |     - name: Compute matrix
35 |       id: compute-matrix
36 |       run: |
37 |         MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}}  ${{inputs.matrix_query}} )
38 |         echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
39 |       shell: bash -euxo pipefail {0}


--------------------------------------------------------------------------------
/tests/static_set/rehash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/sequence.h>
21 | 
22 | #include <catch2/catch_test_macros.hpp>
23 | 
24 | TEST_CASE("static_set rehash test", "")
25 | {
26 |   using key_type = int;
27 | 
28 |   constexpr std::size_t num_keys{400};
29 |   constexpr std::size_t num_erased_keys{100};
30 | 
31 |   cuco::static_set set{num_keys, cuco::empty_key<key_type>{-1}, cuco::erased_key<key_type>{-2}};
32 | 
33 |   thrust::device_vector<key_type> d_keys(num_keys);
34 | 
35 |   thrust::sequence(d_keys.begin(), d_keys.end());
36 | 
37 |   set.insert(d_keys.begin(), d_keys.end());
38 | 
39 |   set.rehash();
40 |   REQUIRE(set.size() == num_keys);
41 | 
42 |   set.rehash(num_keys * 2);
43 |   REQUIRE(set.size() == num_keys);
44 | 
45 |   set.erase(d_keys.begin(), d_keys.begin() + num_erased_keys);
46 |   set.rehash();
47 |   REQUIRE(set.size() == num_keys - num_erased_keys);
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/actions/configure_cccl_sccache/action.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Set up AWS credentials and environment variables for sccache
17 | description: "Set up AWS credentials and environment variables for sccache"
18 | runs:
19 |   using: "composite"
20 |   steps:
21 |     - name: Get AWS credentials for sccache bucket
22 |       uses: aws-actions/configure-aws-credentials@v2
23 |       with:
24 |         role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
25 |         aws-region: us-east-2
26 |         role-duration-seconds: 43200 # 12 hours
27 |     - name: Set environment variables
28 |       run: |
29 |         echo "SCCACHE_BUCKET=rapids-sccache-east" >> $GITHUB_ENV
30 |         echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
31 |         echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
32 |         echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
33 |         echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
34 |       shell: bash


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.6-gcc13",
18 |     "CUCO_CUDA_VERSION": "12.6",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "13"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.6-gcc13"
42 | }
43 | 


--------------------------------------------------------------------------------
/rocm-docs/conf.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | #
 3 | # Copyright (c) 2025 Advanced Micro Devices, Inc.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | from rocm_docs import ROCmDocs
24 | 
25 | 
26 | external_projects_remote_repository = ""
27 | external_projects_current_project = "hipCollections"
28 | 
29 | docs_core = ROCmDocs("hipCollections")
30 | docs_core.run_doxygen(doxygen_root="./doxygen/", doxygen_path=".")  # Only if Doxygen is required for this project
31 | docs_core.enable_api_reference()
32 | docs_core.setup()
33 | 
34 | for sphinx_var in ROCmDocs.SPHINX_VARS:
35 |     globals()[sphinx_var] = getattr(docs_core, sphinx_var)
36 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda11.8-gcc11/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda11.8-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda11.8-gcc11",
18 |     "CUCO_CUDA_VERSION": "11.8",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "11"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda11.8-gcc11"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.6-gcc12/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.6-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.6-gcc12",
18 |     "CUCO_CUDA_VERSION": "12.6",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "12"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.6-gcc12"
42 | }
43 | 


--------------------------------------------------------------------------------
/.devcontainer/cuda12.6-gcc13/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shutdownAction": "stopContainer",
 3 |   "image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04",
 4 |   "hostRequirements": {
 5 |     "gpu": true
 6 |   },
 7 |   "initializeCommand": [
 8 |     "/bin/bash",
 9 |     "-c",
10 |     "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11 |   ],
12 |   "containerEnv": {
13 |     "SCCACHE_REGION": "us-east-2",
14 |     "SCCACHE_BUCKET": "rapids-sccache-devs",
15 |     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
16 |     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17 |     "DEVCONTAINER_NAME": "cuda12.6-gcc13",
18 |     "CUCO_CUDA_VERSION": "12.6",
19 |     "CUCO_HOST_COMPILER": "gcc",
20 |     "CUCO_HOST_COMPILER_VERSION": "13"
21 |   },
22 |   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23 |   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24 |   "mounts": [
25 |     "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26 |     "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27 |     "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28 |   ],
29 |   "customizations": {
30 |     "vscode": {
31 |       "extensions": [
32 |         "llvm-vs-code-extensions.vscode-clangd"
33 |       ],
34 |       "settings": {
35 |         "clangd.arguments": [
36 |           "--compile-commands-dir=${workspaceFolder}/build/latest"
37 |         ]
38 |       }
39 |     }
40 |   },
41 |   "name": "cuda12.6-gcc13"
42 | }
43 | 


--------------------------------------------------------------------------------
/include/cuco/detail/storage/kernels.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include <cuco/detail/utility/cuda.cuh>
19 | 
20 | #include <cstddef>
21 | 
22 | namespace cuco {
23 | namespace detail {
24 | 
25 | CUCO_SUPPRESS_KERNEL_WARNINGS
26 | 
27 | /**
28 |  * @brief Initializes each slot in the bucket storage to contain `value`.
29 |  *
30 |  * @tparam BucketT Bucket type
31 |  *
32 |  * @param buckets Pointer to flat storage for buckets
33 |  * @param n Number of input buckets
34 |  * @param value Value to which all values in `slots` are initialized
35 |  */
36 | template <typename BucketT>
37 | CUCO_KERNEL void initialize(BucketT* buckets,
38 |                             cuco::detail::index_type n,
39 |                             typename BucketT::value_type value)
40 | {
41 |   auto const loop_stride = cuco::detail::grid_stride();
42 |   auto idx               = cuco::detail::global_thread_id();
43 | 
44 |   while (idx < n) {
45 |     auto& bucket_slots = *(buckets + idx);
46 | #pragma unroll
47 |     for (auto& slot : bucket_slots) {
48 |       slot = value;
49 |     }
50 |     idx += loop_stride;
51 |   }
52 | }
53 | 
54 | }  // namespace detail
55 | }  // namespace cuco
56 | 


--------------------------------------------------------------------------------
/include/cuco/utility/cuda_thread_scope.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/atomic>  // cuda::thread_scope
20 | 
21 | namespace cuco {
22 | 
23 | /**
24 |  * @brief Strongly-typed wrapper for `cuda::thread_scope`.
25 |  *
26 |  * @tparam Scope `cuda::thread_scope` to be wrapped
27 |  */
28 | template <cuda::thread_scope Scope>
29 | struct cuda_thread_scope {
30 |   /**
31 |    * @brief Implicit conversion to `cuda::thread_scope`.
32 |    *
33 |    * @return The wrapped `cuda::thread_scope`
34 |    */
35 |   __host__ __device__ constexpr operator cuda::thread_scope() const noexcept { return Scope; }
36 | };
37 | 
38 | // alias definitions
39 | inline constexpr auto thread_scope_system =
40 |   cuda_thread_scope<cuda::thread_scope_system>{};  ///< `cuco::thread_scope_system`
41 | inline constexpr auto thread_scope_device =
42 |   cuda_thread_scope<cuda::thread_scope_device>{};  ///< `cuco::thread_scope_device`
43 | inline constexpr auto thread_scope_block =
44 |   cuda_thread_scope<cuda::thread_scope_block>{};  ///< `cuco::thread_scope_block`
45 | inline constexpr auto thread_scope_thread =
46 |   cuda_thread_scope<cuda::thread_scope_thread>{};  ///< `cuco::thread_scope_thread`
47 | 
48 | }  // namespace cuco
49 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/rank_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/host_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | extern bool modulo_bitgen(uint64_t i);  // Defined in get_test.cu
28 | 
29 | TEST_CASE("dynamic_bitset rank test", "")
30 | {
31 |   cuco::experimental::detail::dynamic_bitset bv;
32 | 
33 |   using size_type = std::size_t;
34 |   constexpr size_type num_elements{4000};
35 | 
36 |   for (size_type i = 0; i < num_elements; i++) {
37 |     bv.push_back(modulo_bitgen(i));
38 |   }
39 | 
40 |   thrust::device_vector<size_type> keys(num_elements);
41 |   thrust::sequence(keys.begin(), keys.end(), 0);
42 | 
43 |   thrust::device_vector<size_type> d_ranks(num_elements);
44 | 
45 |   bv.rank(keys.begin(), keys.end(), d_ranks.begin());
46 | 
47 |   thrust::host_vector<size_type> h_ranks = d_ranks;
48 | 
49 |   size_type cur_rank    = 0;
50 |   size_type num_matches = 0;
51 |   for (size_type i = 0; i < num_elements; i++) {
52 |     num_matches += cur_rank == h_ranks[i];
53 |     if (modulo_bitgen(i)) { cur_rank++; }
54 |   }
55 |   REQUIRE(num_matches == num_elements);
56 | }
57 | 


--------------------------------------------------------------------------------
/include/cuco/detail/operator.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/utility/traits.hpp>
20 | 
21 | #include <type_traits>
22 | 
23 | namespace cuco {
24 | namespace detail {
25 | 
26 | /**
27 |  * @brief CRTP mixin which augments a given `Reference` with an `Operator`.
28 |  *
29 |  * @throw If the operator is not defined in `include/cuco/operator.hpp`
30 |  *
31 |  * @tparam Operator Operator type, i.e., `cuco::op::*_tag`
32 |  * @tparam Reference The reference type.
33 |  *
34 |  * @note This primary template should never be instantiated.
35 |  */
36 | template <typename Operator, typename Reference>
37 | class operator_impl {
38 |   static_assert(cuco::dependent_false<Operator, Reference>,
39 |                 "Operator type is not supported by reference type.");
40 | };
41 | 
42 | /**
43 |  * @brief Checks if the given `Operator` is contained in a list of `Operators`.
44 |  *
45 |  * @tparam Operator Operator type, i.e., `cuco::op::*_tag`
46 |  * @tparam Operators List of operators to search in
47 |  *
48 |  * @return `true` if `Operator` is contained in `Operators`, `false` otherwise.
49 |  */
50 | template <typename Operator, typename... Operators>
51 | static constexpr bool has_operator()
52 | {
53 |   return ((std::is_same_v<Operators, Operator>) || ...);
54 | }
55 | 
56 | }  // namespace detail
57 | }  // namespace cuco
58 | 


--------------------------------------------------------------------------------
/._upstream/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Enhancement
 2 | description: Suggest an idea to improve cuCollections
 3 | title: '[ENHANCEMENT]: '
 4 | labels: ['type: enhancement']
 5 | body:
 6 |   - type: textarea
 7 |     id: description
 8 |     attributes:
 9 |       label: Is your feature request related to a problem? Please describe.
10 |       description: A clear and concise description of what the problem is, e.g., "I would like to be able to..."
11 |       placeholder: I would like an overload of `cuco::static_map::insert` that returns the success of each insertion. 
12 |     validations:
13 |       required: true
14 |   - type: textarea
15 |     id: proposed-solution
16 |     attributes:
17 |       label: Describe the solution you'd like
18 |       description: A clear and concise description of what you want to happen.
19 |       placeholder: |
20 |         Add a new overload of `insert` that takes an output iterator range assignable from `bool` that indicates the success of each insert.
21 |         Example API: 
22 |         template <typename InputIt, typename OutputIt>
23 |         void insert(InputIt first_input, InputIt last_input, OutputIt first_input, cudaStream_t stream = 0);
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: alternatives
28 |     attributes:
29 |       label: Describe alternatives you've considered
30 |       description:
31 |         If applicable, please add a clear and concise description of any alternative solutions or features you've
32 |         considered.
33 |       placeholder: You can implement this yourself with the device-side API, but it would be more convenient as a bulk function.
34 |     validations:
35 |       required: false
36 |   - type: textarea
37 |     id: additional-context
38 |     attributes:
39 |       label: Additional context
40 |       description: Add any other context about the request here.
41 |       placeholder: This would be useful for sparse embedding tables in DL usecases. 
42 |     validations:
43 |       required: false
44 | 


--------------------------------------------------------------------------------
/include/cuco/storage.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/storage/storage.cuh>
20 | 
21 | namespace cuco {
22 | 
23 | /**
24 |  * @brief Public storage class.
25 |  *
26 |  * @note This is a public interface used to control storage bucket size. A bucket consists of one
27 |  * or multiple contiguous slots. The bucket size defines the workload granularity for each CUDA
28 |  * thread, i.e., how many slots a thread would concurrently operate on when performing modify or
29 |  * lookup operations. cuCollections uses the array of bucket storage to supersede the raw flat slot
30 |  * storage due to its superior granularity control: When bucket size equals one, array of buckets
31 |  * performs the same as the flat storage. If the underlying operation is more memory bandwidth
32 |  * bound, e.g., high occupancy multimap operations, a larger bucket size can reduce the length of
33 |  * probing sequences thus improve runtime performance.
34 |  *
35 |  * @tparam BucketSize Number of elements per bucket storage
36 |  */
37 | template <int32_t BucketSize>
38 | class storage {
39 |  public:
40 |   /// Number of slots per bucket storage
41 |   static constexpr int32_t bucket_size = BucketSize;
42 | 
43 |   /// Type of implementation details
44 |   template <class T, class Extent, class Allocator>
45 |   using impl = bucket_storage<T, bucket_size, Extent, Allocator>;
46 | };
47 | 
48 | }  // namespace cuco
49 | 


--------------------------------------------------------------------------------
/tests/static_map/rehash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_map.cuh>
18 | 
19 | #include <cuda/functional>
20 | #include <thrust/iterator/counting_iterator.h>
21 | #include <thrust/iterator/transform_iterator.h>
22 | 
23 | #include <catch2/catch_test_macros.hpp>
24 | 
25 | TEST_CASE("static_map rehash test", "")
26 | {
27 |   using key_type    = int;
28 |   using mapped_type = long;
29 | 
30 |   constexpr std::size_t num_keys{400};
31 |   constexpr std::size_t num_erased_keys{100};
32 | 
33 |   cuco::static_map map{num_keys,
34 |                        cuco::empty_key<key_type>{-1},
35 |                        cuco::empty_value<mapped_type>{-1},
36 |                        cuco::erased_key<key_type>{-2}};
37 | 
38 |   auto keys_begin = thrust::counting_iterator<key_type>(1);
39 | 
40 |   auto pairs_begin = thrust::make_transform_iterator(
41 |     keys_begin,
42 |     cuda::proclaim_return_type<cuco::pair<key_type, mapped_type>>([] __device__(key_type const& x) {
43 |       return cuco::pair<key_type, mapped_type>(x, static_cast<mapped_type>(x));
44 |     }));
45 | 
46 |   map.insert(pairs_begin, pairs_begin + num_keys);
47 | 
48 |   map.rehash();
49 |   REQUIRE(map.size() == num_keys);
50 | 
51 |   map.rehash(num_keys * 2);
52 |   REQUIRE(map.size() == num_keys);
53 | 
54 |   map.erase(keys_begin, keys_begin + num_erased_keys);
55 |   map.rehash();
56 |   REQUIRE(map.size() == num_keys - num_erased_keys);
57 | }
58 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: build and test
17 | 
18 | defaults:
19 |   run:
20 |     shell: bash -eo pipefail {0}
21 | 
22 | on:
23 |   workflow_call:
24 |     inputs:
25 |       cpu: {type: string, required: true}
26 |       test_name: {type: string, required: false}
27 |       build_script: {type: string, required: false}
28 |       test_script: {type: string, required: false}
29 |       container_image: {type: string, required: false}
30 |       run_tests: {type: boolean, required: false, default: true}
31 | 
32 | jobs:
33 |   build:
34 |     name: Build ${{inputs.test_name}}
35 |     uses: ./.github/workflows/run-as-coder.yml
36 |     with:
37 |       name: Build ${{inputs.test_name}}
38 |       runner: linux-${{inputs.cpu}}-cpu16
39 |       image:  ${{ inputs.container_image }}
40 |       command: |
41 |         ${{ inputs.build_script }}
42 | 
43 |   test:
44 |     needs: build
45 |     if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
46 |     name: Test ${{inputs.test_name}}
47 |     uses: ./.github/workflows/run-as-coder.yml
48 |     with:
49 |       name: Test ${{inputs.test_name}}
50 |       runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
51 |       image: ${{inputs.container_image}}
52 |       command: |
53 |         nvidia-smi
54 |         ${{ inputs.test_script }}


--------------------------------------------------------------------------------
/include/cuco/detail/hash_functions/identity_hash.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/type_traits>
20 | #include <thrust/functional.h>
21 | 
22 | namespace cuco::detail {
23 | 
24 | /**
25 |  * @brief An Identity hash function to hash the given argument on host and device
26 |  *
27 |  * @note `identity_hash` is perfect if `hash_table_capacity >= |input set|`
28 |  *
29 |  * @note `identity_hash` is only intended to be used perfectly.
30 |  *
31 |  * @note Perfect hashes are deterministic, and thus do not need seeds.
32 |  *
33 |  * @tparam Key The type of the values to hash
34 |  */
35 | template <typename Key>
36 | struct identity_hash : private thrust::identity<Key> {
37 |   using argument_type = Key;  ///< The type of the values taken as argument
38 |   /// The type of the hash values produced
39 |   using result_type = cuda::std::conditional_t<sizeof(Key) <= 4, uint32_t, uint64_t>;
40 | 
41 |   static_assert(cuda::std::is_convertible_v<Key, result_type>,
42 |                 "Key type must be convertible to result_type");
43 | 
44 |   /**
45 |    * @brief Returns a hash value for its argument, as a value of type `result_type`.
46 |    *
47 |    * @param x The input argument to hash
48 |    * @return A resulting hash value for `x`
49 |    */
50 |   __host__ __device__ result_type operator()(Key const& x) const
51 |   {
52 |     return static_cast<result_type>(thrust::identity<Key>::operator()(x));
53 |   }
54 | };  // identity_hash
55 | 
56 | }  //  namespace cuco::detail
57 | 


--------------------------------------------------------------------------------
/ci/sccache_hit_rate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -euo pipefail
18 | 
19 | # Ensure two arguments are provided
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 <before-file> <after-file>" >&2
22 |   exit 1
23 | fi
24 | 
25 | # Print the contents of the before file
26 | echo "=== Contents of $1 ===" >&2
27 | cat $1 >&2
28 | echo "=== End of $1 ===" >&2
29 | 
30 | # Print the contents of the after file
31 | echo "=== Contents of $2 ==="  >&2
32 | cat $2 >&2
33 | echo "=== End of $2 ===" >&2
34 | 
35 | # Extract compile requests and cache hits from the before and after files
36 | requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1")
37 | hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1")
38 | requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2")
39 | hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2")
40 | 
41 | # Calculate the differences to find out how many new requests and hits
42 | requests_diff=$((requests_after - requests_before))
43 | hits_diff=$((hits_after - hits_before))
44 | 
45 | echo "New Compile Requests: $requests_diff" >&2
46 | echo "New Hits: $hits_diff" >&2
47 | 
48 | # Calculate and print the hit rate
49 | if [ $requests_diff -eq 0 ]; then
50 |     echo "No new compile requests, hit rate is not applicable"
51 | else
52 |     hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
53 |     echo "sccache hit rate: $hit_rate%" >&2
54 |     echo "$hit_rate"
55 | fi


--------------------------------------------------------------------------------
/include/cuco/utility/error.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <stdexcept>
20 | #include <string>
21 | 
22 | namespace cuco {
23 | /**
24 |  * @brief Exception thrown when logical precondition is violated.
25 |  *
26 |  * This exception should not be thrown directly and is instead thrown by the
27 |  * CUCO_EXPECTS macro.
28 |  */
29 | struct logic_error : public std::logic_error {
30 |   /**
31 |    * @brief Constructs a logic_error with the error message.
32 |    *
33 |    * @param message Message to be associated with the exception
34 |    */
35 |   logic_error(char const* const message) : std::logic_error(message) {}
36 | 
37 |   /**
38 |    * @brief Construct a new logic error object with error message
39 |    *
40 |    * @param message Message to be associated with the exception
41 |    */
42 |   logic_error(std::string const& message) : std::logic_error(message) {}
43 | };
44 | /**
45 |  * @brief Exception thrown when a CUDA error is encountered.
46 |  *
47 |  */
48 | struct cuda_error : public std::runtime_error {
49 |   /**
50 |    * @brief Constructs a `cuda_error` object with the given `message`.
51 |    *
52 |    * @param message The error char array used to construct `cuda_error`
53 |    */
54 |   cuda_error(const char* message) : std::runtime_error(message) {}
55 |   /**
56 |    * @brief Constructs a `cuda_error` object with the given `message` string.
57 |    *
58 |    * @param message The `std::string` used to construct `cuda_error`
59 |    */
60 |   cuda_error(std::string const& message) : cuda_error{message.c_str()} {}
61 | };
62 | }  // namespace cuco
63 | 


--------------------------------------------------------------------------------
/.devcontainer/launch.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | launch_devcontainer() {
18 | 
19 |     # Ensure we're in the repo root
20 |     cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )/..";
21 | 
22 |     if [[ -z $1 ]] || [[ -z $2 ]]; then
23 |         echo "Usage: $0 [CUDA version] [Host compiler]"
24 |         echo "Example: $0 12.1 gcc12"
25 |         return 1
26 |     fi
27 | 
28 |     local cuda_version="$1"
29 |     local host_compiler="$2"
30 |     local workspace="$(basename "$(pwd)")";
31 |     local tmpdir="$(mktemp -d)/${workspace}";
32 |     local path="$(pwd)/.devcontainer/cuda${cuda_version}-${host_compiler}";
33 | 
34 |     mkdir -p "${tmpdir}";
35 |     mkdir -p "${tmpdir}/.devcontainer";
36 |     cp -arL "$path/devcontainer.json" "${tmpdir}/.devcontainer";
37 |     sed -i "s@\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json";
38 |     path="${tmpdir}";
39 | 
40 |     local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')";
41 |     local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cuCollections";
42 | 
43 |     echo "devcontainer URL: ${url}";
44 | 
45 |     local launch="";
46 |     if type open >/dev/null 2>&1; then
47 |         launch="open";
48 |     elif type xdg-open >/dev/null 2>&1; then
49 |         launch="xdg-open";
50 |     fi
51 | 
52 |     if [ -n "${launch}" ]; then
53 |         code --new-window "${tmpdir}";
54 |         exec "${launch}" "${url}" >/dev/null 2>&1;
55 |     fi
56 | }
57 | 
58 | launch_devcontainer "$@";


--------------------------------------------------------------------------------
/include/cuco/detail/bloom_filter/default_filter_policy.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cstdint>
20 | 
21 | namespace cuco {
22 | 
23 | template <class Hash, class Word, uint32_t WordsPerBlock>
24 | __host__
25 |   __device__ constexpr default_filter_policy<Hash, Word, WordsPerBlock>::default_filter_policy(
26 |     uint32_t pattern_bits, Hash hash)
27 |   : impl_{pattern_bits, hash}
28 | {
29 | }
30 | 
31 | template <class Hash, class Word, uint32_t WordsPerBlock>
32 | __device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type
33 | default_filter_policy<Hash, Word, WordsPerBlock>::hash(
34 |   typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_argument_type const& key) const
35 | {
36 |   return impl_.hash(key);
37 | }
38 | 
39 | template <class Hash, class Word, uint32_t WordsPerBlock>
40 | template <class Extent>
41 | __device__ constexpr auto default_filter_policy<Hash, Word, WordsPerBlock>::block_index(
42 |   typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
43 |   Extent num_blocks) const
44 | {
45 |   return impl_.block_index(hash, num_blocks);
46 | }
47 | 
48 | template <class Hash, class Word, uint32_t WordsPerBlock>
49 | __device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::word_type
50 | default_filter_policy<Hash, Word, WordsPerBlock>::word_pattern(
51 |   default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
52 |   std::uint32_t word_index) const
53 | {
54 |   return impl_.word_pattern(hash, word_index);
55 | }
56 | 
57 | }  // namespace cuco


--------------------------------------------------------------------------------
/examples/hyperloglog/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <cuco/hyperloglog.cuh>
17 | 
18 | #include <thrust/device_vector.h>
19 | #include <thrust/sequence.h>
20 | 
21 | #include <cmath>
22 | #include <cstddef>
23 | #include <iostream>
24 | 
25 | /**
26 |  * @file host_bulk_example.cu
27 |  * @brief Demonstrates usage of `cuco::hyperloglog` "bulk" host APIs.
28 |  */
29 | int main(void)
30 | {
31 |   using T                         = int;
32 |   constexpr std::size_t num_items = 1ull << 28;  // 1GB
33 | 
34 |   thrust::device_vector<T> items(num_items);
35 | 
36 |   // Generate `num_items` distinct items
37 |   thrust::sequence(items.begin(), items.end(), 0);
38 | 
39 |   // We define the desired standard deviation of the approximation error
40 |   // 0.0122197 is the default value and corresponds to a 32KB sketch size
41 |   auto const sd = cuco::standard_deviation{0.0122197};
42 | 
43 |   // Initialize the estimator
44 |   cuco::hyperloglog<T> estimator{sd};
45 | 
46 |   // Add all items to the estimator
47 |   estimator.add(items.begin(), items.end());
48 | 
49 |   // Adding the same items again will not affect the result
50 |   estimator.add(items.begin(), items.begin() + num_items / 2);
51 | 
52 |   // Calculate the cardinality estimate
53 |   std::size_t const estimated_cardinality = estimator.estimate();
54 | 
55 |   std::cout << "True cardinality: " << num_items
56 |             << "\nEstimated cardinality: " << estimated_cardinality << "\nError: "
57 |             << std::abs(
58 |                  static_cast<double>(estimated_cardinality) / static_cast<double>(num_items) - 1.0)
59 |             << std::endl;
60 | 
61 |   return 0;
62 | }


--------------------------------------------------------------------------------
/tests/utility/fast_int_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/utility/fast_int.cuh>
20 | 
21 | #include <catch2/catch_template_test_macros.hpp>
22 | #include <catch2/generators/catch_generators.hpp>
23 | 
24 | #include <cstdint>
25 | #include <type_traits>
26 | 
27 | TEMPLATE_TEST_CASE(
28 |   "utility::fast_int tests", "", std::int32_t, std::uint32_t, std::int64_t, std::uint64_t)
29 | {
30 |   TestType value           = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456);
31 |   TestType lhs             = GENERATE(1, 2, 9, 32, 4123, 8192, 4312456);
32 |   constexpr auto max_value = std::numeric_limits<TestType>::max();
33 | 
34 |   cuco::utility::fast_int fast_value{value};
35 | 
36 |   SECTION("Should be explicitly convertible to the underlying integer type.")
37 |   {
38 |     REQUIRE(static_cast<TestType>(fast_value) == value);
39 |   }
40 | 
41 |   SECTION("Fast div/mod should produce correct result.")
42 |   {
43 |     INFO(lhs << " /% " << value);
44 |     REQUIRE(lhs / fast_value == lhs / value);
45 |     REQUIRE(lhs % fast_value == lhs % value);
46 |   }
47 | 
48 |   SECTION("Fast div/mod with maximum rhs value should produce correct result.")
49 |   {
50 |     INFO(lhs << " /% " << max_value);
51 |     cuco::utility::fast_int fast_max{max_value};
52 |     REQUIRE(lhs / fast_max == lhs / max_value);
53 |     REQUIRE(lhs % fast_max == lhs % max_value);
54 |   }
55 | 
56 |   SECTION("Fast div/mod with maximum lhs value should produce correct result.")
57 |   {
58 |     INFO(max_value << " /% " << value);
59 |     REQUIRE(max_value / fast_value == max_value / value);
60 |     REQUIRE(max_value % fast_value == max_value % value);
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_defaults.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <cstdint>
24 | #include <vector>
25 | 
26 | namespace cuco::benchmark::defaults {
27 | 
28 | using KEY_TYPE_RANGE   = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
29 | using VALUE_TYPE_RANGE = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
30 | using HASH_RANGE       = nvbench::type_list<cuco::identity_hash<char>,
31 |                                             cuco::xxhash_32<char>,
32 |                                             cuco::xxhash_64<char>,
33 |                                             cuco::murmurhash3_32<char>>;  //,
34 | // cuco::murmurhash3_x86_128<char>,
35 | // cuco::murmurhash3_x64_128<char>>; // TODO handle tuple-like hash value
36 | 
37 | auto constexpr N             = 100'000'000;
38 | auto constexpr OCCUPANCY     = 0.5;
39 | auto constexpr MULTIPLICITY  = 1;
40 | auto constexpr MATCHING_RATE = 1.0;
41 | auto constexpr MAX_NOISE     = 3;
42 | auto constexpr SKEW          = 0.5;
43 | auto constexpr BATCH_SIZE    = 1'000'000;
44 | auto constexpr INITIAL_SIZE  = 50'000'000;
45 | 
46 | auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
47 | auto const N_RANGE_CACHE =
48 |   std::vector<nvbench::int64_t>{8'000, 80'000, 800'000, 8'000'000, 80'000'000};
49 | auto const OCCUPANCY_RANGE     = nvbench::range(0.1, 0.9, 0.1);
50 | auto const MULTIPLICITY_RANGE  = std::vector<nvbench::int64_t>{1, 2, 4, 8, 16};
51 | auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1);
52 | auto const SKEW_RANGE          = nvbench::range(0.1, 1., 0.1);
53 | 
54 | }  // namespace cuco::benchmark::defaults
55 | 


--------------------------------------------------------------------------------
/._upstream/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | 
 3 | Thank you for contributing to cuCollections :)
 4 | 
 5 | Here are some guidelines to help the review process go smoothly.
 6 | 
 7 | 1. Please write a description in this text box of the changes that are being
 8 |    made.
 9 | 
10 | 2. Please ensure that you have written units tests for the changes made/features
11 |    added.
12 | 
13 | 3. If you are closing an issue please use one of the automatic closing words as
14 |    noted here: https://help.github.com/articles/closing-issues-using-keywords/
15 | 
16 | 4. If your pull request is not ready for review but you want to make use of the
17 |    continuous integration testing facilities please label it with `[WIP]`.
18 | 
19 | 5. If your pull request is ready to be reviewed without requiring additional
20 |    work on top of it, then remove the `[WIP]` label (if present) and replace
21 |    it with `[REVIEW]`. If assistance is required to complete the functionality,
22 |    for example when the C/C++ code of a feature is complete but Python bindings
23 |    are still required, then add the label `[HELP-REQ]` so that others can triage
24 |    and assist. The additional changes then can be implemented on top of the
25 |    same PR. If the assistance is done by members of the rapidsAI team, then no
26 |    additional actions are required by the creator of the original PR for this,
27 |    otherwise the original author of the PR needs to give permission to the
28 |    person(s) assisting to commit to their personal fork of the project. If that
29 |    doesn't happen then a new PR based on the code of the original PR can be
30 |    opened by the person assisting, which then will be the PR that will be
31 |    merged.
32 | 
33 | 6. Once all work has been done and review has taken place please do not add
34 |    features or make changes out of the scope of those requested by the reviewer
35 |    (doing this just add delays as already reviewed code ends up having to be
36 |    re-reviewed/it is hard to tell what is new etc!). Further, please do not
37 |    rebase your branch on master/force push/rewrite history, doing any of these
38 |    causes the context of any comments made by reviewers to be lost. If
39 |    conflicts occur against master they should be resolved by merging master
40 |    into the branch used for making the pull request.
41 | 
42 | Many thanks in advance for your cooperation!
43 | 
44 | -->
45 | 


--------------------------------------------------------------------------------
/include/cuco/detail/utility/strong_type.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | namespace cuco::detail {
19 | 
20 | /**
21 |  * @brief A strong type wrapper
22 |  *
23 |  * @tparam T Type of the value
24 |  *
25 |  */
26 | template <class T>
27 | struct strong_type {
28 |   /**
29 |    * @brief Constructs a strong type
30 |    *
31 |    * @param v Value to be wrapped as a strong type
32 |    */
33 |   __host__ __device__ explicit constexpr strong_type(T v) : value{v} {}
34 | 
35 |   /**
36 |    * @brief Implicit conversion operator to the underlying value.
37 |    *
38 |    * @return Underlying value
39 |    */
40 |   __host__ __device__ constexpr operator T() const noexcept { return value; }
41 | 
42 |   T value;  ///< Underlying data value
43 | };
44 | 
45 | }  // namespace cuco::detail
46 | 
47 | /**
48 |  * @brief Convenience wrapper for defining a strong type
49 |  */
50 | #define CUCO_DEFINE_STRONG_TYPE(Name, Type)                 \
51 |   struct Name : public cuco::detail::strong_type<Type> {    \
52 |     __host__ __device__ explicit constexpr Name(Type value) \
53 |       : cuco::detail::strong_type<Type>(value)              \
54 |     {                                                       \
55 |     }                                                       \
56 |   };
57 | 
58 | /**
59 |  * @brief Convenience wrapper for defining a templated strong type
60 |  */
61 | #define CUCO_DEFINE_TEMPLATE_STRONG_TYPE(Name)                                                    \
62 |   template <typename T>                                                                           \
63 |   struct Name : public cuco::detail::strong_type<T> {                                             \
64 |     __host__ __device__ explicit constexpr Name(T value) : cuco::detail::strong_type<T>(value) {} \
65 |   };
66 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/error.hpp>
20 | #include <cuco/utility/key_generator.cuh>
21 | 
22 | #include <nvbench/nvbench.cuh>
23 | 
24 | namespace cuco::benchmark {
25 | 
26 | template <typename Dist>
27 | auto dist_from_state(nvbench::state const& state)
28 | {
29 |   if constexpr (std::is_same_v<Dist, cuco::utility::distribution::unique>) {
30 |     return Dist{};
31 |   } else if constexpr (std::is_same_v<Dist, cuco::utility::distribution::uniform>) {
32 |     auto const multiplicity = state.get_int64("Multiplicity");
33 |     return Dist{multiplicity};
34 |   } else if constexpr (std::is_same_v<Dist, cuco::utility::distribution::gaussian>) {
35 |     auto const skew = state.get_float64("Skew");
36 |     return Dist{skew};
37 |   } else {
38 |     CUCO_FAIL("Unexpected distribution type");
39 |   }
40 | }
41 | 
42 | template <typename T, typename NewType>
43 | struct rebind_hasher;
44 | 
45 | template <template <typename> class Template, typename OldType, typename NewType>
46 | struct rebind_hasher<Template<OldType>, NewType> {
47 |   using type = Template<NewType>;
48 | };
49 | 
50 | template <typename T, typename NewType>
51 | using rebind_hasher_t = typename rebind_hasher<T, NewType>::type;
52 | 
53 | }  // namespace cuco::benchmark
54 | 
55 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::utility::distribution::unique, "UNIQUE", "distribution::unique");
56 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::utility::distribution::uniform,
57 |                              "UNIFORM",
58 |                              "distribution::uniform");
59 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::utility::distribution::gaussian,
60 |                              "GAUSSIAN",
61 |                              "distribution::gaussian");
62 | 


--------------------------------------------------------------------------------
/.github/actions/compute-matrix/compute-matrix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -euo pipefail
18 | 
19 | write_output() {
20 |   local key="$1"
21 |   local value="$2"
22 |   echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
23 | }
24 | 
25 | explode_std_versions() {
26 |   jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
27 | }
28 | 
29 | extract_matrix() {
30 |   local file="$1"
31 |   local type="$2"
32 |   local matrix=$(yq -o=json "$file" | jq -cr ".$type")
33 |   write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
34 |   local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
35 |   write_output "NVCC_FULL_MATRIX" "$nvcc_full_matrix"
36 |   write_output "CUDA_VERSIONS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .cuda] | unique')"
37 |   write_output "HOST_COMPILERS" "$(echo "$nvcc_full_matrix" | jq -cr '[.[] | .compiler.name] | unique')"
38 |   write_output "PER_CUDA_COMPILER_MATRIX" "$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
39 | }
40 | 
41 | main() {
42 |   if [ "$1" == "-v" ]; then
43 |     set -x
44 |     shift
45 |   fi
46 | 
47 |   if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
48 |     echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
49 |     echo "  -v            : Enable verbose output"
50 |     echo "  MATRIX_FILE   : The path to the matrix file."
51 |     echo "  MATRIX_TYPE   : The desired matrix. Supported values: 'pull_request'"
52 |     exit 1
53 |   fi
54 | 
55 |   echo "Input matrix file:" >&2
56 |   cat "$1" >&2
57 |   echo "Matrix Type: $2" >&2
58 | 
59 |   extract_matrix "$1" "$2"
60 | }
61 | 
62 | main "$@"


--------------------------------------------------------------------------------
/benchmarks/static_set/size_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::size` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_size(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys  = state.get_int64("NumInputs");
37 |   auto const occupancy = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const size = num_keys / occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);
42 | 
43 |   key_generator gen;
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   state.add_element_count(num_keys);
47 | 
48 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
49 | 
50 |   set.insert(keys.begin(), keys.end());
51 | 
52 |   state.exec(nvbench::exec_tag::sync,
53 |              [&](nvbench::launch& launch) { auto const size = set.size({launch.get_stream()}); });
54 | }
55 | 
56 | NVBENCH_BENCH_TYPES(static_set_size,
57 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
58 |                                       nvbench::type_list<distribution::unique>))
59 |   .set_name("static_set_size_unique_occupancy")
60 |   .set_type_axes_names({"Key", "Distribution"})
61 |   .set_max_noise(defaults::MAX_NOISE)
62 |   .add_int64_axis("NumInputs", {defaults::N})
63 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
64 | 


--------------------------------------------------------------------------------
/.github/workflows/dispatch-build-and-test.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Dispatch build and test
17 | 
18 | on:
19 |   workflow_call:
20 |     inputs:
21 |       per_cuda_compiler_matrix: {type: string, required: true}
22 |       devcontainer_version: {type: string, required: true}
23 | 
24 | jobs:
25 |   # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
26 |   # ensures that the build/test steps can overlap across different configurations. For example,
27 |   # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
28 |   build_and_test:
29 |     name: ${{matrix.cpu}}
30 |     uses: ./.github/workflows/build-and-test.yml
31 |     strategy:
32 |       fail-fast: false
33 |       matrix:
34 |         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
35 |     with:
36 |       cpu: ${{ matrix.cpu }}
37 |       test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
38 |       build_script: "./ci/build.sh --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
39 |       test_script:  "./ci/test.sh --tests --cxx ${{matrix.compiler.exe}} --std ${{matrix.std}} --arch ${{matrix.gpu_build_archs}} --infix ${{matrix.cpu}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}"
40 |       container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
41 |       run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
42 | 


--------------------------------------------------------------------------------
/include/cuco/detail/storage/storage.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/bucket_storage.cuh>
20 | 
21 | namespace cuco {
22 | namespace detail {
23 | /**
24 |  * @brief Intermediate class internally used by data structures
25 |  *
26 |  * @tparam StorageImpl Storage implementation class
27 |  * @tparam T Storage element type
28 |  * @tparam Extent Type of extent denoting number of buckets
29 |  * @tparam Allocator Type of allocator used for device storage
30 |  */
31 | template <class StorageImpl, class T, class Extent, class Allocator>
32 | class storage : StorageImpl::template impl<T, Extent, Allocator> {
33 |  public:
34 |   /// Storage implementation type
35 |   using impl_type      = typename StorageImpl::template impl<T, Extent, Allocator>;
36 |   using ref_type       = typename impl_type::ref_type;        ///< Storage ref type
37 |   using value_type     = typename impl_type::value_type;      ///< Storage value type
38 |   using allocator_type = typename impl_type::allocator_type;  ///< Storage value type
39 | 
40 |   /// Number of elements per bucket
41 |   static constexpr int bucket_size = impl_type::bucket_size;
42 | 
43 |   using impl_type::allocator;
44 |   using impl_type::bucket_extent;
45 |   using impl_type::capacity;
46 |   using impl_type::data;
47 |   using impl_type::initialize;
48 |   using impl_type::initialize_async;
49 |   using impl_type::num_buckets;
50 |   using impl_type::ref;
51 | 
52 |   /**
53 |    * @brief Constructs storage.
54 |    *
55 |    * @param size Number of slots to (de)allocate
56 |    * @param allocator Allocator used for (de)allocating device storage
57 |    */
58 |   explicit constexpr storage(Extent size, Allocator const& allocator) : impl_type{size, allocator}
59 |   {
60 |   }
61 | };
62 | 
63 | }  // namespace detail
64 | }  // namespace cuco
65 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/rehash_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::rehash` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_rehash(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   std::size_t const capacity = state.get_int64("Capacity");
37 |   auto const occupancy       = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const num_keys = capacity * occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);  // slots per second
42 | 
43 |   key_generator gen;
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   state.add_element_count(capacity);
47 | 
48 |   cuco::static_set<Key> set{capacity, cuco::empty_key<Key>{-1}};
49 | 
50 |   set.insert(keys.begin(), keys.end());
51 | 
52 |   state.exec(nvbench::exec_tag::sync,
53 |              [&](nvbench::launch& launch) { set.rehash({launch.get_stream()}); });
54 | }
55 | 
56 | NVBENCH_BENCH_TYPES(static_set_rehash,
57 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
58 |                                       nvbench::type_list<distribution::unique>))
59 |   .set_name("static_set_rehash_unique_occupancy")
60 |   .set_type_axes_names({"Key", "Distribution"})
61 |   .set_max_noise(defaults::MAX_NOISE)
62 |   .add_int64_axis("Capacity", {defaults::N})
63 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
64 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Modifications Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | -->
19 | 
20 | # hipCollections
21 | Header-only library of GPU-accelerated, concurrent data structures.
22 | This is a port of the original CUDA version at https://github.com/NVIDIA/cuCollections to HIP in order to enable support for AMD GPUs.
23 | 
24 | # Requirements
25 | - ROCm and HIP 7.0.2 (must include `rocthrust-dev` and `hipcub`)
26 | - CMake 3.23.1 or higher (for building the tests)
27 | - git (for getting `libhipcxx`)
28 | - AMD MI200, MI300 GPU
29 | - Linux distribution (tested presently with Ubuntu 22.04)
30 | 
31 | > [!NOTE]
32 | > If `rocthrust-dev` and `hipcub` is not part of your ROCm installation, you can
33 | install them easily in Ubuntu via apt (e.g. sudo apt-get install rocthrust-dev). 
34 | 
35 | # How to build the tests
36 | 
37 | To get started, please have a look at the build script we use for CI at `ci/gpu/build_hip.sh`.
38 | As hipCo is a header-only library, you will usually configure your build system to include the hipCo headers.
39 | In order to build some standalone tests, please run the following from the root directory (to build for AMD GPUs):
40 | 
41 | `mkdir build && cd build && cmake .. && cmake --build .`
42 | 
43 | # Current Limitations
44 | - No support for Windows.
45 | - No support for CUDA backend of HIP has been added yet.
46 | - `hyperloglog` and `experimental::dynamic_map` containers are not supported.
47 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/retrieve_all_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::retrieve_all` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_retrieve_all(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys  = state.get_int64("NumInputs");
37 |   auto const occupancy = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const size = num_keys / occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);
42 | 
43 |   key_generator gen;
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
47 |   set.insert(keys.begin(), keys.end());
48 | 
49 |   thrust::device_vector<Key> result(num_keys);
50 | 
51 |   state.add_element_count(num_keys);
52 |   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
53 |     auto end = set.retrieve_all(result.begin(), {launch.get_stream()});
54 |   });
55 | }
56 | 
57 | NVBENCH_BENCH_TYPES(static_set_retrieve_all,
58 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
59 |                                       nvbench::type_list<distribution::unique>))
60 |   .set_name("static_set_retrieve_all_unique_occupancy")
61 |   .set_type_axes_names({"Key", "Distribution"})
62 |   .set_max_noise(defaults::MAX_NOISE)
63 |   .add_int64_axis("NumInputs", {defaults::N})
64 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
65 | 


--------------------------------------------------------------------------------
/include/cuco/operator.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace cuco {
20 | inline namespace op {
21 | // TODO enum class of int32_t instead of struct
22 | // https://github.com/NVIDIA/cuCollections/issues/239
23 | /**
24 |  * @brief `insert` operator tag
25 |  */
26 | struct insert_tag {
27 | } inline constexpr insert;  ///< `cuco::insert` operator
28 | 
29 | /**
30 |  * @brief `insert_and_find` operator tag
31 |  */
32 | struct insert_and_find_tag {
33 | } inline constexpr insert_and_find;  ///< `cuco::insert_and_find` operator
34 | 
35 | /**
36 |  * @brief `insert_or_assign` operator tag
37 |  */
38 | struct insert_or_assign_tag {
39 | } inline constexpr insert_or_assign;  ///< `cuco::insert_or_assign` operator
40 | 
41 | /**
42 |  * @brief `insert_or_apply` operator tag
43 |  */
44 | struct insert_or_apply_tag {
45 | } inline constexpr insert_or_apply;  ///< `cuco::insert_or_apply` operator
46 | 
47 | /**
48 |  * @brief `erase` operator tag
49 |  */
50 | struct erase_tag {
51 | } inline constexpr erase;  ///< `cuco::erase` operator
52 | 
53 | /**
54 |  * @brief `contains` operator tag
55 |  */
56 | struct contains_tag {
57 | } inline constexpr contains;  ///< `cuco::contains` operator
58 | 
59 | /**
60 |  * @brief `count` operator tag
61 |  */
62 | struct count_tag {
63 | } inline constexpr count;  ///< `cuco::contains` operator
64 | 
65 | /**
66 |  * @brief `find` operator tag
67 |  */
68 | struct find_tag {
69 | } inline constexpr find;  ///< `cuco::find` operator
70 | 
71 | /**
72 |  * @brief `retrieve` operator tag
73 |  */
74 | struct retrieve_tag {
75 | } inline constexpr retrieve;  ///< `cuco::retrieve` operator
76 | 
77 | /**
78 |  * @brief `for_each` operator tag
79 |  */
80 | struct for_each_tag {
81 | } inline constexpr for_each;  ///< `cuco::for_each` operator
82 | 
83 | }  // namespace op
84 | }  // namespace cuco
85 | 
86 | #include <cuco/detail/operator.inl>
87 | 


--------------------------------------------------------------------------------
/ci/sccache_stats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script prints the sccache hit rate between two calls to sccache --show-stats.
18 | # It should be sourced in your script before and after the operations you want to profile,
19 | # with the 'start' or 'end' argument respectively.
20 | 
21 | mode=$1
22 | 
23 | if [[ "$mode" != "start" && "$mode" != "end" ]]; then
24 |     echo "Invalid mode: $mode"
25 |     echo "Usage: $0 {start|end}"
26 |     exit 1
27 | fi
28 | 
29 | case $mode in
30 |   start)
31 |     export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
32 |     export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
33 |     ;;
34 |   end)
35 |     if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then
36 |         echo "Error: start stats not collected. Did you call this script with 'start' before your operations?"
37 |         exit 1
38 |     fi
39 | 
40 |     final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
41 |     final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
42 |     hits=$((final_hits - SCCACHE_START_HITS))
43 |     misses=$((final_misses - SCCACHE_START_MISSES))
44 |     total=$((hits + misses))
45 | 
46 |     prefix=""
47 |     if [ ${GITHUB_ACTIONS:-false} = "true" ]; then
48 |       prefix="::notice::"
49 |     fi
50 | 
51 |     if (( total > 0 )); then
52 |       hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }')
53 |       echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%"
54 |     else
55 |       echo ${prefix}"sccache stats: N/A No new compilation requests"
56 |     fi
57 |     unset SCCACHE_START_HITS
58 |     unset SCCACHE_START_MISSES
59 |     ;;
60 | esac


--------------------------------------------------------------------------------
/include/cuco/detail/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  */
15 | 
16 | #pragma once
17 | 
18 | #include <cuco/detail/error.hpp>
19 | #include <cuco/detail/utility/cuda.hpp>
20 | 
21 | #include <cuda/std/iterator>
22 | #include <cuda/std/type_traits>
23 | 
24 | namespace cuco {
25 | namespace detail {
26 | 
27 | template <typename Iterator>
28 | __host__ __device__ constexpr inline index_type distance(Iterator begin, Iterator end)
29 | {
30 |   using category = typename cuda::std::iterator_traits<Iterator>::iterator_category;
31 |   static_assert(cuda::std::is_base_of_v<cuda::std::random_access_iterator_tag, category>,
32 |                 "Input iterator should be a random access iterator.");
33 |   // `int64_t` instead of arch-dependant `long int`
34 |   return static_cast<index_type>(cuda::std::distance(begin, end));
35 | }
36 | 
37 | /**
38 |  * @brief C++17 constexpr backport of `std::lower_bound`.
39 |  *
40 |  * @tparam ForwardIt Type of input iterator
41 |  * @tparam T Type of `value`
42 |  *
43 |  * @param first Iterator defining the start of the range to examine
44 |  * @param last Iterator defining the start of the range to examine
45 |  * @param value Value to compare the elements to
46 |  *
47 |  * @return Iterator pointing to the first element in the range [first, last) that does not satisfy
48 |  * element < value
49 |  */
50 | template <class ForwardIt, class T>
51 | constexpr ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value)
52 | {
53 |   using diff_type = typename std::iterator_traits<ForwardIt>::difference_type;
54 | 
55 |   ForwardIt it{};
56 |   diff_type count = std::distance(first, last);
57 |   diff_type step{};
58 | 
59 |   while (count > 0) {
60 |     it   = first;
61 |     step = count / 2;
62 |     std::advance(it, step);
63 | 
64 |     if (static_cast<T>(*it) < value) {
65 |       first = ++it;
66 |       count -= step + 1;
67 |     } else
68 |       count = step;
69 |   }
70 | 
71 |   return first;
72 | }
73 | 
74 | }  // namespace detail
75 | }  // namespace cuco
76 | 


--------------------------------------------------------------------------------
/ci/matrix.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | cuda_oldest: &cuda_oldest '11.8'
17 | cuda_newest: &cuda_newest '12.6'
18 | 
19 | # The GPUs to test on
20 | # Note: This assumes that the appropriate gpu_build_archs are set to include building for the GPUs listed here
21 | gpus:
22 |   - 'a100'
23 |   - 'v100'
24 | 
25 | # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
26 | devcontainer_version: '24.12'
27 | 
28 | # Each environment below will generate a unique build/test job
29 | # See the "compute-matrix" job in the workflow for how this is parsed and used
30 | # cuda: The CUDA Toolkit version
31 | # os: The operating system used
32 | # cpu: The CPU architecture
33 | # compiler: The compiler to use
34 | #   name: The compiler name
35 | #   version: The compiler version
36 | #   exe: The unverionsed compiler binary name
37 | #   To use the system's default compiler set "exe: 'c++'" or "name: 'cc'"
38 | # gpu_build_archs: The GPU architectures to build for (comma-separated list)
39 | # std: The C++ standards to build for
40 | #    This field is unique as it will generate an independent build/test job for each value
41 | 
42 | # Configurations that will run for every PR
43 | pull_request:
44 |   nvcc:
45 |     - {cuda: *cuda_oldest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
46 |     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '12', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
47 |     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80', std: [17], jobs: ['build']}
48 |     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'arm64', compiler: {name: 'gcc', version: '12', exe: 'g++'}, gpu_build_archs: '60,90', std: [17], jobs: ['build']}
49 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/find_next_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/execution_policy.h>
23 | #include <thrust/host_vector.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void find_next_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.find_next(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | extern bool modulo_bitgen(uint64_t i);  // Defined in get_test.cu
39 | 
40 | TEST_CASE("dynamic_bitset find next set test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{400};
46 | 
47 |   for (size_type i = 0; i < num_elements; i++) {
48 |     bv.push_back(modulo_bitgen(i));
49 |   }
50 | 
51 |   thrust::device_vector<size_type> device_result(num_elements);
52 |   auto ref = bv.ref();
53 |   find_next_kernel<<<1, 1024>>>(ref, num_elements, device_result.data());
54 | 
55 |   thrust::host_vector<size_type> host_result = device_result;
56 |   size_type num_matches                      = 0;
57 | 
58 |   size_type next_set_pos = -1lu;
59 |   do {
60 |     next_set_pos++;
61 |   } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos));
62 | 
63 |   for (size_type key = 0; key < num_elements; key++) {
64 |     num_matches += host_result[key] == next_set_pos;
65 | 
66 |     if (key == next_set_pos) {
67 |       do {
68 |         next_set_pos++;
69 |       } while (next_set_pos < num_elements and !modulo_bitgen(next_set_pos));
70 |     }
71 |   }
72 |   REQUIRE(num_matches == num_elements);
73 | }
74 | 


--------------------------------------------------------------------------------
/examples/static_set/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_set.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/functional.h>
21 | #include <thrust/logical.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | #include <limits>
26 | 
27 | /**
28 |  * @file host_bulk_example.cu
29 |  * @brief Demonstrates usage of the static_set "bulk" host APIs.
30 |  *
31 |  * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or
32 |  * `contains` on a set of keys.
33 |  *
34 |  */
35 | int main(void)
36 | {
37 |   using Key = int;
38 | 
39 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
40 |   // that they never occur in your input data.
41 |   Key constexpr empty_key_sentinel = -1;
42 | 
43 |   // Number of keys to be inserted
44 |   std::size_t constexpr num_keys = 50'000;
45 | 
46 |   // Compute capacity based on a 50% load factor
47 |   auto constexpr load_factor = 0.5;
48 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
49 | 
50 |   // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel.
51 |   cuco::static_set<Key> set{capacity, cuco::empty_key{empty_key_sentinel}};
52 | 
53 |   // Create a sequence of keys {0, 1, 2, .., i}
54 |   thrust::device_vector<Key> keys(num_keys);
55 |   thrust::sequence(keys.begin(), keys.end(), 0);
56 | 
57 |   // Inserts all keys into the hash set
58 |   set.insert(keys.begin(), keys.end());
59 | 
60 |   // Storage for result
61 |   thrust::device_vector<bool> found(num_keys);
62 | 
63 |   // Check if all keys are contained in the set
64 |   set.contains(keys.begin(), keys.end(), found.begin());
65 | 
66 |   // Verify that all keys have been found
67 |   bool const all_keys_found = thrust::all_of(found.begin(), found.end(), thrust::identity<bool>());
68 | 
69 |   if (all_keys_found) { std::cout << "Success! Found all keys.\n"; }
70 | 
71 |   return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/get_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/execution_policy.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void test_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.test(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | bool modulo_bitgen(uint64_t i) { return i % 7 == 0; }
39 | 
40 | TEST_CASE("dynamic_bitset get test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{400};
46 | 
47 |   size_type num_set_ref = 0;
48 |   for (size_type i = 0; i < num_elements; i++) {
49 |     bv.push_back(modulo_bitgen(i));
50 |     num_set_ref += modulo_bitgen(i);
51 |   }
52 | 
53 |   // Host-bulk test
54 |   thrust::device_vector<size_type> keys(num_elements);
55 |   thrust::sequence(keys.begin(), keys.end(), 0);
56 | 
57 |   thrust::device_vector<size_type> test_result(num_elements);
58 |   thrust::fill(test_result.begin(), test_result.end(), 0);
59 | 
60 |   bv.test(keys.begin(), keys.end(), test_result.begin());
61 | 
62 |   size_type num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0);
63 |   REQUIRE(num_set == num_set_ref);
64 | 
65 |   // Device-ref test
66 |   auto ref = bv.ref();
67 |   thrust::fill(test_result.begin(), test_result.end(), 0);
68 |   test_kernel<<<1, 1024>>>(ref, num_elements, test_result.data());
69 | 
70 |   num_set = thrust::reduce(thrust::device, test_result.begin(), test_result.end(), 0);
71 |   REQUIRE(num_set == num_set_ref);
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/static_map/hash_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/static_map.cuh>
21 | 
22 | #include <thrust/device_vector.h>
23 | #include <thrust/functional.h>
24 | #include <thrust/iterator/counting_iterator.h>
25 | #include <thrust/iterator/transform_iterator.h>
26 | 
27 | #include <catch2/catch_template_test_macros.hpp>
28 | 
29 | using size_type = std::size_t;
30 | 
31 | template <typename Key, typename Hash>
32 | void test_hash_function()
33 | {
34 |   using Value = int64_t;
35 | 
36 |   constexpr size_type num_keys{400};
37 | 
38 |   auto map = cuco::static_map<Key,
39 |                               Value,
40 |                               cuco::extent<size_type>,
41 |                               cuda::thread_scope_device,
42 |                               thrust::equal_to<Key>,
43 |                               cuco::linear_probing<1, Hash>,
44 |                               cuco::cuda_allocator<cuda::std::byte>,
45 |                               cuco::storage<2>>{
46 |     num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
47 | 
48 |   auto keys_begin = thrust::counting_iterator<Key>(1);
49 | 
50 |   auto pairs_begin = thrust::make_transform_iterator(
51 |     keys_begin, cuda::proclaim_return_type<cuco::pair<Key, Value>>([] __device__(auto i) {
52 |       return cuco::pair<Key, Value>(i, i);
53 |     }));
54 | 
55 |   thrust::device_vector<bool> d_keys_exist(num_keys);
56 | 
57 |   map.insert(pairs_begin, pairs_begin + num_keys);
58 | 
59 |   REQUIRE(map.size() == num_keys);
60 | 
61 |   map.contains(keys_begin, keys_begin + num_keys, d_keys_exist.begin());
62 | 
63 |   REQUIRE(cuco::test::all_of(d_keys_exist.begin(), d_keys_exist.end(), thrust::identity{}));
64 | }
65 | 
66 | TEMPLATE_TEST_CASE_SIG("static_map hash tests", "", ((typename Key)), (int32_t), (int64_t))
67 | {
68 |   test_hash_function<Key, cuco::murmurhash3_32<Key>>();
69 |   test_hash_function<Key, cuco::murmurhash3_x64_128<Key>>();
70 |   test_hash_function<Key, cuco::xxhash_32<Key>>();
71 |   test_hash_function<Key, cuco::xxhash_64<Key>>();
72 | }


--------------------------------------------------------------------------------
/include/cuco/utility/allocator.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/error.hpp>
20 | 
21 | namespace cuco {
22 | /**
23 |  * @brief A device allocator using `cudaMalloc`/`cudaFree` to satisfy (de)allocations.
24 |  *
25 |  * @tparam T The allocator's value type
26 |  */
27 | template <typename T>
28 | class cuda_allocator {
29 |  public:
30 |   using value_type = T;  ///< Allocator's value type
31 | 
32 |   cuda_allocator() = default;
33 | 
34 |   /**
35 |    * @brief Copy constructor.
36 |    */
37 |   template <class U>
38 |   cuda_allocator(cuda_allocator<U> const&) noexcept
39 |   {
40 |   }
41 | 
42 |   /**
43 |    * @brief Allocates storage for `n` objects of type `T` using `cudaMalloc`.
44 |    *
45 |    * @param n The number of objects to allocate storage for
46 |    * @return Pointer to the allocated storage
47 |    */
48 |   value_type* allocate(std::size_t n)
49 |   {
50 |     value_type* p;
51 |     CUCO_CUDA_TRY(cudaMalloc(&p, sizeof(value_type) * n));
52 |     return p;
53 |   }
54 | 
55 |   /**
56 |    * @brief Deallocates storage pointed to by `p`.
57 |    *
58 |    * @param p Pointer to memory to deallocate
59 |    */
60 |   void deallocate(value_type* p, std::size_t) { CUCO_CUDA_TRY(cudaFree(p)); }
61 | };
62 | 
63 | /**
64 |  * @brief Equality comparison operator.
65 |  *
66 |  * @tparam T Value type of LHS object
67 |  * @tparam U Value type of RHS object
68 |  *
69 |  * @return `true` iff given arguments are equal
70 |  */
71 | template <typename T, typename U>
72 | bool operator==(cuda_allocator<T> const&, cuda_allocator<U> const&) noexcept
73 | {
74 |   return true;
75 | }
76 | 
77 | /**
78 |  * @brief Inequality comparison operator.
79 |  *
80 |  * @tparam T Value type of LHS object
81 |  * @tparam U Value type of RHS object
82 |  *
83 |  * @param lhs Left-hand side object to compare
84 |  * @param rhs Right-hand side object to compare
85 |  *
86 |  * @return `true` iff given arguments are not equal
87 |  */
88 | template <typename T, typename U>
89 | bool operator!=(cuda_allocator<T> const& lhs, cuda_allocator<U> const& rhs) noexcept
90 | {
91 |   return not(lhs == rhs);
92 | }
93 | 
94 | }  // namespace cuco
95 | 


--------------------------------------------------------------------------------
/.devcontainer/verify_devcontainer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function usage {
 4 |     echo "Usage: $0"
 5 |     echo
 6 |     echo "This script is intended to be run within one of CUCO's Dev Containers."
 7 |     echo "It verifies that the expected environment variables and binary versions match what is expected."
 8 | }
 9 | 
10 | check_envvars() {
11 |     for var_name in "$@"; do
12 |         if [[ -z "${!var_name:-}" ]]; then
13 |             echo "::error:: ${var_name} variable is not set."
14 |             exit 1
15 |         else
16 |             echo "$var_name=${!var_name}"
17 |         fi
18 |     done
19 | }
20 | 
21 | check_host_compiler_version() {
22 |     local version_output=$($CXX --version)
23 | 
24 |     if [[ "$CXX" == "g++" ]]; then
25 |         local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1)
26 |         local expected_compiler="gcc"
27 |     elif [[ "$CXX" == "clang++" ]]; then
28 |         if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then
29 |             actual_version=${BASH_REMATCH[1]}
30 |         else
31 |             echo "::error:: Unable to determine clang version."
32 |             exit 1
33 |         fi
34 |         expected_compiler="llvm"
35 |     else
36 |         echo "::error:: Unexpected CXX value ($CXX)."
37 |         exit 1
38 |     fi
39 | 
40 |     if [[ "$expected_compiler" != "${CUCO_HOST_COMPILER}" || "$actual_version" != "$CUCO_HOST_COMPILER_VERSION" ]]; then
41 |         echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CUCO_HOST_COMPILER}) and version (${CUCO_HOST_COMPILER_VERSION})."
42 |         exit 1
43 |     else
44 |         echo "Detected host compiler: $CXX version $actual_version"
45 |     fi
46 | }
47 | 
48 | check_cuda_version() {
49 |     local cuda_version_output=$(nvcc --version)
50 |     if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then
51 |         local actual_cuda_version=${BASH_REMATCH[1]}
52 |     else
53 |         echo "::error:: Unable to determine CUDA version from nvcc."
54 |         exit 1
55 |     fi
56 | 
57 |     if [[ "$actual_cuda_version" != "$CUCO_CUDA_VERSION" ]]; then
58 |         echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CUCO_CUDA_VERSION)."
59 |         exit 1
60 |     else
61 |         echo "Detected CUDA version: $actual_cuda_version"
62 |     fi
63 | }
64 | 
65 | main() {
66 |     if [[ "$1" == "-h" || "$1" == "--help" ]]; then
67 |         usage
68 |         exit 0
69 |     fi
70 | 
71 |     set -euo pipefail
72 | 
73 |     check_envvars DEVCONTAINER_NAME CXX CUCO_HOST_COMPILER CUCO_CUDA_VERSION CUCO_HOST_COMPILER_VERSION
74 | 
75 |     check_host_compiler_version
76 | 
77 |     check_cuda_version
78 | 
79 |     echo "Dev Container successfully verified!"
80 | }
81 | 
82 | main "$@"


--------------------------------------------------------------------------------
/.github/workflows/run-as-coder.yml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name: Run as coder user
17 | 
18 | defaults:
19 |   run:
20 |     shell: bash -exo pipefail {0}
21 | 
22 | 
23 | on:
24 |   workflow_call:
25 |     inputs:
26 |       name: {type: string, required: true}
27 |       image: {type: string, required: true}
28 |       runner: {type: string, required: true}
29 |       command: {type: string, required: true}
30 |       env: { type: string, required: false, default: "" }
31 | 
32 | jobs:
33 |   run-as-coder:
34 |     name: ${{inputs.name}}
35 |     runs-on: ${{inputs.runner}}
36 |     container:
37 |       options: -u root
38 |       image: ${{inputs.image}}
39 |       env:
40 |         NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
41 |     permissions:
42 |       id-token: write
43 |     steps:
44 |       - name: Checkout repo
45 |         uses: actions/checkout@v3
46 |         with:
47 |           path: cuCollections
48 |           persist-credentials: false
49 |       - name: Move files to coder user home directory
50 |         run: |
51 |           cp -R cuCollections /home/coder/cuCollections
52 |           chown -R coder:coder /home/coder/
53 |       - name: Configure credentials and environment variables for sccache
54 |         uses: ./cuCollections/.github/actions/configure_cccl_sccache
55 |       - name: Run command
56 |         shell: su coder {0}
57 |         run: |
58 |             set -exo pipefail
59 |             cd ~/cuCollections
60 |             eval "${{inputs.command}}" || exit_code=$?
61 |             if [ ! -z "$exit_code" ]; then
62 |                 echo "::error::Error! To checkout the corresponding code and reproduce locally, run the following commands:"
63 |                 echo "git clone --branch $GITHUB_REF_NAME --single-branch --recurse-submodules https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
64 |                 echo "docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
65 |                 exit $exit_code
66 |             fi
67 | 


--------------------------------------------------------------------------------
/include/cuco/types.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/utility/strong_type.cuh>
20 | 
21 | /**
22 |  * @brief Defines various strong type wrappers used across this library.
23 |  *
24 |  * @note Each strong type inherits from `cuco::detail::strong_type<T>`. `CUCO_DEFINE_STRONG_TYPE`
25 |  * and `CUCO_DEFINE_TEMPLATE_STRONG_TYPE` are convenience macros used to define a named type in a
26 |  * single line, e.g., `CUCO_DEFINE_STRONG_TYPE(foo, double)` defines `struct foo : public
27 |  * cuco::detail::strong_type<double> {...};`, where `cuco::foo{42.0}` is implicitly convertible to
28 |  * `double{42.0}`.
29 |  */
30 | 
31 | namespace cuco {
32 | /**
33 |  * @brief A strong type wrapper `cuco::empty_key<Key>` used to denote the empty key sentinel.
34 |  */
35 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_key);
36 | 
37 | /**
38 |  * @brief A strong type wrapper `cuco::empty_value<T>` used to denote the empty value sentinel.
39 |  */
40 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(empty_value);
41 | 
42 | /**
43 |  * @brief A strong type wrapper `cuco::erased_key<Key>` used to denote the erased key sentinel.
44 |  */
45 | CUCO_DEFINE_TEMPLATE_STRONG_TYPE(erased_key);
46 | 
47 | /**
48 |  * @brief A strong type wrapper `cuco::sketch_size_kb` for specifying the upper-bound sketch size of
49 |  * `cuco::hyperloglog(_ref)` in KB.
50 |  *
51 |  * @note Values can also be specified as literals, e.g., 64.3_KB.
52 |  */
53 | CUCO_DEFINE_STRONG_TYPE(sketch_size_kb, double);
54 | 
55 | /**
56 |  * @brief A strong type wrapper `cuco::standard_deviation` for specifying the desired standard
57 |  * deviation for the cardinality estimate of `cuco::hyperloglog(_ref)`.
58 |  */
59 | CUCO_DEFINE_STRONG_TYPE(standard_deviation, double);
60 | 
61 | }  // namespace cuco
62 | 
63 | // User-defined literal operators for `cuco::sketch_size_KB`
64 | __host__ __device__ constexpr cuco::sketch_size_kb operator""_KB(long double value)
65 | {
66 |   return cuco::sketch_size_kb{static_cast<double>(value)};
67 | }
68 | 
69 | __host__ __device__ constexpr cuco::sketch_size_kb operator""_KB(unsigned long long int value)
70 | {
71 |   return cuco::sketch_size_kb{static_cast<double>(value)};
72 | }
73 | 


--------------------------------------------------------------------------------
/include/cuco/utility/traits.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda/std/type_traits>
20 | #include <thrust/device_reference.h>
21 | #include <thrust/tuple.h>
22 | 
23 | namespace cuco {
24 | 
25 | /**
26 |  * @brief Customization point that can be specialized to indicate that it is safe to perform bitwise
27 |  * equality comparisons on the object-representation of objects of type `T`.
28 |  *
29 |  * By default, only types where `std::has_unique_object_representations_v<T>` is true are safe for
30 |  * bitwise equality. However, this can be too restrictive for some types, e.g., floating point
31 |  * types.
32 |  *
33 |  * User-defined specializations of `is_bitwise_comparable` are allowed, but it is the users
34 |  * responsibility to ensure values do not occur that would lead to unexpected behavior. For example,
35 |  * if a `NaN` bit pattern were used as the empty sentinel value, it may not compare bitwise equal to
36 |  * other `NaN` bit patterns.
37 |  *
38 |  */
39 | template <typename T, typename = void>
40 | struct is_bitwise_comparable : cuda::std::false_type {};
41 | 
42 | /// By default, only types with unique object representations are allowed
43 | template <typename T>
44 | struct is_bitwise_comparable<
45 |   T,
46 |   cuda::std::enable_if_t<cuda::std::has_unique_object_representations_v<T>>>
47 |   : cuda::std::true_type {};
48 | 
49 | template <typename T>
50 | inline constexpr bool is_bitwise_comparable_v =
51 |   is_bitwise_comparable<T>::value;  ///< Shortcut definition
52 | 
53 | /**
54 |  * @brief Declares that a type `Type` is bitwise comparable.
55 |  *
56 |  */
57 | #define CUCO_DECLARE_BITWISE_COMPARABLE(Type)                   \
58 |   namespace cuco {                                              \
59 |   template <>                                                   \
60 |   struct is_bitwise_comparable<Type> : cuda::std::true_type {}; \
61 |   }
62 | 
63 | template <bool value, typename... Args>
64 | inline constexpr bool dependent_bool_value = value;  ///< Unpacked dependent bool value
65 | 
66 | template <typename... Args>
67 | inline constexpr bool dependent_false =
68 |   dependent_bool_value<false, Args...>;  ///< Emits a `false` value which is dependent on the given
69 |                                          ///< argument types
70 | 
71 | }  // namespace cuco
72 | 


--------------------------------------------------------------------------------
/examples/bloom_filter/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/bloom_filter.cuh>
18 | 
19 | #include <thrust/count.h>
20 | #include <thrust/device_vector.h>
21 | #include <thrust/execution_policy.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | 
26 | int main(void)
27 | {
28 |   int constexpr num_keys    = 10'000;          ///< Generate 10'000 keys
29 |   int constexpr num_tp      = num_keys * 0.5;  ///< Insert the first half keys into the filter.
30 |   int constexpr num_tn      = num_keys - num_tp;
31 |   int constexpr sub_filters = 200;  ///< 200 sub-filters per bloom filter
32 | 
33 |   // key type for bloom filter
34 |   using key_type = int;
35 | 
36 |   // Spawn a bloom filter with default policy and 200 sub-filters.
37 |   cuco::bloom_filter<key_type> filter{sub_filters};
38 | 
39 |   std::cout << "Bulk insert into bloom filter with default fingerprint generation policy: "
40 |             << std::endl;
41 | 
42 |   thrust::device_vector<key_type> keys(num_keys);
43 |   thrust::sequence(keys.begin(), keys.end(), 1);
44 | 
45 |   auto tp_begin = keys.begin();
46 |   auto tp_end   = tp_begin + num_tp;
47 |   auto tn_begin = tp_end;
48 |   auto tn_end   = keys.end();
49 | 
50 |   // Insert the first half of the keys.
51 |   filter.add(tp_begin, tp_end);
52 | 
53 |   thrust::device_vector<bool> tp_result(num_tp, false);
54 |   thrust::device_vector<bool> tn_result(num_keys - num_tp, false);
55 | 
56 |   // Query the filter for the previously inserted keys.
57 |   // This should result in a true-positive rate of TPR=1.
58 |   filter.contains(tp_begin, tp_end, tp_result.begin());
59 | 
60 |   // Query the filter for the keys that are not present in the filter.
61 |   // Since bloom filters are probalistic data structures, the filter
62 |   // exhibits a false-positive rate FPR>0 depending on the number of bits in
63 |   // the filter and the number of hashes used per key.
64 |   filter.contains(tn_begin, tn_end, tn_result.begin());
65 | 
66 |   float tp_rate =
67 |     float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp);
68 |   float fp_rate =
69 |     float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn);
70 | 
71 |   std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl;
72 | 
73 |   return 0;
74 | }


--------------------------------------------------------------------------------
/benchmarks/bloom_filter/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | 
21 | #include <nvbench/nvbench.cuh>
22 | 
23 | #include <thrust/count.h>
24 | #include <thrust/device_vector.h>
25 | #include <thrust/execution_policy.h>
26 | #include <thrust/sequence.h>
27 | 
28 | #include <cstdint>
29 | 
30 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_64<char>, "xxhash_64", "cuco::xxhash_64");
31 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::XXHash_32<char>, "xxhash_32", "cuco::xxhash_32");
32 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_32<char>,
33 |                              "murmurhash3_32",
34 |                              "cuco::murmurhash3_32");
35 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x86_128<char>,
36 |                              "murmurhash3_x86_128",
37 |                              "cuco::murmurhash3_x86_128");
38 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::MurmurHash3_x64_128<char>,
39 |                              "murmurhash3_x64_128",
40 |                              "cuco::murmurhash3_x64_128");
41 | NVBENCH_DECLARE_TYPE_STRINGS(cuco::detail::identity_hash<char>,
42 |                              "identity_hash",
43 |                              "cuco::identity_hash");
44 | 
45 | namespace cuco::benchmark {
46 | 
47 | template <typename FilterType>
48 | void add_fpr_summary(nvbench::state& state, FilterType& filter)
49 | {
50 |   filter.clear();
51 | 
52 |   auto const num_keys = state.get_int64("NumInputs");
53 | 
54 |   thrust::device_vector<typename FilterType::key_type> keys(num_keys * 2);
55 |   thrust::sequence(thrust::device, keys.begin(), keys.end(), 1);
56 |   thrust::device_vector<bool> result(num_keys, false);
57 | 
58 |   auto tp_begin = keys.begin();
59 |   auto tp_end   = tp_begin + num_keys;
60 |   auto tn_begin = tp_end;
61 |   auto tn_end   = keys.end();
62 |   filter.add(tp_begin, tp_end);
63 |   filter.contains(tn_begin, tn_end, result.begin());
64 | 
65 |   float fp = thrust::count(thrust::device, result.begin(), result.end(), true);
66 | 
67 |   auto& summ = state.add_summary("FalsePositiveRate");
68 |   summ.set_string("hint", "FPR");
69 |   summ.set_string("short_name", "FPR");
70 |   summ.set_string("description", "False-positive rate of the bloom filter.");
71 |   summ.set_float64("value", fp / num_keys);
72 | 
73 |   filter.clear();
74 | }
75 | 
76 | }  // namespace cuco::benchmark


--------------------------------------------------------------------------------
/include/cuco/probe_sequences.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuco/detail/probe_sequence_impl.cuh>
20 | 
21 | namespace cuco::legacy {
22 | 
23 | /**
24 |  * @brief Public linear probing scheme class.
25 |  *
26 |  * Linear probing is efficient when few collisions are present. Performance hints:
27 |  * - Use linear probing when collisions are rare. e.g. low occupancy or low multiplicity.
28 |  * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise.
29 |  *
30 |  * `Hash` should be callable object type.
31 |  *
32 |  * @tparam CGSize Size of CUDA Cooperative Groups
33 |  * @tparam Hash Unary callable type
34 |  */
35 | template <uint32_t CGSize, typename Hash>
36 | class linear_probing : public detail::probe_sequence_base<CGSize> {
37 |  public:
38 |   using probe_sequence_base_type =
39 |     detail::probe_sequence_base<CGSize>;  ///< The base probe scheme type
40 |   using probe_sequence_base_type::cg_size;
41 |   using probe_sequence_base_type::vector_width;
42 | 
43 |   /// Type of implementation details
44 |   template <typename Key, typename Value, cuda::thread_scope Scope>
45 |   using impl = detail::linear_probing_impl<Key, Value, Scope, vector_width(), CGSize, Hash>;
46 | };
47 | 
48 | /**
49 |  *
50 |  * @brief Public double hashing scheme class.
51 |  *
52 |  * Default probe sequence for `cuco::static_multimap`. Double hashing shows superior
53 |  * performance when dealing with high multiplicty and/or high occupancy use cases. Performance
54 |  * hints:
55 |  * - `CGSize` = 1 or 2 when hash map is small (10'000'000 or less), 4 or 8 otherwise.
56 |  *
57 |  * `Hash1` and `Hash2` should be callable object type.
58 |  *
59 |  * @tparam CGSize Size of CUDA Cooperative Groups
60 |  * @tparam Hash1 Unary callable type
61 |  * @tparam Hash2 Unary callable type
62 |  */
63 | template <uint32_t CGSize, typename Hash1, typename Hash2 = Hash1>
64 | class double_hashing : public detail::probe_sequence_base<CGSize> {
65 |  public:
66 |   using probe_sequence_base_type =
67 |     detail::probe_sequence_base<CGSize>;  ///< The base probe scheme type
68 |   using probe_sequence_base_type::cg_size;
69 |   using probe_sequence_base_type::vector_width;
70 | 
71 |   /// Type of implementation details
72 |   template <typename Key, typename Value, cuda::thread_scope Scope>
73 |   using impl = detail::double_hashing_impl<Key, Value, Scope, vector_width(), CGSize, Hash1, Hash2>;
74 | };
75 | 
76 | }  // namespace cuco::legacy
77 | 


--------------------------------------------------------------------------------
/include/cuco/utility/reduction_functors.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include <cuda/atomic>
19 | 
20 | namespace cuco::reduce {
21 | 
22 | /**
23 |  * @brief Device functor performing sum reduction, used with `insert-or-apply`
24 |  */
25 | struct plus {
26 |   /**
27 |    * @brief Performs atomic fetch_add on payload and the new value to be inserted
28 |    *
29 |    * @tparam T The payload type
30 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
31 |    *
32 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
33 |    * @param val The new value to be applied as reduction to the current value
34 |    * in the payload.
35 |    */
36 |   template <typename T, cuda::thread_scope Scope>
37 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
38 |   {
39 |     payload_ref.fetch_add(val, cuda::memory_order_relaxed);
40 |   }
41 | };
42 | 
43 | /**
44 |  * @brief Device functor performing max reduction, used with `insert-or-apply`
45 |  */
46 | struct max {
47 |   /**
48 |    * @brief Performs atomic fetch_max on payload and the new value to be inserted
49 |    *
50 |    * @tparam T The payload type
51 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
52 |    *
53 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
54 |    * @param val The new value to be applied as reduction to the current value
55 |    * in the payload.
56 |    */
57 |   template <typename T, cuda::thread_scope Scope>
58 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
59 |   {
60 |     payload_ref.fetch_max(val, cuda::memory_order_relaxed);
61 |   }
62 | };
63 | 
64 | /**
65 |  * @brief Device functor performing min reduction, used with `insert-or-apply`
66 |  */
67 | struct min {
68 |   /**
69 |    * @brief Performs atomic fetch_min on payload and the new value to be inserted
70 |    *
71 |    * @tparam T The payload type
72 |    * @tparam Scope The cuda::thread_scope used for atomic_ref
73 |    *
74 |    * @param payload_ref The atomic_ref pointing to payload part of the slot
75 |    * @param val The new value to be applied as reduction to the current value
76 |    * in the payload.
77 |    */
78 |   template <typename T, cuda::thread_scope Scope>
79 |   __device__ void operator()(cuda::atomic_ref<T, Scope> payload_ref, const T& val)
80 |   {
81 |     payload_ref.fetch_min(val, cuda::memory_order_relaxed);
82 |   }
83 | };
84 | 
85 | }  // namespace cuco::reduce


--------------------------------------------------------------------------------
/tests/hyperloglog/unique_sequence_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/hyperloglog.cuh>
21 | 
22 | #include <thrust/device_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_template_test_macros.hpp>
26 | #include <catch2/generators/catch_generators.hpp>
27 | 
28 | #include <cmath>
29 | #include <cstddef>
30 | #include <cstdint>
31 | 
32 | TEMPLATE_TEST_CASE_SIG("hyperloglog: unique sequence",
33 |                        "",
34 |                        ((typename T, typename Hash), T, Hash),
35 |                        (int32_t, cuco::xxhash_64<int32_t>),
36 |                        (int64_t, cuco::xxhash_64<int64_t>),
37 |                        (__int128_t, cuco::xxhash_64<__int128_t>))
38 | {
39 |   auto num_items_pow2 = GENERATE(25, 26, 28);
40 |   auto hll_precision  = GENERATE(8, 10, 12, 13, 18, 20);
41 |   auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024;
42 |   INFO("hll_precision=" << hll_precision);
43 |   INFO("sketch_size_kb=" << sketch_size_kb);
44 |   INFO("num_items=2^" << num_items_pow2);
45 |   auto num_items = 1ull << num_items_pow2;
46 | 
47 |   // This factor determines the error threshold for passing the test
48 |   double constexpr tolerance_factor = 2.5;
49 |   // RSD for a given precision is given by the following formula
50 |   double const relative_standard_deviation =
51 |     1.04 / std::sqrt(static_cast<double>(1ull << hll_precision));
52 | 
53 |   thrust::device_vector<T> items(num_items);
54 | 
55 |   // Generate `num_items` distinct items
56 |   thrust::sequence(items.begin(), items.end(), 0);
57 | 
58 |   // Initialize the estimator
59 |   cuco::hyperloglog<T, cuda::thread_scope_device, Hash> estimator{
60 |     cuco::sketch_size_kb(sketch_size_kb)};
61 | 
62 |   REQUIRE(estimator.estimate() == 0);
63 | 
64 |   // Add all items to the estimator
65 |   estimator.add(items.begin(), items.end());
66 | 
67 |   auto const estimate = estimator.estimate();
68 | 
69 |   // Adding the same items again should not affect the result
70 |   estimator.add(items.begin(), items.begin() + num_items / 2);
71 |   REQUIRE(estimator.estimate() == estimate);
72 | 
73 |   // Clearing the estimator should reset the estimate
74 |   estimator.clear();
75 |   REQUIRE(estimator.estimate() == 0);
76 | 
77 |   double const relative_error =
78 |     std::abs((static_cast<double>(estimate) / static_cast<double>(num_items)) - 1.0);
79 | 
80 |   // Check if the error is acceptable
81 |   REQUIRE(relative_error < tolerance_factor * relative_standard_deviation);
82 | }
83 | 


--------------------------------------------------------------------------------
/._upstream/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to cuCollections
 2 | 
 3 | If you are interested in contributing to cuCollections, your contributions will fall
 4 | into three categories:
 5 | 1. You want to report a bug, feature request, or documentation issue
 6 |     - File an [issue](https://github.com/NVIDIA/cuCollections/issues/new/choose)
 7 |     describing what you encountered or what you want to see changed.
 8 |     - The RAPIDS team will evaluate the issues and triage them, scheduling
 9 |     them for a release. If you believe the issue needs priority attention
10 |     comment on the issue to notify the team.
11 | 2. You want to propose a new Feature and implement it
12 |     - Post about your intended feature, and we shall discuss the design and
13 |     implementation.
14 |     - Once we agree that the plan looks good, go ahead and implement it, using
15 |     the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below.
16 | 3. You want to implement a feature or bug-fix for an outstanding issue
17 |     - Follow the [code contributions](https://github.com/NVIDIA/cuCollections/blob/master/CONTRIBUTING.md#code-contributions) guide below.
18 |     - If you need more context on a particular issue, please ask and we shall
19 |     provide.
20 | 
21 | ## Code contributions
22 | 
23 | ### Your first issue
24 | 
25 | 1. Read the project's [README.md](https://github.com/NVIDIA/cuCollections/blob/master/README.md)
26 |     to learn how to setup the development environment
27 | 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
28 |     or [help wanted](https://github.com/NVIDIA/cuCollections/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
29 | 3. Comment on the issue saying you are going to work on it
30 | 4. Code! Make sure to update unit tests!
31 | 5. When done, [create your pull request](https://github.com/NVIDIA/cuCollections/compare)
32 | 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
33 | 7. Wait for other developers to review your code and update code as needed
34 | 8. Once reviewed and approved, a RAPIDS developer will merge your pull request
35 | 
36 | Remember, if you are unsure about anything, don't hesitate to comment on issues
37 | and ask for clarifications!
38 | 
39 | ### Seasoned developers
40 | 
41 | Once you have gotten your feet wet and are more comfortable with the code, you
42 | can look at the prioritized issues of our next release in our [project boards](https://github.com/NVIDIA/cuCollections/projects).
43 | 
44 | > **Pro Tip:** Always look at the release board with the highest number for
45 | issues to work on. This is where RAPIDS developers also focus their efforts.
46 | 
47 | Look at the unassigned issues, and find an issue you are comfortable with
48 | contributing to. Start with _Step 3_ from above, commenting on the issue to let
49 | others know you are working on it. If you have any questions related to the
50 | implementation of the issue, ask them in the issue instead of the PR.
51 | 
52 | ## Attribution
53 | Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
54 | 


--------------------------------------------------------------------------------
/tests/test_utils.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // Modifications Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
18 | // Permission is hereby granted, free of charge, to any person obtaining a copy
19 | // of this software and associated documentation files (the "Software"), to deal
20 | // in the Software without restriction, including without limitation the rights
21 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | // copies of the Software, and to permit persons to whom the Software is
23 | // furnished to do so, subject to the following conditions:
24 | // The above copyright notice and this permission notice shall be included in
25 | // all copies or substantial portions of the Software.
26 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32 | // THE SOFTWARE.
33 | 
34 | #include <cuco/cuda_runtime.h>
35 | 
36 | #pragma once
37 | 
38 | #include <cuda/atomic>
39 | 
40 | namespace cuco {
41 | namespace test {
42 | namespace detail {
43 | 
44 | template <typename Iterator, typename Predicate>
45 | __global__ void count_if(Iterator begin,
46 |                          Iterator end,
47 |                          cuda::atomic<int, cuda::thread_scope_device>* count,
48 |                          Predicate p)
49 | {
50 |   auto tid = blockDim.x * blockIdx.x + threadIdx.x;
51 |   auto it  = begin + tid;
52 | 
53 |   while (it < end) {
54 |     count->fetch_add(static_cast<int>(p(*it)));
55 |     it += gridDim.x * blockDim.x;
56 |   }
57 | }
58 | 
59 | template <typename Iterator1, typename Iterator2, typename Predicate>
60 | __global__ void count_if(Iterator1 begin1,
61 |                          Iterator1 end1,
62 |                          Iterator2 begin2,
63 |                          cuda::atomic<int, cuda::thread_scope_device>* count,
64 |                          Predicate p)
65 | {
66 |   auto const n = end1 - begin1;
67 |   auto tid     = blockDim.x * blockIdx.x + threadIdx.x;
68 | 
69 |   while (tid < n) {
70 |     auto cmp = begin1 + tid;
71 |     auto ref = begin2 + tid;
72 |     count->fetch_add(static_cast<int>(p(*cmp, *ref)));
73 |     tid += gridDim.x * blockDim.x;
74 |   }
75 | }
76 | 
77 | }  // namespace detail
78 | }  // namespace test
79 | }  // namespace cuco
80 | 


--------------------------------------------------------------------------------
/examples/static_multiset/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_multiset.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/functional.h>
21 | #include <thrust/logical.h>
22 | #include <thrust/sequence.h>
23 | 
24 | #include <iostream>
25 | #include <limits>
26 | 
27 | /**
28 |  * @file host_bulk_example.cu
29 |  * @brief Demonstrates usage of the static_multiset "bulk" host APIs.
30 |  *
31 |  * The bulk APIs are only invocable from the host and are used for doing operations like `insert` or
32 |  * `retrieve` on a multiset of keys.
33 |  *
34 |  */
35 | int main(void)
36 | {
37 |   using key_type = int;
38 | 
39 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
40 |   // that they never occur in your input data.
41 |   key_type constexpr empty_key_sentinel = -1;
42 | 
43 |   // Number of keys to be inserted
44 |   std::size_t constexpr num_keys = 50'000;
45 | 
46 |   // Compute capacity based on a 50% load factor
47 |   auto constexpr load_factor = 0.5;
48 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
49 | 
50 |   // Constructs a set with at least `capacity` slots using -1 as the empty keys sentinel.
51 |   cuco::static_multiset<key_type> multiset{capacity, cuco::empty_key{empty_key_sentinel}};
52 | 
53 |   // Create a sequence of keys {0, 1, 2, .., i}
54 |   // We're going to insert each key twice so we only need 'num_keys / 2' distinct keys.
55 |   thrust::device_vector<key_type> keys(num_keys / 2);
56 |   thrust::sequence(keys.begin(), keys.end(), 0);
57 | 
58 |   // Inserts all keys into the hash set
59 |   multiset.insert(keys.begin(), keys.end());
60 |   // Insert the same set of keys again, so each distinct key should occur twice in the multiset
61 |   multiset.insert(keys.begin(), keys.end());
62 | 
63 |   // Counts the occurrences of matching keys contained in the multiset.
64 |   std::size_t const counted_output_size = multiset.count(keys.begin(), keys.end());
65 | 
66 |   // Storage for result
67 |   thrust::device_vector<key_type> output_probes(counted_output_size);
68 |   thrust::device_vector<key_type> output_matches(counted_output_size);
69 | 
70 |   // Retrieve all matching keys
71 |   auto const [output_probes_end, _] =
72 |     multiset.retrieve(keys.begin(), keys.end(), output_probes.begin(), output_matches.begin());
73 |   std::size_t const retrieved_output_size = output_probes_end - output_probes.begin();
74 | 
75 |   if ((retrieved_output_size == counted_output_size) and (retrieved_output_size == num_keys)) {
76 |     std::cout << "Success! Found all keys.\n";
77 |   } else {
78 |     std::cout << "Fail! Something went wrong.\n";
79 |   }
80 | 
81 |   return 0;
82 | }


--------------------------------------------------------------------------------
/include/cuco/detail/storage/storage_base.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cstddef>
 20 | 
 21 | namespace cuco {
 22 | namespace detail {
 23 | /**
 24 |  * @brief Custom deleter for unique pointer.
 25 |  *
 26 |  * @tparam SizeType Type of device storage size
 27 |  * @tparam Allocator Type of allocator used for device storage
 28 |  */
 29 | template <typename SizeType, typename Allocator>
 30 | struct custom_deleter {
 31 |   using pointer = typename Allocator::value_type*;  ///< Value pointer type
 32 | 
 33 |   /**
 34 |    * @brief Constructor of custom deleter.
 35 |    *
 36 |    * @param size Number of values to deallocate
 37 |    * @param allocator Allocator used for deallocating device storage
 38 |    */
 39 |   explicit constexpr custom_deleter(SizeType size, Allocator& allocator)
 40 |     : size_{size}, allocator_{allocator}
 41 |   {
 42 |   }
 43 | 
 44 |   /**
 45 |    * @brief Operator for deallocation
 46 |    *
 47 |    * @param ptr Pointer to the first value for deallocation
 48 |    */
 49 |   void operator()(pointer ptr) { allocator_.deallocate(ptr, size_); }
 50 | 
 51 |   SizeType size_;         ///< Number of values to delete
 52 |   Allocator& allocator_;  ///< Allocator used deallocating values
 53 | };
 54 | 
 55 | /**
 56 |  * @brief Base class of open addressing storage.
 57 |  *
 58 |  * This class should not be used directly.
 59 |  *
 60 |  * @tparam Extent Type of extent denoting storage capacity
 61 |  */
 62 | template <typename Extent>
 63 | class storage_base {
 64 |  public:
 65 |   using extent_type = Extent;                            ///< Storage extent type
 66 |   using size_type   = typename extent_type::value_type;  ///< Storage size type
 67 | 
 68 |   /**
 69 |    * @brief Constructor of base storage.
 70 |    *
 71 |    * @param size Number of elements to (de)allocate
 72 |    */
 73 |   __host__ __device__ explicit constexpr storage_base(Extent size) : extent_{size} {}
 74 | 
 75 |   /**
 76 |    * @brief Gets the total number of elements in the current storage.
 77 |    *
 78 |    * @return The total number of elements
 79 |    */
 80 |   [[nodiscard]] __host__ __device__ constexpr size_type capacity() const noexcept
 81 |   {
 82 |     return static_cast<size_type>(extent_);
 83 |   }
 84 | 
 85 |   /**
 86 |    * @brief Gets the extent of the current storage.
 87 |    *
 88 |    * @return The extent.
 89 |    */
 90 |   [[nodiscard]] __host__ __device__ constexpr extent_type extent() const noexcept
 91 |   {
 92 |     return extent_;
 93 |   }
 94 | 
 95 |  protected:
 96 |   extent_type extent_;  ///< Total number of elements
 97 | };
 98 | 
 99 | }  // namespace detail
100 | }  // namespace cuco
101 | 


--------------------------------------------------------------------------------
/tests/utility/extent_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // Modifications Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
18 | // Permission is hereby granted, free of charge, to any person obtaining a copy
19 | // of this software and associated documentation files (the "Software"), to deal
20 | // in the Software without restriction, including without limitation the rights
21 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | // copies of the Software, and to permit persons to whom the Software is
23 | // furnished to do so, subject to the following conditions:
24 | // The above copyright notice and this permission notice shall be included in
25 | // all copies or substantial portions of the Software.
26 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32 | // THE SOFTWARE.
33 | 
34 | #include <test_utils.hpp>
35 | 
36 | #include <cuco/extent.cuh>
37 | 
38 | #include <catch2/catch_template_test_macros.hpp>
39 | 
40 | TEMPLATE_TEST_CASE_SIG(
41 |   "utility extent tests", "", ((typename SizeType, int dummy), SizeType, dummy), (int32_t,1), (int64_t,1), (std::size_t,1)) // FIXME(HIP/AMD): dummy fixes ambiguous get_wrapper calls in catch2
42 | {
43 |   SizeType constexpr num            = 1234;
44 |   SizeType constexpr gold_reference = 314;  // 157 x 2
45 |   auto constexpr cg_size            = 2;
46 |   auto constexpr bucket_size        = 4;
47 | 
48 |   SECTION("Static extent must be evaluated at compile time.")
49 |   {
50 |     auto const size = cuco::extent<SizeType, num>{};
51 |     STATIC_REQUIRE(num == size);
52 |   }
53 | 
54 |   SECTION("Dynamic extent is evaluated at run time.")
55 |   {
56 |     auto const size = cuco::extent(num);
57 |     REQUIRE(size == num);
58 |   }
59 | 
60 |   SECTION("Compute static valid extent at compile time.")
61 |   {
62 |     auto constexpr size = cuco::extent<SizeType, num>{};
63 |     auto constexpr res  = cuco::make_bucket_extent<cg_size, bucket_size>(size);
64 |     STATIC_REQUIRE(gold_reference == res.value());
65 |   }
66 | 
67 |   SECTION("Compute dynamic valid extent at run time.")
68 |   {
69 |     auto const size = cuco::extent<SizeType>{num};
70 |     auto const res  = cuco::make_bucket_extent<cg_size, bucket_size>(size);
71 |     REQUIRE(gold_reference == res.value());
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/dynamic_bitset/select_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/host_vector.h>
23 | #include <thrust/sequence.h>
24 | 
25 | #include <catch2/catch_test_macros.hpp>
26 | 
27 | template <class BitsetRef, typename size_type, typename OutputIt>
28 | __global__ void select_false_kernel(BitsetRef ref, size_type num_elements, OutputIt output)
29 | {
30 |   cuco::detail::index_type index  = blockIdx.x * blockDim.x + threadIdx.x;
31 |   cuco::detail::index_type stride = gridDim.x * blockDim.x;
32 |   while (index < num_elements) {
33 |     output[index] = ref.select_false(index);
34 |     index += stride;
35 |   }
36 | }
37 | 
38 | extern bool modulo_bitgen(uint64_t i);  // Defined in get_test.cu
39 | 
40 | TEST_CASE("dynamic_bitset select test", "")
41 | {
42 |   cuco::experimental::detail::dynamic_bitset bv;
43 | 
44 |   using size_type = std::size_t;
45 |   constexpr size_type num_elements{4000};
46 | 
47 |   size_type num_set = 0;
48 |   for (size_type i = 0; i < num_elements; i++) {
49 |     bv.push_back(modulo_bitgen(i));
50 |     num_set += modulo_bitgen(i);
51 |   }
52 | 
53 |   // Check select
54 |   {
55 |     thrust::device_vector<size_type> keys(num_set);
56 |     thrust::sequence(keys.begin(), keys.end(), 0);
57 | 
58 |     thrust::device_vector<size_type> d_selects(num_set);
59 | 
60 |     bv.select(keys.begin(), keys.end(), d_selects.begin());
61 | 
62 |     thrust::host_vector<size_type> h_selects = d_selects;
63 | 
64 |     size_type num_matches = 0;
65 |     size_type cur_set_pos = -1lu;
66 |     for (size_type i = 0; i < num_set; i++) {
67 |       do {
68 |         cur_set_pos++;
69 |       } while (cur_set_pos < num_elements and !modulo_bitgen(cur_set_pos));
70 | 
71 |       num_matches += cur_set_pos == h_selects[i];
72 |     }
73 |     REQUIRE(num_matches == num_set);
74 |   }
75 | 
76 |   // Check select_false
77 |   {
78 |     size_type num_not_set = num_elements - num_set;
79 | 
80 |     auto ref = bv.ref();
81 |     thrust::device_vector<size_type> device_result(num_not_set);
82 |     select_false_kernel<<<1, 1024>>>(ref, num_not_set, device_result.data());
83 |     thrust::host_vector<size_type> host_result = device_result;
84 | 
85 |     size_type num_matches     = 0;
86 |     size_type cur_not_set_pos = -1lu;
87 |     for (size_type i = 0; i < num_not_set; i++) {
88 |       do {
89 |         cur_not_set_pos++;
90 |       } while (cur_not_set_pos < num_elements and modulo_bitgen(cur_not_set_pos));
91 | 
92 |       num_matches += cur_not_set_pos == host_result[i];
93 |     }
94 |     REQUIRE(num_matches == num_not_set);
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/examples/static_map/host_bulk_example.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cuco/static_map.cuh>
18 | 
19 | #include <thrust/device_vector.h>
20 | #include <thrust/equal.h>
21 | #include <thrust/iterator/zip_iterator.h>
22 | #include <thrust/sequence.h>
23 | #include <thrust/transform.h>
24 | 
25 | #include <cmath>
26 | #include <cstddef>
27 | #include <iostream>
28 | #include <limits>
29 | 
30 | /**
31 |  * @file host_bulk_example.cu
32 |  * @brief Demonstrates usage of the static_map "bulk" host APIs.
33 |  *
34 |  * The bulk APIs are only invocable from the host and are used for doing operations like insert or
35 |  * find on a set of keys.
36 |  *
37 |  */
38 | 
39 | int main(void)
40 | {
41 |   using Key   = int;
42 |   using Value = int;
43 | 
44 |   // Empty slots are represented by reserved "sentinel" values. These values should be selected such
45 |   // that they never occur in your input data.
46 |   Key constexpr empty_key_sentinel     = -1;
47 |   Value constexpr empty_value_sentinel = -1;
48 | 
49 |   // Number of key/value pairs to be inserted
50 |   std::size_t constexpr num_keys = 50'000;
51 | 
52 |   // Compute capacity based on a 50% load factor
53 |   auto constexpr load_factor = 0.5;
54 |   std::size_t const capacity = std::ceil(num_keys / load_factor);
55 | 
56 |   // Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
57 |   auto map = cuco::static_map{
58 |     capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
59 | 
60 |   // Create a sequence of keys and values {{0,0}, {1,1}, ... {i,i}}
61 |   thrust::device_vector<Key> insert_keys(num_keys);
62 |   thrust::sequence(insert_keys.begin(), insert_keys.end(), 0);
63 |   thrust::device_vector<Value> insert_values(num_keys);
64 |   thrust::sequence(insert_values.begin(), insert_values.end(), 0);
65 |   auto zipped =
66 |     thrust::make_zip_iterator(thrust::make_tuple(insert_keys.begin(), insert_values.begin()));
67 | 
68 |   // Inserts all pairs into the map
69 |   map.insert(zipped, zipped + insert_keys.size());
70 | 
71 |   // Storage for found values
72 |   thrust::device_vector<Value> found_values(num_keys);
73 | 
74 |   // Finds all keys {0, 1, 2, ...} and stores associated values into `found_values`
75 |   // If a key `keys_to_find[i]` doesn't exist, `found_values[i] == empty_value_sentinel`
76 |   map.find(insert_keys.begin(), insert_keys.end(), found_values.begin());
77 | 
78 |   // Verify that all the found values match the inserted values
79 |   bool const all_values_match =
80 |     thrust::equal(found_values.begin(), found_values.end(), insert_values.begin());
81 | 
82 |   if (all_values_match) { std::cout << "Success! Found all values.\n"; }
83 | 
84 |   return 0;
85 | }
86 | 


--------------------------------------------------------------------------------
/ci/pre-commit/doxygen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Modifications Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
18 | # Permission is hereby granted, free of charge, to any person obtaining a copy
19 | # of this software and associated documentation files (the "Software"), to deal
20 | # in the Software without restriction, including without limitation the rights
21 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | # copies of the Software, and to permit persons to whom the Software is
23 | # furnished to do so, subject to the following conditions:
24 | # The above copyright notice and this permission notice shall be included in
25 | # all copies or substantial portions of the Software.
26 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32 | # THE SOFTWARE.
33 | 
34 | # hipCollections doxygen warnings check #
35 | ########################################
36 | 
37 | # skip if doxygen is not installed
38 | if ! [ -x "$(command -v doxygen)" ]; then
39 |   echo -e "warning: Doxygen is not installed"
40 |   exit 0
41 | fi
42 | 
43 | # Utility to return version as number for comparison
44 | function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }
45 | 
46 | # Doxygen supported version 1.8.20 to 1.9.1
47 | DOXYGEN_VERSION=$(doxygen --version)
48 | if [ $(version "$DOXYGEN_VERSION") -lt $(version "1.8.20") ] ||  [ $(version $DOXYGEN_VERSION) -gt $(version "1.9.1") ]; then
49 |   echo -e "Warning: Unsupported Doxygen version $DOXYGEN_VERSION"
50 |   echo -e "Expecting Doxygen version from 1.8.20 to 1.9.1"
51 |   exit 0
52 | fi
53 | 
54 | # Run doxygen, ignore missing tag files error
55 | TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
56 | TAG_ERROR2="error: cannot open tag file .*.tag for writing"
57 | DOXYGEN_STDERR=`cd doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; }  | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"`
58 | RETVAL=$?
59 | 
60 | if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then
61 |   echo -e "\n>>>> FAILED: doxygen check; begin output\n"
62 |   echo -e "$DOXYGEN_STDERR"
63 |   echo -e "\n>>>> FAILED: doxygen check; end output\n"
64 |   RETVAL=1 #because return value is not generated by doxygen 1.8.17
65 | else
66 |   echo -e "\n>>>> PASSED: doxygen check\n"
67 | fi
68 | 
69 | exit $RETVAL
70 | 


--------------------------------------------------------------------------------
/tests/static_multiset/count_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_multiset.cuh>
20 | 
21 | #include <cuda/functional>
22 | #include <thrust/device_vector.h>
23 | #include <thrust/execution_policy.h>
24 | #include <thrust/generate.h>
25 | #include <thrust/sequence.h>
26 | #include <thrust/shuffle.h>
27 | 
28 | #include <catch2/catch_template_test_macros.hpp>
29 | 
30 | using size_type = int32_t;
31 | 
32 | template <typename Set>
33 | void test_unique_sequence(Set& set, size_type num_keys)
34 | {
35 |   using Key = typename Set::key_type;
36 | 
37 |   thrust::device_vector<Key> d_keys(num_keys);
38 |   auto const keys_begin = d_keys.begin();
39 | 
40 |   SECTION("Count of empty set should be zero.")
41 |   {
42 |     auto const count = set.count(keys_begin, keys_begin + num_keys);
43 |     REQUIRE(count == 0);
44 |   }
45 | 
46 |   thrust::sequence(keys_begin, keys_begin + num_keys);
47 |   set.insert(keys_begin, keys_begin + num_keys);
48 | 
49 |   SECTION("Count of n unique keys should be n.")
50 |   {
51 |     auto const count = set.count(keys_begin, keys_begin + num_keys);
52 |     REQUIRE(count == num_keys);
53 |   }
54 | 
55 |   auto constexpr multiplicity = 3;
56 |   auto query_begin            = thrust::make_transform_iterator(
57 |     thrust::make_counting_iterator<size_type>(0),
58 |     cuda::proclaim_return_type<Key>([] __device__(auto i) { return Key{i / multiplicity}; }));
59 | 
60 |   SECTION("Count of 3n unique keys should be 3n.")
61 |   {
62 |     auto const count = set.count(query_begin, query_begin + num_keys * multiplicity);
63 |     REQUIRE(count == num_keys * multiplicity);
64 |   }
65 | }
66 | 
67 | TEMPLATE_TEST_CASE_SIG(
68 |   "static_multiset count tests",
69 |   "",
70 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
71 |   (int32_t, cuco::test::probe_sequence::double_hashing, 1),
72 |   (int32_t, cuco::test::probe_sequence::double_hashing, 2),
73 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
74 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2),
75 |   (int32_t, cuco::test::probe_sequence::linear_probing, 1),
76 |   (int32_t, cuco::test::probe_sequence::linear_probing, 2),
77 |   (int64_t, cuco::test::probe_sequence::linear_probing, 1),
78 |   (int64_t, cuco::test::probe_sequence::linear_probing, 2))
79 | {
80 |   constexpr size_type num_keys{555};
81 | 
82 |   using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
83 |                                    cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
84 |                                    cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;
85 | 
86 |   auto set =
87 |     cuco::static_multiset{num_keys, cuco::empty_key<Key>{-1}, {}, probe{}, {}, cuco::storage<2>{}};
88 | 
89 |   test_unique_sequence(set, num_keys);
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/static_set/retrieve_all_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/static_set.cuh>
20 | 
21 | #include <thrust/device_vector.h>
22 | #include <thrust/distance.h>
23 | #include <thrust/functional.h>
24 | #include <thrust/iterator/counting_iterator.h>
25 | #include <thrust/sequence.h>
26 | #include <thrust/sort.h>
27 | 
28 | #include <catch2/catch_template_test_macros.hpp>
29 | 
30 | template <typename Set>
31 | void test_unique_sequence(Set& set, std::size_t num_keys)
32 | {
33 |   using Key = typename Set::key_type;
34 | 
35 |   thrust::device_vector<Key> d_keys(num_keys);
36 |   thrust::sequence(d_keys.begin(), d_keys.end());
37 |   auto keys_begin = d_keys.begin();
38 | 
39 |   SECTION("Non-inserted keys should not be contained.")
40 |   {
41 |     REQUIRE(set.size() == 0);
42 | 
43 |     auto keys_end = set.retrieve_all(keys_begin);
44 |     REQUIRE(std::distance(keys_begin, keys_end) == 0);
45 |   }
46 | 
47 |   set.insert(keys_begin, keys_begin + num_keys);
48 |   REQUIRE(set.size() == num_keys);
49 | 
50 |   SECTION("All inserted key/value pairs should be contained.")
51 |   {
52 |     thrust::device_vector<Key> d_res(num_keys);
53 |     auto d_res_end = set.retrieve_all(d_res.begin());
54 |     thrust::sort(d_res.begin(), d_res_end);
55 |     REQUIRE(cuco::test::equal(
56 |       d_res.begin(), d_res_end, thrust::counting_iterator<Key>(0), thrust::equal_to<Key>{}));
57 |   }
58 | }
59 | 
60 | TEMPLATE_TEST_CASE_SIG(
61 |   "static_set::retrieve_all tests",
62 |   "",
63 |   ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
64 |   (int32_t, cuco::test::probe_sequence::double_hashing, 1),
65 |   (int32_t, cuco::test::probe_sequence::double_hashing, 2),
66 |   (int64_t, cuco::test::probe_sequence::double_hashing, 1),
67 |   (int64_t, cuco::test::probe_sequence::double_hashing, 2),
68 |   (int32_t, cuco::test::probe_sequence::linear_probing, 1),
69 |   (int32_t, cuco::test::probe_sequence::linear_probing, 2),
70 |   (int64_t, cuco::test::probe_sequence::linear_probing, 1),
71 |   (int64_t, cuco::test::probe_sequence::linear_probing, 2))
72 | {
73 |   constexpr std::size_t num_keys{400};
74 |   constexpr double desired_load_factor = 1.;
75 |   auto constexpr gold_capacity         = CGSize == 1 ? 409  // 409 x 1 x 1
76 |                                                      : 422  // 211 x 2 x 1
77 |     ;
78 | 
79 |   using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
80 |                                    cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
81 |                                    cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;
82 | 
83 |   auto set = cuco::static_set{num_keys, desired_load_factor, cuco::empty_key<Key>{-1}, {}, probe{}};
84 | 
85 |   REQUIRE(set.capacity() == gold_capacity);
86 | 
87 |   test_unique_sequence(set, num_keys);
88 | }
89 | 


--------------------------------------------------------------------------------
/.github/workflows/verify-devcontainers.yml:
--------------------------------------------------------------------------------
 1 | name: Verify devcontainers
 2 | 
 3 | on:
 4 |   workflow_call:
 5 | 
 6 | defaults:
 7 |   run:
 8 |     shell: bash -euo pipefail {0}
 9 | 
10 | jobs:
11 |   verify-make-devcontainers:
12 |     name: Verify devcontainers
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout repository
16 |       uses: actions/checkout@v3
17 |     - name: Setup jq and yq
18 |       run: |
19 |         sudo apt-get update
20 |         sudo apt-get install jq -y
21 |         sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64
22 |         sudo chmod +x /usr/local/bin/yq
23 |     - name: Run the script to generate devcontainer files
24 |       run: |
25 |         ./.devcontainer/make_devcontainers.sh --verbose
26 |     - name: Check for changes
27 |       run: |
28 |         if [[ $(git diff --stat) != '' ]]; then
29 |           git diff --minimal
30 |           echo "::error:: Dev Container files are out of date. Run the .devcontainer/make_devcontainers.sh script and commit the changes."
31 |           exit 1
32 |         else
33 |           echo "::note::Dev Container files are up-to-date."
34 |         fi
35 |   get-devcontainer-list:
36 |     needs: verify-make-devcontainers
37 |     name: List devcontainers
38 |     runs-on: ubuntu-latest
39 |     outputs:
40 |       devcontainers: ${{ steps.get-list.outputs.devcontainers }}
41 |     steps:
42 |     - name: Check out the code
43 |       uses: actions/checkout@v3
44 |     - name: Get list of devcontainer.json paths and names
45 |       id: get-list
46 |       run: |
47 |         devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do
48 |           jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer"
49 |           done | jq -s -c .)
50 |         echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}"
51 |   verify-devcontainers:
52 |     needs: get-devcontainer-list
53 |     name: ${{matrix.devcontainer.name}}
54 |     runs-on: ubuntu-latest
55 |     strategy:
56 |       fail-fast: false
57 |       matrix:
58 |         devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}}
59 |     permissions:
60 |       id-token: write
61 |       contents: read
62 |     steps:
63 |     - name: Check out the code
64 |       uses: actions/checkout@v3
65 |       # devcontainer/ci doesn't supported nested devcontainer.json files, so we need to copy the devcontainer.json
66 |       # file to the top level .devcontainer/ directory
67 |     - name: Copy devcontainer.json to .devcontainer/
68 |       run: |
69 |         src="${{ matrix.devcontainer.path }}"
70 |         dst=".devcontainer/devcontainer.json"
71 |         if [[ "$src" != "$dst" ]]; then
72 |           cp "$src" "$dst"
73 |         fi
74 |       # We don't really need sccache configured, but we need the AWS credentials envvars to be set
75 |       # in order to avoid the devcontainer hanging waiting for GitHub authentication
76 |     - name: Configure credentials and environment variables for sccache
77 |       uses: ./.github/actions/configure_cccl_sccache
78 |     - name: Run in devcontainer
79 |       uses: devcontainers/ci@v0.3
80 |       with:
81 |         push: never
82 |         env: |
83 |           SCCACHE_REGION=${{ env.SCCACHE_REGION }}
84 |           AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }}
85 |           AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }}
86 |           AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }}
87 |         runCmd: |
88 |           .devcontainer/verify_devcontainer.sh


--------------------------------------------------------------------------------
/include/cuco/hash_functions.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuco/detail/hash_functions/identity_hash.cuh>
 20 | #include <cuco/detail/hash_functions/murmurhash3.cuh>
 21 | #include <cuco/detail/hash_functions/xxhash.cuh>
 22 | 
 23 | #include <thrust/functional.h>
 24 | 
 25 | namespace cuco {
 26 | 
 27 | /**
 28 |  * @brief An Identity hash function to hash the given argument on host and device
 29 |  *
 30 |  * @throw A key must not be larger than uint64_t
 31 |  *
 32 |  * @tparam Key The type of the values to hash
 33 |  */
 34 | template <typename Key>
 35 | using identity_hash = detail::identity_hash<Key>;
 36 | 
 37 | /**
 38 |  * @brief The 32-bit integer finalizer function of `MurmurHash3` to hash the given argument on host
 39 |  * and device.
 40 |  *
 41 |  * @throw Key type must be 4 bytes in size
 42 |  *
 43 |  * @tparam Key The type of the values to hash
 44 |  */
 45 | template <typename Key>
 46 | using murmurhash3_fmix_32 = detail::MurmurHash3_fmix32<Key>;
 47 | 
 48 | /**
 49 |  * @brief The 64-bit integer finalizer function of `MurmurHash3` to hash the given argument on host
 50 |  * and device.
 51 |  *
 52 |  * @throw Key type must be 8 bytes in size
 53 |  *
 54 |  * @tparam Key The type of the values to hash
 55 |  */
 56 | template <typename Key>
 57 | using murmurhash3_fmix_64 = detail::MurmurHash3_fmix64<Key>;
 58 | 
 59 | /**
 60 |  * @brief A 32-bit `MurmurHash3` hash function to hash the given argument on host and device.
 61 |  *
 62 |  * @tparam Key The type of the values to hash
 63 |  */
 64 | template <typename Key>
 65 | using murmurhash3_32 = detail::MurmurHash3_32<Key>;
 66 | 
 67 | /**
 68 |  * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device.
 69 |  *
 70 |  * @tparam Key The type of the values to hash
 71 |  */
 72 | template <typename Key>
 73 | using murmurhash3_x64_128 = detail::MurmurHash3_x64_128<Key>;
 74 | 
 75 | /**
 76 |  * @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device.
 77 |  *
 78 |  * @tparam Key The type of the values to hash
 79 |  */
 80 | template <typename Key>
 81 | using murmurhash3_x86_128 = detail::MurmurHash3_x86_128<Key>;
 82 | 
 83 | /**
 84 |  * @brief A 32-bit `XXH32` hash function to hash the given argument on host and device.
 85 |  *
 86 |  * @tparam Key The type of the values to hash
 87 |  */
 88 | template <typename Key>
 89 | using xxhash_32 = detail::XXHash_32<Key>;
 90 | 
 91 | /**
 92 |  * @brief A 64-bit `XXH64` hash function to hash the given argument on host and device.
 93 |  *
 94 |  * @tparam Key The type of the values to hash
 95 |  */
 96 | template <typename Key>
 97 | using xxhash_64 = detail::XXHash_64<Key>;
 98 | 
 99 | /**
100 |  * @brief Default hash function.
101 |  *
102 |  * @tparam Key The type of the values to hash
103 |  */
104 | template <typename Key>
105 | using default_hash_function = xxhash_32<Key>;
106 | 
107 | }  // namespace cuco
108 | 


--------------------------------------------------------------------------------
/tests/hyperloglog/device_ref_test.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <test_utils.hpp>
18 | 
19 | #include <cuco/hash_functions.cuh>
20 | #include <cuco/hyperloglog.cuh>
21 | 
22 | #include <cuda/std/cstddef>
23 | #include <thrust/device_vector.h>
24 | #include <thrust/sequence.h>
25 | 
26 | #include <catch2/catch_template_test_macros.hpp>
27 | #include <catch2/generators/catch_generators.hpp>
28 | 
29 | #include <cmath>
30 | #include <cstdint>
31 | 
32 | template <typename Ref, typename InputIt, typename OutputIt>
33 | __global__ void estimate_kernel(cuco::sketch_size_kb sketch_size_kb,
34 |                                 InputIt in,
35 |                                 size_t n,
36 |                                 OutputIt out)
37 | {
38 |   extern __shared__ cuda::std::byte local_sketch[];
39 | 
40 |   auto const block = cooperative_groups::this_thread_block();
41 | 
42 |   // only a single block computes the estimate
43 |   if (block.group_index().x == 0) {
44 |     Ref estimator(cuda::std::span(local_sketch, Ref::sketch_bytes(sketch_size_kb)));
45 | 
46 |     estimator.clear(block);
47 |     block.sync();
48 | 
49 |     for (int i = block.thread_rank(); i < n; i += block.num_threads()) {
50 |       estimator.add(*(in + i));
51 |     }
52 |     block.sync();
53 |     auto const estimate = estimator.estimate(block);
54 |     if (block.thread_rank() == 0) { *out = estimate; }
55 |   }
56 | }
57 | 
58 | TEMPLATE_TEST_CASE_SIG("hyperloglog: device ref",
59 |                        "",
60 |                        ((typename T, typename Hash), T, Hash),
61 |                        (int32_t, cuco::xxhash_64<int32_t>),
62 |                        (int64_t, cuco::xxhash_64<int64_t>),
63 |                        (__int128_t, cuco::xxhash_64<__int128_t>))
64 | {
65 |   using estimator_type = cuco::hyperloglog<T, cuda::thread_scope_device, Hash>;
66 | 
67 |   auto num_items_pow2 = GENERATE(25, 26, 28);
68 |   auto hll_precision  = GENERATE(8, 10, 12, 13);
69 |   auto sketch_size_kb = 4 * (1ull << hll_precision) / 1024;
70 |   INFO("hll_precision=" << hll_precision);
71 |   INFO("sketch_size_kb=" << sketch_size_kb);
72 |   INFO("num_items=2^" << num_items_pow2);
73 |   auto num_items = 1ull << num_items_pow2;
74 | 
75 |   thrust::device_vector<T> items(num_items);
76 | 
77 |   // Generate `num_items` distinct items
78 |   thrust::sequence(items.begin(), items.end(), 0);
79 | 
80 |   // Initialize the estimator
81 |   estimator_type estimator{cuco::sketch_size_kb(sketch_size_kb)};
82 | 
83 |   // Add all items to the estimator
84 |   estimator.add(items.begin(), items.end());
85 | 
86 |   auto const host_estimate = estimator.estimate();
87 | 
88 |   thrust::device_vector<std::size_t> device_estimate(1);
89 |   estimate_kernel<typename estimator_type::ref_type<cuda::thread_scope_block>>
90 |     <<<1, 512, estimator.sketch_bytes()>>>(
91 |       cuco::sketch_size_kb(sketch_size_kb), items.begin(), num_items, device_estimate.begin());
92 | 
93 |   REQUIRE(device_estimate[0] == host_estimate);
94 | }
95 | 


--------------------------------------------------------------------------------
/include/cuco/detail/__config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // Modifications Copyright (c) 2025 Advanced Micro Devices, Inc.
18 | // Permission is hereby granted, free of charge, to any person obtaining a copy
19 | // of this software and associated documentation files (the "Software"), to deal
20 | // in the Software without restriction, including without limitation the rights
21 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | // copies of the Software, and to permit persons to whom the Software is
23 | // furnished to do so, subject to the following conditions:
24 | // The above copyright notice and this permission notice shall be included in
25 | // all copies or substantial portions of the Software.
26 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32 | // THE SOFTWARE.
33 | 
34 | #pragma once
35 | 
36 | #include <nv/target>
37 | #include <cuda/std/version>
38 | 
39 | //TODO(HIP/AMD): this is not supported/used presently
40 | // #if !defined(__CUDACC_VER_MAJOR__) || !defined(__CUDACC_VER_MINOR__)
41 | // #error "NVCC version not found"
42 | // #elif __CUDACC_VER_MAJOR__ < 11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 5)
43 | // #error "NVCC version 11.5 or later is required"
44 | // #endif
45 | 
46 | // TODO(HIP/AMD): check for compatibility in the future.
47 | // #if !defined(__CUDACC_EXTENDED_LAMBDA__) 
48 | // #error "Support for extended device lambdas is required  (nvcc flag --expt-extended-lambda)"
49 | // #endif
50 | 
51 | // TODO(HIP/AMD): do we need current CCCL?
52 | #ifndef __HIP_PLATFORM_AMD__
53 | #if !defined(CCCL_VERSION) || (CCCL_VERSION < 2005000)
54 | #error "CCCL version 2.5.0 or later is required"
55 | #endif
56 | #endif
57 | 
58 | // TODO(HIP/AMD): still necessary?
59 | #ifdef __HIP_PLATFORM_AMD__
60 | #include <hipcub/hipcub.hpp>
61 | #endif
62 | 
63 | // WAR for libcudacxx/296
64 | #define CUCO_CUDA_MINIMUM_ARCH _NV_FIRST_ARG(__CUDA_ARCH_LIST__)
65 | 
66 | // NOTE(HIP/AMD): No IFP on AMD GPUs. -> No support for barrier (02-26-2025).
67 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 11000) && (CUCO_CUDA_MINIMUM_ARCH >= 700)
68 | #define CUCO_HAS_CUDA_BARRIER
69 | #endif
70 | 
71 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12010)
72 | #define CUCO_HAS_CG_INVOKE_ONE
73 | #endif
74 | 
75 | #if (CUCO_CUDA_MINIMUM_ARCH >= 700)
76 | #define CUCO_HAS_INDEPENDENT_THREADS
77 | #endif
78 | 
79 | #if defined(__SIZEOF_INT128__)
80 | #define CUCO_HAS_INT128
81 | #endif
82 | 
83 | #if defined(CUDART_VERSION) && (CUDART_VERSION >= 12000)
84 | #define CUCO_HAS_CG_REDUCE_UPDATE_ASYNC
85 | #endif
86 | 


--------------------------------------------------------------------------------
/include/cuco/detail/storage/bucket_storage_base.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuco/detail/storage/storage_base.cuh>
 20 | 
 21 | #include <cuda/std/array>
 22 | 
 23 | #include <cstddef>
 24 | #include <cstdint>
 25 | 
 26 | namespace cuco {
 27 | namespace detail {
 28 | /**
 29 | ￼ * @brief Bucket data structure type
 30 | ￼ *
 31 | ￼ * @tparam T Bucket slot type
 32 | ￼ * @tparam BucketSize Number of elements per bucket
 33 | ￼ */
 34 | template <typename T, int32_t BucketSize>
 35 | struct bucket : public cuda::std::array<T, BucketSize> {
 36 |  public:
 37 |   static int32_t constexpr bucket_size = BucketSize;  ///< Number of slots per bucket
 38 | };
 39 | 
 40 | /**
 41 |  * @brief Base class of array of slot buckets open addressing storage.
 42 |  *
 43 |  * @note This should NOT be used directly.
 44 |  *
 45 |  * @tparam T Slot type
 46 |  * @tparam BucketSize Number of slots in each bucket
 47 |  * @tparam Extent Type of extent denoting the number of buckets
 48 |  */
 49 | template <typename T, int32_t BucketSize, typename Extent>
 50 | class bucket_storage_base : public storage_base<Extent> {
 51 |  public:
 52 |   /**
 53 |    * @brief The number of elements (slots) processed per bucket.
 54 |    */
 55 |   static constexpr int32_t bucket_size = BucketSize;
 56 | 
 57 |   using extent_type = typename storage_base<Extent>::extent_type;  ///< Storage extent type
 58 |   using size_type   = typename storage_base<Extent>::size_type;    ///< Storage size type
 59 | 
 60 |   using value_type  = T;                                ///< Slot type
 61 |   using bucket_type = bucket<value_type, bucket_size>;  ///< Slot bucket type
 62 | 
 63 |   /**
 64 |    * @brief Constructor of array of bucket base storage.
 65 |    *
 66 |    * @param size Number of buckets to store
 67 |    */
 68 |   __host__ __device__ explicit constexpr bucket_storage_base(Extent size)
 69 |     : storage_base<Extent>{size}
 70 |   {
 71 |   }
 72 | 
 73 |   /**
 74 |    * @brief Gets the total number of slot buckets in the current storage.
 75 |    *
 76 |    * @return The total number of slot buckets
 77 |    */
 78 |   [[nodiscard]] __host__ __device__ constexpr size_type num_buckets() const noexcept
 79 |   {
 80 |     return storage_base<Extent>::capacity();
 81 |   }
 82 | 
 83 |   /**
 84 |    * @brief Gets the total number of slots in the current storage.
 85 |    *
 86 |    * @return The total number of slots
 87 |    */
 88 |   [[nodiscard]] __host__ __device__ constexpr size_type capacity() const noexcept
 89 |   {
 90 |     return storage_base<Extent>::capacity() * bucket_size;
 91 |   }
 92 | 
 93 |   /**
 94 |    * @brief Gets the bucket extent of the current storage.
 95 |    *
 96 |    * @return The bucket extent.
 97 |    */
 98 |   [[nodiscard]] __host__ __device__ constexpr extent_type bucket_extent() const noexcept
 99 |   {
100 |     return storage_base<Extent>::extent();
101 |   }
102 | };
103 | 
104 | }  // namespace detail
105 | }  // namespace cuco
106 | 


--------------------------------------------------------------------------------
/include/cuco/detail/bitwise_compare.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuco/utility/traits.hpp>
 20 | 
 21 | #include <cuda/functional>
 22 | #include <cuda/std/bit>
 23 | 
 24 | #include <cstdint>
 25 | #include <type_traits>
 26 | 
 27 | namespace cuco {
 28 | namespace detail {
 29 | __host__ __device__ inline int cuda_memcmp(void const* __lhs, void const* __rhs, size_t __count)
 30 | {
 31 |   auto __lhs_c = reinterpret_cast<unsigned char const*>(__lhs);
 32 |   auto __rhs_c = reinterpret_cast<unsigned char const*>(__rhs);
 33 |   while (__count--) {
 34 |     auto const __lhs_v = *__lhs_c++;
 35 |     auto const __rhs_v = *__rhs_c++;
 36 |     if (__lhs_v < __rhs_v) { return -1; }
 37 |     if (__lhs_v > __rhs_v) { return 1; }
 38 |   }
 39 |   return 0;
 40 | }
 41 | 
 42 | template <std::size_t TypeSize>
 43 | struct bitwise_compare_impl {
 44 |   __host__ __device__ static bool compare(char const* lhs, char const* rhs)
 45 |   {
 46 |     return cuda_memcmp(lhs, rhs, TypeSize) == 0;
 47 |   }
 48 | };
 49 | 
 50 | template <>
 51 | struct bitwise_compare_impl<4> {
 52 |   __host__ __device__ inline static bool compare(char const* lhs, char const* rhs)
 53 |   {
 54 |     return *reinterpret_cast<uint32_t const*>(lhs) == *reinterpret_cast<uint32_t const*>(rhs);
 55 |   }
 56 | };
 57 | 
 58 | template <>
 59 | struct bitwise_compare_impl<8> {
 60 |   __host__ __device__ inline static bool compare(char const* lhs, char const* rhs)
 61 |   {
 62 |     return *reinterpret_cast<uint64_t const*>(lhs) == *reinterpret_cast<uint64_t const*>(rhs);
 63 |   }
 64 | };
 65 | 
 66 | /**
 67 |  * @brief Gives value to use as alignment for a type that is at least the
 68 |  * size of type, or 16, whichever is smaller.
 69 |  */
 70 | template <typename T>
 71 | __host__ __device__ constexpr std::size_t alignment()
 72 | {
 73 |   constexpr std::size_t alignment = cuda::std::bit_ceil(sizeof(T));
 74 |   return cuda::std::min(std::size_t{16}, alignment);
 75 | }
 76 | 
 77 | /**
 78 |  * @brief Performs a bitwise equality comparison between the two specified objects
 79 |  *
 80 |  * @tparam T Type with unique object representations
 81 |  * @param lhs The first object
 82 |  * @param rhs The second object
 83 |  * @return If the bits in the object representations of lhs and rhs are identical.
 84 |  */
 85 | template <typename T>
 86 | __host__ __device__ constexpr bool bitwise_compare(T const& lhs, T const& rhs)
 87 | {
 88 |   static_assert(
 89 |     cuco::is_bitwise_comparable_v<T>,
 90 |     "Bitwise compared objects must have unique object representations or be explicitly declared as "
 91 |     "safe for bitwise comparison via specialization of cuco::is_bitwise_comparable_v.");
 92 | 
 93 |   alignas(detail::alignment<T>()) T __lhs{lhs};
 94 |   alignas(detail::alignment<T>()) T __rhs{rhs};
 95 |   return detail::bitwise_compare_impl<sizeof(T)>::compare(reinterpret_cast<char const*>(&__lhs),
 96 |                                                           reinterpret_cast<char const*>(&__rhs));
 97 | }
 98 | 
 99 | }  // namespace detail
100 | }  // namespace cuco
101 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/contains_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::contains_async` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_contains(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys      = state.get_int64("NumInputs");
37 |   auto const occupancy     = state.get_float64("Occupancy");
38 |   auto const matching_rate = state.get_float64("MatchingRate");
39 | 
40 |   std::size_t const size = num_keys / occupancy;
41 | 
42 |   thrust::device_vector<Key> keys(num_keys);
43 | 
44 |   key_generator gen;
45 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
46 | 
47 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
48 |   set.insert(keys.begin(), keys.end());
49 | 
50 |   gen.dropout(keys.begin(), keys.end(), matching_rate);
51 | 
52 |   thrust::device_vector<bool> result(num_keys);
53 | 
54 |   state.add_element_count(num_keys);
55 | 
56 |   state.exec([&](nvbench::launch& launch) {
57 |     set.contains_async(keys.begin(), keys.end(), result.begin(), {launch.get_stream()});
58 |   });
59 | }
60 | 
61 | NVBENCH_BENCH_TYPES(static_set_contains,
62 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
63 |                                       nvbench::type_list<distribution::unique>))
64 |   .set_name("static_set_constains_unique_capacity")
65 |   .set_type_axes_names({"Key", "Distribution"})
66 |   .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE)
67 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
68 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
69 | 
70 | NVBENCH_BENCH_TYPES(static_set_contains,
71 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
72 |                                       nvbench::type_list<distribution::unique>))
73 |   .set_name("static_set_contains_unique_occupancy")
74 |   .set_type_axes_names({"Key", "Distribution"})
75 |   .set_max_noise(defaults::MAX_NOISE)
76 |   .add_int64_axis("NumInputs", {defaults::N})
77 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE)
78 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
79 | 
80 | NVBENCH_BENCH_TYPES(static_set_contains,
81 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
82 |                                       nvbench::type_list<distribution::unique>))
83 |   .set_name("static_set_contains_unique_matching_rate")
84 |   .set_type_axes_names({"Key", "Distribution"})
85 |   .set_max_noise(defaults::MAX_NOISE)
86 |   .add_int64_axis("NumInputs", {defaults::N})
87 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
88 |   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
89 | 


--------------------------------------------------------------------------------
/include/cuco/detail/pair/pair.inl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // Modifications Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
18 | // Permission is hereby granted, free of charge, to any person obtaining a copy
19 | // of this software and associated documentation files (the "Software"), to deal
20 | // in the Software without restriction, including without limitation the rights
21 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | // copies of the Software, and to permit persons to whom the Software is
23 | // furnished to do so, subject to the following conditions:
24 | // The above copyright notice and this permission notice shall be included in
25 | // all copies or substantial portions of the Software.
26 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32 | // THE SOFTWARE.
33 | 
34 | #pragma once
35 | 
36 | #include <type_traits>
37 | #include <utility>
38 | 
39 | // FIXME/WAR(HIP/AMD): We need to include libhipcxx __config here to make
40 | // the macro _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION from libhipcxx available
41 | // in order to add the symbols from tuple_helpers.inl to the correct namespace.
42 | #include <cuda/std/detail/__config>
43 | 
44 | namespace cuco {
45 | 
46 | template <typename First, typename Second>
47 | __host__ __device__ constexpr pair<First, Second>::pair(First const& f, Second const& s)
48 |   : first{f}, second{s}
49 | {
50 | }
51 | 
52 | template <typename First, typename Second>
53 | template <typename F, typename S>
54 | __host__ __device__ constexpr pair<First, Second>::pair(pair<F, S> const& p)
55 |   : first{p.first}, second{p.second}
56 | {
57 | }
58 | 
59 | template <typename F, typename S>
60 | __host__ __device__ constexpr pair<std::decay_t<F>, std::decay_t<S>> make_pair(F&& f,
61 |                                                                                S&& s) noexcept
62 | {
63 |   return pair<std::decay_t<F>, std::decay_t<S>>(std::forward<F>(f), std::forward<S>(s));
64 | }
65 | 
66 | template <class T1, class T2, class U1, class U2>
67 | __host__ __device__ constexpr bool operator==(cuco::pair<T1, T2> const& lhs,
68 |                                               cuco::pair<U1, U2> const& rhs) noexcept
69 | {
70 |   return lhs.first == rhs.first and lhs.second == rhs.second;
71 | }
72 | 
73 | }  // namespace cuco
74 | 
75 | // NOTE(HIP/AMD): Older libhipcxx versions use
76 | // the "hip" namespace name while more recent ones use
77 | // "cuda". We therefore use the macros from libhipcxx
78 | // to put the tuple_helpers.inl symbols into the correct 
79 | // namespace.
80 | _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION
81 | #include <cuco/detail/pair/tuple_helpers.inl>
82 | _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION 
83 | 


--------------------------------------------------------------------------------
/benchmarks/static_multiset/contains_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_multiset.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_multiset::contains_async` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_multiset_contains(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys      = state.get_int64("NumInputs");
37 |   auto const occupancy     = state.get_float64("Occupancy");
38 |   auto const matching_rate = state.get_float64("MatchingRate");
39 | 
40 |   std::size_t const size = num_keys / occupancy;
41 | 
42 |   thrust::device_vector<Key> keys(num_keys);
43 | 
44 |   key_generator gen;
45 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
46 | 
47 |   cuco::static_multiset<Key> set{size, cuco::empty_key<Key>{-1}};
48 |   set.insert(keys.begin(), keys.end());
49 | 
50 |   gen.dropout(keys.begin(), keys.end(), matching_rate);
51 | 
52 |   thrust::device_vector<bool> result(num_keys);
53 | 
54 |   state.add_element_count(num_keys);
55 | 
56 |   state.exec([&](nvbench::launch& launch) {
57 |     set.contains_async(keys.begin(), keys.end(), result.begin(), {launch.get_stream()});
58 |   });
59 | }
60 | 
61 | NVBENCH_BENCH_TYPES(static_multiset_contains,
62 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
63 |                                       nvbench::type_list<distribution::unique>))
64 |   .set_name("static_multiset_constains_unique_capacity")
65 |   .set_type_axes_names({"Key", "Distribution"})
66 |   .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE)
67 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
68 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
69 | 
70 | NVBENCH_BENCH_TYPES(static_multiset_contains,
71 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
72 |                                       nvbench::type_list<distribution::unique>))
73 |   .set_name("static_multiset_contains_unique_occupancy")
74 |   .set_type_axes_names({"Key", "Distribution"})
75 |   .set_max_noise(defaults::MAX_NOISE)
76 |   .add_int64_axis("NumInputs", {defaults::N})
77 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE)
78 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
79 | 
80 | NVBENCH_BENCH_TYPES(static_multiset_contains,
81 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
82 |                                       nvbench::type_list<distribution::unique>))
83 |   .set_name("static_multiset_contains_unique_matching_rate")
84 |   .set_type_axes_names({"Key", "Distribution"})
85 |   .set_max_noise(defaults::MAX_NOISE)
86 |   .add_int64_axis("NumInputs", {defaults::N})
87 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
88 |   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
89 | 


--------------------------------------------------------------------------------
/benchmarks/static_set/find_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | #include <thrust/transform.h>
27 | 
28 | using namespace cuco::benchmark;  // defaults, dist_from_state
29 | using namespace cuco::utility;    // key_generator, distribution
30 | 
31 | /**
32 |  * @brief A benchmark evaluating `cuco::static_set::find_async` performance
33 |  */
34 | template <typename Key, typename Dist>
35 | void static_set_find(nvbench::state& state, nvbench::type_list<Key, Dist>)
36 | {
37 |   auto const num_keys      = state.get_int64("NumInputs");
38 |   auto const occupancy     = state.get_float64("Occupancy");
39 |   auto const matching_rate = state.get_float64("MatchingRate");
40 | 
41 |   std::size_t const size = num_keys / occupancy;
42 | 
43 |   thrust::device_vector<Key> keys(num_keys);
44 | 
45 |   key_generator gen;
46 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
47 | 
48 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
49 |   set.insert(keys.begin(), keys.end());
50 | 
51 |   // TODO: would crash if not passing nullptr, why?
52 |   gen.dropout(keys.begin(), keys.end(), matching_rate, nullptr);
53 | 
54 |   thrust::device_vector<Key> result(num_keys);
55 | 
56 |   state.add_element_count(num_keys);
57 | 
58 |   state.exec([&](nvbench::launch& launch) {
59 |     set.find_async(keys.begin(), keys.end(), result.begin(), {launch.get_stream()});
60 |   });
61 | }
62 | 
63 | NVBENCH_BENCH_TYPES(static_set_find,
64 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
65 |                                       nvbench::type_list<distribution::unique>))
66 |   .set_name("static_set_find_unique_capacity")
67 |   .set_type_axes_names({"Key", "Distribution"})
68 |   .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE)
69 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
70 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
71 | 
72 | NVBENCH_BENCH_TYPES(static_set_find,
73 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
74 |                                       nvbench::type_list<distribution::unique>))
75 |   .set_name("static_set_find_unique_occupancy")
76 |   .set_type_axes_names({"Key", "Distribution"})
77 |   .set_max_noise(defaults::MAX_NOISE)
78 |   .add_int64_axis("NumInputs", {defaults::N})
79 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE)
80 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
81 | 
82 | NVBENCH_BENCH_TYPES(static_set_find,
83 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
84 |                                       nvbench::type_list<distribution::unique>))
85 |   .set_name("static_set_find_unique_matching_rate")
86 |   .set_type_axes_names({"Key", "Distribution"})
87 |   .set_max_noise(defaults::MAX_NOISE)
88 |   .add_int64_axis("NumInputs", {defaults::N})
89 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
90 |   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
91 | 


--------------------------------------------------------------------------------
/benchmarks/static_multiset/find_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_multiset.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | #include <thrust/transform.h>
27 | 
28 | using namespace cuco::benchmark;  // defaults, dist_from_state
29 | using namespace cuco::utility;    // key_generator, distribution
30 | 
31 | /**
32 |  * @brief A benchmark evaluating `cuco::static_multiset::find_async` performance
33 |  */
34 | template <typename Key, typename Dist>
35 | void static_multiset_find(nvbench::state& state, nvbench::type_list<Key, Dist>)
36 | {
37 |   auto const num_keys      = state.get_int64("NumInputs");
38 |   auto const occupancy     = state.get_float64("Occupancy");
39 |   auto const matching_rate = state.get_float64("MatchingRate");
40 | 
41 |   std::size_t const size = num_keys / occupancy;
42 | 
43 |   thrust::device_vector<Key> keys(num_keys);
44 | 
45 |   key_generator gen;
46 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
47 | 
48 |   cuco::static_multiset<Key> set{size, cuco::empty_key<Key>{-1}};
49 |   set.insert(keys.begin(), keys.end());
50 | 
51 |   // TODO: would crash if not passing nullptr, why?
52 |   gen.dropout(keys.begin(), keys.end(), matching_rate, nullptr);
53 | 
54 |   thrust::device_vector<Key> result(num_keys);
55 | 
56 |   state.add_element_count(num_keys);
57 | 
58 |   state.exec([&](nvbench::launch& launch) {
59 |     set.find_async(keys.begin(), keys.end(), result.begin(), {launch.get_stream()});
60 |   });
61 | }
62 | 
63 | NVBENCH_BENCH_TYPES(static_multiset_find,
64 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
65 |                                       nvbench::type_list<distribution::unique>))
66 |   .set_name("static_multiset_find_unique_capacity")
67 |   .set_type_axes_names({"Key", "Distribution"})
68 |   .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE)
69 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
70 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
71 | 
72 | NVBENCH_BENCH_TYPES(static_multiset_find,
73 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
74 |                                       nvbench::type_list<distribution::unique>))
75 |   .set_name("static_multiset_find_unique_occupancy")
76 |   .set_type_axes_names({"Key", "Distribution"})
77 |   .set_max_noise(defaults::MAX_NOISE)
78 |   .add_int64_axis("NumInputs", {defaults::N})
79 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE)
80 |   .add_float64_axis("MatchingRate", {defaults::MATCHING_RATE});
81 | 
82 | NVBENCH_BENCH_TYPES(static_multiset_find,
83 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
84 |                                       nvbench::type_list<distribution::unique>))
85 |   .set_name("static_multiset_find_unique_matching_rate")
86 |   .set_type_axes_names({"Key", "Distribution"})
87 |   .set_max_noise(defaults::MAX_NOISE)
88 |   .add_int64_axis("NumInputs", {defaults::N})
89 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
90 |   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);


--------------------------------------------------------------------------------
/include/cuco/detail/pair/tuple_helpers.inl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | template <typename T1, typename T2>
 18 | struct tuple_size<cuco::pair<T1, T2>> : integral_constant<size_t, 2> {};
 19 | 
 20 | template <typename T1, typename T2>
 21 | struct tuple_size<const cuco::pair<T1, T2>> : tuple_size<cuco::pair<T1, T2>> {};
 22 | 
 23 | template <typename T1, typename T2>
 24 | struct tuple_size<volatile cuco::pair<T1, T2>> : tuple_size<cuco::pair<T1, T2>> {};
 25 | 
 26 | template <typename T1, typename T2>
 27 | struct tuple_size<const volatile cuco::pair<T1, T2>> : tuple_size<cuco::pair<T1, T2>> {};
 28 | 
 29 | template <std::size_t I, typename T1, typename T2>
 30 | struct tuple_element<I, cuco::pair<T1, T2>> {
 31 |   using type = void;
 32 | };
 33 | 
 34 | template <typename T1, typename T2>
 35 | struct tuple_element<0, cuco::pair<T1, T2>> {
 36 |   using type = T1;
 37 | };
 38 | 
 39 | template <typename T1, typename T2>
 40 | struct tuple_element<1, cuco::pair<T1, T2>> {
 41 |   using type = T2;
 42 | };
 43 | 
 44 | template <typename T1, typename T2>
 45 | struct tuple_element<0, const cuco::pair<T1, T2>> : tuple_element<0, cuco::pair<T1, T2>> {};
 46 | 
 47 | template <typename T1, typename T2>
 48 | struct tuple_element<1, const cuco::pair<T1, T2>> : tuple_element<1, cuco::pair<T1, T2>> {};
 49 | 
 50 | template <typename T1, typename T2>
 51 | struct tuple_element<0, volatile cuco::pair<T1, T2>> : tuple_element<0, cuco::pair<T1, T2>> {};
 52 | 
 53 | template <typename T1, typename T2>
 54 | struct tuple_element<1, volatile cuco::pair<T1, T2>> : tuple_element<1, cuco::pair<T1, T2>> {};
 55 | 
 56 | template <typename T1, typename T2>
 57 | struct tuple_element<0, const volatile cuco::pair<T1, T2>> : tuple_element<0, cuco::pair<T1, T2>> {
 58 | };
 59 | 
 60 | template <typename T1, typename T2>
 61 | struct tuple_element<1, const volatile cuco::pair<T1, T2>> : tuple_element<1, cuco::pair<T1, T2>> {
 62 | };
 63 | 
 64 | template <std::size_t I, typename T1, typename T2>
 65 | __host__ __device__ constexpr auto get(cuco::pair<T1, T2>& p) ->
 66 |   typename tuple_element<I, cuco::pair<T1, T2>>::type&
 67 | {
 68 |   static_assert(I < 2);
 69 |   if constexpr (I == 0) {
 70 |     return p.first;
 71 |   } else {
 72 |     return p.second;
 73 |   }
 74 | }
 75 | 
 76 | template <std::size_t I, typename T1, typename T2>
 77 | __host__ __device__ constexpr auto get(cuco::pair<T1, T2>&& p) ->
 78 |   typename tuple_element<I, cuco::pair<T1, T2>>::type&&
 79 | {
 80 |   static_assert(I < 2);
 81 |   if constexpr (I == 0) {
 82 |     return std::move(p.first);
 83 |   } else {
 84 |     return std::move(p.second);
 85 |   }
 86 | }
 87 | 
 88 | template <std::size_t I, typename T1, typename T2>
 89 | __host__ __device__ constexpr auto get(cuco::pair<T1, T2> const& p) ->
 90 |   typename tuple_element<I, cuco::pair<T1, T2>>::type const&
 91 | {
 92 |   static_assert(I < 2);
 93 |   if constexpr (I == 0) {
 94 |     return p.first;
 95 |   } else {
 96 |     return p.second;
 97 |   }
 98 | }
 99 | 
100 | template <std::size_t I, typename T1, typename T2>
101 | __host__ __device__ constexpr auto get(cuco::pair<T1, T2> const&& p) ->
102 |   typename tuple_element<I, cuco::pair<T1, T2>>::type const&&
103 | {
104 |   static_assert(I < 2);
105 |   if constexpr (I == 0) {
106 |     return std::move(p.first);
107 |   } else {
108 |     return std::move(p.second);
109 |   }
110 | }


--------------------------------------------------------------------------------
/benchmarks/static_set/insert_bench.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmark_defaults.hpp>
18 | #include <benchmark_utils.hpp>
19 | 
20 | #include <cuco/static_set.cuh>
21 | #include <cuco/utility/key_generator.cuh>
22 | 
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | using namespace cuco::benchmark;  // defaults, dist_from_state
28 | using namespace cuco::utility;    // key_generator, distribution
29 | 
30 | /**
31 |  * @brief A benchmark evaluating `cuco::static_set::insert_async` performance
32 |  */
33 | template <typename Key, typename Dist>
34 | void static_set_insert(nvbench::state& state, nvbench::type_list<Key, Dist>)
35 | {
36 |   auto const num_keys  = state.get_int64("NumInputs");
37 |   auto const occupancy = state.get_float64("Occupancy");
38 | 
39 |   std::size_t const size = num_keys / occupancy;
40 | 
41 |   thrust::device_vector<Key> keys(num_keys);
42 | 
43 |   key_generator gen;
44 |   gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
45 | 
46 |   state.add_element_count(num_keys);
47 | 
48 |   cuco::static_set<Key> set{size, cuco::empty_key<Key>{-1}};
49 | 
50 |   state.exec(nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
51 |     timer.start();
52 |     set.insert_async(keys.begin(), keys.end(), {launch.get_stream()});
53 |     timer.stop();
54 |     set.clear_async({launch.get_stream()});
55 |   });
56 | }
57 | 
58 | NVBENCH_BENCH_TYPES(static_set_insert,
59 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
60 |                                       nvbench::type_list<distribution::unique>))
61 |   .set_name("static_set_insert_unique_capacity")
62 |   .set_type_axes_names({"Key", "Distribution"})
63 |   .set_max_noise(defaults::MAX_NOISE)
64 |   .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE)
65 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY});
66 | 
67 | NVBENCH_BENCH_TYPES(static_set_insert,
68 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
69 |                                       nvbench::type_list<distribution::unique>))
70 |   .set_name("static_set_insert_unique_occupancy")
71 |   .set_type_axes_names({"Key", "Distribution"})
72 |   .set_max_noise(defaults::MAX_NOISE)
73 |   .add_int64_axis("NumInputs", {defaults::N})
74 |   .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
75 | 
76 | NVBENCH_BENCH_TYPES(static_set_insert,
77 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
78 |                                       nvbench::type_list<distribution::uniform>))
79 |   .set_name("static_set_insert_uniform_multiplicity")
80 |   .set_type_axes_names({"Key", "Distribution"})
81 |   .set_max_noise(defaults::MAX_NOISE)
82 |   .add_int64_axis("NumInputs", {defaults::N})
83 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
84 |   .add_int64_axis("Multiplicity", defaults::MULTIPLICITY_RANGE);
85 | 
86 | NVBENCH_BENCH_TYPES(static_set_insert,
87 |                     NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
88 |                                       nvbench::type_list<distribution::gaussian>))
89 |   .set_name("static_set_insert_gaussian_skew")
90 |   .set_type_axes_names({"Key", "Distribution"})
91 |   .set_max_noise(defaults::MAX_NOISE)
92 |   .add_int64_axis("NumInputs", {defaults::N})
93 |   .add_float64_axis("Occupancy", {defaults::OCCUPANCY})
94 |   .add_float64_axis("Skew", defaults::SKEW_RANGE);
95 | 


--------------------------------------------------------------------------------