├── .gitignore
├── doc
├── output.svg
├── input.svg
├── clusters.svg
├── underfill_cost.svg
├── valid_split_positions.svg
└── adjacency_sweep.svg
├── CHANGELOG.md
├── CONTRIBUTING.txt
├── test
├── src
│ ├── test_clusterizer.c
│ ├── test_perf.cpp
│ ├── tree_gen.hpp
│ └── test_util.hpp
└── CMakeLists.txt
├── .clang-format
├── src
├── connections.hpp
├── clusterizer.hpp
├── underfill_cost.hpp
├── connections.cpp
└── nvcluster.cpp
├── CMakeLists.txt
├── include
└── nvcluster
│ ├── nvcluster_storage.hpp
│ ├── util
│ ├── parallel_execution_libcxx.hpp
│ ├── parallel.hpp
│ └── objects.hpp
│ └── nvcluster.h
├── LICENSE
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | .vscode
3 | .cache
4 | _install
--------------------------------------------------------------------------------
/doc/output.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | # Version 2
3 |
4 | ## Features
5 |
6 | - Vertex limit, [`maxClusterVertices`](include/nvcluster/nvcluster.h)
7 | - Vertex underfill cost, [`costUnderfillVertices`](include/nvcluster/nvcluster.h)
8 | - Implicit connection computation with [`itemVertices`](include/nvcluster/nvcluster.h)
9 | - Shared library support in cmake, [`NVCLUSTER_BUILDER_SHARED`](CMakeLists.txt)
10 | - Dynamic `parallelize` switch in [`nvcluster_ContextCreateInfo`](include/nvcluster/nvcluster.h)
11 |
12 | ## Code Quality
13 |
14 | - Real C API, removing namespace, adding prefixes, symbol export
15 | - Flattened API structs, avoiding pointer chains
16 | - Removed macro based parallel for loops
17 | - Internal use of std::span instead of raw pointers
18 | - vec3f and AABB objects instead of inlined operations
19 | - Fallback for missing libc++ parallel execution
20 |
--------------------------------------------------------------------------------
/CONTRIBUTING.txt:
--------------------------------------------------------------------------------
1 | Developer Certificate of Origin
2 | Version 1.1
3 |
4 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
5 |
6 | Everyone is permitted to copy and distribute verbatim copies of this
7 | license document, but changing it is not allowed.
8 |
9 |
10 | Developer's Certificate of Origin 1.1
11 |
12 | By making a contribution to this project, I certify that:
13 |
14 | (a) The contribution was created in whole or in part by me and I
15 | have the right to submit it under the open source license
16 | indicated in the file; or
17 |
18 | (b) The contribution is based upon previous work that, to the best
19 | of my knowledge, is covered under an appropriate open source
20 | license and I have the right under that license to submit that
21 | work with modifications, whether created in whole or in part
22 | by me, under the same open source license (unless I am
23 | permitted to submit under a different license), as indicated
24 | in the file; or
25 |
26 | (c) The contribution was provided directly to me by some other
27 | person who certified (a), (b) or (c) and I have not modified
28 | it.
29 |
30 | (d) I understand and agree that this project and the contribution
31 | are public and that a record of the contribution (including all
32 | personal information I submit with it, including my sign-off) is
33 | maintained indefinitely and may be redistributed consistent with
34 | this project or the open source license(s) involved.
35 |
--------------------------------------------------------------------------------
/test/src/test_clusterizer.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 |
20 | #ifdef __cplusplus
21 | #error This file verifies the API is C compatible
22 | #endif
23 |
24 | #include
25 | #include
26 |
27 | int runCTest(void)
28 | {
29 | nvcluster_ContextCreateInfo createInfo = nvcluster_defaultContextCreateInfo();
30 | nvcluster_Context context = 0;
31 | nvcluster_Result createResult = nvclusterCreateContext(&createInfo, &context);
32 | if(createResult != NVCLUSTER_SUCCESS)
33 | {
34 | printf("Create Context Result: %s\n", nvclusterResultString(createResult));
35 | return 0;
36 | }
37 |
38 | nvcluster_Result destroyResult = nvclusterDestroyContext(context);
39 | if(destroyResult != NVCLUSTER_SUCCESS)
40 | {
41 | printf("Destroy Context Result: %s\n", nvclusterResultString(destroyResult));
42 | return 0;
43 | }
44 | return 1;
45 | }
46 |
--------------------------------------------------------------------------------
/doc/input.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: LLVM
2 | AccessModifierOffset: '-2'
3 | AlignAfterOpenBracket: Align
4 | AlignConsecutiveAssignments: 'true'
5 | AlignConsecutiveDeclarations: 'true'
6 | AlignOperands: 'true'
7 | AlignTrailingComments: 'true'
8 | AllowAllParametersOfDeclarationOnNextLine: 'false'
9 | AllowShortBlocksOnASingleLine: 'false'
10 | AllowShortCaseLabelsOnASingleLine: 'false'
11 | AllowShortFunctionsOnASingleLine: Inline
12 | AllowShortIfStatementsOnASingleLine: 'false'
13 | AllowShortLoopsOnASingleLine: 'false'
14 | AlwaysBreakAfterReturnType: None
15 | AlwaysBreakBeforeMultilineStrings: 'true'
16 | AlwaysBreakTemplateDeclarations: 'true'
17 | BinPackArguments: 'true'
18 | BinPackParameters: 'false'
19 | ExperimentalAutoDetectBinPacking: 'false'
20 | BreakBeforeBinaryOperators: NonAssignment
21 | BreakBeforeBraces: Custom
22 | BreakBeforeTernaryOperators: 'false'
23 | BreakConstructorInitializersBeforeComma: 'true'
24 | ColumnLimit: '120'
25 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'false'
26 | Cpp11BracedListStyle: 'true'
27 | IndentCaseLabels: 'true'
28 | IndentWidth: '2'
29 | KeepEmptyLinesAtTheStartOfBlocks: 'true'
30 | Language: Cpp
31 | MaxEmptyLinesToKeep: '2'
32 | NamespaceIndentation: None
33 | ObjCSpaceBeforeProtocolList: 'true'
34 | PointerAlignment: Left
35 | SpaceAfterCStyleCast: 'false'
36 | SpaceBeforeAssignmentOperators: 'true'
37 | SpaceBeforeParens: Never
38 | SpaceInEmptyParentheses: 'false'
39 | SpacesBeforeTrailingComments: '2'
40 | SpacesInAngles: 'false'
41 | SpacesInCStyleCastParentheses: 'false'
42 | SpacesInParentheses: 'false'
43 | SpacesInSquareBrackets: 'false'
44 | Standard: Cpp11
45 | TabWidth: '2'
46 | UseTab: Never
47 | SortIncludes: 'true'
48 | ReflowComments: 'false'
49 | BraceWrapping: {
50 | AfterClass: 'true',
51 | AfterControlStatement: 'true',
52 | AfterEnum: 'true',
53 | AfterFunction: 'true',
54 | AfterNamespace: 'false',
55 | AfterStruct: 'true',
56 | AfterUnion: 'true',
57 | BeforeCatch: 'true',
58 | BeforeElse: 'true',
59 | IndentBraces: 'false'
60 | }
61 | PenaltyExcessCharacter: 1
62 | PenaltyBreakBeforeFirstCallParameter: 40
63 | PenaltyBreakFirstLessLess: 1
64 | PenaltyBreakComment: 30
65 | PenaltyBreakString: 30
66 | PenaltyReturnTypeOnItsOwnLine: 9999
67 |
--------------------------------------------------------------------------------
/src/connections.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | namespace nvcluster {
29 |
30 | // A 2D uint32_t array pointer, used to interpret nvcluster_Input::itemVertices.
31 | // Rename to UintSpan2D if used multiple times. Could replace with
32 | // mdspan.
33 | struct ItemVertices
34 | {
35 | public:
36 | ItemVertices(const uint32_t* itemVertices, uint32_t itemCount, uint32_t itemVertexCount)
37 | : m_itemVertices(itemVertices)
38 | , m_itemCount(itemCount)
39 | , m_itemVertexCount(itemVertexCount)
40 | {
41 | }
42 | uint32_t itemCount() const { return m_itemCount; } // mdspan::extent(0)
43 | uint32_t itemVertexCount() const { return m_itemVertexCount; } // mdspan::extent(1)
44 | std::span vertices(size_t itemIndex) const // ~submdspan
45 | {
46 | return std::span(m_itemVertices, m_itemCount * m_itemVertexCount).subspan(itemIndex * m_itemVertexCount, m_itemVertexCount);
47 | }
48 |
49 | private:
50 | const uint32_t* m_itemVertices;
51 | uint32_t m_itemCount;
52 | uint32_t m_itemVertexCount;
53 | };
54 |
55 | // Utility to generate item connections and vertex bits to use the vertex limit
56 | // feature.
57 | struct MeshConnections
58 | {
59 | std::vector connectionRanges;
60 | std::vector connectionItems;
61 | std::vector connectionVertexBits;
62 | };
63 |
64 | NVCLUSTER_API MeshConnections makeMeshConnections(bool parallelize, ItemVertices itemVertices, uint32_t vertexCount);
65 |
66 | } // namespace nvcluster
67 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | find_package(GTest QUIET)
17 | if(NOT GTest_FOUND)
18 | include(FetchContent)
19 | FetchContent_Declare(
20 | googletest
21 | GIT_REPOSITORY https://github.com/google/googletest.git
22 | GIT_TAG v1.14.0
23 | GIT_SHALLOW TRUE
24 | )
25 | FetchContent_MakeAvailable(googletest)
26 | endif()
27 |
28 | find_package(nanobench QUIET)
29 | if(NOT nanobench_FOUND)
30 | include(FetchContent)
31 | FetchContent_Declare(
32 | nanobench
33 | GIT_REPOSITORY https://github.com/martinus/nanobench.git
34 | GIT_TAG v4.3.11
35 | GIT_SHALLOW TRUE)
36 | FetchContent_MakeAvailable(nanobench)
37 | endif()
38 |
39 | add_executable(nv_cluster_builder_tests
40 | src/test_clusterizer.c
41 | src/test_clusterizer.cpp
42 | src/test_meshes.cpp
43 | src/test_perf.cpp
44 | )
45 |
46 | option(NVCLUSTER_TEST_MESHES "FetchContent cgltf to test meshes in current directory" OFF)
47 | if(NVCLUSTER_TEST_MESHES)
48 | if(NOT TARGET cgltf_static)
49 | set(CGLTF_INCLUDE "${CMAKE_BINARY_DIR}/cgltf")
50 | file(MAKE_DIRECTORY "${CGLTF_INCLUDE}")
51 | file(DOWNLOAD https://raw.githubusercontent.com/jkuhlmann/cgltf/refs/tags/v1.15/cgltf.h "${CGLTF_INCLUDE}/cgltf.h")
52 | file(WRITE "${CMAKE_BINARY_DIR}/cgltf.cpp" "#define CGLTF_IMPLEMENTATION\n#include \n")
53 | add_library(cgltf_static "${CMAKE_BINARY_DIR}/cgltf.cpp")
54 | target_include_directories(cgltf_static PUBLIC "${CGLTF_INCLUDE}")
55 | endif()
56 | target_compile_definitions(nv_cluster_builder_tests PRIVATE TEST_MESHES)
57 | target_link_libraries(nv_cluster_builder_tests PRIVATE cgltf_static)
58 | endif()
59 |
60 | target_include_directories(nv_cluster_builder_tests PRIVATE src ../src) # adds internal src directory to allow unit testing
61 | target_link_libraries(nv_cluster_builder_tests PRIVATE nv_cluster_builder gtest_main gmock_main nanobench)
62 |
63 | if(MSVC)
64 | target_compile_options(nv_cluster_builder_tests PRIVATE
65 | /W4
66 | /WX
67 | )
68 | target_compile_definitions(nv_cluster_builder_tests PRIVATE WIN32_LEAN_AND_MEAN=1 NOMINMAX)
69 | else()
70 | target_compile_options(nv_cluster_builder_tests PRIVATE
71 | -Wall
72 | -Wextra
73 | -Wpedantic
74 | -Wshadow
75 | -Wconversion
76 | -Werror
77 | )
78 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
79 | target_compile_definitions(nv_cluster_builder_tests PRIVATE
80 | $<$:_GLIBCXX_ASSERTIONS>
81 | )
82 | endif()
83 | endif()
84 |
85 | include(GoogleTest)
86 | gtest_discover_tests(nv_cluster_builder_tests)
87 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
16 | # SPDX-License-Identifier: Apache-2.0
17 |
18 | cmake_minimum_required(VERSION 3.20)
19 | project(nv_cluster_builder VERSION 2.0)
20 |
21 | set(SOURCES
22 | src/clusterizer.cpp
23 | src/connections.cpp
24 | src/nvcluster.cpp
25 | )
26 | file(GLOB HEADERS_INTERNAL
27 | src/*.hpp
28 | )
29 | file(GLOB HEADERS_PUBLIC
30 | include/nvcluster/*.h
31 | include/nvcluster/*.hpp
32 | include/nvcluster/util/*.hpp
33 | )
34 |
35 | source_group("public_include" FILES ${HEADERS_PUBLIC})
36 | source_group("source" FILES ${SOURCES} ${HEADERS_INTERNAL})
37 |
38 | # Optionally build as a shared library
39 | include(CMakeDependentOption)
40 | cmake_dependent_option(
41 | NVCLUSTER_BUILDER_SHARED # option variable
42 | "Build shared library" # description
43 | OFF # default value if exposed; user can override
44 | "NOT BUILD_SHARED_LIBS" # condition to expose option
45 | ON # value if not exposed; user can't override
46 | )
47 |
48 | if (NVCLUSTER_BUILDER_SHARED)
49 | set(CMAKE_C_VISIBILITY_PRESET hidden)
50 | set(CMAKE_CXX_VISIBILITY_PRESET hidden)
51 | set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
52 | add_library(nv_cluster_builder SHARED ${SOURCES} ${HEADERS_INTERNAL} ${HEADERS_PUBLIC})
53 | target_compile_definitions(nv_cluster_builder PUBLIC NVCLUSTER_BUILDER_SHARED)
54 | else()
55 | add_library(nv_cluster_builder STATIC ${SOURCES} ${HEADERS_INTERNAL} ${HEADERS_PUBLIC})
56 | endif ()
57 | target_compile_features(nv_cluster_builder PUBLIC cxx_std_20)
58 | target_include_directories(nv_cluster_builder PUBLIC include)
59 | target_include_directories(nv_cluster_builder PRIVATE src)
60 | target_compile_definitions(nv_cluster_builder PRIVATE NVCLUSTER_BUILDER_COMPILING)
61 |
62 | # All the warnings. Branch on COMPILE_LANGUAGE to avoid passing unknowns to nvcc
63 | if(MSVC)
64 | target_compile_options(nv_cluster_builder PRIVATE
65 | $<$:/W4>
66 | $<$:/WX>
67 | $<$:/wd4127> # 'conditional expression is constant' unhelpful when mixing c.t. and dynamic
68 | )
69 | target_compile_definitions(nv_cluster_builder PRIVATE WIN32_LEAN_AND_MEAN=1 NOMINMAX)
70 | else()
71 | target_compile_options(nv_cluster_builder PRIVATE
72 | -fno-math-errno
73 | -fno-trapping-math
74 | #-funsafe-math-optimizations
75 | $<$:-Wall>
76 | $<$:-Wextra>
77 | $<$:-Wpedantic>
78 | $<$:-Wconversion>
79 | $<$:-Werror>
80 | )
81 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
82 | target_compile_definitions(nv_cluster_builder PRIVATE
83 | $<$:_GLIBCXX_ASSERTIONS>
84 | # Do not use ABI breaking _GLIBCXX_DEBUG or _GLIBCXX_DEBUG_BACKTRACE
85 | )
86 | endif()
87 | endif()
88 |
89 | option(NVCLUSTER_MULTITHREADED "Build with multithreaded cluster generation support" ON)
90 | if(NVCLUSTER_MULTITHREADED)
91 | target_compile_definitions(nv_cluster_builder PRIVATE NVCLUSTER_MULTITHREADED=1)
92 |
93 | # Optional TBB for std::execution on linux
94 | if(NOT MSVC)
95 | find_library(TBB_LIBRARIES NAMES tbb HINTS ${TBB_DIR})
96 | if(TBB_LIBRARIES)
97 | message(STATUS "TBB: ${TBB_LIBRARIES}")
98 | target_link_libraries(nv_cluster_builder PRIVATE ${TBB_LIBRARIES})
99 | else()
100 | message(STATUS "TBB not found for std::execution")
101 | endif()
102 | endif()
103 | else()
104 | target_compile_definitions(nv_cluster_builder PRIVATE NVCLUSTER_MULTITHREADED=0)
105 | endif()
106 |
107 | if(BUILD_TESTING)
108 | option(BUILD_NV_CLUSTER_BUILDER_TESTING "Build nv_cluster_builder tests" ON)
109 | if(BUILD_NV_CLUSTER_BUILDER_TESTING)
110 | enable_testing()
111 | add_subdirectory(test)
112 | endif()
113 | endif()
114 |
115 | install(TARGETS nv_cluster_builder)
116 |
--------------------------------------------------------------------------------
/include/nvcluster/nvcluster_storage.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 |
20 | #pragma once
21 | #include
22 |
23 | #include
24 |
25 | namespace nvcluster {
26 |
27 | // Utility storage for clustering output
28 | // Construct with generateClusters()
29 | struct ClusterStorage
30 | {
31 | std::vector clusterItemRanges;
32 | std::vector items;
33 |
34 | void shrink_to_fit()
35 | {
36 | // clusterItemRanges is conservatively sized for clustering output. If this
37 | // object is kept around, memory can be saved by reallocating.
38 | clusterItemRanges.shrink_to_fit();
39 | }
40 | };
41 |
42 | // Utility storage for segmented clustering output
43 | // Construct with generateSegmentedClusters()
44 | struct SegmentedClusterStorage
45 | {
46 | std::vector segmentClusterRanges;
47 | std::vector clusterItemRanges;
48 | std::vector items;
49 |
50 | void shrink_to_fit()
51 | {
52 | // clusterItemRanges is conservatively sized for clustering output. If this
53 | // object is kept around, memory can be saved by reallocating.
54 | clusterItemRanges.shrink_to_fit();
55 | }
56 | };
57 |
58 | // ClusterStorage delayed init constructor
59 | inline nvcluster_Result generateClusters(nvcluster_Context context,
60 | const nvcluster_Config& config,
61 | const nvcluster_Input& input,
62 | ClusterStorage& clusterStorage)
63 | {
64 | // Query output upper limit
65 | nvcluster_Counts requiredCounts;
66 | nvcluster_Result result = nvclusterGetRequirements(context, &config, input.itemCount, &requiredCounts);
67 | if(result != nvcluster_Result::NVCLUSTER_SUCCESS)
68 | {
69 | return result;
70 | }
71 |
72 | // Resize to the upper limit
73 | clusterStorage.clusterItemRanges.resize(requiredCounts.clusterCount);
74 | clusterStorage.items.resize(input.itemCount);
75 |
76 | // Build clusters
77 | nvcluster_OutputClusters outputClusters{
78 | .clusterItemRanges = clusterStorage.clusterItemRanges.data(),
79 | .items = clusterStorage.items.data(),
80 | .clusterCount = uint32_t(clusterStorage.clusterItemRanges.size()),
81 | .itemCount = uint32_t(clusterStorage.items.size()),
82 | };
83 | result = nvclusterBuild(context, &config, &input, &outputClusters);
84 | if(result != nvcluster_Result::NVCLUSTER_SUCCESS)
85 | {
86 | return result;
87 | }
88 |
89 | // Resize down to what was written. Let the user call shrink_to_fit() if the
90 | // object is not temporary.
91 | clusterStorage.clusterItemRanges.resize(outputClusters.clusterCount);
92 | return result;
93 | }
94 |
95 | inline nvcluster_Result generateSegmentedClusters(nvcluster_Context context,
96 | const nvcluster_Config& config,
97 | const nvcluster_Input& input,
98 | const nvcluster_Segments& segments,
99 | SegmentedClusterStorage& segmentedClusterStorage)
100 | {
101 | // Query output upper limit
102 | nvcluster_Counts requiredCounts;
103 | nvcluster_Result result = nvclusterGetRequirementsSegmented(context, &config, input.itemCount, &segments, &requiredCounts);
104 | if(result != nvcluster_Result::NVCLUSTER_SUCCESS)
105 | {
106 | return result;
107 | }
108 |
109 | // Resize to the upper limit
110 | segmentedClusterStorage.segmentClusterRanges.resize(segments.segmentCount);
111 | segmentedClusterStorage.clusterItemRanges.resize(requiredCounts.clusterCount);
112 | segmentedClusterStorage.items.resize(input.itemCount);
113 |
114 | // Build clusters
115 | nvcluster_OutputClusters outputClusters{
116 | .clusterItemRanges = segmentedClusterStorage.clusterItemRanges.data(),
117 | .items = segmentedClusterStorage.items.data(),
118 | .clusterCount = uint32_t(segmentedClusterStorage.clusterItemRanges.size()),
119 | .itemCount = uint32_t(segmentedClusterStorage.items.size()),
120 | };
121 | result = nvclusterBuildSegmented(context, &config, &input, &segments, &outputClusters,
122 | segmentedClusterStorage.segmentClusterRanges.data());
123 | if(result != nvcluster_Result::NVCLUSTER_SUCCESS)
124 | {
125 | return result;
126 | }
127 |
128 | // Resize down to what was written. Let the user call shrink_to_fit() if the
129 | // object is not temporary.
130 | segmentedClusterStorage.clusterItemRanges.resize(outputClusters.clusterCount);
131 | return result;
132 | }
133 |
134 | } // namespace nvcluster
135 |
--------------------------------------------------------------------------------
/src/clusterizer.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | namespace nvcluster {
27 |
28 | inline MeshConnections makeMeshConnections(bool parallelize, const nvcluster_Config& inputConfig, const nvcluster_Input& input)
29 | {
30 | return makeMeshConnections(parallelize, ItemVertices(input.itemVertices, input.itemCount, inputConfig.itemVertexCount),
31 | input.vertexCount);
32 | }
33 |
34 | struct Input
35 | {
36 | Input(const nvcluster_Config& inputConfig, const nvcluster_Input& input, const nvcluster_Segments& inputSegments)
37 | : Input(inputConfig,
38 | std::span(reinterpret_cast(input.itemBoundingBoxes), input.itemCount),
39 | std::span(reinterpret_cast(input.itemCentroids), input.itemCount),
40 | std::span(reinterpret_cast(inputSegments.segmentItemRanges), inputSegments.segmentCount),
41 | maybeNull(reinterpret_cast(input.itemConnectionRanges), input.itemCount),
42 | maybeNull(input.connectionTargetItems, input.connectionCount),
43 | maybeNull(input.connectionWeights, input.connectionCount),
44 | maybeNull(input.connectionVertexBits, input.connectionCount))
45 | {
46 | }
47 |
48 | Input(const nvcluster_Config& inputConfig, const nvcluster_Input& input, const nvcluster_Segments& inputSegments, const MeshConnections& meshConnections)
49 | : Input(inputConfig,
50 | std::span(reinterpret_cast(input.itemBoundingBoxes), input.itemCount),
51 | std::span(reinterpret_cast(input.itemCentroids), input.itemCount),
52 | std::span(reinterpret_cast(inputSegments.segmentItemRanges), inputSegments.segmentCount),
53 | meshConnections.connectionRanges,
54 | meshConnections.connectionItems,
55 | {}, // incompatible with auto-computed connections
56 | meshConnections.connectionVertexBits)
57 | {
58 | }
59 |
60 | Input(const nvcluster_Config& config_,
61 | std::span boundingBoxes_,
62 | std::span centroids_,
63 | std::span segments_,
64 | std::span itemConnectionRanges_ = {},
65 | std::span connectionTargetItems_ = {},
66 | std::span connectionWeights_ = {},
67 | std::span connectionVertexBits_ = {})
68 | : config(config_)
69 | , boundingBoxes(boundingBoxes_)
70 | , centroids(centroids_)
71 | , segments(segments_)
72 | , itemConnectionRanges(itemConnectionRanges_)
73 | , connectionTargetItems(connectionTargetItems_)
74 | , connectionWeights(connectionWeights_)
75 | , connectionVertexBits(connectionVertexBits_)
76 | {
77 | // NOTE: validation is done by the C API and none here to avoid throwing
78 | // more exceptions than the standard library already does, e.g. bad_alloc
79 | }
80 |
81 | // Minimal spatial-only input
82 | const nvcluster_Config& config;
83 | std::span boundingBoxes;
84 | std::span centroids;
85 |
86 | // Clusterize within each range of items
87 | std::span segments;
88 |
89 | // Optional connections (may be empty)
90 | std::span itemConnectionRanges;
91 | std::span connectionTargetItems;
92 | std::span connectionWeights;
93 | std::span connectionVertexBits;
94 |
95 | private:
96 | template
97 | std::span maybeNull(const T* ptr, uint32_t size)
98 | {
99 | return ptr ? std::span{ptr, size} : std::span{};
100 | }
101 | };
102 |
103 | struct OutputClusters
104 | {
105 | OutputClusters(nvcluster_OutputClusters& output, nvcluster_Range* outputSegments, uint32_t outputSegmentCount)
106 | : clusterItemRanges(reinterpret_cast(output.clusterItemRanges), output.clusterCount)
107 | , items(reinterpret_cast(output.items), output.itemCount)
108 | , segments(reinterpret_cast(outputSegments), outputSegmentCount)
109 | , clusterCount(output.clusterCount)
110 | , itemCount(output.itemCount)
111 | {
112 | }
113 | std::span clusterItemRanges;
114 | std::span items;
115 | std::span segments;
116 | uint32_t& clusterCount; // output count reference
117 | uint32_t& itemCount; // output count reference
118 | };
119 |
120 | NVCLUSTER_API [[nodiscard]] nvcluster_Result clusterize(bool parallelize, const Input& input, const OutputClusters& clusters);
121 |
122 | } // namespace nvcluster
123 |
--------------------------------------------------------------------------------
/src/underfill_cost.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | /// @file Heuristics to compute item/triangle and vertex "underfill costs",
20 | /// which encourage the clusterizer to form bigger clusters, still within the
21 | /// maximums. Separated from the source file for unit testing.
22 | #pragma once
23 |
24 | #include
25 | #include
26 |
27 | namespace nvcluster {
28 |
29 | // Switch to compute a connectedness metric that indicates how many vertices
30 | // will be duplicated after cutting the node, rather than assume the square root
31 | // of vertices will be cut. Takes a bit longer but can help with long skinny
32 | // geometry like triangle strips.
33 | static constexpr bool COMPUTE_AVERAGE_CUT_VERTICES = true;
34 |
35 | struct Underfill
36 | {
37 | uint32_t underfillCount = 0;
38 |
39 | // underfillCount is unique vertices if true, otherwise count is items
40 | bool vertexLimited = false;
41 | };
42 |
43 | inline float guessRequiredClustersForVertexLimit(float currentVertices, float targetVertices)
44 | {
45 | // s=\frac{nv-2\sqrt{n}v+2\sqrt{\left(\sqrt{n}-1\right)^{4}v}+n-2\sqrt{n}+v+1}{\left(v-1\right)^{2}}
46 | float sqrtN = sqrtf(currentVertices);
47 | float sqrtNMinus1_4 = powf(sqrtN - 1.0f, 4.0f);
48 | float numerator = currentVertices * targetVertices //
49 | - 2.0f * sqrtN * targetVertices //
50 | + 2.0f * sqrtf(sqrtNMinus1_4 * targetVertices) //
51 | + currentVertices //
52 | - 2.0f * sqrtN //
53 | + targetVertices //
54 | + 1.0f;
55 | float denominator = (targetVertices - 1.0f) * (targetVertices - 1.0f);
56 | return numerator / denominator;
57 | }
58 |
59 | inline float guessRequiredClustersForVertexLimit(float currentVertices, float averageCutVertices, float targetVertices)
60 | {
61 | // (2 sqrt((a - 1)^2 (a^2 - 2 a v + n (v - 1) + v)) + 2 a^2 - 2 a (v + 1) + n (v - 1) + v + 1)/(v - 1)^2
62 | float a = averageCutVertices;
63 | float v = targetVertices;
64 | float n = currentVertices;
65 | float t1 = a * a - 2.0f * a * v + n * (v - 1.0f) + v;
66 | if(t1 < 0.0f) // candidate split with less than the average cut vertices (e.g. first or last few)
67 | return 1.0f;
68 | float t2 = 2.0f * (a - 1.0f) * sqrtf(t1) + 2.0f * a * a - 2.0f * a * (v + 1.0f) + n * (v - 1.0f) + v + 1.0f;
69 | return t2 / ((v - 1.0f) * (v - 1.0f));
70 | }
71 |
72 | // Inverse of guessVertexLimitRequiredClusters()
73 | inline float guessVerticesAfterClustering(float currentVertices, float clusters)
74 | {
75 | // v\left(n,s\right)=\frac{\left(\sqrt{n}+\sqrt{s}-1\right)^{2}}{s}
76 | float t = sqrtf(currentVertices) + sqrtf(clusters) - 1.0f;
77 | return (t * t) / clusters;
78 | }
79 |
80 | // Inverse of guessVertexLimitRequiredClusters()
81 | inline float guessVerticesAfterClustering(float currentVertices, float averageCutVertices, float clusters)
82 | {
83 | // v(n, s) = (2 (a - 1) sqrt(s) - 2 a + n + s + 1)/s
84 | float t = 2.0f * (averageCutVertices - 1.0f) * sqrtf(clusters) - 2.0f * averageCutVertices + currentVertices + clusters + 1.0f;
85 | return t / clusters;
86 | }
87 |
88 | // Returns the number of items remaining to fill the last bucket
89 | inline uint32_t underfillCount(uint32_t bucketSize, uint32_t itemCount)
90 | {
91 | return div_ceil(itemCount, bucketSize) * bucketSize - itemCount;
92 | }
93 |
94 | // Computes the expected number of vertices less than the maximum in the
95 | // remaining cluster. This is entirely modelled off connections from shared
96 | // vertices between a rectangular grid of triangles.
97 | // TODO: remove AABB
98 | inline Underfill generalUnderfillCount(const Input& input, uint32_t itemCount, uint32_t vertexCount, float averageCutVertices)
99 | {
100 | float requiredClustersItems = float(itemCount) / float(input.config.maxClusterSize);
101 | float requiredClustersVertices =
102 | COMPUTE_AVERAGE_CUT_VERTICES ?
103 | guessRequiredClustersForVertexLimit(float(vertexCount), averageCutVertices, float(input.config.maxClusterVertices)) :
104 | guessRequiredClustersForVertexLimit(float(vertexCount), float(input.config.maxClusterVertices));
105 |
106 | if(requiredClustersItems > requiredClustersVertices)
107 | {
108 | // Item limited
109 | return {underfillCount(input.config.maxClusterSize, itemCount), false};
110 | }
111 | else
112 | {
113 | // Vertex limited
114 | float clusterCount = ceilf(requiredClustersVertices - 1e-6f);
115 | float verticesPerCluster = COMPUTE_AVERAGE_CUT_VERTICES ?
116 | guessVerticesAfterClustering(float(vertexCount), averageCutVertices, clusterCount) :
117 | guessVerticesAfterClustering(float(vertexCount), clusterCount);
118 | float availableVertices = clusterCount * float(input.config.maxClusterVertices);
119 | float underfill = availableVertices - verticesPerCluster * clusterCount + 0.5f;
120 | assert(verticesPerCluster > 1.0f);
121 | assert(underfill >= 0.0f);
122 | return {uint32_t(underfill), true};
123 | }
124 | }
125 |
126 | } // namespace nvcluster
127 |
--------------------------------------------------------------------------------
/include/nvcluster/util/parallel_execution_libcxx.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | /// @file Shim for missing libc++ features. libc++ is the LLVM implementation of
20 | /// the standard library. This project was developed with libstdc++ (the GNU
21 | /// implementation) and MSVC STL. The contents of this file provides workarounds
22 | /// for missing features, and disables parallel execution in the process.
23 | /// See: https://github.com/nvpro-samples/nv_cluster_lod_builder/issues/1
24 | /// TODO: parallel execution with e.g. https://github.com/mikekazakov/pstld
25 | #pragma once
26 |
27 | #include
28 | #include
29 |
30 | // TODO: add a numerical comparison for _LIBCPP_VERSION if std::execution
31 | // support is added
32 | #if defined(_LIBCPP_VERSION)
33 |
34 | // Disable parallel execution as it is not supported by libc++ or this shim
35 | #if !defined(NVCLUSTER_MULTITHREADED)
36 | #define NVCLUSTER_MULTITHREADED 0
37 | #else
38 | #undef NVCLUSTER_MULTITHREADED
39 | #define NVCLUSTER_MULTITHREADED 0
40 | #endif
41 |
42 | namespace std {
43 |
44 | // If you see duplicate definitions here, filter out the current _LIBCPP_VERSION
45 | namespace execution {
46 | class sequenced_policy
47 | {
48 | };
49 | class parallel_policy
50 | {
51 | };
52 | class parallel_unsequenced_policy
53 | {
54 | };
55 | class unsequenced_policy
56 | {
57 | };
58 | inline constexpr sequenced_policy seq{};
59 | inline constexpr parallel_policy par{};
60 | inline constexpr parallel_unsequenced_policy par_unseq{};
61 | inline constexpr unsequenced_policy unseq{};
62 | } // namespace execution
63 |
64 | template
65 | void for_each(ExecutionPolicy&&,
66 |
67 | ForwardIt first,
68 | ForwardIt last,
69 | UnaryFunc f)
70 | {
71 | for_each(first, last, f);
72 | }
73 |
74 | template
75 | requires std::same_as, execution::sequenced_policy>
76 | ForwardIt2 inclusive_scan(ExecutionPolicy&&, ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first)
77 | {
78 | return inclusive_scan(first, last, d_first);
79 | }
80 |
81 | template
82 | requires std::same_as, execution::sequenced_policy>
83 | || std::same_as, execution::parallel_policy>
84 | || std::same_as, execution::parallel_unsequenced_policy>
85 | || std::same_as, execution::unsequenced_policy>
86 | ForwardIt2 exclusive_scan(ExecutionPolicy&&, ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first, T init)
87 | {
88 | static_assert(std::same_as, execution::sequenced_policy>); // SFINAE delayed error
89 | return exclusive_scan(first, last, d_first, init);
90 | }
91 |
92 | template
93 | requires std::same_as, execution::sequenced_policy>
94 | ForwardIt2 exclusive_scan(ExecutionPolicy&&, ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first, T init, BinaryOp op)
95 | {
96 | return exclusive_scan(first, last, d_first, init, op);
97 | }
98 |
99 | template
100 | requires std::same_as, execution::sequenced_policy>
101 | ForwardIt2 transform_exclusive_scan(ExecutionPolicy&&, ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first, T init, BinaryOp binary_op, UnaryOp unary_op)
102 | {
103 | return transform_exclusive_scan(first, last, d_first, init, binary_op, unary_op);
104 | }
105 |
106 | template
107 | requires std::same_as, execution::sequenced_policy>
108 | ForwardIt2 transform_inclusive_scan(ExecutionPolicy&&,
109 | ForwardIt1 first,
110 | ForwardIt1 last,
111 | ForwardIt2 d_first,
112 |
113 | BinaryOp binary_op,
114 | UnaryOp unary_op)
115 | {
116 | #if 1
117 | auto transformed_view = std::ranges::subrange(first, last) | std::views::transform(unary_op);
118 | return std::inclusive_scan(transformed_view.begin(), transformed_view.end(), d_first, binary_op);
119 | #else
120 | // possible bug in libc++: typename iterator_traits<_InputIterator>::value_type __init = __u(*__first);
121 | return transform_inclusive_scan(first, last, d_first, binary_op, unary_op);
122 | #endif
123 | }
124 |
125 | template
126 | requires std::same_as, execution::sequenced_policy>
127 | BidirIt stable_partition(ExecutionPolicy&&,
128 |
129 | BidirIt first,
130 | BidirIt last,
131 | UnaryPred p)
132 | {
133 | return stable_partition(first, last, p);
134 | }
135 |
136 | template
137 | requires std::same_as, execution::sequenced_policy>
138 | void sort(ExecutionPolicy&&, RandomIt first, RandomIt last)
139 | {
140 | return sort(first, last);
141 | }
142 |
143 | template
144 | requires std::same_as, execution::sequenced_policy>
145 | void sort(ExecutionPolicy&&, RandomIt first, RandomIt last, Compare comp)
146 | {
147 | return sort(first, last, comp);
148 | }
149 |
150 | template
151 | requires std::same_as, execution::sequenced_policy>
152 | T transform_reduce(ExecutionPolicy&&, ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, T init)
153 | {
154 | return transform_reduce(first1, last1, first2, init);
155 | }
156 |
157 | template
158 | requires std::same_as, execution::sequenced_policy>
159 | T transform_reduce(ExecutionPolicy&&, ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, T init, BinaryOp1 reduce, BinaryOp2 transform)
160 | {
161 | return transform_reduce(first1, last1, first2, init, reduce, transform);
162 | }
163 |
164 | // Workaround for missing atomic_ref in libc++
165 | #if _LIBCPP_VERSION < 190000
166 | struct atomic_ref
167 | {
168 | atomic_ref(uint32_t& v)
169 | : value(&v)
170 | {
171 | }
172 | uint32_t operator++() { return reinterpret_cast&>(*value).operator++(); }
173 | uint32_t operator++(int) { return reinterpret_cast&>(*value).operator++(0); }
174 | uint32_t* value;
175 | };
176 | #endif
177 |
178 | } // namespace std
179 | #endif
180 |
--------------------------------------------------------------------------------
/include/nvcluster/util/parallel.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | #include
22 |
23 | // Workaround for libc++ std::execution
24 | #include
25 |
26 | // Shortcut to select the parallel execution model depending on a bool, using
27 | // variable template specialization
28 | template
29 | inline constexpr auto exec = std::execution::seq;
30 | template <>
31 | inline constexpr auto exec = std::execution::par_unseq;
32 |
33 | // This is an iterator that counts upwards from an initial value.
34 | // std::views::iota would almost work for this, but iota on MSVC 2019 doesn't
35 | // support random access, which is necessary for parallelism.
36 | template
37 | struct iota_iterator
38 | {
39 | using value_type = T;
40 | // [iterator.traits] in the C++ standard requires this to be a signed type.
41 | // We choose int64_t here, because it's conceivable someone could use
42 | // T == uint32_t and then iterate over more than 2^31 - 1 elements.
43 | using difference_type = int64_t;
44 | using pointer = T*;
45 | using reference = T&;
46 | using iterator_category = std::random_access_iterator_tag;
47 | iota_iterator() = default;
48 | iota_iterator(const iota_iterator& other) noexcept = default;
49 | iota_iterator(iota_iterator&& other) noexcept = default;
50 | iota_iterator& operator=(const iota_iterator& other) noexcept = default;
51 | iota_iterator& operator=(iota_iterator&& other) noexcept = default;
52 | iota_iterator(T i_)
53 | : i(i_)
54 | {
55 | }
56 | value_type operator*() const { return i; }
57 | iota_iterator& operator++()
58 | {
59 | ++i;
60 | return *this;
61 | }
62 | iota_iterator operator++(int)
63 | {
64 | iota_iterator t(*this);
65 | ++*this;
66 | return t;
67 | }
68 | iota_iterator& operator--()
69 | {
70 | --i;
71 | return *this;
72 | }
73 | iota_iterator operator--(int)
74 | {
75 | iota_iterator t(*this);
76 | --*this;
77 | return t;
78 | }
79 | iota_iterator operator+(difference_type d) const { return {static_cast(static_cast(i) + d)}; }
80 | iota_iterator operator-(difference_type d) const { return {static_cast(static_cast(i) - d)}; }
81 | iota_iterator& operator+=(difference_type d)
82 | {
83 | i = static_cast(static_cast(i) + d);
84 | return *this;
85 | }
86 | iota_iterator& operator-=(difference_type d)
87 | {
88 | i = static_cast(static_cast(i) - d);
89 | return *this;
90 | }
91 | bool operator==(const iota_iterator& other) const { return i == other.i; }
92 | bool operator!=(const iota_iterator& other) const { return i != other.i; }
93 | bool operator<(const iota_iterator& other) const { return i < other.i; }
94 | bool operator<=(const iota_iterator& other) const { return i <= other.i; }
95 | bool operator>(const iota_iterator& other) const { return i > other.i; }
96 | bool operator>=(const iota_iterator& other) const { return i >= other.i; }
97 | difference_type operator-(const iota_iterator& other) const
98 | {
99 | return static_cast(i) - static_cast(other.i);
100 | }
101 | friend iota_iterator operator+(difference_type n, const iota_iterator& it) { return it + n; }
102 | T operator[](difference_type d) const { return static_cast(static_cast(i) + d); }
103 |
104 | private:
105 | T i = 0;
106 | };
107 |
108 | // Expresses the range from m_begin to m_end - 1.
109 | template
110 | struct iota_view
111 | {
112 | using iterator = iota_iterator;
113 | iota_view(T begin, T end)
114 | : m_begin(begin)
115 | , m_end(end)
116 | {
117 | }
118 | iterator begin() const { return {m_begin}; };
119 | iterator end() const { return {m_end}; };
120 |
121 | private:
122 | T m_begin, m_end;
123 | };
124 |
125 | // Runs a function in parallel for each index from 0 to numItems - 1. Uses
126 | // batches of size BatchSize for reduced overhead and better autovectorization.
127 | //
128 | // BatchSize will also be used as the threshold for when to switch from
129 | // single-threaded to multi-threaded execution. For this reason, it should be set
130 | // to a power of 2 around where multi - threaded is faster than single - threaded for
131 | // the given function.Some examples are :
132 | // * 8192 for trivial workloads(a * x + y)
133 | // * 2048 for animation workloads(multiplication by a single matrix)
134 | // * 512 for more computationally heavy workloads(run XTEA)
135 | // * 1 for full parallelization(load an image)
136 | //
137 | // This is a simpler version of nvh::parallel_batches, which you can find in
138 | // nvpro_core.
139 | template
140 | inline void parallel_batches(size_t numItems, F&& fn)
141 | {
142 | if constexpr(!Parallelize)
143 | {
144 | // Explicit constexpr case to avoid linking to the parallel implementation
145 | // if it's not used (and can't partially specialize the function).
146 | for(size_t i = 0; i < numItems; i++)
147 | {
148 | fn(i);
149 | }
150 | }
151 | else
152 | {
153 | // For small item counts, it's fastest to use a single thread and avoid the
154 | // overhead from invoking a parallel executor.
155 | if(numItems <= BatchSize)
156 | {
157 | for(size_t i = 0; i < numItems; i++)
158 | {
159 | fn(i);
160 | }
161 | }
162 | else
163 | {
164 | // Unroll the loop into batches of size BATCHSIZE or less. This worker
165 | // function will be run in parallel using
166 | // std::for_each(std::execution::par_unseq).
167 | const size_t numBatches = (numItems + BatchSize - 1) / BatchSize;
168 | auto worker = [&numItems, &fn](const size_t batchIndex) {
169 | const size_t start = BatchSize * batchIndex;
170 | const size_t itemsRemaining = numItems - start;
171 | // This split is necessary to make MSVC try to auto-vectorize the first
172 | // loop, which will be the most common case when numItems is large.
173 | if(itemsRemaining >= BatchSize)
174 | {
175 | // Exactly BATCHSIZE items to process
176 | for(size_t i = start; i < start + BatchSize; i++)
177 | {
178 | fn(i);
179 | }
180 | }
181 | else
182 | {
183 | // Variable-length loop
184 | for(size_t i = start; i < numItems; i++)
185 | {
186 | fn(i);
187 | }
188 | }
189 | };
190 |
191 | // This runs the worker above for each batch from 0 to numBatches-1.
192 | iota_view batches(0, numBatches);
193 | std::for_each(std::execution::par_unseq, batches.begin(), batches.end(), worker);
194 | }
195 | }
196 | }
197 |
--------------------------------------------------------------------------------
/test/src/test_perf.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 |
25 | namespace nb = ankerl::nanobench;
26 |
27 | struct SpatialDesc
28 | {
29 | SpatialDesc(const GeometryMesh& mesh)
30 | {
31 | boundingBoxes.resize(mesh.triangles.size());
32 | std::ranges::transform(mesh.triangles, boundingBoxes.begin(), [&](vec3u tri) { return aabb(tri, mesh.positions); });
33 | centroids.resize(boundingBoxes.size());
34 | std::ranges::transform(boundingBoxes, centroids.begin(), [](AABB b) { return b.center(); });
35 | }
36 | std::vector boundingBoxes;
37 | std::vector centroids;
38 | nvcluster_Input clusterInput(const GeometryMesh* mesh = nullptr) const
39 | {
40 | return nvcluster_Input{
41 | .itemBoundingBoxes = reinterpret_cast(boundingBoxes.data()),
42 | .itemCentroids = reinterpret_cast(centroids.data()),
43 | .itemCount = uint32_t(boundingBoxes.size()),
44 | .itemConnectionRanges = nullptr,
45 | .connectionTargetItems = nullptr,
46 | .connectionWeights = nullptr,
47 | .connectionVertexBits = nullptr,
48 | .connectionCount = 0,
49 | .itemVertices = mesh ? reinterpret_cast(mesh->triangles.data()) : nullptr,
50 | .vertexCount = mesh ? uint32_t(mesh->triangles.size()) : 0,
51 | };
52 | }
53 | };
54 |
55 | TEST(Perf, All)
56 | {
57 | #if !defined(NDEBUG)
58 | GTEST_SKIP() << "Skipping performance tests in debug mode";
59 | #else
60 | GeometryMesh sphere = makeIcosphere(4);
61 | SpatialDesc sphereDesc(sphere);
62 | GeometryMesh tree = generateTree(3);
63 | SpatialDesc treeDesc(tree);
64 | auto sphereSingleTri = sphere;
65 | sphereSingleTri.triangles.resize(1);
66 | nb::Bench()
67 | .minEpochTime(std::chrono::milliseconds(500))
68 | .minEpochIterations(10)
69 | .warmup(1)
70 | .run("makeMeshConnections", [&] { nb::doNotOptimizeAway(makeMeshConnections(false, sphere)); })
71 | .run("makeMeshConnections parallel", [&] { nb::doNotOptimizeAway(makeMeshConnections(true, sphere)); })
72 | .run("makeMeshConnections parallel single tri",
73 | [&] { nb::doNotOptimizeAway(makeMeshConnections(true, sphereSingleTri)); })
74 | .run("cluster sphere limit t=[28,32]",
75 | [&] {
76 | nb::doNotOptimizeAway(ClusterStorage(
77 | nvcluster_Config{
78 | .minClusterSize = 28,
79 | .maxClusterSize = 32,
80 | .maxClusterVertices = ~0u,
81 | .costUnderfill = 0.0f,
82 | .costOverlap = 0.0f,
83 | .costUnderfillVertices = 0.0f,
84 | .itemVertexCount = 3,
85 | .preSplitThreshold = 0,
86 | },
87 | sphereDesc.clusterInput()));
88 | })
89 | .run("cluster sphere limit t=[28,32], v=32*3",
90 | [&] {
91 | nb::doNotOptimizeAway(ClusterStorage(
92 | nvcluster_Config{
93 | .minClusterSize = 28,
94 | .maxClusterSize = 32,
95 | .maxClusterVertices = 32 * 3,
96 | .costUnderfill = 0.0f,
97 | .costOverlap = 0.0f,
98 | .costUnderfillVertices = 0.0f,
99 | .itemVertexCount = 3,
100 | .preSplitThreshold = 0,
101 | },
102 | sphereDesc.clusterInput(&sphere)));
103 | })
104 | .run("cluster sphere limit t=[28,32], v=16",
105 | [&] {
106 | nb::doNotOptimizeAway(ClusterStorage(
107 | nvcluster_Config{
108 | .minClusterSize = 28,
109 | .maxClusterSize = 32,
110 | .maxClusterVertices = 16,
111 | .costUnderfill = 0.0f,
112 | .costOverlap = 0.0f,
113 | .costUnderfillVertices = 0.0f,
114 | .itemVertexCount = 3,
115 | .preSplitThreshold = 0,
116 | },
117 | sphereDesc.clusterInput(&sphere)));
118 | })
119 | .run("cluster sphere limit t=[28,32], v=16, costs",
120 | [&] {
121 | nb::doNotOptimizeAway(ClusterStorage(
122 | nvcluster_Config{
123 | .minClusterSize = 28,
124 | .maxClusterSize = 32,
125 | .maxClusterVertices = 16,
126 | .costUnderfill = 0.1f,
127 | .costOverlap = 0.1f,
128 | .costUnderfillVertices = 0.1f,
129 | .itemVertexCount = 3,
130 | .preSplitThreshold = 0,
131 | },
132 | sphereDesc.clusterInput(&sphere)));
133 | })
134 | .run("cluster tree limit t=[28,32]",
135 | [&] {
136 | nb::doNotOptimizeAway(ClusterStorage(
137 | nvcluster_Config{
138 | .minClusterSize = 28,
139 | .maxClusterSize = 32,
140 | .maxClusterVertices = ~0u,
141 | .costUnderfill = 0.0f,
142 | .costOverlap = 0.0f,
143 | .costUnderfillVertices = 0.0f,
144 | .itemVertexCount = 3,
145 | .preSplitThreshold = 0,
146 | },
147 | treeDesc.clusterInput()));
148 | })
149 | .run("cluster tree limit t=[28,32], v=16",
150 | [&] {
151 | nb::doNotOptimizeAway(ClusterStorage(
152 | nvcluster_Config{
153 | .minClusterSize = 28,
154 | .maxClusterSize = 32,
155 | .maxClusterVertices = 16,
156 | .costUnderfill = 0.0f,
157 | .costOverlap = 0.0f,
158 | .costUnderfillVertices = 0.0f,
159 | .itemVertexCount = 3,
160 | .preSplitThreshold = 0,
161 | },
162 | treeDesc.clusterInput(&tree)));
163 | })
164 | .run("cluster tree limit t=[28,32], v=16, costs", [&] {
165 | nb::doNotOptimizeAway(ClusterStorage(
166 | nvcluster_Config{
167 | .minClusterSize = 28,
168 | .maxClusterSize = 32,
169 | .maxClusterVertices = 16,
170 | .costUnderfill = 0.1f,
171 | .costOverlap = 0.1f,
172 | .costUnderfillVertices = 0.1f,
173 | .itemVertexCount = 3,
174 | .preSplitThreshold = 0,
175 | },
176 | treeDesc.clusterInput(&tree)));
177 | });
178 | #endif
179 | }
180 |
--------------------------------------------------------------------------------
/test/src/tree_gen.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | // AI generated...
22 |
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | static constexpr float g_twoPi = 6.28318530718f;
30 |
31 | inline vec3f evaluateBezier(const vec3f& p0, const vec3f& p1, const vec3f& p2, float t)
32 | {
33 | float u = 1 - t;
34 | float tt = t * t;
35 | float uu = u * u;
36 |
37 | vec3f point = uu * p0; // Quadratic term
38 | point += 2 * u * t * p1; // Linear term
39 | point += tt * p2; // Constant term
40 |
41 | return point;
42 | }
43 |
44 | inline GeometryMesh makeTriangleStrip(std::function path, uint32_t segments, float width)
45 | {
46 | GeometryMesh mesh;
47 |
48 | float e = 0.001f;
49 | for(uint32_t i = 0; i <= segments; ++i)
50 | {
51 | float t = float(i) / float(segments);
52 |
53 | vec3f position = path(t);
54 | vec3f dp1 = path(t + e) - path(t); // First derivative (tangent vector)
55 | vec3f dp2 = path(t + 2 * e) - 2 * path(t + e) + path(t); // Second derivative
56 |
57 | vec3f normal = cross(dp1, dp2);
58 | normal = normalize(normal) * width; // Scale normal to desired strip width
59 |
60 | vec3f leftPoint = position - normal * 0.5f;
61 | vec3f rightPoint = position + normal * 0.5f;
62 |
63 | mesh.positions.push_back(leftPoint);
64 | mesh.positions.push_back(rightPoint);
65 |
66 | if(i > 0) // Add triangle indices after the first segment
67 | {
68 | size_t idx = mesh.positions.size();
69 | mesh.triangles.push_back({idx - 2, idx - 3, idx - 1});
70 | mesh.triangles.push_back({idx - 2, idx - 4, idx - 3});
71 | }
72 | }
73 |
74 | return mesh;
75 | }
76 |
77 | inline GeometryMesh makeBranch(std::function path, uint32_t segments, uint32_t segmentsCircular, float radius)
78 | {
79 | GeometryMesh mesh;
80 |
81 | float e = 0.001f;
82 | for(uint32_t j = 0; j <= segments; ++j)
83 | {
84 | float t = float(j) / float(segments);
85 | vec3f position = path(t);
86 | vec3f tangent = normalize(path(t + e) - path(t)); // Compute tangent vector
87 | vec3f normal = normalize(vec3f(-tangent[1], tangent[0], tangent[2])); // Arbitrary normal perpendicular to tangent
88 | vec3f binormal = normalize(cross(tangent, normal)); // Compute binormal for perpendicularity
89 |
90 | size_t baseIndex = mesh.positions.size();
91 |
92 | float segmentRadius = powf(1.0f - t, 0.1f) * radius;
93 | // Generate vertices for a ring around the path position
94 | for(uint32_t i = 0; i < segmentsCircular; ++i)
95 | {
96 | float angle = (g_twoPi * float(i)) / float(segmentsCircular); // Corrected calculation of angle using segmentsCircular
97 | vec3f offset = segmentRadius * (cosf(angle) * normal + sinf(angle) * binormal);
98 | vec3f vertex = position + offset;
99 |
100 | mesh.positions.push_back(vertex);
101 | }
102 |
103 | // Add triangle indices for the cylinder body
104 | if(j > 0)
105 | {
106 | for(uint32_t i = 0; i < segmentsCircular; ++i)
107 | {
108 | uint32_t next = (i + 1) % segmentsCircular; // Corrected modulo operation to properly handle adjacency
109 |
110 | mesh.triangles.push_back({baseIndex + i, baseIndex + next, baseIndex + i - segmentsCircular});
111 | mesh.triangles.push_back({baseIndex + next, baseIndex + next - segmentsCircular, baseIndex + i - segmentsCircular});
112 | }
113 | }
114 | }
115 |
116 | return mesh;
117 | }
118 |
119 | inline GeometryMesh makeCone(std::function path, float t, float radius, uint32_t segments)
120 | {
121 | GeometryMesh mesh;
122 |
123 | vec3f position = path(t);
124 | vec3f tangent = normalize(path(t + 0.01f) - path(t)); // Compute tangent vector
125 | vec3f normal = normalize(vec3f(-tangent[1], tangent[0], tangent[2])); // Arbitrary normal perpendicular to tangent
126 | vec3f binormal = cross(tangent, normal); // Compute binormal for perpendicularity
127 |
128 | // Generate vertices for the base ring
129 | size_t baseIndex = mesh.positions.size();
130 | for(uint32_t j = 0; j <= segments; ++j)
131 | {
132 | float angle = (g_twoPi * float(j)) / float(segments);
133 | vec3f offset = radius * (cosf(angle) * normal + sinf(angle) * binormal);
134 | vec3f vertex = position + offset + tangent * radius;
135 | mesh.positions.push_back(vertex);
136 | }
137 |
138 | // Add the tip of the cone
139 | vec3f tip = position; // Position for cone tip
140 | uint32_t tipIndex = uint32_t(mesh.positions.size());
141 | mesh.positions.push_back(tip);
142 |
143 | // Add triangle indices for the cone
144 | for(uint32_t i = 0; i < segments; ++i)
145 | {
146 | uint32_t next = (i + 1) % segments;
147 | mesh.triangles.push_back({baseIndex + i, baseIndex + next, tipIndex});
148 | }
149 |
150 | return mesh;
151 | }
152 |
153 | inline GeometryMesh mergeMeshes(const GeometryMesh& mesh1, const GeometryMesh& mesh2)
154 | {
155 | GeometryMesh mergedMesh;
156 |
157 | // Combine positions
158 | mergedMesh.positions = mesh1.positions;
159 | mergedMesh.positions.insert(mergedMesh.positions.end(), mesh2.positions.begin(), mesh2.positions.end());
160 |
161 | // Combine triangles, adjusting the indices of the second mesh
162 | mergedMesh.triangles = mesh1.triangles;
163 | size_t offset = mesh1.positions.size(); // Offset for indices of mesh2
164 | mergedMesh.triangles.reserve(mesh1.triangles.size() + mesh2.triangles.size());
165 | for(const auto& triangle : mesh2.triangles)
166 | {
167 | mergedMesh.triangles.push_back({triangle[0] + offset, triangle[1] + offset, triangle[2] + offset});
168 | }
169 |
170 | return mergedMesh;
171 | }
172 |
173 | inline float unitRand()
174 | {
175 | static std::mt19937 gen(0);
176 | static std::uniform_real_distribution dis(0.0f, 1.0f);
177 | return dis(gen);
178 | }
179 |
180 | inline uint32_t intRand(uint32_t min, uint32_t max)
181 | {
182 | static std::mt19937 gen(0);
183 | std::uniform_int_distribution dis(min, max);
184 | return dis(gen);
185 | }
186 |
187 | inline std::function branchPath(vec3f base, float sideScale, float height)
188 | {
189 | float angle = unitRand() * g_twoPi;
190 | vec2f side = vec2f{cosf(angle), sinf(angle)} * sideScale;
191 | return [base, side, height](float t) {
192 | return evaluateBezier(base, base + vec3f{side[0], height * 0.3f, side[1]}, base + vec3f{side[0], height, side[1]}, t);
193 | };
194 | }
195 |
196 | inline void generateTree(GeometryMesh& treeMesh, vec3f base, float side, float height, uint32_t depth)
197 | {
198 | // Generate a branch path
199 | auto path = branchPath(base, side * (depth > 3 ? 0.3f : 1.0f), height);
200 |
201 | // Create geometry for the branch
202 | if(depth == 0)
203 | {
204 | treeMesh = mergeMeshes(treeMesh, makeTriangleStrip(path, 3u + uint32_t(height * 3.0f), height * 0.3f));
205 |
206 | // Add cones along the triangle strip
207 | uint32_t cones = intRand(2, 5);
208 | for(uint32_t i = 1; i < cones; ++i)
209 | {
210 | treeMesh = mergeMeshes(treeMesh, makeCone(path, (float(i) - unitRand() * 0.5f) / float(cones + 1), height * 0.2f, 5));
211 | }
212 | }
213 | else
214 | {
215 | treeMesh = mergeMeshes(treeMesh, makeBranch(path, depth * 2 + 4, depth + 4, height * 0.05f));
216 |
217 | uint32_t branches = depth == 2 ? intRand(4, 16) : intRand(2, 5);
218 | for(uint32_t i = 1; i <= branches; ++i)
219 | {
220 | generateTree(treeMesh, path((float(i) + unitRand() * 0.5f) / float(branches + 1)), side * (0.5f + 0.4f * unitRand()),
221 | height * (0.4f + 0.5f * unitRand()), depth - intRand(1, std::max(1u, depth / 2u)));
222 | }
223 | }
224 | }
225 |
226 | inline GeometryMesh generateTree(uint32_t levels = 4)
227 | {
228 | GeometryMesh treeMesh;
229 | generateTree(treeMesh, {0.0f, 0.0f, 0.0f}, 4.0f, 6.0f, levels);
230 | treeMesh.name = "tree_l" + std::to_string(levels);
231 | return treeMesh;
232 | }
233 |
--------------------------------------------------------------------------------
/doc/clusters.svg:
--------------------------------------------------------------------------------
1 |
2 |
13 |
--------------------------------------------------------------------------------
/doc/underfill_cost.svg:
--------------------------------------------------------------------------------
1 |
2 |
78 |
--------------------------------------------------------------------------------
/include/nvcluster/util/objects.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 |
33 | #ifdef min
34 | #error "Preprocessor min defined. Add NOMINMAX to the build system"
35 | #endif
36 |
37 | #ifdef max
38 | #undef "Preprocessor max defined. Add NOMINMAX to the build system"
39 | #endif
40 |
41 | namespace nvcluster {
42 |
43 | // Returns the ceiling of an integer division. Assumes positive values.
44 | template
45 | T div_ceil(const T& a, const T& b)
46 | {
47 | return (a + b - 1) / b;
48 | }
49 |
50 | // A tiny and general vector implementation, like glm
51 | // clang-format off
52 | template
53 | requires std::is_arithmetic_v
54 | struct vec : std::array {
55 | using std::array::array;
56 | using std::array::operator[];
57 | using std::array::begin;
58 | using std::array::end;
59 |
60 | [[nodiscard]] constexpr vec() noexcept : std::array{} {} // zero initialize
61 | [[nodiscard]] constexpr vec(T all) noexcept { std::ranges::fill(*this, all); }
62 |
63 | // Workaround for aggregate std::array initialization,
64 | // https://stackoverflow.com/questions/8192185
65 | // TODO: remove unsafe static_cast! Not sure what to do to mimic brace initialization
66 | template
67 | requires (sizeof...(U) == N) && (std::is_convertible_v && ...)
68 | [[nodiscard]] constexpr vec(const U&... init) noexcept : std::array{ {static_cast(init)...} } {}
69 |
70 | // Creating an apply(..., std::plus()) could work too
71 | constexpr vec& operator+=(const vec& v) { for (std::size_t i = 0; i < N; ++i) (*this)[i] += v[i]; return *this; }
72 | constexpr vec& operator-=(const vec& v) { for (std::size_t i = 0; i < N; ++i) (*this)[i] -= v[i]; return *this; }
73 | constexpr vec& operator*=(T s) { for (std::size_t i = 0; i < N; ++i) (*this)[i] *= s; return *this; }
74 | constexpr vec& operator/=(T s) { for (std::size_t i = 0; i < N; ++i) (*this)[i] /= s; return *this; }
75 |
76 | [[nodiscard]] constexpr vec operator-() const { vec r; for (std::size_t i = 0; i < N; ++i) r[i] = -(*this)[i]; return r; }
77 |
78 | // "Hidden friends" for faster compilation
79 | [[nodiscard]] friend constexpr vec operator+(const vec& a, const vec& b) { return vec(a) += b; }
80 | [[nodiscard]] friend constexpr vec operator-(const vec& a, const vec& b) { return vec(a) -= b; }
81 | [[nodiscard]] friend constexpr vec operator*(const vec& v, T s) { return vec(v) *= s; }
82 | [[nodiscard]] friend constexpr vec operator*(T s, const vec& v) { return v * s; }
83 | [[nodiscard]] friend constexpr vec operator/(const vec& v, T s) { return vec(v) /= s; }
84 | [[nodiscard]] friend constexpr bool operator==(const vec& a, const vec& b) { return std::ranges::equal(a, b); }
85 | [[nodiscard]] friend constexpr bool operator!=(const vec& a, const vec& b) { return !(a == b); }
86 |
87 | operator nvcluster_Vec3f() const requires (N == 3 && std::same_as) { return {(*this)[0], (*this)[1], (*this)[2]}; }
88 | };
89 | template [[nodiscard]] constexpr vec min(const vec& a, const vec& b) { vec r; for (std::size_t i = 0; i < N; ++i) r[i] = std::min(a[i], b[i]); return r; }
90 | template [[nodiscard]] constexpr vec max(const vec& a, const vec& b) { vec r; for (std::size_t i = 0; i < N; ++i) r[i] = std::max(a[i], b[i]); return r; }
91 | template [[nodiscard]] constexpr vec clamp(const vec& v, const vec& min_v, const vec& max_v) { vec r; for (std::size_t i = 0; i < N; ++i) r[i] = std::clamp(v[i], min_v[i], max_v[i]); return r; }
92 | template [[nodiscard]] constexpr T dot(const vec& a, const vec& b) { T r{}; for (std::size_t i = 0; i < N; ++i) r += a[i] * b[i]; return r; }
93 | template [[nodiscard]] constexpr T length_squared(const vec& v) { return dot(v, v); }
94 | template [[nodiscard]] T length(const vec& v) requires std::floating_point { return std::sqrt(length_squared(v)); }
95 | template [[nodiscard]] vec normalize(const vec& v) requires std::floating_point { return v * (T{1} / length(v)); }
96 | template [[nodiscard]] constexpr vec cross(const vec& a, const vec& b) requires (N == 3) && std::is_signed_v {
97 | return {a[1]*b[2] - a[2]*b[1], a[2]*b[0] - a[0]*b[2], a[0]*b[1] - a[1]*b[0]};
98 | }
99 | // clang-format on
100 |
101 | using vec2f = vec;
102 | using vec3f = vec;
103 | using vec4f = vec;
104 | using vec2u = vec;
105 | using vec3u = vec;
106 | using vec4u = vec;
107 | using vec2i = vec;
108 | using vec3i = vec;
109 | using vec4i = vec;
110 | static_assert(sizeof(nvcluster_Vec3f) == sizeof(vec3f));
111 |
112 | // Axis aligned bounding box
113 | struct AABB
114 | {
115 | vec3f min, max;
116 |
117 | // Plus returns the union of bounding boxes.
118 | // [[nodiscard]] allows the compiler to warn if the return value is ignored,
119 | // which would be a bug. E.g. a + b; but should be a += b;
120 | [[nodiscard]] constexpr AABB operator+(const AABB& other) const
121 | {
122 | return {nvcluster::min(min, other.min), nvcluster::max(max, other.max)};
123 | }
124 | constexpr AABB& operator+=(const AABB& other) { return *this = *this + other; };
125 |
126 | [[nodiscard]] constexpr vec3f size() const { return max - min; }
127 | [[nodiscard]] constexpr vec3f center() const { return (min + max) * 0.5f; }
128 | [[nodiscard]] constexpr vec3f positive_size() const { return nvcluster::max(vec3f(0.0f), size()); }
129 | [[nodiscard]] constexpr AABB positive() const { return {min, min + positive_size()}; }
130 | [[nodiscard]] constexpr float half_area() const
131 | {
132 | auto s = size();
133 | return s[0] * (s[1] + s[2]) + s[1] * s[2];
134 | }
135 | [[nodiscard]] constexpr AABB intersect(const AABB& other) const
136 | {
137 | return AABB{nvcluster::max(min, other.min), nvcluster::min(max, other.max)}.positive();
138 | }
139 | [[nodiscard]] constexpr static AABB empty()
140 | {
141 | return {vec3f{std::numeric_limits::max()}, vec3f{std::numeric_limits::lowest()}};
142 | }
143 | operator nvcluster_AABB() const { return {{min[0], min[1], min[2]}, {max[0], max[1], max[2]}}; }
144 | };
145 | static_assert(sizeof(nvcluster_AABB) == sizeof(AABB));
146 |
147 | // An index/cursor based subrange
148 | struct Range
149 | {
150 | uint32_t offset = {};
151 | uint32_t count = {};
152 |
153 | // Use iota() to make the range iterable
154 | // E.g.: for(uint32_t i : range.indices()) ...
155 | // std::views::iota() is similar to python's range()
156 | [[nodiscard]] auto indices() const { return std::views::iota(offset, offset + count); }
157 | [[nodiscard]] constexpr uint32_t end() const { return offset + count; }
158 | operator nvcluster_Range() { return {offset, count}; }
159 | };
160 | static_assert(sizeof(nvcluster_Range) == sizeof(Range));
161 |
162 | } // namespace nvcluster
163 |
164 | // hashing functions from https://stackoverflow.com/questions/35985960/c-why-is-boosthash-combine-the-best-way-to-combine-hash-values
165 | namespace {
166 |
167 | template
168 | constexpr T xorshift(const T& n, int i)
169 | {
170 | return n ^ (n >> i);
171 | }
172 |
173 | inline constexpr uint32_t hash(const uint32_t& n)
174 | {
175 | uint32_t p = 0x55555555ul; // pattern of alternating 0 and 1
176 | uint32_t c = 3423571495ul; // random uneven integer constant;
177 | return c * xorshift(p * xorshift(n, 16), 16);
178 | }
179 |
180 | inline constexpr uint64_t hash(const uint64_t& n)
181 | {
182 | uint64_t p = 0x5555555555555555ull; // pattern of alternating 0 and 1
183 | uint64_t c = 17316035218449499591ull; // random uneven integer constant;
184 | return c * xorshift(p * xorshift(n, 32), 32);
185 | }
186 |
187 | // call this function with the old seed and the new key to be hashed and
188 | // combined into the new seed value, respectively the final hash
189 | template
190 | constexpr size_t hash_combine(std::size_t& seed, const T& v)
191 | {
192 | return seed = std::rotl(seed, std::numeric_limits::digits / 3) ^ hash(std::hash{}(v));
193 | }
194 |
195 | // From: https://blog.infotraining.pl/how-to-hash-objects-without-repetition
196 | template
197 | requires(sizeof...(TValues) > 1)
198 | constexpr size_t combined_hash(const TValues&... values)
199 | {
200 | size_t seed{};
201 | (..., hash_combine(seed, values));
202 | return seed;
203 | }
204 |
205 | // Adapter for std::array
206 | template
207 | constexpr size_t array_hash(const std::array& arr)
208 | {
209 | return [&arr](std::index_sequence) { return combined_hash(arr[I]...); }(std::make_index_sequence{});
210 | }
211 |
212 | } // anonymous namespace
213 |
214 | // Define a hash so vec3 can be used in e.g. std::unordered_map
215 | template
216 | struct std::hash>
217 | {
218 | std::size_t operator()(const nvcluster::vec& v) const noexcept { return array_hash(v); }
219 | };
220 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
--------------------------------------------------------------------------------
/test/src/test_util.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION
17 | * SPDX-License-Identifier: Apache-2.0
18 | */
19 | #pragma once
20 |
21 | #include
22 | #include // internal, for unit testing
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include // for vec3 etc.
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | using nvcluster::AABB;
35 | using nvcluster::Range;
36 | using nvcluster::vec2f;
37 | using nvcluster::vec3f;
38 | using nvcluster::vec3u;
39 |
40 | // Computes the axis-aligned bounding box of a triangle with the given indices.
41 | inline AABB aabb(vec3u triangle, std::span positions)
42 | {
43 | using namespace nvcluster;
44 | return {min(min(positions[triangle[0]], positions[triangle[1]]), positions[triangle[2]]),
45 | max(max(positions[triangle[0]], positions[triangle[1]]), positions[triangle[2]])};
46 | }
47 |
48 | template
49 | bool allUnique(const Range& range)
50 | {
51 | std::unordered_set unique(std::begin(range), std::end(range));
52 | return unique.size() == std::ranges::size(range);
53 | }
54 |
55 | template
56 | bool contains(const Range& range, std::ranges::range_value_t value)
57 | {
58 | return std::ranges::find(range, value) != std::end(range);
59 | }
60 |
61 | // Shortcut for passing Range offset and count to std::span::subspan(), which
62 | // returns a span pointing to a possibly smaller range of values.
63 | template
64 | constexpr auto subspan(Items& items, nvcluster_Range range)
65 | {
66 | assert(range.count == 0 || range.offset < std::ranges::size(items));
67 | assert(range.count == 0 || range.offset + range.count <= std::ranges::size(items));
68 | return std::span(items).subspan(range.offset, range.count);
69 | }
70 |
71 | // Simple mesh struct. Triangle indices and vertex positions. Plus a name for
72 | // better context when tests fail.
73 | struct GeometryMesh
74 | {
75 | std::string name;
76 | std::vector triangles;
77 | std::vector positions;
78 |
79 | // Dump the mesh to a .obj file for testing
80 | void write(std::ostream& os) const
81 | {
82 | os << "g mesh\n";
83 | for(auto& p : positions)
84 | os << "v " << p[0] << " " << p[1] << " " << p[2] << "\n";
85 | for(auto& t : triangles)
86 | os << "f " << t[0] + 1 << " " << t[1] + 1 << " " << t[2] + 1 << "\n";
87 | };
88 | void write(const std::filesystem::path& path) const
89 | {
90 | std::ofstream ofile(path);
91 | write(ofile);
92 | }
93 | };
94 |
95 | inline nvcluster::MeshConnections makeMeshConnections(bool parallelize, const GeometryMesh& mesh)
96 | {
97 | return nvcluster::makeMeshConnections(parallelize,
98 | nvcluster::ItemVertices(reinterpret_cast(mesh.triangles.data()),
99 | uint32_t(mesh.triangles.size()), 3u),
100 | uint32_t(mesh.positions.size()));
101 | }
102 |
103 | inline void check(nvcluster_Result result)
104 | {
105 | if(result != nvcluster_Result::NVCLUSTER_SUCCESS)
106 | throw std::runtime_error(nvclusterResultString(result));
107 | }
108 |
109 | // nvcluster_Context wrapper handles ownership, lifetime, doesn't leak when
110 | // tests return etc.
111 | struct ScopedContext
112 | {
113 | ScopedContext(const nvcluster_ContextCreateInfo& createInfo = {})
114 | {
115 | check(nvclusterCreateContext(&createInfo, &context));
116 | }
117 | ~ScopedContext() { std::ignore = nvclusterDestroyContext(context); }
118 | ScopedContext(const ScopedContext& other) = delete;
119 | ScopedContext& operator=(const ScopedContext& other) = delete;
120 | operator nvcluster_Context() const { return context; }
121 | nvcluster_Context context = nullptr;
122 | };
123 |
124 | // Shortcut to build clusters from various forms of inputs
125 | struct ClusterStorage : nvcluster::ClusterStorage
126 | {
127 | // External API
128 | ClusterStorage(const nvcluster_Config& config, const nvcluster_Input& input)
129 | {
130 | check(generateClusters(ScopedContext(), config, input, *this));
131 | }
132 |
133 | // Internal interface, for unit testing
134 | ClusterStorage(const nvcluster::Input& input)
135 | {
136 | if(input.segments.size() != 1)
137 | throw std::runtime_error("segmented clustering not implemented in this test");
138 | nvcluster_Counts requiredCounts;
139 | check(nvclusterGetRequirements(ScopedContext(), &input.config, uint32_t(input.boundingBoxes.size()), &requiredCounts));
140 | clusterItemRanges.resize(requiredCounts.clusterCount);
141 | items.resize(input.boundingBoxes.size());
142 | nvcluster_OutputClusters output{.clusterItemRanges = clusterItemRanges.data(),
143 | .items = items.data(),
144 | .clusterCount = uint32_t(clusterItemRanges.size()),
145 | .itemCount = uint32_t(items.size())};
146 | nvcluster_Range outputSegment{};
147 | check(clusterize(true, input, nvcluster::OutputClusters(output, &outputSegment, 1)));
148 | if(outputSegment.offset != 0 || size_t(outputSegment.count) != output.clusterCount)
149 | throw std::runtime_error("expected one segment with everything");
150 | clusterItemRanges.resize(output.clusterCount);
151 | }
152 | };
153 |
154 | // Returns the number of unique vertices per cluster to verify the vertex limit
155 | // feature
156 | inline std::vector countClusterVertices(const nvcluster::ClusterStorage& clustering, const GeometryMesh& mesh)
157 | {
158 | std::vector result;
159 | result.reserve(clustering.clusterItemRanges.size());
160 | for(nvcluster_Range r : clustering.clusterItemRanges)
161 | {
162 | std::span cluster = subspan(clustering.items, r);
163 | std::unordered_set uniqueVertices;
164 | for(auto i : cluster)
165 | {
166 | uniqueVertices.insert(mesh.triangles[i][0]);
167 | uniqueVertices.insert(mesh.triangles[i][1]);
168 | uniqueVertices.insert(mesh.triangles[i][2]);
169 | }
170 | result.push_back(uint32_t(uniqueVertices.size()));
171 | }
172 | return result;
173 | }
174 |
175 | // Icosahedron data.
176 | namespace icosahedron {
177 | constexpr float X = .525731112119133606f;
178 | constexpr float Z = .850650808352039932f;
179 | static std::array positions = {{{-X, 0.0, Z},
180 | {X, 0.0, Z},
181 | {-X, 0.0, -Z},
182 | {X, 0.0, -Z},
183 | {0.0, Z, X},
184 | {0.0, Z, -X},
185 | {0.0, -Z, X},
186 | {0.0, -Z, -X},
187 | {Z, X, 0.0},
188 | {-Z, X, 0.0},
189 | {Z, -X, 0.0},
190 | {-Z, -X, 0.0}}};
191 | static std::array triangles = {{{0, 4, 1}, {0, 9, 4}, {9, 5, 4}, {4, 5, 8}, {4, 8, 1},
192 | {8, 10, 1}, {8, 3, 10}, {5, 3, 8}, {5, 2, 3}, {2, 7, 3},
193 | {7, 10, 3}, {7, 6, 10}, {7, 11, 6}, {11, 0, 6}, {0, 1, 6},
194 | {6, 1, 10}, {9, 0, 11}, {9, 11, 2}, {9, 2, 5}, {7, 2, 11}}};
195 | } // namespace icosahedron
196 |
197 | // Type of a function to call when creating a triangle. Takes 3 positions as
198 | // inputs.
199 | using triangle_callback = std::function;
200 |
201 | // Recursively subdivides a triangle on a sphere by a factor of 2^depth.
202 | // Calls the callback function on each new triangle.
203 | inline void subdivide(vec3f v0, vec3f v1, vec3f v2, int depth, triangle_callback& callback)
204 | {
205 | if(depth == 0)
206 | {
207 | callback(v0, v1, v2);
208 | }
209 | else
210 | {
211 | vec3f v01 = normalize(v0 + v1);
212 | vec3f v12 = normalize(v1 + v2);
213 | vec3f v20 = normalize(v2 + v0);
214 | subdivide(v0, v01, v20, depth - 1, callback);
215 | subdivide(v1, v12, v01, depth - 1, callback);
216 | subdivide(v2, v20, v12, depth - 1, callback);
217 | subdivide(v01, v12, v20, depth - 1, callback);
218 | }
219 | }
220 |
221 | // Makes an icosphere with 20 * (4^depth) triangles.
222 | inline void makeIcosphere(int depth, triangle_callback& callback)
223 | {
224 | for(size_t i = 0; i < icosahedron::triangles.size(); i++)
225 | {
226 | const vec3f v0 = icosahedron::positions[icosahedron::triangles[i][0]];
227 | const vec3f v1 = icosahedron::positions[icosahedron::triangles[i][1]];
228 | const vec3f v2 = icosahedron::positions[icosahedron::triangles[i][2]];
229 | subdivide(v0, v1, v2, depth, callback);
230 | }
231 | }
232 |
233 | inline GeometryMesh makeIcosphere(int subdivision)
234 | {
235 | std::unordered_map vertexCache;
236 | std::vector triangles;
237 | // Our triangle callback function tries to place each of the vertices in the
238 | // vertex cache; each of the `it` iterators point to the existing value if
239 | // the vertex was already in the cache, or to a new value at the end of the
240 | // cache if it's a new vertex.
241 | triangle_callback callback = [&vertexCache, &triangles](vec3f v0, vec3f v1, vec3f v2) {
242 | auto [it0, new0] = vertexCache.try_emplace(v0, static_cast(vertexCache.size()));
243 | auto [it1, new1] = vertexCache.try_emplace(v1, static_cast(vertexCache.size()));
244 | auto [it2, new2] = vertexCache.try_emplace(v2, static_cast(vertexCache.size()));
245 | triangles.push_back({it0->second, it1->second, it2->second});
246 | };
247 | makeIcosphere(subdivision, callback);
248 | std::vector