├── .DS_Store
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── Standalone
├── CMakeLists.txt
└── main.cpp
├── core
├── geometryProcessing.cpp
├── geometryProcessing.h
├── idxBufCleaner.cpp
├── meshletCompresser.cpp
├── meshletConverter.cpp
├── meshletGenerators.cpp
├── meshletGenerators.h
├── meshletMaker.h
├── meshletMeshDescriptor.cpp
├── meshletTaskDescriptor.cpp
├── meshlet_builder.hpp
├── meshlet_util.hpp
├── mm_meshlet_builder.h
├── mm_structures.h
└── settings.h
└── images
├── bounding.png
├── greedy.png
├── kmedoids.png
├── tipsynvidia.png
└── zeux.png
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Aa][Rr][Mm]/
27 | [Aa][Rr][Mm]64/
28 | bld/
29 | [Bb]in/
30 | [Oo]bj/
31 | [Ll]og/
32 | [Ll]ogs/
33 |
34 | # Visual Studio 2015/2017 cache/options directory
35 | .vs/
36 | # Uncomment if you have tasks that create the project's static files in wwwroot
37 | #wwwroot/
38 |
39 | # Visual Studio 2017 auto generated files
40 | Generated\ Files/
41 |
42 | # MSTest test Results
43 | [Tt]est[Rr]esult*/
44 | [Bb]uild[Ll]og.*
45 |
46 | # NUnit
47 | *.VisualState.xml
48 | TestResult.xml
49 | nunit-*.xml
50 |
51 | # Build Results of an ATL Project
52 | [Dd]ebugPS/
53 | [Rr]eleasePS/
54 | dlldata.c
55 |
56 | # Benchmark Results
57 | BenchmarkDotNet.Artifacts/
58 |
59 | # .NET Core
60 | project.lock.json
61 | project.fragment.lock.json
62 | artifacts/
63 |
64 | # StyleCop
65 | StyleCopReport.xml
66 |
67 | # Files built by Visual Studio
68 | *_i.c
69 | *_p.c
70 | *_h.h
71 | *.ilk
72 | *.meta
73 | *.obj
74 | *.iobj
75 | *.pch
76 | *.pdb
77 | *.ipdb
78 | *.pgc
79 | *.pgd
80 | *.rsp
81 | *.sbr
82 | *.tlb
83 | *.tli
84 | *.tlh
85 | *.tmp
86 | *.tmp_proj
87 | *_wpftmp.csproj
88 | *.log
89 | *.vspscc
90 | *.vssscc
91 | .builds
92 | *.pidb
93 | *.svclog
94 | *.scc
95 |
96 | # Chutzpah Test files
97 | _Chutzpah*
98 |
99 | # Visual C++ cache files
100 | ipch/
101 | *.aps
102 | *.ncb
103 | *.opendb
104 | *.opensdf
105 | *.sdf
106 | *.cachefile
107 | *.VC.db
108 | *.VC.VC.opendb
109 |
110 | # Visual Studio profiler
111 | *.psess
112 | *.vsp
113 | *.vspx
114 | *.sap
115 |
116 | # Visual Studio Trace Files
117 | *.e2e
118 |
119 | # TFS 2012 Local Workspace
120 | $tf/
121 |
122 | # Guidance Automation Toolkit
123 | *.gpState
124 |
125 | # ReSharper is a .NET coding add-in
126 | _ReSharper*/
127 | *.[Rr]e[Ss]harper
128 | *.DotSettings.user
129 |
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 |
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 |
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 |
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 |
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 |
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 |
153 | # Web workbench (sass)
154 | .sass-cache/
155 |
156 | # Installshield output folder
157 | [Ee]xpress/
158 |
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 |
169 | # Click-Once directory
170 | publish/
171 |
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 |
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 |
185 | # NuGet Packages
186 | *.nupkg
187 | # NuGet Symbol Packages
188 | *.snupkg
189 | # The packages folder can be ignored because of Package Restore
190 | **/[Pp]ackages/*
191 | # except build/, which is used as an MSBuild target.
192 | !**/[Pp]ackages/build/
193 | # Uncomment if necessary however generally it will be regenerated when needed
194 | #!**/[Pp]ackages/repositories.config
195 | # NuGet v3's project.json files produces more ignorable files
196 | *.nuget.props
197 | *.nuget.targets
198 |
199 | # Microsoft Azure Build Output
200 | csx/
201 | *.build.csdef
202 |
203 | # Microsoft Azure Emulator
204 | ecf/
205 | rcf/
206 |
207 | # Windows Store app package directories and files
208 | AppPackages/
209 | BundleArtifacts/
210 | Package.StoreAssociation.xml
211 | _pkginfo.txt
212 | *.appx
213 | *.appxbundle
214 | *.appxupload
215 |
216 | # Visual Studio cache files
217 | # files ending in .cache can be ignored
218 | *.[Cc]ache
219 | # but keep track of directories ending in .cache
220 | !?*.[Cc]ache/
221 |
222 | # Others
223 | ClientBin/
224 | ~$*
225 | *~
226 | *.dbmdl
227 | *.dbproj.schemaview
228 | *.jfm
229 | *.pfx
230 | *.publishsettings
231 | orleans.codegen.cs
232 |
233 | # Including strong name files can present a security risk
234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
235 | #*.snk
236 |
237 | # Since there are multiple workflows, uncomment next line to ignore bower_components
238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
239 | #bower_components/
240 |
241 | # RIA/Silverlight projects
242 | Generated_Code/
243 |
244 | # Backup & report files from converting an old project file
245 | # to a newer Visual Studio version. Backup files are not needed,
246 | # because we have git ;-)
247 | _UpgradeReport_Files/
248 | Backup*/
249 | UpgradeLog*.XML
250 | UpgradeLog*.htm
251 | ServiceFabricBackup/
252 | *.rptproj.bak
253 |
254 | # SQL Server files
255 | *.mdf
256 | *.ldf
257 | *.ndf
258 |
259 | # Business Intelligence projects
260 | *.rdl.data
261 | *.bim.layout
262 | *.bim_*.settings
263 | *.rptproj.rsuser
264 | *- [Bb]ackup.rdl
265 | *- [Bb]ackup ([0-9]).rdl
266 | *- [Bb]ackup ([0-9][0-9]).rdl
267 |
268 | # Microsoft Fakes
269 | FakesAssemblies/
270 |
271 | # GhostDoc plugin setting file
272 | *.GhostDoc.xml
273 |
274 | # Node.js Tools for Visual Studio
275 | .ntvs_analysis.dat
276 | node_modules/
277 |
278 | # Visual Studio 6 build log
279 | *.plg
280 |
281 | # Visual Studio 6 workspace options file
282 | *.opt
283 |
284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
285 | *.vbw
286 |
287 | # Visual Studio LightSwitch build output
288 | **/*.HTMLClient/GeneratedArtifacts
289 | **/*.DesktopClient/GeneratedArtifacts
290 | **/*.DesktopClient/ModelManifest.xml
291 | **/*.Server/GeneratedArtifacts
292 | **/*.Server/ModelManifest.xml
293 | _Pvt_Extensions
294 |
295 | # Paket dependency manager
296 | .paket/paket.exe
297 | paket-files/
298 |
299 | # FAKE - F# Make
300 | .fake/
301 |
302 | # CodeRush personal settings
303 | .cr/personal
304 |
305 | # Python Tools for Visual Studio (PTVS)
306 | __pycache__/
307 | *.pyc
308 |
309 | # Cake - Uncomment if you are using it
310 | # tools/**
311 | # !tools/packages.config
312 |
313 | # Tabs Studio
314 | *.tss
315 |
316 | # Telerik's JustMock configuration file
317 | *.jmconfig
318 |
319 | # BizTalk build output
320 | *.btp.cs
321 | *.btm.cs
322 | *.odx.cs
323 | *.xsd.cs
324 |
325 | # OpenCover UI analysis results
326 | OpenCover/
327 |
328 | # Azure Stream Analytics local run output
329 | ASALocalRun/
330 |
331 | # MSBuild Binary and Structured Log
332 | *.binlog
333 |
334 | # NVidia Nsight GPU debugger configuration file
335 | *.nvuser
336 |
337 | # MFractors (Xamarin productivity tool) working folder
338 | .mfractor/
339 |
340 | # Local History for Visual Studio
341 | .localhistory/
342 |
343 | # BeatPulse healthcheck temp database
344 | healthchecksdb
345 |
346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
347 | MigrationBackup/
348 |
349 | # Ionide (cross platform F# VS Code tools) working folder
350 | .ionide/
351 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.19)
2 | cmake_policy(SET CMP0177 NEW)
3 |
4 | set(CMAKE_CXX_STANDARD 17)
5 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
6 | set(CMAKE_SUPPRESS_REGENERATION true)
7 |
8 | option(BUILD_STANDALONE "Build standalone" FALSE)
9 | project(meshletmaker VERSION 1.0)
10 |
11 |
12 | set(SOURCES
13 | core/geometryProcessing.cpp
14 | core/idxBufCleaner.cpp
15 | core/meshletConverter.cpp
16 | core/meshletCompresser.cpp
17 | core/meshletMeshDescriptor.cpp
18 | core/meshletTaskDescriptor.cpp
19 | )
20 |
21 | set(HEADERS
22 | core/geometryProcessing.h
23 | core/mm_meshlet_builder.h
24 | core/meshlet_builder.hpp
25 | core/meshlet_util.hpp
26 | core/mm_structures.h
27 | core/meshletMaker.h
28 | core/settings.h
29 | )
30 |
31 | add_library(meshletmaker STATIC ${SOURCES} ${HEADERS})
32 |
33 | target_include_directories(meshletmaker
34 | PRIVATE
35 | ${PROJECT_SOURCE_DIR}/libs/tinyobjloader/
36 | ${PROJECT_SOURCE_DIR}/libs/glm/
37 | )
38 |
39 | set(INCLUDES
40 | core/MeshletMaker.h
41 | core/settings.h
42 | )
43 |
44 | install(FILES ${INCLUDES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
45 |
46 | if (BUILD_STANDALONE)
47 | add_subdirectory(Standalone)
48 | endif()
49 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Mark Bo Jensen, PhD
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Performance Comparison of Meshlet Generation Strategies
2 |
3 | 



4 |
5 |
6 | This repo is greatly inspired by the meshoptimizer library by Arseny Kapoulkine and uses code from NVIDIAS meshlet example created by Christoph Kubrich.
7 |
8 | ## Abstract
9 | Mesh shaders were recently introduced for faster rendering of triangle meshes. Instead of
10 | pushing each individual triangle through the rasterization pipeline, we can create triangle
11 | clusters called meshlets and perform per-cluster culling operations. This is a great opportunity
12 | to efficiently render very large meshes. However, the performance of mesh shaders depends
13 | on how we create the meshlets. We test rendering performance, on NVIDIA hadware, after
14 | the use of different methods for organizing triangle meshes into meshlets. To measure the
15 | performance of a method, we render meshes of different complexity from many randomly
16 | selected views and measure the render time per triangle. Based on our findings, we suggest
17 | guidelines for creation of meshlets. Using our guidelines we propose two simple methods for
18 | generating meshlets that result in good rendering performance, when combined with hardware
19 | manufactures best practices. Our objective is to make it easier for the graphics practitioner to
20 | organize a triangle mesh into high performance meshlets.
21 |
22 | ## Paper
23 | Please find the paper describings the details of the different Meshlet generation strategies here: https://jcgt.org/published/0012/02/01/
24 |
25 | If you use the work then please cite us:
26 | Mark Bo Jensen, Jeppe Revall Frisvad, and J. Andreas Bærentzen, Performance Comparison of Meshlet Generation Strategies, Journal of Computer Graphics Techniques (JCGT), vol. 12, no. 2, 1-27, 2023
27 |
--------------------------------------------------------------------------------
/Standalone/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.19)
2 |
3 | set(CMAKE_CXX_STANDARD 17)
4 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
5 | set(CMAKE_SUPPRESS_REGENERATION true)
6 |
7 | include_directories(
8 | ${PROJECT_SOURCE_DIR}/libs/tinyobjloader/
9 | ${PROJECT_SOURCE_DIR}/core
10 | ${PROJECT_SOURCE_DIR}
11 | )
12 |
13 | set(SOURCES
14 | main.cpp
15 | )
16 |
17 | set(HEADERS
18 | )
19 |
20 | set(TARGETS standalone)
21 |
22 | add_executable(standalone ${SOURCES} ${HEADERS})
23 |
24 |
25 | target_link_libraries(standalone
26 | meshletmaker
27 | ${SYSTEM_LIBS})
28 |
29 | install(TARGETS ${TARGETS} EXPORT meshletMakerTargets
30 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
31 | )
--------------------------------------------------------------------------------
/Standalone/main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/Standalone/main.cpp
--------------------------------------------------------------------------------
/core/geometryProcessing.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef HEADER_GUARD_GEOMETRYPROCESSING
3 | #define HEADER_GUARD_GEOMETRYPROCESSING
4 |
5 | #include
6 | #include
7 |
8 | #include "settings.h"
9 |
10 | namespace mm {
11 | void calculateObjectBoundingBox(const std::vector& vertices, float* objectBboxMin, float* objectBboxMax);
12 | void calculateObjectBoundingBox(std::vector* vertices, float* objectBboxMin, float* objectBboxMax);
13 | }
14 | #endif // HEADER_GUARD_GEOMETRYPROCESSING
--------------------------------------------------------------------------------
/core/idxBufCleaner.cpp:
--------------------------------------------------------------------------------
1 | #include "meshletMaker.h"
2 | #include
3 |
4 |
5 |
6 |
7 | namespace mm {
8 | void cleanIndexBuffer() {
9 | std::cout << "linking worked" << std::endl;
10 | }
11 | }
--------------------------------------------------------------------------------
/core/meshletCompresser.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletCompresser.cpp
--------------------------------------------------------------------------------
/core/meshletConverter.cpp:
--------------------------------------------------------------------------------
1 | #include "meshletMaker.h"
2 |
3 | namespace mm {
4 | void convertToMeshlets() {
5 |
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/core/meshletGenerators.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletGenerators.cpp
--------------------------------------------------------------------------------
/core/meshletGenerators.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletGenerators.h
--------------------------------------------------------------------------------
/core/meshletMaker.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef HEADER_GUARD_MESHLETMAKER
3 | #define HEADER_GUARD_MESHLETMAKER
4 |
5 | #define GLFW_INCLUDE_VULKAN
6 | #define GLM_FORCE_RADIANS
7 | #define GLM_FORCE_DEPTH_ZERO_TO_ONE
8 |
9 | #include
10 | #include
11 | #include
12 | #include
13 |
14 | #include "settings.h"
15 |
16 |
17 | namespace mm {
18 |
19 | void calculateCentroids(std::vector triangles, const Vertex* vertexBuffer);
20 |
21 | void loadTinyModel(const std::string& path, std::vector* vertices, std::vector* indices);
22 |
23 | template
24 | void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const VertexIndexType* indices);
25 |
26 | template void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const uint32_t* indices);
27 | template void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const uint16_t* indices);
28 |
29 | template
30 | void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat = -1, uint32_t primitiveLimit = 125, uint32_t vertexLimit = 64);
31 |
32 | void tipsifyIndexBuffer(const uint32_t* indicies, const uint32_t numIndices, const uint32_t numVerts, const int cacheSize, std::vector& optimizedIdxBuffer);
33 |
34 | template void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit);
35 | template void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit);
36 |
37 | template
38 | void generateMeshlets(const VertexIndexType* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat = -1, uint32_t primitiveLimit = 125, uint32_t vertexLimit = 64);
39 |
40 | template void generateMeshlets(const uint32_t* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit);
41 | template void generateMeshlets(const uint16_t* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit);
42 |
43 |
44 | template
45 | std::vector packPackMeshlets(const std::vector>& mehslets);
46 |
47 | template std::vector packPackMeshlets(const std::vector>& mehslets);
48 | template std::vector packPackMeshlets(const std::vector>& mehslets);
49 |
50 | template
51 | std::vector packNVMeshlets(const std::vector>& mehslets);
52 |
53 | template std::vector packNVMeshlets(const std::vector>& mehslets);
54 |
55 | template
56 | std::vector packNVMeshlets16(const std::vector>& mehslets);
57 | template std::vector packNVMeshlets16(const std::vector>& mehslets);
58 |
59 | template
60 | std::vector packMMMeshlets(const std::vector>& mehslets);
61 | template std::vector packMMMeshlets(const std::vector>& mehslets);
62 |
63 | template
64 | std::vector packVertMeshlets(const std::vector>& mehslets);
65 |
66 | template std::vector packVertMeshlets(const std::vector>& mehslets);
67 | template std::vector packVertMeshlets(const std::vector>& mehslets);
68 |
69 | void collectStats(const NVMeshlet::MeshletGeometryPack& geometry, std::vector& stats);
70 | void generateEarlyCulling(NVMeshlet::MeshletGeometryPack& geometry, const std::vector& vertices, std::vector& objectData);
71 |
72 | void collectStats(const NVMeshlet::MeshletGeometry& geometry, std::vector& stats);
73 | void generateEarlyCulling(NVMeshlet::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData);
74 |
75 | void collectStats(const NVMeshlet::MeshletGeometry16& geometry, std::vector& stats);
76 | void generateEarlyCulling(NVMeshlet::MeshletGeometry16& geometry, const std::vector& vertices, std::vector& objectData);
77 |
78 |
79 | void collectStats(const mm::MeshletGeometry& geometry, std::vector& stats);
80 | void generateEarlyCulling(mm::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData);
81 | void generateEarlyCullingVert(mm::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData);
82 |
83 | void cleanIndexBuffer();
84 |
85 | //void convertToMeshlets();
86 |
87 | //void compressMeshlets();
88 |
89 | void createMeshletPackDescriptors(const std::string& modelPath, std::vector* meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats, const int strat);
90 |
91 | void createMeshletMeshDescriptors(const std::string& modelPath, std::vector * meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats, const int strat);
92 |
93 | void loadObjAsMeshlet(const std::string& modelPath, std::vector * meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats);
94 | }
95 |
96 |
97 | #endif // HEADER_GUARD_MESHLETMAKER
--------------------------------------------------------------------------------
/core/meshletMeshDescriptor.cpp:
--------------------------------------------------------------------------------
1 | #define GLM_ENABLE_EXPERIMENTAL
2 |
3 | #include "meshletMaker.h"
4 | #include "geometryProcessing.h"
5 | #include "mm_meshlet_builder.h"
6 | #include "meshlet_builder.hpp"
7 | #include "meshlet_util.hpp"
8 |
9 |
10 | #include
11 | #include
12 |
13 |
14 | namespace mm {
15 |
16 | std::vector AreaWeightedTriangleList(const std::vector& triangles, const Vertex* vertexBuffer) {
17 | double minArea = DBL_MAX;
18 | std::vector triangleAreas;
19 | triangleAreas.resize(triangles.size());
20 | for (const auto& t : triangles) {
21 | // area of triangle is half the magnitude of the crossproduct
22 | glm::vec3 firstVec = vertexBuffer[t->vertices[2]->index] - vertexBuffer[t->vertices[0]->index];
23 | glm::vec3 secondVec = vertexBuffer[t->vertices[2]->index] - vertexBuffer[t->vertices[1]->index];
24 | double area = glm::length(glm::cross(firstVec, secondVec)) * 0.5f;
25 | if (area < minArea && area != 0.0) {
26 | minArea = area;
27 | }
28 | triangleAreas[t->id] = area;
29 | }
30 |
31 | std::vector weightedAreas;
32 | // create list of indices weighted based on triangle area
33 | for (int i = 0; i < triangleAreas.size(); ++i) {
34 | double area = triangleAreas[i];
35 | int weightedRoundedArea = std::ceilf(area / minArea);
36 | for (int j = 0; j < weightedRoundedArea; ++j) {
37 | weightedAreas.push_back(i);
38 | }
39 | }
40 |
41 | auto rng = std::default_random_engine{};
42 | std::shuffle(std::begin(weightedAreas), std::end(weightedAreas), rng);
43 |
44 | return weightedAreas;
45 |
46 | }
47 |
48 | std::vector SampleList(const std::vector list,const int sampleSize) {
49 |
50 | std::vector samples;
51 | std::unordered_set usedTriangleIds;
52 | samples.reserve(sampleSize);
53 |
54 | std::srand(std::time(NULL));
55 | int remaining = sampleSize;
56 |
57 | while (remaining > 0) {
58 | uint32_t triangleId = list[(std::rand() % list.size()+1)];
59 | if (usedTriangleIds.find(triangleId) == usedTriangleIds.end()) {
60 | usedTriangleIds.insert(triangleId);
61 | samples.push_back(triangleId);
62 | --remaining;
63 | }
64 | }
65 |
66 | return samples;
67 | }
68 |
69 | bool CompareTriangles(const Triangle* t1,const Triangle* t2,const int idx) {
70 | return (t1->centroid[idx] < t2->centroid[idx]);
71 | }
72 |
73 | bool compareVerts(const Vert* v1,const Vert* v2, const Vertex* vertexBuffer, const int idx) {
74 | return (vertexBuffer[v1->index].pos[idx] < vertexBuffer[v2->index].pos[idx]);
75 | }
76 |
77 | int sortLists() {
78 |
79 | return 0;
80 | }
81 |
82 | template
83 | void generateMeshlets(const VertexIndexType* indices, uint32_t numIndices, std::vector>& meshlets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit) {
84 | assert(primitiveLimit <= MAX_PRIMITIVE_COUNT_LIMIT);
85 | assert(vertexLimit <= MAX_VERTEX_COUNT_LIMIT);
86 |
87 | MeshletCache cache;
88 | cache.reset();
89 |
90 | switch (strat) {
91 |
92 | default:
93 |
94 | for (VertexIndexType i = 0; i < numIndices / 3; i++)
95 | {
96 |
97 | if (cache.cannotInsert(indices + i * 3, vertexLimit, primitiveLimit))
98 | {
99 | // finish old and reset
100 | meshlets.push_back(cache);
101 | cache.reset();
102 | }
103 | cache.insert(indices + i * 3, vertices);
104 | }
105 | if (!cache.empty())
106 | {
107 | meshlets.push_back(cache);
108 | }
109 | }
110 | }
111 |
112 | template
113 | void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& meshlets, const Vertex* vertexBuffer, int strat, uint32_t primitiveLimit, uint32_t vertexLimit) {
114 | assert(primitiveLimit <= MAX_PRIMITIVE_COUNT_LIMIT);
115 | assert(vertexLimit <= MAX_VERTEX_COUNT_LIMIT);
116 |
117 | std::vector vertsVector;
118 | if (strat != 4) {
119 | glm::vec3 min{ FLT_MAX };
120 | glm::vec3 max{ FLT_MIN };
121 | for (Triangle* tri : triangles) {
122 | //glm::vec3 v1 = vertexBuffer[tri->vertices[0]->index].pos;
123 | //glm::vec3 v2 = vertexBuffer[tri->vertices[1]->index].pos;
124 | //glm::vec3 v3 = vertexBuffer[tri->vertices[2]->index].pos;
125 |
126 | min = glm::min(min, vertexBuffer[tri->vertices[0]->index].pos);
127 | min = glm::min(min, vertexBuffer[tri->vertices[1]->index].pos);
128 | min = glm::min(min, vertexBuffer[tri->vertices[2]->index].pos);
129 | max = glm::max(max, vertexBuffer[tri->vertices[0]->index].pos);
130 | max = glm::max(max, vertexBuffer[tri->vertices[1]->index].pos);
131 | max = glm::max(max, vertexBuffer[tri->vertices[2]->index].pos);
132 |
133 | //min = glm::min(min, v1);
134 | //min = glm::min(min, v2);
135 | //min = glm::min(min, v3);
136 | //max = glm::max(max, v1);
137 | //max = glm::max(max, v2);
138 | //max = glm::max(max, v3);
139 |
140 | glm::vec3 centroid = (vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos) / 3.0f;
141 | //glm::vec3 centroid = (v1 + v2 + v3) / 3.0f;
142 | tri->centroid[0] = centroid.x;
143 | tri->centroid[1] = centroid.y;
144 | tri->centroid[2] = centroid.z;
145 | }
146 |
147 | // use the same axis info to sort vertices
148 | glm::vec3 axis = glm::abs(max - min);
149 |
150 |
151 | vertsVector.reserve(indexVertexMap.size());
152 | for (int i = 0; i < indexVertexMap.size(); ++i) {
153 | vertsVector.push_back(indexVertexMap[i]);
154 | }
155 |
156 | if (axis.x > axis.y && axis.x > axis.z) {
157 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 0));
158 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 0));
159 | std::cout << "x sorted" << std::endl;
160 | }
161 | else if (axis.y > axis.z && axis.y > axis.x) {
162 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 1));
163 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 1));
164 | std::cout << "y sorted" << std::endl;
165 | }
166 | else {
167 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 2));
168 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 2));
169 | std::cout << "z sorted" << std::endl;
170 | }
171 | }
172 |
173 | std::unordered_map used;
174 | MeshletCache cache;
175 | cache.reset();
176 | switch (strat) {
177 | case 21:
178 | {
179 | std::queue priorityQueue;
180 | std::unordered_map visitedTriangleIds;
181 |
182 | // let us sort the triangles
183 | //calculateCentroids(triangles, vertexBuffer);
184 | //std::sort(triangles.begin(), triangles.end(), CompareTriangles);
185 |
186 |
187 | // add triangles to cache untill full.
188 | for (int i = 0; i < triangles.size(); ++i) {
189 | // for (Triangle* triangle : triangles) {
190 | // if triangle is not used generate meshlet
191 | Triangle* triangle = triangles[i];
192 |
193 | if (triangle->flag == 1) continue;
194 |
195 | //reset
196 | priorityQueue.push(triangle);
197 |
198 | // add triangles to cache untill it is full.
199 | while (!priorityQueue.empty()) {
200 | // pop current triangle
201 | Triangle* tri = priorityQueue.front();
202 | visitedTriangleIds[tri->id] = tri->id;
203 |
204 |
205 | // get all vertices of current triangle
206 | VertexIndexType candidateIndices[3];
207 | for (uint32_t j = 0; j < 3; ++j) {
208 | candidateIndices[j] = tri->vertices[j]->index;
209 | }
210 | // break if cache is full
211 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
212 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize
213 | // so we run through all triangles to see if the meshlet already has the required verts
214 | // we try to do this in a dum way to test if it is worth it
215 | for (int v = 0; v < cache.numVertices; ++v) {
216 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) {
217 | if (tri->flag == 1) continue;
218 |
219 | VertexIndexType candidateIndices[3];
220 | for (uint32_t j = 0; j < 3; ++j) {
221 | uint32_t idx = tri->vertices[j]->index;
222 | candidateIndices[j] = idx;
223 | }
224 |
225 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
226 | cache.insert(candidateIndices, vertexBuffer);
227 | tri->flag = 1;
228 | }
229 | }
230 | }
231 | meshlets.push_back(cache);
232 |
233 | //reset cache and empty priorityQueue
234 | priorityQueue = {};
235 | priorityQueue.push(tri);
236 | cache.reset();
237 | visitedTriangleIds.clear();
238 |
239 | //reset cache and empty priorityQueue
240 |
241 | continue;
242 | // start over again but from the fringe of the current cluster
243 | }
244 | // get alle neighbours of current triangle
245 | for (Triangle* t : tri->neighbours) {
246 | if ((t->flag != 1) && (visitedTriangleIds.find(t->id) == visitedTriangleIds.end())) {
247 | priorityQueue.push(t);
248 | visitedTriangleIds[t->id] = t->id;
249 | }
250 | }
251 |
252 |
253 | cache.insert(candidateIndices, vertexBuffer);
254 | // if triangle is inserted set flag to used.
255 | priorityQueue.pop();
256 | tri->flag = 1;
257 |
258 |
259 | };
260 | }
261 | // add remaining triangles to a meshlet
262 | if (!cache.empty()) {
263 | meshlets.push_back(cache);
264 | cache.reset();
265 | }
266 | break;
267 | }
268 |
269 | // greedy triangle + clustering
270 | case 20:
271 | {
272 | std::vector> clusters;
273 | std::vector cluster;
274 | std::queue priorityQueue;
275 | std::unordered_map visitedTriangleIds;
276 |
277 | //std::vector weightedAreaTriangleList = AreaWeightedTriangleList(triangles, vertexBuffer);
278 | //std::vector clusterCenters = SampleList(weightedAreaTriangleList, 92);
279 |
280 | // add triangles to cache untill full.
281 | for (int i = 0; i < triangles.size(); ++i) {
282 | // for (Triangle* triangle : triangles) {
283 | // if triangle is not used generate meshlet
284 | Triangle* triangle = triangles[i];
285 |
286 | if (triangle->flag == 1) continue;
287 |
288 | //reset
289 | priorityQueue.push(triangle);
290 |
291 | // add triangles to cache untill it is full.
292 | while (!priorityQueue.empty()) {
293 | // pop current triangle
294 | Triangle* tri = priorityQueue.front();
295 | visitedTriangleIds[tri->id] = tri->id;
296 |
297 |
298 | // get all vertices of current triangle
299 | VertexIndexType candidateIndices[3];
300 | for (uint32_t j = 0; j < 3; ++j) {
301 | candidateIndices[j] = tri->vertices[j]->index;
302 | }
303 | // break if cache is full
304 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
305 | meshlets.push_back(cache);
306 |
307 | //reset cache and empty priorityQueue
308 | priorityQueue = {};
309 | priorityQueue.push(tri);
310 | clusters.push_back(cluster);
311 | cluster.clear();
312 | cache.reset();
313 | visitedTriangleIds.clear();
314 |
315 | //reset cache and empty priorityQueue
316 |
317 | continue;
318 | // start over again but from the fringe of the current cluster
319 | }
320 | // get alle neighbours of current triangle
321 | for (Triangle* t : tri->neighbours) {
322 | if ((t->flag != 1) && (visitedTriangleIds.find(t->id) == visitedTriangleIds.end())) {
323 | priorityQueue.push(t);
324 | visitedTriangleIds[t->id] = t->id;
325 | }
326 | }
327 |
328 |
329 | cache.insert(candidateIndices, vertexBuffer);
330 | cluster.push_back(tri->id);
331 | // if triangle is inserted set flag to used.
332 | priorityQueue.pop();
333 | tri->flag = 1;
334 |
335 |
336 | };
337 | }
338 | // add remaining triangles to a meshlet
339 | if (!cache.empty()) {
340 | meshlets.push_back(cache);
341 | cache.reset();
342 | clusters.push_back(cluster);
343 | cluster.clear();
344 | }
345 |
346 | //for (int k = 0; k < 10; ++k) {
347 | // // find initial clustercenters
348 | // std::vector candidates;
349 | // uint32_t maxDistance;
350 | // uint32_t minDistance;
351 | // uint32_t dist;
352 | // int count;
353 | // int maxCount;
354 | // bool CENTER_IS_SET = false;
355 | // // putting cluster centers into a vector for later use
356 | // std::vector> centers;
357 | // std::vector clusterCenters;
358 | // clusterCenters.resize(clusters.size());
359 | // centers.resize(clusters.size());
360 | // for (uint32_t i = 0; i < clusters.size(); ++i) {
361 | // minDistance = -1;
362 | // maxCount = -1;
363 | // candidates.clear();
364 | // //build subgraph here ?
365 | // uint32_t difference = 0;
366 | // for (unsigned int j = 0; j < clusters[i].size(); ++j) {
367 | // count = 0;
368 | // Triangle* t = triangles[clusters[i][j]];
369 | // t->dist = 0;
370 |
371 | // std::queue priorityQueue;
372 | // priorityQueue.push(t);
373 |
374 | // // for each triangle in frontier
375 | // dist = 0;
376 | // visitedTriangleIds.clear();
377 | // visitedTriangleIds[t->id] = t->id;
378 |
379 | // while (!priorityQueue.empty()) {
380 | // // add neighbours to queue
381 | // Triangle* cur_t = priorityQueue.front();
382 | // priorityQueue.pop();
383 |
384 | // // update distance
385 | // dist = cur_t->dist + 1;
386 |
387 | // for (Triangle* neighbour : cur_t->neighbours) {
388 | // if (std::find(clusters[i].begin(), clusters[i].end(), neighbour->id) != clusters[i].end() && (visitedTriangleIds.find(neighbour->id) == visitedTriangleIds.end())) {
389 | // neighbour->dist = dist;
390 | // neighbour->flag = cur_t->flag;
391 | // visitedTriangleIds[neighbour->id] = neighbour->id;
392 | // //if (priorityQueue.size() <= clusters[i].size())
393 | // priorityQueue.push(neighbour);
394 | // ++count;
395 | // } //continue;
396 |
397 | // }
398 | // }
399 | // //distance = dist;
400 | // //if (distance > maxDistance) maxDistance = distance;
401 | // maxDistance = dist;
402 |
403 |
404 | // if (visitedTriangleIds.size() != clusters[i].size()) maxDistance = -1; // Does not consider every element of cluster a possibility
405 | //
406 | // // center is set means that we can have more than one triangle in the center
407 | // if (maxDistance == minDistance && CENTER_IS_SET) { // We might not have convergence guarantees for accurate graph centers
408 | // candidates.push_back(clusters[i][j]);
409 | // }
410 | // else if (maxDistance < minDistance) {
411 | // candidates.clear();
412 | // candidates.push_back(clusters[i][j]);
413 | // //std::cout << "Cluster " << i << " has candidate " << clusters[i][j] << " with eccentricity " << maxDistance << " compared to previous " << minDistance << std::endl;
414 | // minDistance = maxDistance;
415 | // }
416 | // else if (maxDistance == -1 && candidates.size() == 0) {
417 | // if (count > maxCount) {
418 | // candidates.clear();
419 | // candidates.push_back(clusters[i][j]);
420 | // maxCount = count;
421 | // if (k > 0) {
422 | // std::cout << "Error no candidates for cluster " << i << std::endl;
423 | // }
424 | //
425 | // }
426 | // }
427 | // }
428 |
429 | // if (candidates.size() == 0) {
430 | // std::cout << "Error no candidates for cluster " << i << std::endl;
431 | // }
432 | // centers[i] = candidates;
433 | // clusterCenters[i] = candidates[0];
434 | // }
435 |
436 | // // redestribute triangles
437 |
438 | // // reset clusters
439 | // clusters.clear();
440 | // clusters.resize(clusterCenters.size());
441 | // visitedTriangleIds.clear();
442 | // std::queue triangleQueue;
443 | // for (int i = 0; i < triangles.size(); ++i) {
444 | // Triangle* tri = triangles[i];
445 |
446 | // visitedTriangleIds.clear();
447 | // visitedTriangleIds[tri->id] = tri->id;
448 |
449 | // triangleQueue.push(tri);
450 | // while (!triangleQueue.empty())
451 | // {
452 | // Triangle* curTri = triangleQueue.front();
453 | // triangleQueue.pop();
454 |
455 |
456 | // // if curTri is a cluster center asign tri to that cluster
457 | // std::vector::iterator clusterItr = std::find(clusterCenters.begin(), clusterCenters.end(), curTri->id);
458 | // if (clusterItr != clusterCenters.end()) {
459 | // int idx = std::distance(clusterCenters.begin(), clusterItr);
460 | // clusters[idx].push_back(tri->id);
461 | // triangleQueue = {};
462 | // break;
463 | // }
464 |
465 | // for (Triangle* neighbour : curTri->neighbours) {
466 | // if (visitedTriangleIds.find(neighbour->id) != visitedTriangleIds.end()) continue;
467 | // triangleQueue.push(neighbour);
468 | // visitedTriangleIds[neighbour->id] = neighbour->id;
469 |
470 |
471 | // }
472 | // }
473 | // }
474 | //}
475 |
476 | ////pack into caches
477 | //for (std::vector c : clusters) {
478 | // for (uint32_t triIdx : c) {
479 | // VertexIndexType candidateIndices[3];
480 | // for (uint32_t j = 0; j < 3; ++j) {
481 | // candidateIndices[j] = triangles[triIdx]->vertices[j]->index;
482 | // }
483 |
484 | // cache.insert(candidateIndices, vertexBuffer);
485 | // }
486 |
487 | // meshlets.push_back(cache);
488 | // cache.reset();
489 | //}
490 | break;
491 | }
492 | case 23:
493 | {
494 | std::unordered_set currentVerts;
495 | std::vector trianglesInCluster;
496 | std::deque priorityQueue;
497 | std::unordered_map visitedTriangleIds;
498 | glm::vec3 center = glm::vec3(0.0f);
499 | float radius = 0;
500 | float bestNewRadius = DBL_MAX;
501 | float newRadius = DBL_MAX;
502 | bool updateSphere = false;
503 |
504 | ////let us sort the triangles
505 | //calculateCentroids(triangles, vertexBuffer);
506 | //std::sort(triangles.begin(), triangles.end(), CompareTriangles);
507 |
508 |
509 | // add triangles to cache untill full.
510 | //for (Triangle* triangle : triangles) {
511 | for (int t = 0; t < triangles.size();) {
512 |
513 | Triangle* triangle = triangles[t];
514 | // if triangle is not used generate meshlet
515 | if (triangle->flag == 1) {
516 | ++t;
517 | continue;
518 | }
519 |
520 | priorityQueue.push_back(triangle);
521 |
522 |
523 | while (!priorityQueue.empty()) {
524 |
525 | int bestTriIdx = 0;
526 | int triIdx = 0;
527 | bestNewRadius = DBL_MAX;
528 | for (Triangle* possible_tri : priorityQueue) {
529 |
530 | // prioritize triangles who have no "live" neighbours
531 | // also prioritize triangles who already have all verts in the cluster
532 | int newVert{};
533 | int vertsInMeshlet = 0;
534 | int used = 0;
535 | for (int i = 0; i < 3; ++i) {
536 | if (currentVerts.find(possible_tri->vertices[i]->index) == currentVerts.end()) {
537 | newVert = i;
538 | }
539 | else {
540 | ++vertsInMeshlet;
541 | }
542 | }
543 |
544 | for (auto neighbour_tri : possible_tri->neighbours) {
545 | if (neighbour_tri->flag == 1) ++used;
546 | }
547 |
548 | if (possible_tri->neighbours.size() == used) used = 3;
549 |
550 | //if all verts are allready in meshlet
551 | if (vertsInMeshlet == 3) {
552 | bestTriIdx = triIdx;
553 | updateSphere = false;
554 | break;
555 | }
556 |
557 | // if dangling triangle add it
558 | if (used == 3) {
559 | bestTriIdx = triIdx;
560 | if (vertsInMeshlet == 2) {
561 | // afterwards check the added radius by adding triangle to the cluster
562 | const mm::Vertex p = vertexBuffer[possible_tri->vertices[newVert]->index];
563 | bestNewRadius = 0.5 * (radius + glm::length(center - p.pos));
564 | updateSphere = true;
565 | }
566 | else {
567 | updateSphere = false;
568 | }
569 | break;
570 | }
571 |
572 |
573 | // else if no verts are in meshlet ie starting a new meshlet
574 | if (vertsInMeshlet == 0) {
575 | center = (vertexBuffer[possible_tri->vertices[0]->index].pos + vertexBuffer[possible_tri->vertices[1]->index].pos + vertexBuffer[possible_tri->vertices[2]->index].pos) / 3.0f;
576 | radius = glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[1]->index].pos), glm::length(center - vertexBuffer[possible_tri->vertices[2]->index].pos)));
577 | updateSphere = false;
578 | //radius = 0.5 * (radius +(glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[1]->index].pos), glm::length(center - vertexBuffer[possible_tri->vertices[2]->index].pos)))));
579 | break;
580 | }
581 | else if (vertsInMeshlet == 2) {
582 | // afterwards check the added radius by adding triangle to the cluster
583 | const mm::Vertex p = vertexBuffer[possible_tri->vertices[newVert]->index];
584 | newRadius = 0.5 * (radius + glm::length(center - p.pos));
585 | updateSphere = true;
586 | }
587 |
588 | if (newRadius <= bestNewRadius) {
589 | bestNewRadius = newRadius;
590 | bestTriIdx = triIdx;
591 |
592 | }
593 | triIdx++;
594 | }
595 | // move best tri to front of queue
596 | std::swap(priorityQueue.front(), priorityQueue[bestTriIdx]);
597 | Triangle* tri = priorityQueue.front();
598 |
599 | int newVert{};
600 | VertexIndexType candidateIndices[3];
601 | for (VertexIndexType i = 0; i < 3; ++i) {
602 | candidateIndices[i] = tri->vertices[i]->index;
603 | if (currentVerts.find(tri->vertices[i]->index) == currentVerts.end()) newVert = i;
604 | }
605 |
606 | if (updateSphere) {
607 | // get all vertices of current triangle
608 | const mm::Vertex p = vertexBuffer[tri->vertices[newVert]->index];
609 | radius = bestNewRadius;
610 | center = p.pos + (radius / (FLT_EPSILON + glm::length(center - p.pos))) * (center - p.pos);
611 | }
612 |
613 | // break if cache is full
614 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
615 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize
616 | // so we run through all triangles to see if the meshlet already has the required verts
617 | // we try to do this in a dum way to test if it is worth it
618 | for (int v = 0; v < cache.numVertices; ++v) {
619 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) {
620 | if (tri->flag == 1) continue;
621 |
622 | VertexIndexType candidateIndices[3];
623 | for (uint32_t j = 0; j < 3; ++j) {
624 | uint32_t idx = tri->vertices[j]->index;
625 | candidateIndices[j] = idx;
626 | }
627 |
628 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
629 | cache.insert(candidateIndices, vertexBuffer);
630 | tri->flag = 1;
631 | }
632 | }
633 | }
634 | meshlets.push_back(cache);
635 | //addMeshlet(geometry, cache);
636 |
637 | //if (meshlets.size() == 4) return;
638 | //reset cache and empty priorityQueue
639 | //priorityQueue = { tri };
640 | priorityQueue.clear();
641 | trianglesInCluster.clear();
642 | currentVerts.clear();
643 | cache.reset();
644 | center = glm::vec3(0.0f);
645 | radius = 0.0f;
646 | break;
647 |
648 | }
649 |
650 | cache.insert(candidateIndices, vertexBuffer);
651 |
652 | // if triangle is inserted set flag to used.
653 | priorityQueue.pop_front();
654 | tri->flag = 1;
655 | visitedTriangleIds[tri->id] = tri->id;
656 |
657 |
658 | // add the used vertices to the current cluster
659 | currentVerts.insert(tri->vertices[0]->index);
660 | currentVerts.insert(tri->vertices[1]->index);
661 | currentVerts.insert(tri->vertices[2]->index);
662 | trianglesInCluster.push_back(tri);
663 |
664 | // get alle neighbours of triangles currently in meshlet
665 | priorityQueue.clear();
666 | for (Triangle* tr : trianglesInCluster) {
667 | for (Triangle* t : tr->neighbours) {
668 | if (t->flag != 1) priorityQueue.push_back(t);
669 | }
670 | }
671 | };
672 |
673 | if (!cache.empty()) {
674 | meshlets.push_back(cache);
675 | priorityQueue.clear();
676 | trianglesInCluster.clear();
677 | currentVerts.clear();
678 | cache.reset();
679 | center = glm::vec3(0.0f);
680 | radius = 0.0f;
681 | }
682 | }
683 | // add remaining triangles to a meshlet
684 | if (!cache.empty()) {
685 | meshlets.push_back(cache);
686 | cache.reset();
687 | }
688 |
689 | break;
690 | }
691 | // bounding sphere based on vertex fanning
692 | case 24:
693 | {
694 | std::unordered_map usedVerts;
695 | std::unordered_set currentVerts;
696 | float radius = .0f;
697 | glm::vec3 center = glm::vec3(.0f);
698 |
699 |
700 |
701 | //std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer));
702 |
703 | for (int i = 0; i < vertsVector.size();) {
704 |
705 |
706 | Vert* vert = vertsVector[i];
707 | Triangle* bestTri = nullptr;
708 | float newRadius = FLT_MAX;
709 | float bestNewRadius = FLT_MAX - 1.0f;
710 | int bestVertsInMeshlet = 0;
711 |
712 | for (uint32_t j = 0; j < cache.numVertices; ++j) {
713 | uint32_t vertId = cache.vertices[j];
714 |
715 | for (Triangle* tri : indexVertexMap[vertId]->neighbours) {
716 | if (tri->flag == 1) continue;
717 |
718 | // get info about tri
719 | int newVert{};
720 | int vertsInMeshlet = 0;
721 | int used = 0;
722 | for (int i = 0; i < 3; ++i) {
723 | if (currentVerts.find(tri->vertices[i]->index) == currentVerts.end()) {
724 | newVert = i;
725 | }
726 | else {
727 | ++vertsInMeshlet;
728 | }
729 | }
730 |
731 | for (auto neighbour_tri : tri->neighbours) {
732 | if (neighbour_tri->flag == 1) ++used;
733 | }
734 |
735 | if (tri->neighbours.size() == used) used = 3;
736 |
737 |
738 | // if dangling triangle add it
739 | if (used == 3) {
740 | ++vertsInMeshlet;
741 | }
742 |
743 | //if all verts are allready in meshlet
744 | if (vertsInMeshlet == 3) {
745 | newRadius = radius;
746 | }
747 | else if (vertsInMeshlet == 1){
748 | continue;
749 | }
750 | else {
751 | //TODO TURN THIS IN TO ONE THINK THAT ALWAYS RUNS
752 | // LIKE MAKE SURE THAT THE VERTEX furtherst away from center is used for new radius
753 | // or calculate three new radius and use the biggest one
754 | // afterwards check the added radius by adding triangle to the cluster
755 | float newRadius = 0.5 * (radius + glm::length(center - vertexBuffer[tri->vertices[newVert]->index].pos));
756 |
757 | }
758 |
759 | if (vertsInMeshlet > bestVertsInMeshlet || newRadius < bestNewRadius ) {
760 | bestVertsInMeshlet = vertsInMeshlet;
761 | bestNewRadius = newRadius;
762 | bestTri = tri;
763 | }
764 | }
765 | }
766 |
767 | if (bestTri == nullptr) {
768 | // create radius and center for the first triangle in the meshlet
769 | for (Triangle* tri : vert->neighbours) {
770 | // skip used triangles
771 | if (tri->flag != 1) {
772 | bestTri = tri;
773 |
774 | center = (vertexBuffer[bestTri->vertices[0]->index].pos + vertexBuffer[bestTri->vertices[1]->index].pos + vertexBuffer[bestTri->vertices[2]->index].pos) / 3.0f;
775 | bestNewRadius = glm::max(glm::length(center - vertexBuffer[bestTri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[bestTri->vertices[1]->index].pos), glm::length(center - vertexBuffer[bestTri->vertices[2]->index].pos)));
776 | break;
777 | }
778 | }
779 |
780 | if (bestTri == nullptr) {
781 | ++i;
782 | // here we finalize current meshlet when we need to enforce locality
783 | //if (cache.numPrims != 0) {
784 | // meshlets.push_back(cache);
785 | // currentVerts.clear();
786 | // cache.reset();
787 | //}
788 |
789 | continue;
790 | }
791 | }
792 |
793 | int newVert{};
794 | int numNewVerts = 0;
795 | VertexIndexType candidateIndices[3];
796 | for (VertexIndexType i = 0; i < 3; ++i) {
797 | candidateIndices[i] = bestTri->vertices[i]->index;
798 | if (currentVerts.find(bestTri->vertices[i]->index) == currentVerts.end()) {
799 | newVert = i;
800 | ++numNewVerts;
801 | }
802 | }
803 |
804 | radius = bestNewRadius;
805 | if (numNewVerts = 1) {
806 | // get all vertices of current triangle
807 | const mm::Vertex p = vertexBuffer[bestTri->vertices[newVert]->index];
808 | center = p.pos + (radius / (FLT_EPSILON + glm::length(center - p.pos))) * (center - p.pos);
809 | }
810 |
811 | // If full pack and restart restart
812 | //add triangle to cache
813 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
814 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize
815 | // so we run through all triangles to see if the meshlet already has the required verts
816 | // we try to do this in a dum way to test if it is worth it
817 | for (int v = 0; v < cache.numVertices; ++v) {
818 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) {
819 | if (tri->flag == 1) continue;
820 |
821 | VertexIndexType candidateIndices[3];
822 | for (uint32_t j = 0; j < 3; ++j) {
823 | uint32_t idx = tri->vertices[j]->index;
824 | candidateIndices[j] = idx;
825 | }
826 |
827 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
828 | cache.insert(candidateIndices, vertexBuffer);
829 | tri->flag = 1;
830 | }
831 | }
832 | }
833 | meshlets.push_back(cache);
834 | currentVerts.clear();
835 | cache.reset();
836 | continue;
837 | //break;
838 |
839 |
840 | }
841 |
842 | // insert triangle and mark used
843 | cache.insert(candidateIndices, vertexBuffer);
844 | bestTri->flag = 1;
845 | currentVerts.insert(candidateIndices[0]);
846 | currentVerts.insert(candidateIndices[1]);
847 | currentVerts.insert(candidateIndices[2]);
848 | ++usedVerts[candidateIndices[0]];
849 | ++usedVerts[candidateIndices[1]];
850 | ++usedVerts[candidateIndices[2]];
851 |
852 | //if (indexVertexMap[i]->neighbours.size() == usedVerts[indexVertexMap[i]->index]) ++i;
853 | }
854 |
855 | // add remaining triangles to a meshlet
856 | if (!cache.empty()) {
857 | meshlets.push_back(cache);
858 | cache.reset();
859 | }
860 |
861 | break;
862 | }
863 | case 12:
864 | {
865 | std::queue priorityQueue;
866 | //std::vector> clusters;
867 | //std::vector cluster;
868 | //std::vector triangleCentroids;
869 | //triangleCentroids.resize(triangles.size());
870 | //std::vector clusterCentroids;
871 |
872 | //// pick best triangle to add
873 | //std::vector vertsVector;
874 | //vertsVector.reserve(indexVertexMap.size());
875 | //for (int i = 0; i < indexVertexMap.size(); ++i) {
876 | // vertsVector.push_back(indexVertexMap[i]);
877 | //}
878 |
879 | //std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer));
880 |
881 |
882 | //glm::vec3 clusterCenter = glm::vec3(0.0f);
883 | // add triangles to cache untill full.
884 | for (int i = 0; i < vertsVector.size(); ++i) {
885 | // for (Triangle* triangle : triangles) {
886 | // if triangle is not used generate meshlet
887 | Vert* vert = vertsVector[i];
888 | if (used.find(vert->index) != used.end()) continue;
889 |
890 | //reset
891 | priorityQueue.push(vert);
892 |
893 | // add triangles to cache untill it is full.
894 | while (!priorityQueue.empty()) {
895 | // pop current triangle
896 | Vert* vert = priorityQueue.front();
897 |
898 | for (Triangle* tri : vert->neighbours) {
899 | if (tri->flag == 1) continue;
900 | //glm::vec3 centroid = glm::vec3(0.0f);
901 |
902 | // calculate centroid
903 | //centroid = vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos;
904 | //triangleCentroids[tri->id] = centroid / 3.0f;
905 |
906 |
907 | // get all vertices of current triangle
908 | VertexIndexType candidateIndices[3];
909 | for (uint32_t j = 0; j < 3; ++j) {
910 | uint32_t idx = tri->vertices[j]->index;
911 | candidateIndices[j] = idx;
912 | if (used.find(idx) == used.end()) priorityQueue.push(tri->vertices[j]);
913 | }
914 | // break if cache is full
915 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
916 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize
917 | // so we run through all triangles to see if the meshlet already has the required verts
918 | // we try to do this in a dum way to test if it is worth it
919 | for (int v = 0; v < cache.numVertices; ++v) {
920 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) {
921 | if (tri->flag == 1) continue;
922 |
923 | VertexIndexType candidateIndices[3];
924 | for (uint32_t j = 0; j < 3; ++j) {
925 | uint32_t idx = tri->vertices[j]->index;
926 | candidateIndices[j] = idx;
927 | if (used.find(idx) == used.end()) priorityQueue.push(tri->vertices[j]);
928 | }
929 |
930 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
931 | cache.insert(candidateIndices, vertexBuffer);
932 | //cluster.push_back(tri->id);
933 | //clusterCenter += triangleCentroids[tri->id];
934 | tri->flag = 1;
935 | }
936 | }
937 | }
938 | //clusters.push_back(cluster);
939 | //clusterCenter = clusterCenter / float(cluster.size());
940 | //cluster.clear();
941 | //clusterCentroids.push_back(clusterCenter);
942 | meshlets.push_back(cache);
943 | //clusterCenter = glm::vec3(0.0f);
944 |
945 | //reset cache and empty priorityQueue
946 | priorityQueue = {};
947 | priorityQueue.push(vert);
948 | cache.reset();
949 | continue;
950 | // start over again but from the fringe of the current cluster
951 | }
952 |
953 | cache.insert(candidateIndices, vertexBuffer);
954 | //cluster.push_back(tri->id);
955 | //clusterCenter += triangleCentroids[tri->id];
956 |
957 | // if triangle is inserted set flag to used.
958 | tri->flag = 1;
959 | }
960 |
961 | // pop vertex if we make it through all its neighbours
962 | priorityQueue.pop();
963 | used[vert->index] = 1;
964 |
965 |
966 |
967 |
968 |
969 | };
970 | }
971 | // add remaining triangles to a meshlet
972 | if (!cache.empty()) {
973 | meshlets.push_back(cache);
974 | cache.reset();
975 | //clusters.push_back(cluster);
976 | //clusterCenter = clusterCenter / float(cluster.size());
977 | //clusterCentroids.push_back(clusterCenter);
978 | //cluster.clear();
979 |
980 | }
981 |
982 | //for (int i = 0; i < 2; ++i) {
983 | // // find cluster centers
984 | // std::vector clusterCenters;
985 | // clusterCenters.resize(clusters.size());
986 | // uint32_t clusterid = 0;
987 | // for (std::vector c : clusters) {
988 | // double minDist = DBL_MAX;
989 | // for (uint32_t tid : c) {
990 | // glm::vec3 clusterCentroid = clusterCentroids[clusterid];
991 | // glm::vec3 triangleCentroid = triangleCentroids[tid];
992 | // // distance to clusterCenter
993 | // double distance = glm::distance(clusterCentroid, triangleCentroid);
994 | // // if distance is shortest cur triangle is center
995 | // if (distance < minDist) {
996 | // minDist = distance;
997 | // clusterCenters[clusterid] = tid;
998 | // }
999 |
1000 |
1001 | // }
1002 | // ++clusterid;
1003 |
1004 | // }
1005 |
1006 | // //redestribute triangles
1007 |
1008 | // // reset clusters
1009 | // clusters.clear();
1010 | // clusters.resize(clusterCenters.size());
1011 | // clusterCentroids.resize(clusterCenters.size());
1012 | // std::queue triangleQueue;
1013 | // for (int i = 0; i < triangles.size(); ++i) {
1014 | // Triangle* tri = triangles[i];
1015 | // if (tri->flag == i) continue;
1016 | // tri->flag = i;
1017 |
1018 | // triangleQueue.push(tri);
1019 | // while (!triangleQueue.empty())
1020 | // {
1021 | // Triangle* curTri = triangleQueue.front();
1022 | // triangleQueue.pop();
1023 |
1024 |
1025 | // // if curTri is a cluster center asign tri to that cluster
1026 | // std::vector::iterator clusterItr = std::find(clusterCenters.begin(), clusterCenters.end(), curTri->id);
1027 | // if (clusterItr != clusterCenters.end()) {
1028 | // int idx = std::distance(clusterCenters.begin(), clusterItr);
1029 | // clusters[idx].push_back(tri->id);
1030 | // triangleQueue = {};
1031 | // break;
1032 | // }
1033 |
1034 | // for (Triangle* neighbour : curTri->neighbours) {
1035 | // if (neighbour->flag == i) continue;
1036 | // triangleQueue.push(neighbour);
1037 | // neighbour->flag = i;
1038 |
1039 |
1040 | // }
1041 | // }
1042 | // }
1043 | // // recalculate centroids
1044 | // for (int i = 0; i < clusters.size(); ++i) {
1045 | // std::vector c = clusters[i];
1046 | // glm::vec3 clusterCentroid = glm::vec3(0.0f);
1047 | // for (uint32_t triIdx : c) {
1048 | // Triangle* tri = triangles[triIdx];
1049 | // clusterCentroid += vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos;
1050 | // }
1051 | // clusterCentroid = clusterCentroid / float(c.size());
1052 | // clusterCentroids[i] = clusterCentroid;
1053 | // }
1054 | //}
1055 |
1056 | ////pack into caches
1057 | //for (std::vector c : clusters) {
1058 | // for (uint32_t triIdx : c) {
1059 | // VertexIndexType candidateIndices[3];
1060 | // for (uint32_t j = 0; j < 3; ++j) {
1061 | // candidateIndices[j] = triangles[triIdx]->vertices[j]->index;
1062 | // }
1063 |
1064 | // cache.insert(candidateIndices, vertexBuffer);
1065 | // }
1066 |
1067 | // meshlets.push_back(cache);
1068 | // cache.reset();
1069 | //}
1070 | break;
1071 | }
1072 | case 11:
1073 | {
1074 |
1075 | std::vector> clusters;
1076 | unsigned char tris[126]; // ideally we could use mem equal to the entire mesh
1077 | unsigned char verts[64]; // ideally we could use mem equal to the entire mesh
1078 | // the challenge is to not end up with small islands of triangles that will become their own clusters
1079 | memset(tris, 0xff, primitiveLimit);
1080 | memset(verts, 0xff, vertexLimit);
1081 |
1082 | // we want to go through our mesh here and mark cluster centers and their radii.
1083 | // that way we can essentially do discreet poison sampling of the mesh to find cluster centers.
1084 |
1085 |
1086 | // should I go round the vertex instead ?
1087 | // pick said triangle fan all verts in it, and then subsequently add triangles from the ring ?
1088 | std::vector cluster;
1089 | std::queue triangleQue;
1090 | for (Triangle* triangle : triangles) {
1091 | if (triangle->flag == 1) continue;
1092 |
1093 | size_t vertices, triangles = 0;
1094 | while (triangles + 1 <= primitiveLimit || vertices <= vertexLimit) {
1095 |
1096 |
1097 | // add neighbours to queue
1098 | for (int i = 0; i < 3; ++i)
1099 | {
1100 | if (triangle->neighbours[i]->flag == 1) continue;
1101 | triangleQue.push(triangle->neighbours[i]);
1102 | }
1103 |
1104 | // try to add triangle to current cluster
1105 | // skip degenerate
1106 |
1107 | if (triangle->vertices[0] == triangle->vertices[1] || triangle->vertices[0] == triangle->vertices[2] || triangle->vertices[1] == triangle->vertices[2])
1108 | {
1109 | triangle->flag = 1;
1110 | continue;
1111 | }
1112 |
1113 | uint32_t found = 0;
1114 | // check if any of the incoming three indices are already in cluster
1115 | for (uint32_t v = 0; v < vertices; ++v)
1116 | {
1117 | found += (verts[v] == triangle->vertices[0]->index) + (verts[v] == triangle->vertices[1]->index) + (verts[v] == triangle->vertices[2]->index);
1118 | }
1119 |
1120 | // add triangle and verts
1121 | if ((vertices + 3 - found) > vertexLimit || (triangles + 1) > primitiveLimit) {
1122 | vertices += 3 - found;
1123 | triangles++;
1124 | }
1125 |
1126 |
1127 | // // potential speed up is keeping track of cluster center
1128 | // // might be required for the next part.
1129 | }
1130 | //
1131 | ////reset cluster
1132 | memset(tris, 0xff, primitiveLimit);
1133 | memset(verts, 0xff, vertexLimit);
1134 | }
1135 |
1136 | // grow out while we have less than vertexlimit and primitivelimit verts and triangles.
1137 | // grab new triangle center and repeat
1138 |
1139 |
1140 |
1141 | // run a pass or two of k-medoids clustering to balance out clusters before backing into caches
1142 | break;
1143 | }
1144 | // our advanced stat
1145 | case 3:
1146 | {
1147 |
1148 | std::unordered_set currentVerts;
1149 | std::vector trianglesInCluster;
1150 | std::deque priorityQueue;
1151 | double boarderLength = 0.0;
1152 | // add triangles to cache untill full.
1153 | for (Triangle* triangle : triangles) {
1154 | // if triangle is not used generate meshlet
1155 | if (triangle->flag == 1) continue;
1156 |
1157 | //reset
1158 | boarderLength = 0.0;
1159 | priorityQueue.push_back(triangle);
1160 | currentVerts.clear();
1161 | trianglesInCluster.clear();
1162 |
1163 | // add triangles to cache untill it is full.
1164 | while (!priorityQueue.empty()) {
1165 | // pop current triangle that expands boarder the least
1166 |
1167 |
1168 | float boarderIncrease = DBL_MAX;
1169 | int bestTriIdx = 0;
1170 | int triIdx = 0;
1171 | for (Triangle* possible_tri : priorityQueue) {
1172 | //Triangle* tri = priorityQueue.front();
1173 | // find out how many verts are already in cluster
1174 | int numVerts = 0;
1175 | bool newVerts[3];
1176 | int idx = 0;
1177 | for (Vert* v : possible_tri->vertices)
1178 | {
1179 | int count = currentVerts.count(v->index);
1180 | newVerts[idx++] = count;
1181 | numVerts += count;
1182 | //if (numVerts >= 3) {
1183 | // std::cout << "we have 3 verts" << std::endl;
1184 | //}
1185 | }
1186 |
1187 | float newBoarder = 0.0f;
1188 | float oldBoarder = 0.0f;
1189 | float newBoarderIncrease = 0.0f;
1190 | switch (numVerts) {
1191 | case 3:
1192 | {
1193 | for (Triangle* nb : possible_tri->neighbours) {
1194 | // find common verts
1195 | std::vector common_verts;
1196 | for (Vert* v : possible_tri->vertices)
1197 | {
1198 | if (v->index == nb->vertices[0]->index)
1199 | {
1200 | common_verts.push_back(nb->vertices[0]->index);
1201 | }
1202 | else if (v->index == nb->vertices[1]->index)
1203 | {
1204 | common_verts.push_back(nb->vertices[1]->index);
1205 | }
1206 | else if (v->index == nb->vertices[2]->index)
1207 | {
1208 | common_verts.push_back(nb->vertices[2]->index);
1209 | }
1210 | }
1211 | if (std::find(trianglesInCluster.begin(), trianglesInCluster.end(), nb) != trianglesInCluster.end()) //nb->flag == 1)
1212 | {
1213 | //add to old boarder
1214 | oldBoarder += vertexBuffer[common_verts[0]].euclideanDistance(vertexBuffer[common_verts[1]]);
1215 | }
1216 | else
1217 | {
1218 | //add to new boarder
1219 | newBoarder = vertexBuffer[common_verts[0]].euclideanDistance(vertexBuffer[common_verts[1]]);
1220 | }
1221 | }
1222 | newBoarderIncrease = newBoarder - oldBoarder;
1223 | break;
1224 | }
1225 | case 2:
1226 | {
1227 | // figure out which vertex is not in cluster
1228 | if (newVerts[0] == 1 && newVerts[1] == 1)
1229 | {
1230 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index])
1231 | + vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index])
1232 | - vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]);
1233 |
1234 |
1235 | }
1236 | else if (newVerts[2] == 1 && newVerts[1] == 1)
1237 | {
1238 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[0]->index])
1239 | + vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[0]->index])
1240 | - vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]);
1241 | }
1242 | else if (newVerts[0] == 1 && newVerts[2] == 1)
1243 | {
1244 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index])
1245 | + vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index])
1246 | - vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]);
1247 | }
1248 | break;
1249 | }
1250 | // 1 shared vert and none result in entire triangle boarder being added
1251 | default:
1252 | {
1253 | // based on that we calculate new boarder
1254 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index])
1255 | + vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index])
1256 | + vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]);
1257 | break;
1258 | }
1259 | };
1260 |
1261 | if (newBoarderIncrease <= boarderIncrease) {
1262 | boarderIncrease = newBoarderIncrease;
1263 | bestTriIdx = triIdx;
1264 |
1265 | }
1266 |
1267 |
1268 | triIdx++;
1269 | }
1270 | // move best tri to front of queue
1271 | std::swap(priorityQueue.front(), priorityQueue[bestTriIdx]);
1272 | Triangle* tri = priorityQueue.front();
1273 |
1274 | // get all vertices of current triangle
1275 | VertexIndexType candidateIndices[3];
1276 | for (VertexIndexType i = 0; i < 3; ++i) {
1277 | candidateIndices[i] = tri->vertices[i]->index;
1278 | }
1279 | // break if cache is full
1280 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
1281 | meshlets.push_back(cache);
1282 | //addMeshlet(geometry, cache);
1283 |
1284 | //reset cache and empty priorityQueue
1285 | priorityQueue = {tri};
1286 | trianglesInCluster.clear();
1287 | currentVerts.clear();
1288 | cache.reset();
1289 | continue;
1290 | }
1291 |
1292 | cache.insert(candidateIndices, vertexBuffer);
1293 |
1294 | // if triangle is inserted set flag to used.
1295 | priorityQueue.pop_front();
1296 | tri->flag = 1;
1297 |
1298 | //insert triangle and calculate added boarder
1299 | boarderLength += boarderIncrease;
1300 |
1301 | // add the used vertices to the current cluster
1302 | currentVerts.insert(tri->vertices[0]->index);
1303 | currentVerts.insert(tri->vertices[1]->index);
1304 | currentVerts.insert(tri->vertices[2]->index);
1305 | trianglesInCluster.push_back(tri);
1306 |
1307 | // get alle neighbours of triangles currently in meshlet
1308 | priorityQueue.clear();
1309 | for (Triangle* tr : trianglesInCluster) {
1310 | for (Triangle* t : tr->neighbours) {
1311 | if (t->flag != 1) priorityQueue.push_back(t);
1312 | }
1313 | }
1314 | //for (Triangle* t : tri->neighbours) {
1315 | // if (t->flag != 1) priorityQueue.push_back(t);
1316 | //}
1317 |
1318 | };
1319 | }
1320 | // add remaining triangles to a meshlet
1321 | if (!cache.empty()) {
1322 | meshlets.push_back(cache);
1323 | cache.reset();
1324 | }
1325 |
1326 | //// add triangles to cache untill full.
1327 | //for (Triangle* triangle : triangles) {
1328 | // // if triangle is not used generate meshlet
1329 | // if (triangle->flag != 1) {
1330 | // //get indicies
1331 | // VertexIndexType candidateIndices[3];
1332 | // for (VertexIndexType i = 0; i < 3; ++i) {
1333 | // candidateIndices[i] = triangle->vertices[i]->index;
1334 | // }
1335 |
1336 | // // check if we can add to current meshlet if not we finish it.
1337 | // if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
1338 | // meshlets.push_back(cache);
1339 | // cache.reset();
1340 | // }
1341 |
1342 | // // insert current triangle
1343 | // cache.insert(candidateIndices, vertexBuffer);
1344 | // triangle->flag = 1;
1345 | // }
1346 | //}
1347 |
1348 | //// add remaining triangles to a meshlet
1349 | //if (!cache.empty()) {
1350 | // meshlets.push_back(cache);
1351 | // cache.reset();
1352 | //}
1353 |
1354 |
1355 | // return numIndicies for now - maybe change return type
1356 | break;
1357 | }
1358 | // graphicslab cluster without building sparse matrix
1359 | case 4:
1360 | {
1361 | int generated = 0;
1362 |
1363 | //// cluster center indices
1364 | std::unordered_set c_indices;
1365 | c_indices.reserve(glm::ceil(triangles.size() / 100)); // indexVertexMap.size() / 3 / primitiveLimit); // triangles.size() / primitiveLimit);//
1366 |
1367 | //// find random centers
1368 | std::default_random_engine generator;
1369 | std::uniform_int_distribution distribution(0, triangles.size() - 1);
1370 |
1371 | //// this loop here is made to make sure that different cluster centers are chosen
1372 | while (c_indices.size() < glm::ceil(triangles.size() / 100)) { //indexVertexMap.size() / 3 / primitiveLimit) { //triangles.size() / primitiveLimit) { // Consider dropping std::rand - fails to generate sufficiently random numbers
1373 | c_indices.insert(distribution(generator)); // Can loop forever if random produces few distinct random values
1374 | }
1375 |
1376 | //std::vector weightedAreaTriangleList = AreaWeightedTriangleList(triangles, vertexBuffer);
1377 | //std::vector c_indices = SampleList(weightedAreaTriangleList, 1000);
1378 |
1379 | std::cout << c_indices.size() << " centers generated" << std::endl;
1380 |
1381 | // putting cluster centers into a vector for later use
1382 | std::vector> centers;
1383 | for (uint32_t i : c_indices) {
1384 | centers.push_back(std::vector{i});
1385 | }
1386 | c_indices.clear();
1387 |
1388 |
1389 |
1390 |
1391 | std::cout << "Starting Kmeans" << std::endl;
1392 |
1393 | // create the new clusters
1394 | std::vector> clusters(centers.size(), std::vector());
1395 |
1396 | uint32_t distance;
1397 | uint32_t minDistance;
1398 |
1399 | uint32_t iter = 0;
1400 |
1401 | // settings and structures from on Graphics Lab
1402 | bool CENTER_IS_SET = false;
1403 | unsigned int ITER_LIM = -1;
1404 | bool SMOOTH_CLUSTERS = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSE || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSEO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU;
1405 | bool MULTI_SPLIT = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSEO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU;
1406 | bool AGGRESSIVE_BALANCING = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA;
1407 |
1408 | double convergenceDist;
1409 | //double CONVERGENCE_LIM = 3 * (NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU);//(*vertices).size()/1000000; // Seems to perform well?
1410 | double CONVERGENCE_LIM = 1.0f; // 1.5f
1411 | bool done = false;
1412 |
1413 | std::vector> dirtyVerts;
1414 | std::vector flippableVerts;
1415 |
1416 | mm::MeshletCache cache;
1417 | VertexIndexType* candidateIndices = new VertexIndexType[3];
1418 |
1419 | std::unordered_map center_map;
1420 |
1421 | std::vector> prevCenters;
1422 |
1423 | for (uint32_t i = 0; i < centers.size(); ++i) {
1424 | for (uint32_t j = 0; j < centers[i].size(); ++j) {
1425 | center_map[centers[i][j]] = i;
1426 | }
1427 | }
1428 |
1429 | // while clusters do not fit into meshlets
1430 | while (!done) {
1431 | iter = 0;
1432 | convergenceDist = CONVERGENCE_LIM + 1;
1433 | // should resample list with new clusters - to reconverge
1434 | //std::vector newCenters = SampleList(weightedAreaTriangleList, center_map.size());
1435 |
1436 | //centers.clear();
1437 | //for (uint32_t i : newCenters) {
1438 | // centers.push_back(std::vector{i});
1439 | //}
1440 |
1441 | //center_map.clear();
1442 | //for (uint32_t i = 0; i < newCenters.size(); ++i) {
1443 | // center_map[newCenters[i]] = i;
1444 | //}
1445 |
1446 | // while clusters have not yet converged
1447 | while (convergenceDist > CONVERGENCE_LIM) {
1448 | prevCenters = centers;
1449 |
1450 | iter++;
1451 |
1452 | // clear clusters
1453 | for (uint32_t i = 0; i < clusters.size(); ++i) {
1454 | clusters[i].reserve(10000);
1455 | clusters[i].clear();
1456 | }
1457 |
1458 | // reserve to make sure that the vectors are threadsafe
1459 | dirtyVerts.reserve(centers.size());
1460 | flippableVerts.reserve(centers.size());
1461 |
1462 | dirtyVerts.clear();
1463 | flippableVerts.clear();
1464 |
1465 | bool dirty;
1466 | uint32_t dist;
1467 | uint32_t finalDistance;
1468 | uint32_t distlim = 125;
1469 | uint32_t count;
1470 | double differenceBetweenCenters = 0.0;
1471 | #pragma omp parallel shared(triangles, dirtyVerts, flippableVerts, distlim, AGGRESSIVE_BALANCING) private(count, finalDistance, dirty, minDistance, dist, distance) firstprivate(center_map)
1472 | {
1473 |
1474 | //for (Triangle* v : triangles) {
1475 | #pragma omp for collapse(2) //shared(triangles, dirtyVerts, flippableVerts, distlim, AGGRESSIVE_BALANCING) private(count, finalDistance, dirty, minDistance, dist, distance) firstprivate(center_map)
1476 | for (int t = 0; t < triangles.size(); ++t) {
1477 | Triangle* v = triangles[t];
1478 | minDistance = -1;
1479 |
1480 | Triangle* c;
1481 | v->flag = -1;
1482 | //uint32_t clustercenter = -1;
1483 | finalDistance = -1;
1484 | count = 1;
1485 |
1486 | dirty = false;
1487 | std::vector dirtyCandidates{};
1488 |
1489 | // BFS on structure based on the current triangle
1490 | // Ideally we would want to check all clusters but it should be ok
1491 | // to break after finding the first cluster because all other clusters
1492 | // must be further away (I THINK)
1493 | std::queue priorityQueue;
1494 | std::queue distanceQueue;
1495 | priorityQueue.push(v);
1496 | distanceQueue.push(1);
1497 |
1498 | // actually we might not even need to keep a distance since we are going to
1499 | // grab the first cluster center we meet
1500 |
1501 | // for each triangle in frontier
1502 |
1503 | //reset finalDistance between each triangle.
1504 | // essentially finaldistance is
1505 | std::unordered_map visitedTriangleIds{};
1506 | visitedTriangleIds[v->id] = v->id;
1507 | bool centerFound = false;
1508 | while (!priorityQueue.empty()) {
1509 |
1510 | // add neighbours to queue
1511 | Triangle* cur_t = priorityQueue.front();
1512 | priorityQueue.pop();
1513 | // update distance
1514 | //dist = cur_t->dist + 1;
1515 | uint32_t cur_dist = distanceQueue.front();
1516 | dist = cur_dist + 1;
1517 | distanceQueue.pop();
1518 |
1519 |
1520 | // check current triangles id against clusters
1521 | if (center_map.find(cur_t->id) != center_map.end()) {
1522 |
1523 | // if current tri is a cluster center break
1524 | //distance = cur_t->dist;
1525 | distance = cur_dist;
1526 |
1527 | if (distance < minDistance) {
1528 | if (AGGRESSIVE_BALANCING && distance == minDistance - 1) {
1529 | dirty = true;
1530 | }
1531 | else {
1532 | dirty = false;
1533 | }
1534 | c = cur_t;
1535 | minDistance = distance;
1536 | v->flag = center_map[c->id];
1537 | //clustercenter = center_map[c->id];
1538 | dirtyCandidates = { v->flag };
1539 | //dirtyCandidates = { clustercenter };
1540 | //v->dist = 0;
1541 | centerFound = true;
1542 | }
1543 | else if (distance != -1) {
1544 | if (AGGRESSIVE_BALANCING && distance == minDistance + 1) {
1545 | dirty = true;
1546 | }
1547 | else if (distance == minDistance) {
1548 | dirtyCandidates.push_back(center_map[c->id]);
1549 | //v->dist = 1;
1550 | if (SMOOTH_CLUSTERS) v->flag = -1;
1551 | //if (SMOOTH_CLUSTERS) clustercenter = -1;
1552 | dirty = false;
1553 | }
1554 | }
1555 | //finalDistance = distance;
1556 | }
1557 |
1558 | for (Triangle* neighbour : cur_t->neighbours) {
1559 | //Triangle localNeighbour = *neighbour;
1560 | if (visitedTriangleIds.find(neighbour->id) != visitedTriangleIds.end()) continue;
1561 | //neighbour->dist = dist;
1562 | visitedTriangleIds[neighbour->id] = neighbour->id;
1563 | //neighbour->flag = v->flag;
1564 |
1565 | // no need to explore more than the 125 surrounding triangles
1566 | // since if a cluster is further away we actually need a new cluster
1567 | //if (dist <= 15) priorityQueue.push(neighbour);
1568 | if (!centerFound) {
1569 | priorityQueue.push(neighbour);
1570 | distanceQueue.push(dist);
1571 | }
1572 | count++;
1573 | }
1574 | //if (count >= distlim) finalDistance = dist;
1575 |
1576 | }
1577 |
1578 | //if (c == nullptr) {
1579 | // v->flag = -1;
1580 | //}
1581 |
1582 | #pragma omp critical
1583 | {
1584 | if (dirtyCandidates.size() > 1) {
1585 | dirtyCandidates.push_back(v->id);
1586 | dirtyVerts.push_back(dirtyCandidates);
1587 | //continue;
1588 | }
1589 | else {
1590 | if (dirty) {
1591 | flippableVerts.push_back(v->id);
1592 | }
1593 | if (v->flag < centers.size()) {
1594 | //if (clustercenter < centers.size()) {
1595 |
1596 | clusters[v->flag].push_back(v->id);
1597 | //clusters[clustercenter].push_back(v->id);
1598 | // setting the flag changes total number of clusters, who knows why
1599 | //v->flag = clustercenter;
1600 | }
1601 | else {
1602 | v->flag = centers.size();
1603 | //clustercenter = centers.size();
1604 | clusters.push_back(std::vector{v->id});
1605 | centers.push_back(std::vector{v->id});
1606 | center_map[v->id] = v->flag;
1607 | //center_map[v->id] = clustercenter;
1608 | std::cout << "damn" << std::endl;
1609 | }
1610 | }
1611 | }
1612 | }
1613 |
1614 | //if (iter > ITER_LIM) break;
1615 |
1616 | // Update centers
1617 |
1618 | uint32_t maxDistance;
1619 | #pragma omp for reduction (+:differenceBetweenCenters) collapse(2) //shared(triangles, centers, clusters, CENTER_IS_SET) private(maxDistance, dist, minDistance)
1620 | for (int i = 0; i < clusters.size(); ++i) {
1621 | std::vector cluster = clusters[i];
1622 | std::vector center = centers[i];
1623 | minDistance = -1;
1624 | std::vector candidates{};
1625 | //build subgraph here ?
1626 | uint32_t difference = 0;
1627 | for (int j = 0; j < cluster.size(); ++j) {
1628 | count = 0;
1629 | Triangle* t = triangles[cluster[j]];
1630 | //Triangle t = *triangles[cluster[j]];
1631 |
1632 | //t->dist = 0;
1633 | //t.dist = 0;
1634 | //
1635 | //t->flag = -1;
1636 | //for (Triangle* v : clusters[i]) {
1637 | // if (v->id == cur_t->id) continue;
1638 |
1639 |
1640 |
1641 |
1642 | //}
1643 | // BFS on structure based on the current triangle
1644 | // Ideally we would want to check all clusters but it should be ok
1645 | // to break after finding the first cluster because all other clusters
1646 | // must be further away (I THINK)
1647 | std::queue priorityQueue;
1648 | //std::queue priorityQueue;
1649 | priorityQueue.push(t);
1650 | std::queue distanceQueue;
1651 | distanceQueue.push(0);
1652 |
1653 | // actually we might not even need to keep a distance since we are going to
1654 | // grab the first cluster center we meet
1655 |
1656 | // for each triangle in frontier
1657 | dist = 0;
1658 | uint32_t distanceToClusterCenter = 0;
1659 | std::unordered_map visitedIds{};
1660 | visitedIds[t->id] = t->id;
1661 | while (!priorityQueue.empty()) {
1662 | // add neighbours to queue
1663 | Triangle* cur_t = priorityQueue.front();
1664 | priorityQueue.pop();
1665 | // update distance
1666 | uint32_t cur_dist = distanceQueue.front();
1667 | distanceQueue.pop();
1668 | dist = cur_dist + 1;
1669 |
1670 |
1671 | if (std::find(center.begin(), center.end(), cur_t->id) != center.end()) {
1672 | distanceToClusterCenter = cur_dist;
1673 | }
1674 |
1675 |
1676 |
1677 |
1678 | for (Triangle* neighbour : cur_t->neighbours) {
1679 | //Triangle localNeighbour = *neighbour;
1680 | if (visitedIds.find(neighbour->id) != visitedIds.end() || std::find(cluster.begin(), cluster.end(), neighbour->id) == cluster.end()) continue;
1681 | //localNeighbour.dist = dist;
1682 | //neighbour->flag = cur_t->flag;
1683 | visitedIds[neighbour->id] = neighbour->id;
1684 | //if (priorityQueue.size() <= clusters[i].size())
1685 | distanceQueue.push(dist);
1686 | priorityQueue.push(neighbour);
1687 | ++count;
1688 | }
1689 |
1690 | }
1691 |
1692 |
1693 | maxDistance = dist;
1694 |
1695 | if (visitedIds.size() != cluster.size()) {
1696 | maxDistance = -1; // Does not consider every element of cluster a possibility
1697 | // center is set means that we can have more than one triangle in the center
1698 | }if (maxDistance == minDistance && CENTER_IS_SET) { // We might not have convergence guarantees for accurate graph centers
1699 | candidates.push_back(cluster[j]);
1700 | }
1701 | else if (maxDistance < minDistance) {
1702 | candidates.clear();
1703 | candidates.push_back(cluster[j]);
1704 | difference = distanceToClusterCenter;
1705 | //std::cout << "Cluster " << i << " has candidate " << clusters[i][j] << " with eccentricity " << maxDistance << " compared to previous " << minDistance << std::endl;
1706 | minDistance = maxDistance;
1707 | }
1708 | }
1709 |
1710 | if (candidates.size() == 0) {
1711 | std::cout << "Error no candidates for cluster " << i << std::endl;
1712 | }
1713 | centers[i] = candidates;
1714 | differenceBetweenCenters += difference;
1715 | }
1716 | }
1717 | center_map.clear();
1718 | convergenceDist = 0;
1719 | for (int i = 0; i < centers.size(); ++i) {
1720 | //std::cout << "Center " << i << " size " << centers[i].size() << std::endl;
1721 | for (int j = 0; j < centers[i].size(); ++j) {
1722 | center_map[centers[i][j]] = i;
1723 | }
1724 | // TODO: Adapt to center-sets
1725 | // this loop looks at difference between the distance of all triangles in cluster to
1726 | // the old cluster center and the new cluster center
1727 | //if (i < prevCenters.size()) {
1728 | // //distance = distanceMatrix->get(centers[i][0], prevCenters[i][0]) - 1;
1729 | // if (distance > convergenceDist) convergenceDist = distance;
1730 | //}
1731 | //else {
1732 | // convergenceDist = -1;
1733 | //}
1734 | }
1735 |
1736 | convergenceDist = differenceBetweenCenters / centers.size();
1737 | //std::cout << "Convergence distance " << convergenceDist << std::endl;
1738 | //std::cout << "Number of clusters " << center_map.size() << std::endl;
1739 |
1740 | }
1741 |
1742 | //std::cout << "Centers converged" << std::endl;
1743 | //Assign "dirty" vertices
1744 | if (AGGRESSIVE_BALANCING) {
1745 | for (uint32_t vert_id : flippableVerts) {
1746 | Triangle* vertex = triangles[vert_id]; // bamboozle is actually triangle!
1747 | uint32_t old_flag = vertex->flag;
1748 | for (uint32_t i = 0; i < vertex->neighbours.size(); ++i) {
1749 | if (vertex->neighbours[i]->flag == vertex->neighbours[(i + 1) % vertex->neighbours.size()]->flag) {
1750 | if (vertex->neighbours[i]->flag != -1) vertex->flag = vertex->neighbours[i]->flag;
1751 | break;
1752 | }
1753 | }
1754 | if (vertex->flag != old_flag) {
1755 | for (uint32_t i = 0; i < clusters[old_flag].size(); ++i) {
1756 | if (triangles[clusters[old_flag][i]]->id == vertex->id) {
1757 | std::swap(clusters[old_flag][i], clusters[old_flag][clusters[old_flag].size() - 1]);
1758 | clusters[old_flag].pop_back();
1759 | }
1760 | }
1761 | clusters[vertex->flag].push_back(vertex->id);
1762 | }
1763 | }
1764 | }
1765 | for (auto dirtyList : dirtyVerts) {
1766 | uint32_t vert_id = dirtyList.back();
1767 |
1768 | Triangle* vertex = triangles[vert_id];
1769 | vertex->dist = 0;
1770 |
1771 | // Check neighbours
1772 | if (SMOOTH_CLUSTERS || AGGRESSIVE_BALANCING) {
1773 | for (uint32_t i = 0; i < vertex->neighbours.size(); ++i) {
1774 | if (vertex->neighbours[i]->flag == vertex->neighbours[(i + 1) % vertex->neighbours.size()]->flag) {
1775 | vertex->flag = vertex->neighbours[i]->flag;
1776 | break;
1777 | }
1778 | }
1779 | if (vertex->flag == -1) {
1780 | uint32_t min_size = -1;
1781 | uint32_t curr_candidate = -1;
1782 | for (uint32_t i = 0; i < dirtyList.size() - 1; ++i) {
1783 | if (clusters[dirtyList[i]].size() < min_size) {
1784 | min_size = clusters[dirtyList[i]].size();
1785 | curr_candidate = dirtyList[i];
1786 | }
1787 | }
1788 | vertex->flag = curr_candidate;
1789 | }
1790 | }
1791 | clusters[vertex->flag].push_back(vertex->id);
1792 | }
1793 |
1794 | //Check if the partitioning fits
1795 | done = true;
1796 | [&] {
1797 | for (uint32_t c = 0; c < clusters.size(); ++c) {
1798 | if (!done) break;
1799 | cache.reset();
1800 | for (uint32_t v_id : clusters[c]) {
1801 | for (uint32_t i = 0; i < 3; ++i) {
1802 | candidateIndices[i] = triangles[v_id]->vertices[i]->index;
1803 | }
1804 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
1805 | // Create initial centers and recurse
1806 | if (centers[c].size() > 1) {
1807 | centers.push_back(std::vector{centers[c].back()});
1808 | centers[c].pop_back();
1809 | //std::cout << "Splitting center " << centers[c].back() << "," << centers.back()[0] << std::endl;
1810 | }
1811 | else{
1812 | uint32_t candidate_center = clusters[c][std::rand() % clusters[c].size()];
1813 | while (center_map.count(candidate_center) != 0) {
1814 | candidate_center = clusters[c][std::rand() % clusters[c].size()];
1815 | }
1816 | centers.push_back(std::vector{candidate_center});
1817 | center_map[candidate_center] = centers.size() - 1;
1818 | //std::cout << "Adding neighbour center " << centers.back()[0] << std::endl;
1819 | }
1820 | //std::cout << "Cluster size conflict, recursing " << clusters[c].size() << std::endl;
1821 | clusters.push_back(std::vector());
1822 | done = false;
1823 |
1824 | if (!MULTI_SPLIT) c = clusters.size();
1825 | //std::cout << "Number of clusters: " << clusters.size() << std::endl;
1826 | return;
1827 | }
1828 | cache.insert(candidateIndices, vertexBuffer);
1829 | }
1830 | }
1831 | }();
1832 | }
1833 | delete[] candidateIndices;
1834 |
1835 |
1836 |
1837 |
1838 | //std::cout << "Kmeans done building meshlets" << std::endl;
1839 |
1840 |
1841 | for (std::vector c : clusters) {
1842 | cache.reset();
1843 | for (uint32_t index : c) {
1844 | for (uint32_t i = 0; i < 3; ++i) {
1845 | candidateIndices[i] = triangles[index]->vertices[i]->index;
1846 | }
1847 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) return; // U done goofed
1848 | cache.insert(candidateIndices, vertexBuffer);
1849 | }
1850 | generated++;
1851 | meshlets.push_back(cache);
1852 | }
1853 | //std::cout << "Meshlets generated " << generated << std::endl;
1854 |
1855 | break;
1856 |
1857 |
1858 | }
1859 | //graphicslab cluster commented out because of eigen dependency
1860 | // zoutmans version
1861 | case 0:
1862 | {
1863 |
1864 | std::vector used(triangles.size(), false);
1865 |
1866 | std::unordered_set currentVerts;
1867 |
1868 | std::vector frontier;
1869 |
1870 | VertexIndexType* candidateIndices = new VertexIndexType[3];
1871 |
1872 | uint32_t score;
1873 | uint32_t maxScore;
1874 |
1875 | Triangle* candidate;
1876 | Triangle* current;
1877 | uint32_t candidateIndex;
1878 |
1879 | for (uint32_t used_count = 0; used_count < triangles.size(); ++used_count) {
1880 | if (used[used_count]) continue;
1881 |
1882 | // Empty frontier
1883 | frontier = { triangles[used_count] };
1884 | currentVerts.clear();
1885 |
1886 | while (frontier.size() > 0) {
1887 | maxScore = 0;
1888 |
1889 | for (uint32_t i = 0; i < frontier.size(); ++i) {
1890 | current = frontier[i];
1891 | score = 0;
1892 | for (Vert* v : current->vertices) score += currentVerts.count(v->index);
1893 |
1894 | if (score >= maxScore) {
1895 | maxScore = score;
1896 | candidate = current;
1897 | candidateIndex = i;
1898 | }
1899 | }
1900 |
1901 | for (uint32_t i = 0; i < 3; ++i) {
1902 | candidateIndices[i] = candidate->vertices[i]->index;
1903 | }
1904 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
1905 | meshlets.push_back(cache);
1906 | cache.reset();
1907 | break;
1908 | }
1909 | cache.insert(candidateIndices, vertexBuffer);
1910 | std::swap(frontier[candidateIndex], frontier[frontier.size() - 1]);
1911 | frontier.pop_back();
1912 | for (Vert* v : candidate->vertices) currentVerts.insert(v->index);
1913 | for (Triangle* t : candidate->neighbours) {
1914 | if (!used[t->id]) frontier.push_back(t);
1915 | }
1916 |
1917 | used[candidate->id] = true;
1918 | }
1919 |
1920 |
1921 | // Find best scoring triangle in frontier
1922 | // Attempt to add to meshlet
1923 | // If fail
1924 | // Add meshlet to geometry
1925 | // Reset cache
1926 | // Continue loop
1927 | // If success
1928 | // Add triangle to meshlet
1929 | // If frontier empty continue loop
1930 | }
1931 |
1932 | if (!cache.empty())
1933 | {
1934 | meshlets.push_back(cache);
1935 | }
1936 |
1937 | break;
1938 | }
1939 | // Our Greedy version
1940 | default:
1941 | {
1942 |
1943 | std::queue priorityQueue;
1944 |
1945 | // add triangles to cache untill full.
1946 | for (int i = 0; i < triangles.size(); ++i) {
1947 | // for (Triangle* triangle : triangles) {
1948 | // if triangle is not used generate meshlet
1949 | Triangle* triangle = triangles[i];
1950 |
1951 | if (triangle->flag == 1) continue;
1952 |
1953 | //reset
1954 | priorityQueue.push(triangle);
1955 |
1956 |
1957 |
1958 |
1959 | // add triangles to cache untill it is full.
1960 | while (!priorityQueue.empty()) {
1961 | // pop current triangle
1962 | Triangle* tri = priorityQueue.front();
1963 |
1964 | // get all vertices of current triangle
1965 | VertexIndexType candidateIndices[3];
1966 | for (uint32_t j = 0; j < 3; ++j) {
1967 | candidateIndices[j] = tri->vertices[j]->index;
1968 | }
1969 | // break if cache is full
1970 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
1971 | meshlets.push_back(cache);
1972 |
1973 | //reset cache and empty priorityQueue
1974 | priorityQueue = {};
1975 | priorityQueue.push(tri);
1976 | cache.reset();
1977 | break;
1978 | // start over again but from the fringe of the current cluster
1979 | }
1980 | // get alle neighbours of current triangle
1981 | for (Triangle* t : tri->neighbours) {
1982 | if (t->flag != 1) priorityQueue.push(t);
1983 | }
1984 |
1985 |
1986 | cache.insert(candidateIndices, vertexBuffer);
1987 | // if triangle is inserted set flag to used.
1988 | priorityQueue.pop();
1989 | tri->flag = 1;
1990 |
1991 |
1992 | };
1993 | }
1994 | // add remaining triangles to a meshlet
1995 | if (!cache.empty()) {
1996 | meshlets.push_back(cache);
1997 | cache.reset();
1998 | }
1999 |
2000 | //// add triangles to cache untill full.
2001 | //for (Triangle* triangle : triangles) {
2002 | // // if triangle is not used generate meshlet
2003 | // if (triangle->flag != 1) {
2004 | // //get indicies
2005 | // VertexIndexType candidateIndices[3];
2006 | // for (VertexIndexType i = 0; i < 3; ++i) {
2007 | // candidateIndices[i] = triangle->vertices[i]->index;
2008 | // }
2009 |
2010 | // // check if we can add to current meshlet if not we finish it.
2011 | // if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) {
2012 | // meshlets.push_back(cache);
2013 | // cache.reset();
2014 | // }
2015 |
2016 | // // insert current triangle
2017 | // cache.insert(candidateIndices, vertexBuffer);
2018 | // triangle->flag = 1;
2019 | // }
2020 | //}
2021 |
2022 | //// add remaining triangles to a meshlet
2023 | //if (!cache.empty()) {
2024 | // meshlets.push_back(cache);
2025 | // cache.reset();
2026 | //}
2027 | }
2028 | }
2029 | }
2030 |
2031 | }
--------------------------------------------------------------------------------
/core/meshletTaskDescriptor.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletTaskDescriptor.cpp
--------------------------------------------------------------------------------
/core/meshlet_util.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | namespace NVMeshlet {
9 | struct Vertex;
10 | struct Triangle;
11 |
12 | struct Vertex {
13 | std::vector neighbours;
14 | unsigned int index;
15 | unsigned int degree;
16 | };
17 |
18 | struct Triangle {
19 | std::vector vertices;
20 | std::vector neighbours;
21 | uint32_t id;
22 | uint32_t flag = -1;
23 | uint32_t dist;
24 | };
25 |
26 | class DistMatrix {
27 | public:
28 | virtual void set(uint32_t i, uint32_t j, uint32_t val) = 0;
29 | virtual uint32_t get(uint32_t i, uint32_t j) = 0;
30 | };
31 |
32 | class SymMatrix : public DistMatrix {
33 | private:
34 | unsigned int m_n;
35 | std::vector m_data;
36 |
37 | uint64_t translate(uint32_t i, uint32_t j) {
38 | // Consider assert/error return on i=j or exceeding size
39 | // Trust in the math
40 | if (i > j) std::swap(i, j);
41 | return ((uint64_t)j * j - j) / 2 + i;
42 | };
43 |
44 | public:
45 | SymMatrix(std::vector* vertices, uint32_t distlim) {
46 | m_n = vertices->size();
47 | uint64_t temp = ((uint64_t)m_n * m_n - m_n) / 2;
48 | m_data = std::vector(temp, -1);
49 |
50 | // BFS
51 | Triangle* current;
52 | std::queue frontier;
53 | uint32_t dist;
54 | for (uint32_t i = 0; i < m_n; ++i) {
55 | current = (*vertices)[i];
56 | current->flag = i;
57 |
58 | //if(i%100 == 0) std::cout << i << "/" << m_n <<"\n";
59 |
60 | dist = 0;
61 | frontier.push(current);
62 |
63 | while (!frontier.empty()) {
64 | current = frontier.front();
65 | frontier.pop(); // Y u do this stdlib
66 |
67 | dist = get(i, current->id);
68 |
69 | for (uint32_t t = 0; t < current->neighbours.size(); ++t) {
70 | if (current->neighbours[t]->flag == i) continue;
71 | current->neighbours[t]->flag = i;
72 | set(i, current->neighbours[t]->id, dist + 1);
73 | frontier.push(current->neighbours[t]);
74 | }
75 | }
76 | }
77 | };
78 |
79 | void set(uint32_t i, uint32_t j, uint32_t val) {
80 | if (i == j) return;
81 | if (i > j) std::swap(i, j);
82 | m_data[translate(i, j)] = val;
83 | };
84 |
85 | uint32_t get(uint32_t i, uint32_t j) {
86 | if (i == j) return 0;
87 | if (i > j) std::swap(i, j);
88 | return m_data[translate(i, j)];
89 | };
90 | };
91 |
92 | inline Vertex* findMaxVertex(std::vector* vec) {
93 | unsigned int max = 0;
94 | Vertex* res = vec->front();
95 | for (const auto& v : *vec) {
96 | //std::cout << v->degree;
97 | if (v->degree > max) {
98 | max = v->degree;
99 | res = v;
100 | }
101 | }
102 | //std::cout << "\nMax " << max << std::endl;
103 | return res;
104 | };
105 |
106 | }
107 |
108 | namespace mm {
109 |
110 | class DistMatrix {
111 | public:
112 | virtual void set(uint32_t i, uint32_t j, uint32_t val) = 0;
113 | virtual uint32_t get(uint32_t i, uint32_t j) = 0;
114 | };
115 |
116 | class SymMatrix : public DistMatrix {
117 | private:
118 | unsigned int m_n;
119 | std::vector m_data;
120 |
121 | uint64_t translate(uint32_t i, uint32_t j) {
122 | // Consider assert/error return on i=j or exceeding size
123 | // Trust in the math
124 | if (i > j) std::swap(i, j);
125 | return ((uint64_t)j * j - j) / 2 + i;
126 | };
127 |
128 | public:
129 | SymMatrix(std::vector* vertices, uint32_t distlim) {
130 | m_n = vertices->size();
131 | uint64_t temp = ((uint64_t)m_n * m_n - m_n) / 2;
132 | m_data = std::vector(temp, -1);
133 |
134 | // BFS
135 | Triangle* current;
136 | std::queue frontier;
137 | uint32_t dist;
138 | for (uint32_t i = 0; i < m_n; ++i) {
139 | current = (*vertices)[i];
140 | current->flag = i;
141 |
142 | //if(i%100 == 0) std::cout << i << "/" << m_n <<"\n";
143 |
144 | dist = 0;
145 | frontier.push(current);
146 |
147 | while (!frontier.empty()) {
148 | current = frontier.front();
149 | frontier.pop(); // Y u do this stdlib
150 |
151 | dist = get(i, current->id);
152 |
153 | for (uint32_t t = 0; t < current->neighbours.size(); ++t) {
154 | if (current->neighbours[t]->flag == i) continue;
155 | current->neighbours[t]->flag = i;
156 | set(i, current->neighbours[t]->id, dist + 1);
157 | frontier.push(current->neighbours[t]);
158 | }
159 | }
160 | }
161 | };
162 |
163 | void set(uint32_t i, uint32_t j, uint32_t val) {
164 | if (i == j) return;
165 | if (i > j) std::swap(i, j);
166 | m_data[translate(i, j)] = val;
167 | };
168 |
169 | uint32_t get(uint32_t i, uint32_t j) {
170 | if (i == j) return 0;
171 | if (i > j) std::swap(i, j);
172 | return m_data[translate(i, j)];
173 | };
174 | };
175 |
176 | inline Vert* findMaxVertex(std::vector* vec) {
177 | unsigned int max = 0;
178 | Vert* res = vec->front();
179 | for (const auto& v : *vec) {
180 | //std::cout << v->degree;
181 | if (v->degree > max) {
182 | max = v->degree;
183 | res = v;
184 | }
185 | }
186 | //std::cout << "\nMax " << max << std::endl;
187 | return res;
188 | };
189 |
190 | } // namespace MeshletGen
--------------------------------------------------------------------------------
/core/mm_structures.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifndef VK_STRUCTURES_H
3 | #define VK_STRUCTURES_H
4 |
5 | #define GLM_ENABLE_EXPERIMENTAL
6 |
7 | #include
8 | #include
9 | #include
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | struct MeshletDescMesh
18 | {
19 |
20 | };
21 |
22 |
23 | struct MeshletDescTask
24 | {
25 | // A Meshlet contains a set of unique vertices
26 | // and a group of primitives that are defined by
27 | // indices into this local set of vertices.
28 | //
29 | // The information here is used by a single
30 | // mesh shader's workgroup to execute vertex
31 | // and primitive shading.
32 | // It is packed into single "uvec4"/"uint4" value
33 | // so the hardware can leverage 128-bit loads in the
34 | // shading languages.
35 | // The offsets used here are for the appropriate
36 | // indices arrays.
37 | //
38 | // A bounding box as well as an angled cone is stored to allow
39 | // quick culling in the task shader.
40 | // The current packing is just a basic implementation, that
41 | // may be customized, but ideally fits within 128 bit.
42 |
43 | //
44 | // Bitfield layout :
45 | //
46 | // Field.X | Bits | Content
47 | // ------------|:----:|----------------------------------------------
48 | // bboxMinX | 8 | bounding box coord relative to object bbox
49 | // bboxMinY | 8 | UNORM8
50 | // bboxMinZ | 8 |
51 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet
52 | // ------------|:----:|----------------------------------------------
53 | // Field.Y | |
54 | // ------------|:----:|----------------------------------------------
55 | // bboxMaxX | 8 | bounding box coord relative to object bbox
56 | // bboxMaxY | 8 | UNORM8
57 | // bboxMaxZ | 8 |
58 | // primMax | 8 | number of primitives - 1 in the meshlet
59 | // ------------|:----:|----------------------------------------------
60 | // Field.Z | |
61 | // ------------|:----:|----------------------------------------------
62 | // vertexBegin | 20 | offset to the first vertex index, times alignment
63 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8
64 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8
65 | // ------------|:----:|----------------------------------------------
66 | // Field.W | |
67 | // ------------|:----:|----------------------------------------------
68 | // primBegin | 20 | offset to the first primitive index, times alignment
69 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8
70 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8
71 | //
72 | // Note : the bitfield is not expanded in the struct due to differences in how
73 | // GPU & CPU compilers pack bit-fields and endian-ness.
74 |
75 | union
76 | {
77 | #if !defined(NDEBUG) && defined(_MSC_VER)
78 | struct
79 | {
80 | // warning, not portable
81 | unsigned bboxMinX : 8;
82 | unsigned bboxMinY : 8;
83 | unsigned bboxMinZ : 8;
84 | unsigned vertexMax : 8;
85 |
86 | unsigned bboxMaxX : 8;
87 | unsigned bboxMaxY : 8;
88 | unsigned bboxMaxZ : 8;
89 | unsigned primMax : 8;
90 |
91 | unsigned vertexBegin : 20;
92 | signed coneOctX : 8;
93 | unsigned coneAngleLo : 4;
94 |
95 | unsigned primBegin : 20;
96 | signed coneOctY : 8;
97 | unsigned coneAngleHi : 4;
98 | } _debug;
99 | #endif
100 | struct
101 | {
102 | uint32_t fieldX;
103 | uint32_t fieldY;
104 | uint32_t fieldZ;
105 | uint32_t fieldW;
106 | };
107 | };
108 |
109 |
110 | };
111 |
112 | #endif // VK_STRUCTURES_H
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/core/settings.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 |
6 | static const int MAX_VERTEX_COUNT_LIMIT = 256;
7 | static const int MAX_PRIMITIVE_COUNT_LIMIT = 256;
8 |
9 | static const uint32_t PACKBASIC_ALIGN = 16;
10 | // how many indices are fetched per thread, 8 or 4
11 | static const uint32_t PACKBASIC_PRIMITIVE_INDICES_PER_FETCH = 8;
12 |
13 | typedef uint32_t PackBasicType;
14 |
15 | // must not change
16 | typedef uint8_t PrimitiveIndexType; // must store [0,MAX_VERTEX_COUNT_LIMIT-1]
17 |
18 |
19 | namespace NVMeshlet {
20 |
21 | // Enum for meshlet generation strategies.
22 | enum GenStrategy {
23 | NAIVE,
24 | GREEDY,
25 | KMEANSD,
26 | KMEANSS,
27 | KMEANSE,
28 | KMEANSO,
29 | KMEANSEO,
30 | KMEANSA,
31 | KMEANSU
32 | };
33 |
34 |
35 | struct Stats
36 | {
37 | size_t meshletsTotal = 0;
38 | // slightly more due to task-shader alignment
39 | size_t meshletsStored = 0;
40 |
41 | // number of meshlets that can be backface cluster culled at all
42 | // due to similar normals
43 | size_t backfaceTotal = 0;
44 |
45 | size_t primIndices = 0;
46 | size_t primTotal = 0;
47 |
48 | size_t vertexIndices = 0;
49 | size_t vertexTotal = 0;
50 |
51 |
52 |
53 | // used when we sum multiple stats into a single to
54 | // compute averages of the averages/variances below.
55 |
56 | // Special data points.
57 | size_t triangleCountHist[MAX_PRIMITIVE_COUNT_LIMIT] = { 0 };
58 | size_t vertexCountHist[MAX_VERTEX_COUNT_LIMIT] = { 0 };
59 | size_t reusageMeasure = 0;
60 |
61 | size_t appended = 0;
62 |
63 | double primloadAvg = 0.f;
64 | double primloadVar = 0.f;
65 | double vertexloadAvg = 0.f;
66 | double vertexloadVar = 0.f;
67 |
68 | void append(const Stats& other)
69 | {
70 | meshletsTotal += other.meshletsTotal;
71 | meshletsStored += other.meshletsStored;
72 | backfaceTotal += other.backfaceTotal;
73 |
74 | primIndices += other.primIndices;
75 | vertexIndices += other.vertexIndices;
76 | vertexTotal += other.vertexTotal;
77 | primTotal += other.primTotal;
78 |
79 | appended += other.appended;
80 | primloadAvg += other.primloadAvg;
81 | primloadVar += other.primloadVar;
82 | vertexloadAvg += other.vertexloadAvg;
83 | vertexloadVar += other.vertexloadVar;
84 | }
85 |
86 | void fprint(FILE* log) const
87 | {
88 | if (!appended || !meshletsTotal)
89 | return;
90 |
91 | double fprimloadAvg = primloadAvg / double(appended);
92 | double fprimloadVar = primloadVar / double(appended);
93 | double fvertexloadAvg = vertexloadAvg / double(appended);
94 | double fvertexloadVar = vertexloadVar / double(appended);
95 |
96 | double statsNum = double(meshletsTotal);
97 | double backfaceAvg = double(backfaceTotal) / statsNum;
98 |
99 | double primWaste = double(primIndices) / double(primTotal * 3) - 1.0;
100 | double vertexWaste = double(vertexIndices) / double(vertexTotal) - 1.0;
101 | double meshletWaste = double(meshletsStored) / double(meshletsTotal) - 1.0;
102 |
103 | fprintf(log,
104 | "meshlets; %7zd; prim; %9zd; %.2f; vertex; %9zd; %.2f; backface; %.2f; waste; v; %.2f; p; %.2f; m; %.2f\n", meshletsTotal,
105 | primTotal, fprimloadAvg, vertexTotal, fvertexloadAvg, backfaceAvg, vertexWaste, primWaste, meshletWaste);
106 | }
107 | };
108 |
109 | // use getTaskPaddedElements
110 | static const uint32_t MESHLETS_PER_TASK = 32;
111 |
112 |
113 | // We allow two different type of primitive index packings.
114 | // The first is preferred, but yields slightly greater code complexity.
115 | enum PrimitiveIndexPacking
116 | {
117 | // Dense array of multiple uint8s, 3 uint8s per primitive.
118 | // Least waste, can partially use 32-bit storage intrinsic for writing to gl_PrimitiveIndices
119 | PRIMITIVE_PACKING_TIGHT_UINT8,
120 |
121 | // Same as above but we may use less triangles to simplify loader logic.
122 | // We guarantee that all indices can be safely written to the gl_PrimitiveIndices array
123 | // using the 32-bit write intrinsic in the shader.
124 | PRIMITIVE_PACKING_FITTED_UINT8,
125 |
126 | // 4 uint8s per primitive, indices in first three 8-bit
127 | // makes decoding an individual triangle easy, but sacrifices bandwidth/storage
128 | NVMESHLET_PACKING_TRIANGLE_UINT32,
129 | };
130 |
131 | // The default shown here packs uint8 tightly, and makes them accessible as 64-bit load.
132 | // Keep in sync with shader configuration!
133 |
134 | static const PrimitiveIndexPacking PRIMITIVE_PACKING = PRIMITIVE_PACKING_FITTED_UINT8;
135 | // how many indices are fetched per thread, 8 or 4
136 | static const uint32_t PRIMITIVE_INDICES_PER_FETCH = 8;
137 |
138 | // Higher values mean slightly more wasted memory, but allow to use greater offsets within
139 | // the few bits we have, resulting in a higher total amount of triangles and vertices.
140 | static const uint32_t PRIMITIVE_PACKING_ALIGNMENT = 32; // must be multiple of PRIMITIVE_BITS_PER_FETCH
141 | static const uint32_t VERTEX_PACKING_ALIGNMENT = 16;
142 |
143 | struct MeshletPackBasicDesc
144 | {
145 | //
146 | // Bitfield layout :
147 | //
148 | // Field.X | Bits | Content
149 | // ------------|:----:|----------------------------------------------
150 | // bboxMinX | 8 | bounding box coord relative to object bbox
151 | // bboxMinY | 8 | UNORM8
152 | // bboxMinZ | 8 |
153 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet
154 | // ------------|:----:|----------------------------------------------
155 | // Field.Y | |
156 | // ------------|:----:|----------------------------------------------
157 | // bboxMaxX | 8 | bounding box coord relative to object bbox
158 | // bboxMaxY | 8 | UNORM8
159 | // bboxMaxZ | 8 |
160 | // primMax | 8 | number of primitives - 1 in the meshlet
161 | // ------------|:----:|----------------------------------------------
162 | // Field.Z | |
163 | // ------------|:----:|----------------------------------------------
164 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8
165 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8
166 | // coneAngle | 8 | -sin(cone.angle), SNORM8
167 | // vertexPack | 8 | vertex indices per 32 bits (1 or 2)
168 | // ------------|:----:|----------------------------------------------
169 | // Field.W | |
170 | // ------------|:----:|----------------------------------------------
171 | // packOffset | 32 | index buffer value of the first vertex
172 |
173 | //
174 | // Note : the bitfield is not expanded in the struct due to differences in how
175 | // GPU & CPU compilers pack bit-fields and endian-ness.
176 |
177 | union
178 | {
179 | #if !defined(NDEBUG) && defined(_MSC_VER)
180 | struct
181 | {
182 | // warning, not portable
183 | unsigned bboxMinX : 8;
184 | unsigned bboxMinY : 8;
185 | unsigned bboxMinZ : 8;
186 | unsigned vertexMax : 8;
187 |
188 | unsigned bboxMaxX : 8;
189 | unsigned bboxMaxY : 8;
190 | unsigned bboxMaxZ : 8;
191 | unsigned primMax : 8;
192 |
193 | signed coneOctX : 8;
194 | signed coneOctY : 8;
195 | signed coneAngle : 8;
196 | unsigned vertexPack : 8;
197 |
198 | unsigned packOffset : 32;
199 | } _debug;
200 | #endif
201 | struct
202 | {
203 | uint32_t fieldX;
204 | uint32_t fieldY;
205 | uint32_t fieldZ;
206 | uint32_t fieldW;
207 | };
208 | };
209 |
210 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; }
211 | void setNumVertices(uint32_t num)
212 | {
213 | assert(num <= MAX_VERTEX_COUNT_LIMIT);
214 | fieldX |= pack(num - 1, 8, 24);
215 | }
216 |
217 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; }
218 | void setNumPrims(uint32_t num)
219 | {
220 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT);
221 | fieldY |= pack(num - 1, 8, 24);
222 | }
223 |
224 | uint32_t getNumVertexPack() const { return unpack(fieldZ, 8, 24); }
225 | void setNumVertexPack(uint32_t num) { fieldZ |= pack(num, 8, 24); }
226 |
227 | uint32_t getPackOffset() const { return fieldW; }
228 | void setPackOffset(uint32_t index) { fieldW = index; }
229 |
230 | uint32_t getVertexStart() const { return 0; }
231 | uint32_t getVertexSize() const
232 | {
233 | uint32_t vertexDiv = getNumVertexPack();
234 | uint32_t vertexElems = ((getNumVertices() + vertexDiv - 1) / vertexDiv);
235 |
236 | return vertexElems;
237 | }
238 |
239 | uint32_t getPrimStart() const { return (getVertexStart() + getVertexSize() + 1) & (~1u); }
240 | uint32_t getPrimSize() const
241 | {
242 | uint32_t primDiv = 4;
243 | uint32_t primElems = ((getNumPrims() * 3 + PACKBASIC_PRIMITIVE_INDICES_PER_FETCH - 1) / primDiv);
244 |
245 | return primElems;
246 | }
247 |
248 | // positions are relative to object's bbox treated as UNORM
249 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3])
250 | {
251 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16);
252 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16);
253 | }
254 |
255 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const
256 | {
257 | bboxMin[0] = unpack(fieldX, 8, 0);
258 | bboxMin[0] = unpack(fieldX, 8, 8);
259 | bboxMin[0] = unpack(fieldX, 8, 16);
260 |
261 | bboxMax[0] = unpack(fieldY, 8, 0);
262 | bboxMax[0] = unpack(fieldY, 8, 8);
263 | bboxMax[0] = unpack(fieldY, 8, 16);
264 | }
265 |
266 | // uses octant encoding for cone Normal
267 | // positive angle means the cluster cannot be backface-culled
268 | // numbers are treated as SNORM
269 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle)
270 | {
271 | uint8_t anglebits = minusSinAngle;
272 | fieldZ |= pack(coneOctX, 8, 0);
273 | fieldZ |= pack(coneOctY, 8, 8);
274 | fieldZ |= pack(minusSinAngle, 8, 16);
275 | }
276 |
277 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const
278 | {
279 | coneOctX = unpack(fieldZ, 8, 0);
280 | coneOctY = unpack(fieldZ, 8, 8);
281 | minusSinAngle = unpack(fieldZ, 8, 16);
282 | }
283 |
284 | MeshletPackBasicDesc()
285 | {
286 | fieldX = 0;
287 | fieldY = 0;
288 | fieldZ = 0;
289 | fieldW = 0;
290 | }
291 |
292 | static uint32_t pack(uint32_t value, int width, int offset)
293 | {
294 | return (uint32_t)((value & ((1 << width) - 1)) << offset);
295 | }
296 | static uint32_t unpack(uint32_t value, int width, int offset)
297 | {
298 | return (uint32_t)((value >> offset) & ((1 << width) - 1));
299 | }
300 | };
301 |
302 | struct MeshletPackBasic
303 | {
304 |
305 | // variable size
306 | //
307 | // aligned to PACKBASIC_ALIGN bytes
308 | // - first squence is either 16 or 32 bit indices per vertex
309 | // (vertexPack is 2 or 1) respectively
310 | // - second sequence aligned to 8 bytes, primitive many 8 bit values
311 | //
312 | //
313 | // { u32[numVertices/vertexPack ...], padding..., u8[(numPrimitives) * 3 ...] }
314 |
315 | union
316 | {
317 | uint32_t data32[1];
318 | uint16_t data16[1];
319 | uint8_t data8[1];
320 | };
321 |
322 | inline void setVertexIndex(uint32_t PACKED_SIZE, uint32_t vertex, uint32_t vertexPack, uint32_t indexValue)
323 | {
324 | #if 1
325 | if (vertexPack == 1) {
326 | data32[vertex] = indexValue;
327 | }
328 | else {
329 | data16[vertex] = indexValue;
330 | }
331 | #else
332 | uint32_t idx = vertex / vertexPack;
333 | uint32_t shift = vertex % vertexPack;
334 | assert(idx < PACKED_SIZE);
335 | data32[idx] |= indexValue << (shift * 16);
336 | #endif
337 | }
338 |
339 | inline uint32_t getVertexIndex(uint32_t vertex, uint32_t vertexPack) const
340 | {
341 | #if 1
342 | return (vertexPack == 1) ? data32[vertex] : data16[vertex];
343 | #else
344 | uint32_t idx = vertex / vertexPack;
345 | uint32_t shift = vertex & (vertexPack - 1);
346 | uint32_t bits = vertexPack == 2 ? 16 : 0;
347 | uint32_t indexValue = data32[idx];
348 | indexValue <<= ((1 - shift) * bits);
349 | indexValue >>= (bits);
350 | return indexValue;
351 | #endif
352 | }
353 |
354 | inline void setPrimIndices(uint32_t PACKED_SIZE, uint32_t prim, uint32_t primStart, const uint8_t indices[3])
355 | {
356 | uint32_t idx = primStart * 4 + prim * 3;
357 |
358 | assert(idx < PACKED_SIZE * 4);
359 |
360 | data8[idx + 0] = indices[0];
361 | data8[idx + 1] = indices[1];
362 | data8[idx + 2] = indices[2];
363 | }
364 |
365 | inline void getPrimIndices(uint32_t prim, uint32_t primStart, uint8_t indices[3]) const
366 | {
367 | uint32_t idx = primStart * 4 + prim * 3;
368 |
369 | indices[0] = data8[idx + 0];
370 | indices[1] = data8[idx + 1];
371 | indices[2] = data8[idx + 2];
372 | }
373 | };
374 |
375 | struct MeshletGeometryPack
376 | {
377 | std::vector meshletPacks;
378 | std::vector meshletDescriptors;
379 | //std::vector meshletBboxes;
380 | };
381 |
382 | struct MeshletDesc
383 | {
384 | // A Meshlet contains a set of unique vertices
385 | // and a group of primitives that are defined by
386 | // indices into this local set of vertices.
387 | //
388 | // The information here is used by a single
389 | // mesh shader's workgroup to execute vertex
390 | // and primitive shading.
391 | // It is packed into single "uvec4"/"uint4" value
392 | // so the hardware can leverage 128-bit loads in the
393 | // shading languages.
394 | // The offsets used here are for the appropriate
395 | // indices arrays.
396 | //
397 | // A bounding box as well as an angled cone is stored to allow
398 | // quick culling in the task shader.
399 | // The current packing is just a basic implementation, that
400 | // may be customized, but ideally fits within 128 bit.
401 |
402 | //
403 | // Bitfield layout :
404 | //
405 | // Field.X | Bits | Content
406 | // ------------|:----:|----------------------------------------------
407 | // bboxMinX | 8 | bounding box coord relative to object bbox
408 | // bboxMinY | 8 | UNORM8
409 | // bboxMinZ | 8 |
410 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet
411 | // ------------|:----:|----------------------------------------------
412 | // Field.Y | |
413 | // ------------|:----:|----------------------------------------------
414 | // bboxMaxX | 8 | bounding box coord relative to object bbox
415 | // bboxMaxY | 8 | UNORM8
416 | // bboxMaxZ | 8 |
417 | // primMax | 8 | number of primitives - 1 in the meshlet
418 | // ------------|:----:|----------------------------------------------
419 | // Field.Z | |
420 | // ------------|:----:|----------------------------------------------
421 | // vertexBegin | 20 | offset to the first vertex index, times alignment
422 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8
423 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8
424 | // ------------|:----:|----------------------------------------------
425 | // Field.W | |
426 | // ------------|:----:|----------------------------------------------
427 | // primBegin | 20 | offset to the first primitive index, times alignment
428 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8
429 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8
430 | //
431 | // Note : the bitfield is not expanded in the struct due to differences in how
432 | // GPU & CPU compilers pack bit-fields and endian-ness.
433 |
434 | union
435 | {
436 | #if !defined(NDEBUG) && defined(_MSC_VER)
437 | struct
438 | {
439 | // warning, not portable
440 | unsigned bboxMinX : 8;
441 | unsigned bboxMinY : 8;
442 | unsigned bboxMinZ : 8;
443 | unsigned vertexMax : 8;
444 |
445 | unsigned bboxMaxX : 8;
446 | unsigned bboxMaxY : 8;
447 | unsigned bboxMaxZ : 8;
448 | unsigned primMax : 8;
449 |
450 | unsigned vertexBegin : 20;
451 | signed coneOctX : 8;
452 | unsigned coneAngleLo : 4;
453 |
454 | unsigned primBegin : 20;
455 | signed coneOctY : 8;
456 | unsigned coneAngleHi : 4;
457 | } _debug;
458 | #endif
459 | struct
460 | {
461 | uint32_t fieldX;
462 | uint32_t fieldY;
463 | uint32_t fieldZ;
464 | uint32_t fieldW;
465 | };
466 | };
467 |
468 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; }
469 | void setNumVertices(uint32_t num)
470 | {
471 | assert(num <= MAX_VERTEX_COUNT_LIMIT);
472 | fieldX |= pack(num - 1, 8, 24);
473 | }
474 |
475 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; }
476 | void setNumPrims(uint32_t num)
477 | {
478 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT);
479 | fieldY |= pack(num - 1, 8, 24);
480 | }
481 |
482 | uint32_t getVertexBegin() const { return unpack(fieldZ, 20, 0) * VERTEX_PACKING_ALIGNMENT; }
483 | void setVertexBegin(uint32_t begin)
484 | {
485 | assert(begin % VERTEX_PACKING_ALIGNMENT == 0);
486 | assert(begin / VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1));
487 | fieldZ |= pack(begin / VERTEX_PACKING_ALIGNMENT, 20, 0);
488 | }
489 |
490 | uint32_t getPrimBegin() const { return unpack(fieldW, 20, 0) * PRIMITIVE_PACKING_ALIGNMENT; }
491 | void setPrimBegin(uint32_t begin)
492 | {
493 | assert(begin % PRIMITIVE_PACKING_ALIGNMENT == 0);
494 | assert(begin / PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1));
495 | fieldW |= pack(begin / PRIMITIVE_PACKING_ALIGNMENT, 20, 0);
496 | }
497 |
498 | // positions are relative to object's bbox treated as UNORM
499 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3])
500 | {
501 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16);
502 |
503 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16);
504 | }
505 |
506 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const
507 | {
508 | bboxMin[0] = unpack(fieldX, 8, 0);
509 | bboxMin[1] = unpack(fieldX, 8, 8);
510 | bboxMin[2] = unpack(fieldX, 8, 16);
511 |
512 | bboxMax[0] = unpack(fieldY, 8, 0);
513 | bboxMax[1] = unpack(fieldY, 8, 8);
514 | bboxMax[2] = unpack(fieldY, 8, 16);
515 | }
516 |
517 | // uses octant encoding for cone Normal
518 | // positive angle means the cluster cannot be backface-culled
519 | // numbers are treated as SNORM
520 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle)
521 | {
522 | uint8_t anglebits = minusSinAngle;
523 | fieldZ |= pack(coneOctX, 8, 20) | pack((anglebits >> 0) & 0xF, 4, 28);
524 | fieldW |= pack(coneOctY, 8, 20) | pack((anglebits >> 4) & 0xF, 4, 28);
525 | }
526 |
527 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const
528 | {
529 | coneOctX = unpack(fieldZ, 8, 20);
530 | coneOctY = unpack(fieldW, 8, 20);
531 | minusSinAngle = unpack(fieldZ, 4, 28) | (unpack(fieldW, 4, 28) << 4);
532 | }
533 |
534 | MeshletDesc() { memset(this, 0, sizeof(MeshletDesc)); }
535 |
536 | static uint32_t pack(uint32_t value, int width, int offset)
537 | {
538 | return (uint32_t)((value & ((1 << width) - 1)) << offset);
539 | }
540 | static uint32_t unpack(uint32_t value, int width, int offset)
541 | {
542 | return (uint32_t)((value >> offset) & ((1 << width) - 1));
543 | }
544 |
545 | static bool isPrimBeginLegal(uint32_t begin) { return begin / PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1); }
546 |
547 | static bool isVertexBeginLegal(uint32_t begin) { return begin / VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1); }
548 | };
549 |
550 |
551 |
552 | struct MeshletGeometry
553 | {
554 | // The vertex indices are similar to provided to the provided
555 | // triangle index buffer. Instead of each triangle using 3 vertex indices,
556 | // each meshlet holds a unique set of variable vertex indices.
557 | std::vector vertexIndices;
558 |
559 | // Each triangle is using 3 primitive indices, these indices
560 | // are local to the meshlet's unique set of vertices.
561 | // Due to alignment the number of primitiveIndices != input triangle indices.
562 | std::vector primitiveIndices;
563 |
564 | // Each meshlet contains offsets into the above arrays.
565 | std::vector meshletDescriptors;
566 | };
567 |
568 | struct MeshletGeometry16
569 | {
570 | // The vertex indices are similar to provided to the provided
571 | // triangle index buffer. Instead of each triangle using 3 vertex indices,
572 | // each meshlet holds a unique set of variable vertex indices.
573 | std::vector vertexIndices;
574 |
575 | // Each triangle is using 3 primitive indices, these indices
576 | // are local to the meshlet's unique set of vertices.
577 | // Due to alignment the number of primitiveIndices != input triangle indices.
578 | std::vector primitiveIndices;
579 |
580 | // Each meshlet contains offsets into the above arrays.
581 | std::vector meshletDescriptors;
582 | };
583 |
584 | inline uint32_t computeTasksCount(uint32_t numMeshlets)
585 | {
586 | return (numMeshlets + MESHLETS_PER_TASK - 1) / MESHLETS_PER_TASK;
587 | }
588 |
589 | inline uint32_t computePackedPrimitiveCount(uint32_t numTris)
590 | {
591 | if (PRIMITIVE_PACKING != PRIMITIVE_PACKING_FITTED_UINT8)
592 | return numTris;
593 |
594 | uint32_t indices = numTris * 3;
595 | // align to PRIMITIVE_INDICES_PER_FETCH
596 | uint32_t indicesFit = (indices / PRIMITIVE_INDICES_PER_FETCH) * PRIMITIVE_INDICES_PER_FETCH;
597 | uint32_t numTrisFit = indicesFit / 3;
598 | ;
599 | assert(numTrisFit > 0);
600 | return numTrisFit;
601 | }
602 |
603 | inline uint64_t computeCommonAlignedSize(uint64_t size)
604 | {
605 | // To be able to store different data of the meshlet (desc, prim & vertex indices) in the same buffer,
606 | // we need to have a common alignment that keeps all the data natural aligned.
607 |
608 | static const uint64_t align = std::max(std::max(sizeof(MeshletDesc), sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT),
609 | sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT);
610 | static_assert(align % sizeof(MeshletDesc) == 0, "nvmeshlet failed common align");
611 | static_assert(align % sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align");
612 | static_assert(align % sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align");
613 |
614 | return ((size + align - 1) / align) * align;
615 | }
616 |
617 | inline uint64_t computeIndicesAlignedSize(uint64_t size)
618 | {
619 | // To be able to store different data of the meshlet (prim & vertex indices) in the same buffer,
620 | // we need to have a common alignment that keeps all the data natural aligned.
621 |
622 | static const uint64_t align = std::max(sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT, sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT);
623 | static_assert(align % sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align");
624 | static_assert(align % sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align");
625 |
626 | return ((size + align - 1) / align) * align;
627 | }
628 |
629 | } // end namespace NVMeshlet
630 |
631 |
632 | namespace mm {
633 |
634 |
635 |
636 | // must match cadscene!
637 | struct ObjectData {
638 | glm::mat4 worldMatrix;
639 | glm::mat4 worldMatrixIT;
640 | glm::mat4 objectMatrix;
641 | glm::vec4 bboxMin;
642 | glm::vec4 bboxMax;
643 | glm::vec3 _pad0;
644 | float winding;
645 | glm::vec4 color;
646 | };
647 |
648 | struct Vertex {
649 | glm::vec3 pos;
650 | glm::vec3 color;
651 | glm::vec2 texCoord;
652 |
653 | bool operator==(const Vertex& other) const {
654 | return pos == other.pos && color == other.color && texCoord == other.texCoord;
655 | }
656 |
657 | glm::vec3 operator-(const Vertex& other) const {
658 | return glm::vec3(pos.x - other.pos.x, pos.y - other.pos.y, pos.z - other.pos.z);
659 | }
660 |
661 | float euclideanDistance(const Vertex& other) const {
662 | return std::sqrt(std::pow(other.pos.x - pos.x,2) + std::pow(other.pos.y - pos.y, 2) + std::pow(other.pos.z - pos.z, 2));
663 | }
664 | };
665 |
666 | struct Vert;
667 |
668 | struct Triangle {
669 | std::vector vertices;
670 | std::vector neighbours;
671 | float centroid[3]{};
672 | uint32_t id;
673 | uint32_t flag = -1;
674 | uint32_t dist;
675 | };
676 |
677 | struct Vert {
678 | std::vector neighbours;
679 | unsigned int index;
680 | unsigned int degree;
681 | };
682 |
683 | template
684 | struct MeshletCache {
685 | PrimitiveIndexType primitives[MAX_PRIMITIVE_COUNT_LIMIT][3];
686 | uint32_t vertices[MAX_VERTEX_COUNT_LIMIT]; // this is the actual index buffer
687 | uint32_t numPrims;
688 | uint32_t numVertices;
689 | Vertex actualVertices[MAX_VERTEX_COUNT_LIMIT];
690 |
691 | // funky version!
692 | uint32_t numVertexDeltaBits;
693 | uint32_t numVertexAllBits;
694 |
695 | uint32_t primitiveBits = 1;
696 | uint32_t maxBlockBits = ~0;
697 |
698 | bool empty() const { return numVertices == 0; }
699 |
700 | void reset() {
701 | numPrims = 0;
702 | numVertices = 0;
703 | numVertexDeltaBits = 0;
704 | numVertexAllBits = 0;
705 |
706 | memset(vertices, 0xFFFFFFFF, sizeof(vertices));
707 | memset(actualVertices, 0x00000000, sizeof(actualVertices));
708 | }
709 |
710 | bool fitsBlock() const
711 | {
712 | uint32_t primBits = (numPrims - 1) * 3 * primitiveBits;
713 | uint32_t vertBits = (numVertices - 1) * numVertexDeltaBits;
714 | bool state = (primBits + vertBits) <= maxBlockBits;
715 |
716 | return state;
717 | }
718 |
719 | // check if cache can hold one more triangle
720 | bool cannotInsert(const VertexIndexType* indices, uint32_t maxVertexSize, uint32_t maxPrimitiveSize) const
721 | {
722 | // skip degenerate
723 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2])
724 | {
725 | return false;
726 | }
727 |
728 | uint32_t found = 0;
729 |
730 | // check if any of the incoming three indices are already in cache
731 | for (uint32_t v = 0; v < numVertices; ++v) {
732 | for (int i = 0; i < 3; ++i) {
733 | uint32_t idx = indices[i];
734 | if (vertices[v] == idx) {
735 | found++;
736 | }
737 | }
738 | }
739 | // out of bounds
740 | return (numVertices + 3 - found) > maxVertexSize || (numPrims + 1) > maxPrimitiveSize;
741 | }
742 |
743 | bool cannotInsertBlock(const VertexIndexType* indices, uint32_t maxVertexSize, uint32_t maxPrimitiveSize) const
744 | {
745 | // skip degenerate
746 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2])
747 | {
748 | return false;
749 | }
750 |
751 | uint32_t found = 0;
752 |
753 | // check if any of the incoming three indices are already in cache
754 | for (uint32_t v = 0; v < numVertices; ++v) {
755 | for (int i = 0; i < 3; ++i) {
756 | uint32_t idx = indices[i];
757 | if (vertices[v] == idx) {
758 | found++;
759 | }
760 | }
761 | }
762 |
763 | uint32_t firstVertex = numVertices ? vertices[0] : indices[0];
764 | uint32_t cmpBits = std::max(findMSB((firstVertex ^ indices[0]) | 1),
765 | std::max(findMSB((firstVertex ^ indices[1]) | 1), findMSB((firstVertex ^ indices[2]) | 1)))
766 | + 1;
767 |
768 | uint32_t deltaBits = std::max(cmpBits, numVertexDeltaBits);
769 |
770 | uint32_t newVertices = numVertices + 3 - found;
771 | uint32_t newPrims = numPrims + 1;
772 |
773 | uint32_t newBits;
774 |
775 | {
776 | uint32_t newVertBits = (newVertices - 1) * deltaBits;
777 | uint32_t newPrimBits = (newPrims - 1) * 3 * primitiveBits;
778 | newBits = newVertBits + newPrimBits;
779 | }
780 |
781 |
782 | // out of bounds
783 | return (numVertices + 3 - found) > maxVertexSize || (numPrims + 1) > maxPrimitiveSize;
784 | }
785 |
786 | // insert new triangle
787 | void insert(const VertexIndexType* indices, const Vertex* verts)
788 | {
789 | uint32_t triangle[3];
790 |
791 | // skip degenerate
792 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2])
793 | {
794 | return;
795 | }
796 |
797 | for (int i = 0; i < 3; ++i) {
798 | // take out an index
799 | uint32_t idx = indices[i];
800 | bool found = false;
801 |
802 | // check if idx is already in cache
803 | for (uint32_t v = 0; v < numVertices; ++v)
804 | {
805 | if (idx == vertices[v])
806 | {
807 | triangle[i] = v;
808 | found = true;
809 | break;
810 | }
811 | }
812 | // if idx is not in cache add it
813 | if (!found)
814 | {
815 | vertices[numVertices] = idx;
816 | actualVertices[numVertices] = verts[idx];
817 | triangle[i] = numVertices;
818 |
819 | if (numVertices)
820 | {
821 | numVertexDeltaBits = std::max(findMSB((idx ^ vertices[0]) | 1) + 1, numVertexDeltaBits);
822 | }
823 | numVertexAllBits = std::max(numVertexAllBits, findMSB(idx) + 1);
824 |
825 | numVertices++;
826 | }
827 | }
828 |
829 | primitives[numPrims][0] = triangle[0];
830 | primitives[numPrims][1] = triangle[1];
831 | primitives[numPrims][2] = triangle[2];
832 | numPrims++;
833 |
834 | assert(fitsBlock());
835 | }
836 | };
837 |
838 | struct MeshletMeshDesc
839 | {
840 | // A Meshlet contains a set of unique vertices
841 | // and a group of primitives that are defined by
842 | // indices into this local set of vertices.
843 | //
844 | // The information here is used by a single
845 | // mesh shader's workgroup to execute vertex
846 | // and primitive shading.
847 | // It is packed into single "uvec4"/"uint4" value
848 | // so the hardware can leverage 128-bit loads in the
849 | // shading languages.
850 | // The offsets used here are for the appropriate
851 | // indices arrays.
852 | //
853 | // A bounding box as well as an angled cone is stored to allow
854 | // quick culling in the task shader.
855 | // The current packing is just a basic implementation, that
856 | // may be customized, but ideally fits within 128 bit.
857 |
858 | //
859 | // Bitfield layout :
860 | //
861 | // Field.X | Bits | Content
862 | // ------------|:----:|----------------------------------------------
863 | // bboxMinX | 8 | bounding box coord relative to object bbox
864 | // bboxMinY | 8 | UNORM8
865 | // bboxMinZ | 8 |
866 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet
867 | // ------------|:----:|----------------------------------------------
868 | // Field.Y | |
869 | // ------------|:----:|----------------------------------------------
870 | // bboxMaxX | 8 | bounding box coord relative to object bbox
871 | // bboxMaxY | 8 | UNORM8
872 | // bboxMaxZ | 8 |
873 | // primMax | 8 | number of primitives - 1 in the meshlet
874 | // ------------|:----:|----------------------------------------------
875 | // Field.Z | |
876 | // ------------|:----:|----------------------------------------------
877 | // vertexBegin | 32 | offset to the first vertex index, times alignment
878 | // ------------|:----:|----------------------------------------------
879 | // Field.W | |
880 | // ------------|:----:|----------------------------------------------
881 | // primBegin | 32 | offset to the first primitive index, times alignment
882 |
883 | union
884 | {
885 | #if !defined(NDEBUG) && defined(_MSC_VER)
886 | struct
887 | {
888 | // warning, not portable
889 | unsigned bboxMinX : 8;
890 | unsigned bboxMinY : 8;
891 | unsigned bboxMinZ : 8;
892 | unsigned vertexMax : 8;
893 |
894 | unsigned bboxMaxX : 8;
895 | unsigned bboxMaxY : 8;
896 | unsigned bboxMaxZ : 8;
897 | unsigned primMax : 8;
898 |
899 | unsigned vertexBegin : 20;
900 | signed coneOctX : 8;
901 | unsigned coneAngleLo : 4;
902 |
903 | unsigned primBegin : 20;
904 | signed coneOctY : 8;
905 | unsigned coneAngleHi : 4;
906 | } _debug;
907 | #endif
908 | struct
909 | {
910 | uint32_t fieldX;
911 | uint32_t fieldY;
912 | uint32_t fieldZ;
913 | uint32_t fieldW;
914 | };
915 | };
916 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; }
917 | void setNumVertices(uint32_t num)
918 | {
919 | assert(num <= MAX_VERTEX_COUNT_LIMIT);
920 | fieldX |= pack(num - 1, 8, 24);
921 | }
922 |
923 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; }
924 | void setNumPrims(uint32_t num)
925 | {
926 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT);
927 | fieldY |= pack(num - 1, 8, 24);
928 | }
929 |
930 | uint32_t getVertexBegin() const { return fieldZ;/*unpack(fieldZ, 20, 0) * NVMeshlet::VERTEX_PACKING_ALIGNMENT;*/ }
931 | void setVertexBegin(uint32_t begin)
932 | {
933 | //assert(begin % NVMeshlet::VERTEX_PACKING_ALIGNMENT == 0);
934 | //assert(begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1));
935 | //fieldZ |= pack(begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT, 20, 0);
936 | fieldZ = begin;
937 | }
938 |
939 | uint32_t getPrimBegin() const { return fieldW;/*unpack(fieldW, 20, 0) * NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT;*/ }
940 | void setPrimBegin(uint32_t begin)
941 | {
942 | //assert(begin % NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT == 0);
943 | //assert(begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1));
944 | //fieldW |= pack(begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT, 20, 0);
945 | fieldW = begin;
946 | }
947 |
948 | // positions are relative to object's bbox treated as UNORM
949 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3])
950 | {
951 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16);
952 |
953 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16);
954 | }
955 |
956 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const
957 | {
958 | bboxMin[0] = unpack(fieldX, 8, 0);
959 | bboxMin[0] = unpack(fieldX, 8, 8);
960 | bboxMin[0] = unpack(fieldX, 8, 16);
961 |
962 | bboxMax[0] = unpack(fieldY, 8, 0);
963 | bboxMax[0] = unpack(fieldY, 8, 8);
964 | bboxMax[0] = unpack(fieldY, 8, 16);
965 | }
966 |
967 | // uses octant encoding for cone Normal
968 | // positive angle means the cluster cannot be backface-culled
969 | // numbers are treated as SNORM
970 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle)
971 | {
972 | uint8_t anglebits = minusSinAngle;
973 | fieldZ |= pack(coneOctX, 8, 20) | pack((anglebits >> 0) & 0xF, 4, 28);
974 | fieldW |= pack(coneOctY, 8, 20) | pack((anglebits >> 4) & 0xF, 4, 28);
975 | }
976 |
977 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const
978 | {
979 | coneOctX = unpack(fieldZ, 8, 20);
980 | coneOctY = unpack(fieldW, 8, 20);
981 | minusSinAngle = unpack(fieldZ, 4, 28) | (unpack(fieldW, 4, 28) << 4);
982 | }
983 |
984 | MeshletMeshDesc() { memset(this, 0, sizeof(MeshletMeshDesc)); }
985 |
986 | static uint32_t pack(uint32_t value, int width, int offset)
987 | {
988 | return (uint32_t)((value & ((1 << width) - 1)) << offset);
989 | }
990 | static uint32_t unpack(uint32_t value, int width, int offset)
991 | {
992 | return (uint32_t)((value >> offset) & ((1 << width) - 1));
993 | }
994 |
995 | static bool isPrimBeginLegal(uint32_t begin) { return begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT < ((1 << 32) - 1); }
996 |
997 | static bool isVertexBeginLegal(uint32_t begin) { return begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT < ((1 << 32) - 1); }
998 | };
999 |
1000 | struct MeshletTaskDesc
1001 | {
1002 | // A Meshlet contains a set of unique vertices
1003 | // and a group of primitives that are defined by
1004 | // indices into this local set of vertices.
1005 | //
1006 | // The information here is used by a single
1007 | // mesh shader's workgroup to execute vertex
1008 | // and primitive shading.
1009 | // It is packed into single "uvec4"/"uint4" value
1010 | // so the hardware can leverage 128-bit loads in the
1011 | // shading languages.
1012 | // The offsets used here are for the appropriate
1013 | // indices arrays.
1014 | //
1015 | // A bounding box as well as an angled cone is stored to allow
1016 | // quick culling in the task shader.
1017 | // The current packing is just a basic implementation, that
1018 | // may be customized, but ideally fits within 128 bit.
1019 |
1020 | //
1021 | // Bitfield layout :
1022 | //
1023 | // Field.X | Bits | Content
1024 | // ------------|:----:|----------------------------------------------
1025 | // bboxMinX | 8 | bounding box coord relative to object bbox
1026 | // bboxMinY | 8 | UNORM8
1027 | // bboxMinZ | 8 |
1028 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet
1029 | // ------------|:----:|----------------------------------------------
1030 | // Field.Y | |
1031 | // ------------|:----:|----------------------------------------------
1032 | // bboxMaxX | 8 | bounding box coord relative to object bbox
1033 | // bboxMaxY | 8 | UNORM8
1034 | // bboxMaxZ | 8 |
1035 | // primMax | 8 | number of primitives - 1 in the meshlet
1036 | // ------------|:----:|----------------------------------------------
1037 | // Field.Z | |
1038 | // ------------|:----:|----------------------------------------------
1039 | // vertexBegin | 20 | offset to the first vertex index, times alignment
1040 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8
1041 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8
1042 | // ------------|:----:|----------------------------------------------
1043 | // Field.W | |
1044 | // ------------|:----:|----------------------------------------------
1045 | // primBegin | 20 | offset to the first primitive index, times alignment
1046 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8
1047 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8
1048 | //
1049 | // Note : the bitfield is not expanded in the struct due to differences in how
1050 | // GPU & CPU compilers pack bit-fields and endian-ness.
1051 |
1052 | union
1053 | {
1054 | #if !defined(NDEBUG) && defined(_MSC_VER)
1055 | struct
1056 | {
1057 | // warning, not portable
1058 | unsigned bboxMinX : 8;
1059 | unsigned bboxMinY : 8;
1060 | unsigned bboxMinZ : 8;
1061 | unsigned vertexMax : 8;
1062 |
1063 | unsigned bboxMaxX : 8;
1064 | unsigned bboxMaxY : 8;
1065 | unsigned bboxMaxZ : 8;
1066 | unsigned primMax : 8;
1067 |
1068 | unsigned vertexBegin : 20;
1069 | signed coneOctX : 8;
1070 | unsigned coneAngleLo : 4;
1071 |
1072 | unsigned primBegin : 20;
1073 | signed coneOctY : 8;
1074 | unsigned coneAngleHi : 4;
1075 | } _debug;
1076 | #endif
1077 | struct
1078 | {
1079 | uint32_t fieldX;
1080 | uint32_t fieldY;
1081 | uint32_t fieldZ;
1082 | uint32_t fieldW;
1083 | };
1084 | };
1085 | };
1086 |
1087 | struct MeshletGeometry
1088 | {
1089 | // The vertex indices are similar to provided to the provided
1090 | // triangle index buffer. Instead of each triangle using 3 vertex indices,
1091 | // each meshlet holds a unique set of variable vertex indices.
1092 | std::vector vertexIndices;
1093 |
1094 | // Each triangle is using 3 primitive indices, these indices
1095 | // are local to the meshlet's unique set of vertices.
1096 | // Due to alignment the number of primitiveIndices != input triangle indices.
1097 | std::vector primitiveIndices;
1098 | std::vector vertices;
1099 |
1100 | // Each meshlet contains offsets into the above arrays.
1101 | std::vector meshletMeshDescriptors;
1102 | std::vector meshletTaskDescriptors;
1103 | //std::vector meshletTaskDescriptors;
1104 | };
1105 | }
1106 |
1107 |
--------------------------------------------------------------------------------
/images/bounding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/bounding.png
--------------------------------------------------------------------------------
/images/greedy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/greedy.png
--------------------------------------------------------------------------------
/images/kmedoids.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/kmedoids.png
--------------------------------------------------------------------------------
/images/tipsynvidia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/tipsynvidia.png
--------------------------------------------------------------------------------
/images/zeux.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/zeux.png
--------------------------------------------------------------------------------