├── .DS_Store ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── Standalone ├── CMakeLists.txt └── main.cpp ├── core ├── geometryProcessing.cpp ├── geometryProcessing.h ├── idxBufCleaner.cpp ├── meshletCompresser.cpp ├── meshletConverter.cpp ├── meshletGenerators.cpp ├── meshletGenerators.h ├── meshletMaker.h ├── meshletMeshDescriptor.cpp ├── meshletTaskDescriptor.cpp ├── meshlet_builder.hpp ├── meshlet_util.hpp ├── mm_meshlet_builder.h ├── mm_structures.h └── settings.h └── images ├── bounding.png ├── greedy.png ├── kmedoids.png ├── tipsynvidia.png └── zeux.png /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | cmake_policy(SET CMP0177 NEW) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_SUPPRESS_REGENERATION true) 7 | 8 | option(BUILD_STANDALONE "Build standalone" FALSE) 9 | project(meshletmaker VERSION 1.0) 10 | 11 | 12 | set(SOURCES 13 | core/geometryProcessing.cpp 14 | core/idxBufCleaner.cpp 15 | core/meshletConverter.cpp 16 | core/meshletCompresser.cpp 17 | core/meshletMeshDescriptor.cpp 18 | core/meshletTaskDescriptor.cpp 19 | ) 20 | 21 | set(HEADERS 22 | core/geometryProcessing.h 23 | core/mm_meshlet_builder.h 24 | core/meshlet_builder.hpp 25 | core/meshlet_util.hpp 26 | core/mm_structures.h 27 | core/meshletMaker.h 28 | core/settings.h 29 | ) 30 | 31 | add_library(meshletmaker STATIC ${SOURCES} ${HEADERS}) 32 | 33 | target_include_directories(meshletmaker 34 | PRIVATE 35 | ${PROJECT_SOURCE_DIR}/libs/tinyobjloader/ 36 | ${PROJECT_SOURCE_DIR}/libs/glm/ 37 | ) 38 | 39 | set(INCLUDES 40 | core/MeshletMaker.h 41 | core/settings.h 42 | ) 43 | 44 | install(FILES ${INCLUDES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include) 45 | 46 | if (BUILD_STANDALONE) 47 | add_subdirectory(Standalone) 48 | endif() 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Mark Bo Jensen, PhD 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Performance Comparison of Meshlet Generation Strategies 2 | 3 | 4 | 5 | 6 | This repo is greatly inspired by the meshoptimizer library by Arseny Kapoulkine and uses code from NVIDIAS meshlet example created by Christoph Kubrich. 7 | 8 | ## Abstract 9 | Mesh shaders were recently introduced for faster rendering of triangle meshes. Instead of 10 | pushing each individual triangle through the rasterization pipeline, we can create triangle 11 | clusters called meshlets and perform per-cluster culling operations. This is a great opportunity 12 | to efficiently render very large meshes. However, the performance of mesh shaders depends 13 | on how we create the meshlets. We test rendering performance, on NVIDIA hadware, after 14 | the use of different methods for organizing triangle meshes into meshlets. To measure the 15 | performance of a method, we render meshes of different complexity from many randomly 16 | selected views and measure the render time per triangle. Based on our findings, we suggest 17 | guidelines for creation of meshlets. Using our guidelines we propose two simple methods for 18 | generating meshlets that result in good rendering performance, when combined with hardware 19 | manufactures best practices. Our objective is to make it easier for the graphics practitioner to 20 | organize a triangle mesh into high performance meshlets. 21 | 22 | ## Paper 23 | Please find the paper describings the details of the different Meshlet generation strategies here: https://jcgt.org/published/0012/02/01/ 24 | 25 | If you use the work then please cite us: 26 | Mark Bo Jensen, Jeppe Revall Frisvad, and J. Andreas Bærentzen, Performance Comparison of Meshlet Generation Strategies, Journal of Computer Graphics Techniques (JCGT), vol. 12, no. 2, 1-27, 2023 27 | -------------------------------------------------------------------------------- /Standalone/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | set(CMAKE_CXX_STANDARD 17) 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 5 | set(CMAKE_SUPPRESS_REGENERATION true) 6 | 7 | include_directories( 8 | ${PROJECT_SOURCE_DIR}/libs/tinyobjloader/ 9 | ${PROJECT_SOURCE_DIR}/core 10 | ${PROJECT_SOURCE_DIR} 11 | ) 12 | 13 | set(SOURCES 14 | main.cpp 15 | ) 16 | 17 | set(HEADERS 18 | ) 19 | 20 | set(TARGETS standalone) 21 | 22 | add_executable(standalone ${SOURCES} ${HEADERS}) 23 | 24 | 25 | target_link_libraries(standalone 26 | meshletmaker 27 | ${SYSTEM_LIBS}) 28 | 29 | install(TARGETS ${TARGETS} EXPORT meshletMakerTargets 30 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 31 | ) -------------------------------------------------------------------------------- /Standalone/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/Standalone/main.cpp -------------------------------------------------------------------------------- /core/geometryProcessing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef HEADER_GUARD_GEOMETRYPROCESSING 3 | #define HEADER_GUARD_GEOMETRYPROCESSING 4 | 5 | #include 6 | #include 7 | 8 | #include "settings.h" 9 | 10 | namespace mm { 11 | void calculateObjectBoundingBox(const std::vector& vertices, float* objectBboxMin, float* objectBboxMax); 12 | void calculateObjectBoundingBox(std::vector* vertices, float* objectBboxMin, float* objectBboxMax); 13 | } 14 | #endif // HEADER_GUARD_GEOMETRYPROCESSING -------------------------------------------------------------------------------- /core/idxBufCleaner.cpp: -------------------------------------------------------------------------------- 1 | #include "meshletMaker.h" 2 | #include 3 | 4 | 5 | 6 | 7 | namespace mm { 8 | void cleanIndexBuffer() { 9 | std::cout << "linking worked" << std::endl; 10 | } 11 | } -------------------------------------------------------------------------------- /core/meshletCompresser.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletCompresser.cpp -------------------------------------------------------------------------------- /core/meshletConverter.cpp: -------------------------------------------------------------------------------- 1 | #include "meshletMaker.h" 2 | 3 | namespace mm { 4 | void convertToMeshlets() { 5 | 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /core/meshletGenerators.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletGenerators.cpp -------------------------------------------------------------------------------- /core/meshletGenerators.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletGenerators.h -------------------------------------------------------------------------------- /core/meshletMaker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef HEADER_GUARD_MESHLETMAKER 3 | #define HEADER_GUARD_MESHLETMAKER 4 | 5 | #define GLFW_INCLUDE_VULKAN 6 | #define GLM_FORCE_RADIANS 7 | #define GLM_FORCE_DEPTH_ZERO_TO_ONE 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "settings.h" 15 | 16 | 17 | namespace mm { 18 | 19 | void calculateCentroids(std::vector triangles, const Vertex* vertexBuffer); 20 | 21 | void loadTinyModel(const std::string& path, std::vector* vertices, std::vector* indices); 22 | 23 | template 24 | void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const VertexIndexType* indices); 25 | 26 | template void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const uint32_t* indices); 27 | template void makeMesh(std::unordered_map* indexVertexMap, std::vector* triangles, const uint32_t numIndices, const uint16_t* indices); 28 | 29 | template 30 | void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat = -1, uint32_t primitiveLimit = 125, uint32_t vertexLimit = 64); 31 | 32 | void tipsifyIndexBuffer(const uint32_t* indicies, const uint32_t numIndices, const uint32_t numVerts, const int cacheSize, std::vector& optimizedIdxBuffer); 33 | 34 | template void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit); 35 | template void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit); 36 | 37 | template 38 | void generateMeshlets(const VertexIndexType* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat = -1, uint32_t primitiveLimit = 125, uint32_t vertexLimit = 64); 39 | 40 | template void generateMeshlets(const uint32_t* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit); 41 | template void generateMeshlets(const uint16_t* indices, uint32_t numIndices, std::vector>& mehslets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit); 42 | 43 | 44 | template 45 | std::vector packPackMeshlets(const std::vector>& mehslets); 46 | 47 | template std::vector packPackMeshlets(const std::vector>& mehslets); 48 | template std::vector packPackMeshlets(const std::vector>& mehslets); 49 | 50 | template 51 | std::vector packNVMeshlets(const std::vector>& mehslets); 52 | 53 | template std::vector packNVMeshlets(const std::vector>& mehslets); 54 | 55 | template 56 | std::vector packNVMeshlets16(const std::vector>& mehslets); 57 | template std::vector packNVMeshlets16(const std::vector>& mehslets); 58 | 59 | template 60 | std::vector packMMMeshlets(const std::vector>& mehslets); 61 | template std::vector packMMMeshlets(const std::vector>& mehslets); 62 | 63 | template 64 | std::vector packVertMeshlets(const std::vector>& mehslets); 65 | 66 | template std::vector packVertMeshlets(const std::vector>& mehslets); 67 | template std::vector packVertMeshlets(const std::vector>& mehslets); 68 | 69 | void collectStats(const NVMeshlet::MeshletGeometryPack& geometry, std::vector& stats); 70 | void generateEarlyCulling(NVMeshlet::MeshletGeometryPack& geometry, const std::vector& vertices, std::vector& objectData); 71 | 72 | void collectStats(const NVMeshlet::MeshletGeometry& geometry, std::vector& stats); 73 | void generateEarlyCulling(NVMeshlet::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData); 74 | 75 | void collectStats(const NVMeshlet::MeshletGeometry16& geometry, std::vector& stats); 76 | void generateEarlyCulling(NVMeshlet::MeshletGeometry16& geometry, const std::vector& vertices, std::vector& objectData); 77 | 78 | 79 | void collectStats(const mm::MeshletGeometry& geometry, std::vector& stats); 80 | void generateEarlyCulling(mm::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData); 81 | void generateEarlyCullingVert(mm::MeshletGeometry& geometry, const std::vector& vertices, std::vector& objectData); 82 | 83 | void cleanIndexBuffer(); 84 | 85 | //void convertToMeshlets(); 86 | 87 | //void compressMeshlets(); 88 | 89 | void createMeshletPackDescriptors(const std::string& modelPath, std::vector* meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats, const int strat); 90 | 91 | void createMeshletMeshDescriptors(const std::string& modelPath, std::vector * meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats, const int strat); 92 | 93 | void loadObjAsMeshlet(const std::string& modelPath, std::vector * meshletGeometry, std::vector* vertCount, std::vector* vertices, std::vector* objectData, std::vector* stats); 94 | } 95 | 96 | 97 | #endif // HEADER_GUARD_MESHLETMAKER -------------------------------------------------------------------------------- /core/meshletMeshDescriptor.cpp: -------------------------------------------------------------------------------- 1 | #define GLM_ENABLE_EXPERIMENTAL 2 | 3 | #include "meshletMaker.h" 4 | #include "geometryProcessing.h" 5 | #include "mm_meshlet_builder.h" 6 | #include "meshlet_builder.hpp" 7 | #include "meshlet_util.hpp" 8 | 9 | 10 | #include 11 | #include 12 | 13 | 14 | namespace mm { 15 | 16 | std::vector AreaWeightedTriangleList(const std::vector& triangles, const Vertex* vertexBuffer) { 17 | double minArea = DBL_MAX; 18 | std::vector triangleAreas; 19 | triangleAreas.resize(triangles.size()); 20 | for (const auto& t : triangles) { 21 | // area of triangle is half the magnitude of the crossproduct 22 | glm::vec3 firstVec = vertexBuffer[t->vertices[2]->index] - vertexBuffer[t->vertices[0]->index]; 23 | glm::vec3 secondVec = vertexBuffer[t->vertices[2]->index] - vertexBuffer[t->vertices[1]->index]; 24 | double area = glm::length(glm::cross(firstVec, secondVec)) * 0.5f; 25 | if (area < minArea && area != 0.0) { 26 | minArea = area; 27 | } 28 | triangleAreas[t->id] = area; 29 | } 30 | 31 | std::vector weightedAreas; 32 | // create list of indices weighted based on triangle area 33 | for (int i = 0; i < triangleAreas.size(); ++i) { 34 | double area = triangleAreas[i]; 35 | int weightedRoundedArea = std::ceilf(area / minArea); 36 | for (int j = 0; j < weightedRoundedArea; ++j) { 37 | weightedAreas.push_back(i); 38 | } 39 | } 40 | 41 | auto rng = std::default_random_engine{}; 42 | std::shuffle(std::begin(weightedAreas), std::end(weightedAreas), rng); 43 | 44 | return weightedAreas; 45 | 46 | } 47 | 48 | std::vector SampleList(const std::vector list,const int sampleSize) { 49 | 50 | std::vector samples; 51 | std::unordered_set usedTriangleIds; 52 | samples.reserve(sampleSize); 53 | 54 | std::srand(std::time(NULL)); 55 | int remaining = sampleSize; 56 | 57 | while (remaining > 0) { 58 | uint32_t triangleId = list[(std::rand() % list.size()+1)]; 59 | if (usedTriangleIds.find(triangleId) == usedTriangleIds.end()) { 60 | usedTriangleIds.insert(triangleId); 61 | samples.push_back(triangleId); 62 | --remaining; 63 | } 64 | } 65 | 66 | return samples; 67 | } 68 | 69 | bool CompareTriangles(const Triangle* t1,const Triangle* t2,const int idx) { 70 | return (t1->centroid[idx] < t2->centroid[idx]); 71 | } 72 | 73 | bool compareVerts(const Vert* v1,const Vert* v2, const Vertex* vertexBuffer, const int idx) { 74 | return (vertexBuffer[v1->index].pos[idx] < vertexBuffer[v2->index].pos[idx]); 75 | } 76 | 77 | int sortLists() { 78 | 79 | return 0; 80 | } 81 | 82 | template 83 | void generateMeshlets(const VertexIndexType* indices, uint32_t numIndices, std::vector>& meshlets, const Vertex* vertices, int strat, uint32_t primitiveLimit, uint32_t vertexLimit) { 84 | assert(primitiveLimit <= MAX_PRIMITIVE_COUNT_LIMIT); 85 | assert(vertexLimit <= MAX_VERTEX_COUNT_LIMIT); 86 | 87 | MeshletCache cache; 88 | cache.reset(); 89 | 90 | switch (strat) { 91 | 92 | default: 93 | 94 | for (VertexIndexType i = 0; i < numIndices / 3; i++) 95 | { 96 | 97 | if (cache.cannotInsert(indices + i * 3, vertexLimit, primitiveLimit)) 98 | { 99 | // finish old and reset 100 | meshlets.push_back(cache); 101 | cache.reset(); 102 | } 103 | cache.insert(indices + i * 3, vertices); 104 | } 105 | if (!cache.empty()) 106 | { 107 | meshlets.push_back(cache); 108 | } 109 | } 110 | } 111 | 112 | template 113 | void generateMeshlets(std::unordered_map& indexVertexMap, std::vector& triangles, std::vector>& meshlets, const Vertex* vertexBuffer, int strat, uint32_t primitiveLimit, uint32_t vertexLimit) { 114 | assert(primitiveLimit <= MAX_PRIMITIVE_COUNT_LIMIT); 115 | assert(vertexLimit <= MAX_VERTEX_COUNT_LIMIT); 116 | 117 | std::vector vertsVector; 118 | if (strat != 4) { 119 | glm::vec3 min{ FLT_MAX }; 120 | glm::vec3 max{ FLT_MIN }; 121 | for (Triangle* tri : triangles) { 122 | //glm::vec3 v1 = vertexBuffer[tri->vertices[0]->index].pos; 123 | //glm::vec3 v2 = vertexBuffer[tri->vertices[1]->index].pos; 124 | //glm::vec3 v3 = vertexBuffer[tri->vertices[2]->index].pos; 125 | 126 | min = glm::min(min, vertexBuffer[tri->vertices[0]->index].pos); 127 | min = glm::min(min, vertexBuffer[tri->vertices[1]->index].pos); 128 | min = glm::min(min, vertexBuffer[tri->vertices[2]->index].pos); 129 | max = glm::max(max, vertexBuffer[tri->vertices[0]->index].pos); 130 | max = glm::max(max, vertexBuffer[tri->vertices[1]->index].pos); 131 | max = glm::max(max, vertexBuffer[tri->vertices[2]->index].pos); 132 | 133 | //min = glm::min(min, v1); 134 | //min = glm::min(min, v2); 135 | //min = glm::min(min, v3); 136 | //max = glm::max(max, v1); 137 | //max = glm::max(max, v2); 138 | //max = glm::max(max, v3); 139 | 140 | glm::vec3 centroid = (vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos) / 3.0f; 141 | //glm::vec3 centroid = (v1 + v2 + v3) / 3.0f; 142 | tri->centroid[0] = centroid.x; 143 | tri->centroid[1] = centroid.y; 144 | tri->centroid[2] = centroid.z; 145 | } 146 | 147 | // use the same axis info to sort vertices 148 | glm::vec3 axis = glm::abs(max - min); 149 | 150 | 151 | vertsVector.reserve(indexVertexMap.size()); 152 | for (int i = 0; i < indexVertexMap.size(); ++i) { 153 | vertsVector.push_back(indexVertexMap[i]); 154 | } 155 | 156 | if (axis.x > axis.y && axis.x > axis.z) { 157 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 0)); 158 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 0)); 159 | std::cout << "x sorted" << std::endl; 160 | } 161 | else if (axis.y > axis.z && axis.y > axis.x) { 162 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 1)); 163 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 1)); 164 | std::cout << "y sorted" << std::endl; 165 | } 166 | else { 167 | std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer, 2)); 168 | std::sort(triangles.begin(), triangles.end(), std::bind(CompareTriangles, std::placeholders::_1, std::placeholders::_2, 2)); 169 | std::cout << "z sorted" << std::endl; 170 | } 171 | } 172 | 173 | std::unordered_map used; 174 | MeshletCache cache; 175 | cache.reset(); 176 | switch (strat) { 177 | case 21: 178 | { 179 | std::queue priorityQueue; 180 | std::unordered_map visitedTriangleIds; 181 | 182 | // let us sort the triangles 183 | //calculateCentroids(triangles, vertexBuffer); 184 | //std::sort(triangles.begin(), triangles.end(), CompareTriangles); 185 | 186 | 187 | // add triangles to cache untill full. 188 | for (int i = 0; i < triangles.size(); ++i) { 189 | // for (Triangle* triangle : triangles) { 190 | // if triangle is not used generate meshlet 191 | Triangle* triangle = triangles[i]; 192 | 193 | if (triangle->flag == 1) continue; 194 | 195 | //reset 196 | priorityQueue.push(triangle); 197 | 198 | // add triangles to cache untill it is full. 199 | while (!priorityQueue.empty()) { 200 | // pop current triangle 201 | Triangle* tri = priorityQueue.front(); 202 | visitedTriangleIds[tri->id] = tri->id; 203 | 204 | 205 | // get all vertices of current triangle 206 | VertexIndexType candidateIndices[3]; 207 | for (uint32_t j = 0; j < 3; ++j) { 208 | candidateIndices[j] = tri->vertices[j]->index; 209 | } 210 | // break if cache is full 211 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 212 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize 213 | // so we run through all triangles to see if the meshlet already has the required verts 214 | // we try to do this in a dum way to test if it is worth it 215 | for (int v = 0; v < cache.numVertices; ++v) { 216 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) { 217 | if (tri->flag == 1) continue; 218 | 219 | VertexIndexType candidateIndices[3]; 220 | for (uint32_t j = 0; j < 3; ++j) { 221 | uint32_t idx = tri->vertices[j]->index; 222 | candidateIndices[j] = idx; 223 | } 224 | 225 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 226 | cache.insert(candidateIndices, vertexBuffer); 227 | tri->flag = 1; 228 | } 229 | } 230 | } 231 | meshlets.push_back(cache); 232 | 233 | //reset cache and empty priorityQueue 234 | priorityQueue = {}; 235 | priorityQueue.push(tri); 236 | cache.reset(); 237 | visitedTriangleIds.clear(); 238 | 239 | //reset cache and empty priorityQueue 240 | 241 | continue; 242 | // start over again but from the fringe of the current cluster 243 | } 244 | // get alle neighbours of current triangle 245 | for (Triangle* t : tri->neighbours) { 246 | if ((t->flag != 1) && (visitedTriangleIds.find(t->id) == visitedTriangleIds.end())) { 247 | priorityQueue.push(t); 248 | visitedTriangleIds[t->id] = t->id; 249 | } 250 | } 251 | 252 | 253 | cache.insert(candidateIndices, vertexBuffer); 254 | // if triangle is inserted set flag to used. 255 | priorityQueue.pop(); 256 | tri->flag = 1; 257 | 258 | 259 | }; 260 | } 261 | // add remaining triangles to a meshlet 262 | if (!cache.empty()) { 263 | meshlets.push_back(cache); 264 | cache.reset(); 265 | } 266 | break; 267 | } 268 | 269 | // greedy triangle + clustering 270 | case 20: 271 | { 272 | std::vector> clusters; 273 | std::vector cluster; 274 | std::queue priorityQueue; 275 | std::unordered_map visitedTriangleIds; 276 | 277 | //std::vector weightedAreaTriangleList = AreaWeightedTriangleList(triangles, vertexBuffer); 278 | //std::vector clusterCenters = SampleList(weightedAreaTriangleList, 92); 279 | 280 | // add triangles to cache untill full. 281 | for (int i = 0; i < triangles.size(); ++i) { 282 | // for (Triangle* triangle : triangles) { 283 | // if triangle is not used generate meshlet 284 | Triangle* triangle = triangles[i]; 285 | 286 | if (triangle->flag == 1) continue; 287 | 288 | //reset 289 | priorityQueue.push(triangle); 290 | 291 | // add triangles to cache untill it is full. 292 | while (!priorityQueue.empty()) { 293 | // pop current triangle 294 | Triangle* tri = priorityQueue.front(); 295 | visitedTriangleIds[tri->id] = tri->id; 296 | 297 | 298 | // get all vertices of current triangle 299 | VertexIndexType candidateIndices[3]; 300 | for (uint32_t j = 0; j < 3; ++j) { 301 | candidateIndices[j] = tri->vertices[j]->index; 302 | } 303 | // break if cache is full 304 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 305 | meshlets.push_back(cache); 306 | 307 | //reset cache and empty priorityQueue 308 | priorityQueue = {}; 309 | priorityQueue.push(tri); 310 | clusters.push_back(cluster); 311 | cluster.clear(); 312 | cache.reset(); 313 | visitedTriangleIds.clear(); 314 | 315 | //reset cache and empty priorityQueue 316 | 317 | continue; 318 | // start over again but from the fringe of the current cluster 319 | } 320 | // get alle neighbours of current triangle 321 | for (Triangle* t : tri->neighbours) { 322 | if ((t->flag != 1) && (visitedTriangleIds.find(t->id) == visitedTriangleIds.end())) { 323 | priorityQueue.push(t); 324 | visitedTriangleIds[t->id] = t->id; 325 | } 326 | } 327 | 328 | 329 | cache.insert(candidateIndices, vertexBuffer); 330 | cluster.push_back(tri->id); 331 | // if triangle is inserted set flag to used. 332 | priorityQueue.pop(); 333 | tri->flag = 1; 334 | 335 | 336 | }; 337 | } 338 | // add remaining triangles to a meshlet 339 | if (!cache.empty()) { 340 | meshlets.push_back(cache); 341 | cache.reset(); 342 | clusters.push_back(cluster); 343 | cluster.clear(); 344 | } 345 | 346 | //for (int k = 0; k < 10; ++k) { 347 | // // find initial clustercenters 348 | // std::vector candidates; 349 | // uint32_t maxDistance; 350 | // uint32_t minDistance; 351 | // uint32_t dist; 352 | // int count; 353 | // int maxCount; 354 | // bool CENTER_IS_SET = false; 355 | // // putting cluster centers into a vector for later use 356 | // std::vector> centers; 357 | // std::vector clusterCenters; 358 | // clusterCenters.resize(clusters.size()); 359 | // centers.resize(clusters.size()); 360 | // for (uint32_t i = 0; i < clusters.size(); ++i) { 361 | // minDistance = -1; 362 | // maxCount = -1; 363 | // candidates.clear(); 364 | // //build subgraph here ? 365 | // uint32_t difference = 0; 366 | // for (unsigned int j = 0; j < clusters[i].size(); ++j) { 367 | // count = 0; 368 | // Triangle* t = triangles[clusters[i][j]]; 369 | // t->dist = 0; 370 | 371 | // std::queue priorityQueue; 372 | // priorityQueue.push(t); 373 | 374 | // // for each triangle in frontier 375 | // dist = 0; 376 | // visitedTriangleIds.clear(); 377 | // visitedTriangleIds[t->id] = t->id; 378 | 379 | // while (!priorityQueue.empty()) { 380 | // // add neighbours to queue 381 | // Triangle* cur_t = priorityQueue.front(); 382 | // priorityQueue.pop(); 383 | 384 | // // update distance 385 | // dist = cur_t->dist + 1; 386 | 387 | // for (Triangle* neighbour : cur_t->neighbours) { 388 | // if (std::find(clusters[i].begin(), clusters[i].end(), neighbour->id) != clusters[i].end() && (visitedTriangleIds.find(neighbour->id) == visitedTriangleIds.end())) { 389 | // neighbour->dist = dist; 390 | // neighbour->flag = cur_t->flag; 391 | // visitedTriangleIds[neighbour->id] = neighbour->id; 392 | // //if (priorityQueue.size() <= clusters[i].size()) 393 | // priorityQueue.push(neighbour); 394 | // ++count; 395 | // } //continue; 396 | 397 | // } 398 | // } 399 | // //distance = dist; 400 | // //if (distance > maxDistance) maxDistance = distance; 401 | // maxDistance = dist; 402 | 403 | 404 | // if (visitedTriangleIds.size() != clusters[i].size()) maxDistance = -1; // Does not consider every element of cluster a possibility 405 | // 406 | // // center is set means that we can have more than one triangle in the center 407 | // if (maxDistance == minDistance && CENTER_IS_SET) { // We might not have convergence guarantees for accurate graph centers 408 | // candidates.push_back(clusters[i][j]); 409 | // } 410 | // else if (maxDistance < minDistance) { 411 | // candidates.clear(); 412 | // candidates.push_back(clusters[i][j]); 413 | // //std::cout << "Cluster " << i << " has candidate " << clusters[i][j] << " with eccentricity " << maxDistance << " compared to previous " << minDistance << std::endl; 414 | // minDistance = maxDistance; 415 | // } 416 | // else if (maxDistance == -1 && candidates.size() == 0) { 417 | // if (count > maxCount) { 418 | // candidates.clear(); 419 | // candidates.push_back(clusters[i][j]); 420 | // maxCount = count; 421 | // if (k > 0) { 422 | // std::cout << "Error no candidates for cluster " << i << std::endl; 423 | // } 424 | // 425 | // } 426 | // } 427 | // } 428 | 429 | // if (candidates.size() == 0) { 430 | // std::cout << "Error no candidates for cluster " << i << std::endl; 431 | // } 432 | // centers[i] = candidates; 433 | // clusterCenters[i] = candidates[0]; 434 | // } 435 | 436 | // // redestribute triangles 437 | 438 | // // reset clusters 439 | // clusters.clear(); 440 | // clusters.resize(clusterCenters.size()); 441 | // visitedTriangleIds.clear(); 442 | // std::queue triangleQueue; 443 | // for (int i = 0; i < triangles.size(); ++i) { 444 | // Triangle* tri = triangles[i]; 445 | 446 | // visitedTriangleIds.clear(); 447 | // visitedTriangleIds[tri->id] = tri->id; 448 | 449 | // triangleQueue.push(tri); 450 | // while (!triangleQueue.empty()) 451 | // { 452 | // Triangle* curTri = triangleQueue.front(); 453 | // triangleQueue.pop(); 454 | 455 | 456 | // // if curTri is a cluster center asign tri to that cluster 457 | // std::vector::iterator clusterItr = std::find(clusterCenters.begin(), clusterCenters.end(), curTri->id); 458 | // if (clusterItr != clusterCenters.end()) { 459 | // int idx = std::distance(clusterCenters.begin(), clusterItr); 460 | // clusters[idx].push_back(tri->id); 461 | // triangleQueue = {}; 462 | // break; 463 | // } 464 | 465 | // for (Triangle* neighbour : curTri->neighbours) { 466 | // if (visitedTriangleIds.find(neighbour->id) != visitedTriangleIds.end()) continue; 467 | // triangleQueue.push(neighbour); 468 | // visitedTriangleIds[neighbour->id] = neighbour->id; 469 | 470 | 471 | // } 472 | // } 473 | // } 474 | //} 475 | 476 | ////pack into caches 477 | //for (std::vector c : clusters) { 478 | // for (uint32_t triIdx : c) { 479 | // VertexIndexType candidateIndices[3]; 480 | // for (uint32_t j = 0; j < 3; ++j) { 481 | // candidateIndices[j] = triangles[triIdx]->vertices[j]->index; 482 | // } 483 | 484 | // cache.insert(candidateIndices, vertexBuffer); 485 | // } 486 | 487 | // meshlets.push_back(cache); 488 | // cache.reset(); 489 | //} 490 | break; 491 | } 492 | case 23: 493 | { 494 | std::unordered_set currentVerts; 495 | std::vector trianglesInCluster; 496 | std::deque priorityQueue; 497 | std::unordered_map visitedTriangleIds; 498 | glm::vec3 center = glm::vec3(0.0f); 499 | float radius = 0; 500 | float bestNewRadius = DBL_MAX; 501 | float newRadius = DBL_MAX; 502 | bool updateSphere = false; 503 | 504 | ////let us sort the triangles 505 | //calculateCentroids(triangles, vertexBuffer); 506 | //std::sort(triangles.begin(), triangles.end(), CompareTriangles); 507 | 508 | 509 | // add triangles to cache untill full. 510 | //for (Triangle* triangle : triangles) { 511 | for (int t = 0; t < triangles.size();) { 512 | 513 | Triangle* triangle = triangles[t]; 514 | // if triangle is not used generate meshlet 515 | if (triangle->flag == 1) { 516 | ++t; 517 | continue; 518 | } 519 | 520 | priorityQueue.push_back(triangle); 521 | 522 | 523 | while (!priorityQueue.empty()) { 524 | 525 | int bestTriIdx = 0; 526 | int triIdx = 0; 527 | bestNewRadius = DBL_MAX; 528 | for (Triangle* possible_tri : priorityQueue) { 529 | 530 | // prioritize triangles who have no "live" neighbours 531 | // also prioritize triangles who already have all verts in the cluster 532 | int newVert{}; 533 | int vertsInMeshlet = 0; 534 | int used = 0; 535 | for (int i = 0; i < 3; ++i) { 536 | if (currentVerts.find(possible_tri->vertices[i]->index) == currentVerts.end()) { 537 | newVert = i; 538 | } 539 | else { 540 | ++vertsInMeshlet; 541 | } 542 | } 543 | 544 | for (auto neighbour_tri : possible_tri->neighbours) { 545 | if (neighbour_tri->flag == 1) ++used; 546 | } 547 | 548 | if (possible_tri->neighbours.size() == used) used = 3; 549 | 550 | //if all verts are allready in meshlet 551 | if (vertsInMeshlet == 3) { 552 | bestTriIdx = triIdx; 553 | updateSphere = false; 554 | break; 555 | } 556 | 557 | // if dangling triangle add it 558 | if (used == 3) { 559 | bestTriIdx = triIdx; 560 | if (vertsInMeshlet == 2) { 561 | // afterwards check the added radius by adding triangle to the cluster 562 | const mm::Vertex p = vertexBuffer[possible_tri->vertices[newVert]->index]; 563 | bestNewRadius = 0.5 * (radius + glm::length(center - p.pos)); 564 | updateSphere = true; 565 | } 566 | else { 567 | updateSphere = false; 568 | } 569 | break; 570 | } 571 | 572 | 573 | // else if no verts are in meshlet ie starting a new meshlet 574 | if (vertsInMeshlet == 0) { 575 | center = (vertexBuffer[possible_tri->vertices[0]->index].pos + vertexBuffer[possible_tri->vertices[1]->index].pos + vertexBuffer[possible_tri->vertices[2]->index].pos) / 3.0f; 576 | radius = glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[1]->index].pos), glm::length(center - vertexBuffer[possible_tri->vertices[2]->index].pos))); 577 | updateSphere = false; 578 | //radius = 0.5 * (radius +(glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[possible_tri->vertices[1]->index].pos), glm::length(center - vertexBuffer[possible_tri->vertices[2]->index].pos))))); 579 | break; 580 | } 581 | else if (vertsInMeshlet == 2) { 582 | // afterwards check the added radius by adding triangle to the cluster 583 | const mm::Vertex p = vertexBuffer[possible_tri->vertices[newVert]->index]; 584 | newRadius = 0.5 * (radius + glm::length(center - p.pos)); 585 | updateSphere = true; 586 | } 587 | 588 | if (newRadius <= bestNewRadius) { 589 | bestNewRadius = newRadius; 590 | bestTriIdx = triIdx; 591 | 592 | } 593 | triIdx++; 594 | } 595 | // move best tri to front of queue 596 | std::swap(priorityQueue.front(), priorityQueue[bestTriIdx]); 597 | Triangle* tri = priorityQueue.front(); 598 | 599 | int newVert{}; 600 | VertexIndexType candidateIndices[3]; 601 | for (VertexIndexType i = 0; i < 3; ++i) { 602 | candidateIndices[i] = tri->vertices[i]->index; 603 | if (currentVerts.find(tri->vertices[i]->index) == currentVerts.end()) newVert = i; 604 | } 605 | 606 | if (updateSphere) { 607 | // get all vertices of current triangle 608 | const mm::Vertex p = vertexBuffer[tri->vertices[newVert]->index]; 609 | radius = bestNewRadius; 610 | center = p.pos + (radius / (FLT_EPSILON + glm::length(center - p.pos))) * (center - p.pos); 611 | } 612 | 613 | // break if cache is full 614 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 615 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize 616 | // so we run through all triangles to see if the meshlet already has the required verts 617 | // we try to do this in a dum way to test if it is worth it 618 | for (int v = 0; v < cache.numVertices; ++v) { 619 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) { 620 | if (tri->flag == 1) continue; 621 | 622 | VertexIndexType candidateIndices[3]; 623 | for (uint32_t j = 0; j < 3; ++j) { 624 | uint32_t idx = tri->vertices[j]->index; 625 | candidateIndices[j] = idx; 626 | } 627 | 628 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 629 | cache.insert(candidateIndices, vertexBuffer); 630 | tri->flag = 1; 631 | } 632 | } 633 | } 634 | meshlets.push_back(cache); 635 | //addMeshlet(geometry, cache); 636 | 637 | //if (meshlets.size() == 4) return; 638 | //reset cache and empty priorityQueue 639 | //priorityQueue = { tri }; 640 | priorityQueue.clear(); 641 | trianglesInCluster.clear(); 642 | currentVerts.clear(); 643 | cache.reset(); 644 | center = glm::vec3(0.0f); 645 | radius = 0.0f; 646 | break; 647 | 648 | } 649 | 650 | cache.insert(candidateIndices, vertexBuffer); 651 | 652 | // if triangle is inserted set flag to used. 653 | priorityQueue.pop_front(); 654 | tri->flag = 1; 655 | visitedTriangleIds[tri->id] = tri->id; 656 | 657 | 658 | // add the used vertices to the current cluster 659 | currentVerts.insert(tri->vertices[0]->index); 660 | currentVerts.insert(tri->vertices[1]->index); 661 | currentVerts.insert(tri->vertices[2]->index); 662 | trianglesInCluster.push_back(tri); 663 | 664 | // get alle neighbours of triangles currently in meshlet 665 | priorityQueue.clear(); 666 | for (Triangle* tr : trianglesInCluster) { 667 | for (Triangle* t : tr->neighbours) { 668 | if (t->flag != 1) priorityQueue.push_back(t); 669 | } 670 | } 671 | }; 672 | 673 | if (!cache.empty()) { 674 | meshlets.push_back(cache); 675 | priorityQueue.clear(); 676 | trianglesInCluster.clear(); 677 | currentVerts.clear(); 678 | cache.reset(); 679 | center = glm::vec3(0.0f); 680 | radius = 0.0f; 681 | } 682 | } 683 | // add remaining triangles to a meshlet 684 | if (!cache.empty()) { 685 | meshlets.push_back(cache); 686 | cache.reset(); 687 | } 688 | 689 | break; 690 | } 691 | // bounding sphere based on vertex fanning 692 | case 24: 693 | { 694 | std::unordered_map usedVerts; 695 | std::unordered_set currentVerts; 696 | float radius = .0f; 697 | glm::vec3 center = glm::vec3(.0f); 698 | 699 | 700 | 701 | //std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer)); 702 | 703 | for (int i = 0; i < vertsVector.size();) { 704 | 705 | 706 | Vert* vert = vertsVector[i]; 707 | Triangle* bestTri = nullptr; 708 | float newRadius = FLT_MAX; 709 | float bestNewRadius = FLT_MAX - 1.0f; 710 | int bestVertsInMeshlet = 0; 711 | 712 | for (uint32_t j = 0; j < cache.numVertices; ++j) { 713 | uint32_t vertId = cache.vertices[j]; 714 | 715 | for (Triangle* tri : indexVertexMap[vertId]->neighbours) { 716 | if (tri->flag == 1) continue; 717 | 718 | // get info about tri 719 | int newVert{}; 720 | int vertsInMeshlet = 0; 721 | int used = 0; 722 | for (int i = 0; i < 3; ++i) { 723 | if (currentVerts.find(tri->vertices[i]->index) == currentVerts.end()) { 724 | newVert = i; 725 | } 726 | else { 727 | ++vertsInMeshlet; 728 | } 729 | } 730 | 731 | for (auto neighbour_tri : tri->neighbours) { 732 | if (neighbour_tri->flag == 1) ++used; 733 | } 734 | 735 | if (tri->neighbours.size() == used) used = 3; 736 | 737 | 738 | // if dangling triangle add it 739 | if (used == 3) { 740 | ++vertsInMeshlet; 741 | } 742 | 743 | //if all verts are allready in meshlet 744 | if (vertsInMeshlet == 3) { 745 | newRadius = radius; 746 | } 747 | else if (vertsInMeshlet == 1){ 748 | continue; 749 | } 750 | else { 751 | //TODO TURN THIS IN TO ONE THINK THAT ALWAYS RUNS 752 | // LIKE MAKE SURE THAT THE VERTEX furtherst away from center is used for new radius 753 | // or calculate three new radius and use the biggest one 754 | // afterwards check the added radius by adding triangle to the cluster 755 | float newRadius = 0.5 * (radius + glm::length(center - vertexBuffer[tri->vertices[newVert]->index].pos)); 756 | 757 | } 758 | 759 | if (vertsInMeshlet > bestVertsInMeshlet || newRadius < bestNewRadius ) { 760 | bestVertsInMeshlet = vertsInMeshlet; 761 | bestNewRadius = newRadius; 762 | bestTri = tri; 763 | } 764 | } 765 | } 766 | 767 | if (bestTri == nullptr) { 768 | // create radius and center for the first triangle in the meshlet 769 | for (Triangle* tri : vert->neighbours) { 770 | // skip used triangles 771 | if (tri->flag != 1) { 772 | bestTri = tri; 773 | 774 | center = (vertexBuffer[bestTri->vertices[0]->index].pos + vertexBuffer[bestTri->vertices[1]->index].pos + vertexBuffer[bestTri->vertices[2]->index].pos) / 3.0f; 775 | bestNewRadius = glm::max(glm::length(center - vertexBuffer[bestTri->vertices[0]->index].pos), glm::max(glm::length(center - vertexBuffer[bestTri->vertices[1]->index].pos), glm::length(center - vertexBuffer[bestTri->vertices[2]->index].pos))); 776 | break; 777 | } 778 | } 779 | 780 | if (bestTri == nullptr) { 781 | ++i; 782 | // here we finalize current meshlet when we need to enforce locality 783 | //if (cache.numPrims != 0) { 784 | // meshlets.push_back(cache); 785 | // currentVerts.clear(); 786 | // cache.reset(); 787 | //} 788 | 789 | continue; 790 | } 791 | } 792 | 793 | int newVert{}; 794 | int numNewVerts = 0; 795 | VertexIndexType candidateIndices[3]; 796 | for (VertexIndexType i = 0; i < 3; ++i) { 797 | candidateIndices[i] = bestTri->vertices[i]->index; 798 | if (currentVerts.find(bestTri->vertices[i]->index) == currentVerts.end()) { 799 | newVert = i; 800 | ++numNewVerts; 801 | } 802 | } 803 | 804 | radius = bestNewRadius; 805 | if (numNewVerts = 1) { 806 | // get all vertices of current triangle 807 | const mm::Vertex p = vertexBuffer[bestTri->vertices[newVert]->index]; 808 | center = p.pos + (radius / (FLT_EPSILON + glm::length(center - p.pos))) * (center - p.pos); 809 | } 810 | 811 | // If full pack and restart restart 812 | //add triangle to cache 813 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 814 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize 815 | // so we run through all triangles to see if the meshlet already has the required verts 816 | // we try to do this in a dum way to test if it is worth it 817 | for (int v = 0; v < cache.numVertices; ++v) { 818 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) { 819 | if (tri->flag == 1) continue; 820 | 821 | VertexIndexType candidateIndices[3]; 822 | for (uint32_t j = 0; j < 3; ++j) { 823 | uint32_t idx = tri->vertices[j]->index; 824 | candidateIndices[j] = idx; 825 | } 826 | 827 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 828 | cache.insert(candidateIndices, vertexBuffer); 829 | tri->flag = 1; 830 | } 831 | } 832 | } 833 | meshlets.push_back(cache); 834 | currentVerts.clear(); 835 | cache.reset(); 836 | continue; 837 | //break; 838 | 839 | 840 | } 841 | 842 | // insert triangle and mark used 843 | cache.insert(candidateIndices, vertexBuffer); 844 | bestTri->flag = 1; 845 | currentVerts.insert(candidateIndices[0]); 846 | currentVerts.insert(candidateIndices[1]); 847 | currentVerts.insert(candidateIndices[2]); 848 | ++usedVerts[candidateIndices[0]]; 849 | ++usedVerts[candidateIndices[1]]; 850 | ++usedVerts[candidateIndices[2]]; 851 | 852 | //if (indexVertexMap[i]->neighbours.size() == usedVerts[indexVertexMap[i]->index]) ++i; 853 | } 854 | 855 | // add remaining triangles to a meshlet 856 | if (!cache.empty()) { 857 | meshlets.push_back(cache); 858 | cache.reset(); 859 | } 860 | 861 | break; 862 | } 863 | case 12: 864 | { 865 | std::queue priorityQueue; 866 | //std::vector> clusters; 867 | //std::vector cluster; 868 | //std::vector triangleCentroids; 869 | //triangleCentroids.resize(triangles.size()); 870 | //std::vector clusterCentroids; 871 | 872 | //// pick best triangle to add 873 | //std::vector vertsVector; 874 | //vertsVector.reserve(indexVertexMap.size()); 875 | //for (int i = 0; i < indexVertexMap.size(); ++i) { 876 | // vertsVector.push_back(indexVertexMap[i]); 877 | //} 878 | 879 | //std::sort(vertsVector.begin(), vertsVector.end(), std::bind(compareVerts, std::placeholders::_1, std::placeholders::_2, vertexBuffer)); 880 | 881 | 882 | //glm::vec3 clusterCenter = glm::vec3(0.0f); 883 | // add triangles to cache untill full. 884 | for (int i = 0; i < vertsVector.size(); ++i) { 885 | // for (Triangle* triangle : triangles) { 886 | // if triangle is not used generate meshlet 887 | Vert* vert = vertsVector[i]; 888 | if (used.find(vert->index) != used.end()) continue; 889 | 890 | //reset 891 | priorityQueue.push(vert); 892 | 893 | // add triangles to cache untill it is full. 894 | while (!priorityQueue.empty()) { 895 | // pop current triangle 896 | Vert* vert = priorityQueue.front(); 897 | 898 | for (Triangle* tri : vert->neighbours) { 899 | if (tri->flag == 1) continue; 900 | //glm::vec3 centroid = glm::vec3(0.0f); 901 | 902 | // calculate centroid 903 | //centroid = vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos; 904 | //triangleCentroids[tri->id] = centroid / 3.0f; 905 | 906 | 907 | // get all vertices of current triangle 908 | VertexIndexType candidateIndices[3]; 909 | for (uint32_t j = 0; j < 3; ++j) { 910 | uint32_t idx = tri->vertices[j]->index; 911 | candidateIndices[j] = idx; 912 | if (used.find(idx) == used.end()) priorityQueue.push(tri->vertices[j]); 913 | } 914 | // break if cache is full 915 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 916 | // we run out of verts but could push prims more so we do a pass of prims here to see if we can maximize 917 | // so we run through all triangles to see if the meshlet already has the required verts 918 | // we try to do this in a dum way to test if it is worth it 919 | for (int v = 0; v < cache.numVertices; ++v) { 920 | for (Triangle* tri : indexVertexMap[cache.vertices[v]]->neighbours) { 921 | if (tri->flag == 1) continue; 922 | 923 | VertexIndexType candidateIndices[3]; 924 | for (uint32_t j = 0; j < 3; ++j) { 925 | uint32_t idx = tri->vertices[j]->index; 926 | candidateIndices[j] = idx; 927 | if (used.find(idx) == used.end()) priorityQueue.push(tri->vertices[j]); 928 | } 929 | 930 | if (!cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 931 | cache.insert(candidateIndices, vertexBuffer); 932 | //cluster.push_back(tri->id); 933 | //clusterCenter += triangleCentroids[tri->id]; 934 | tri->flag = 1; 935 | } 936 | } 937 | } 938 | //clusters.push_back(cluster); 939 | //clusterCenter = clusterCenter / float(cluster.size()); 940 | //cluster.clear(); 941 | //clusterCentroids.push_back(clusterCenter); 942 | meshlets.push_back(cache); 943 | //clusterCenter = glm::vec3(0.0f); 944 | 945 | //reset cache and empty priorityQueue 946 | priorityQueue = {}; 947 | priorityQueue.push(vert); 948 | cache.reset(); 949 | continue; 950 | // start over again but from the fringe of the current cluster 951 | } 952 | 953 | cache.insert(candidateIndices, vertexBuffer); 954 | //cluster.push_back(tri->id); 955 | //clusterCenter += triangleCentroids[tri->id]; 956 | 957 | // if triangle is inserted set flag to used. 958 | tri->flag = 1; 959 | } 960 | 961 | // pop vertex if we make it through all its neighbours 962 | priorityQueue.pop(); 963 | used[vert->index] = 1; 964 | 965 | 966 | 967 | 968 | 969 | }; 970 | } 971 | // add remaining triangles to a meshlet 972 | if (!cache.empty()) { 973 | meshlets.push_back(cache); 974 | cache.reset(); 975 | //clusters.push_back(cluster); 976 | //clusterCenter = clusterCenter / float(cluster.size()); 977 | //clusterCentroids.push_back(clusterCenter); 978 | //cluster.clear(); 979 | 980 | } 981 | 982 | //for (int i = 0; i < 2; ++i) { 983 | // // find cluster centers 984 | // std::vector clusterCenters; 985 | // clusterCenters.resize(clusters.size()); 986 | // uint32_t clusterid = 0; 987 | // for (std::vector c : clusters) { 988 | // double minDist = DBL_MAX; 989 | // for (uint32_t tid : c) { 990 | // glm::vec3 clusterCentroid = clusterCentroids[clusterid]; 991 | // glm::vec3 triangleCentroid = triangleCentroids[tid]; 992 | // // distance to clusterCenter 993 | // double distance = glm::distance(clusterCentroid, triangleCentroid); 994 | // // if distance is shortest cur triangle is center 995 | // if (distance < minDist) { 996 | // minDist = distance; 997 | // clusterCenters[clusterid] = tid; 998 | // } 999 | 1000 | 1001 | // } 1002 | // ++clusterid; 1003 | 1004 | // } 1005 | 1006 | // //redestribute triangles 1007 | 1008 | // // reset clusters 1009 | // clusters.clear(); 1010 | // clusters.resize(clusterCenters.size()); 1011 | // clusterCentroids.resize(clusterCenters.size()); 1012 | // std::queue triangleQueue; 1013 | // for (int i = 0; i < triangles.size(); ++i) { 1014 | // Triangle* tri = triangles[i]; 1015 | // if (tri->flag == i) continue; 1016 | // tri->flag = i; 1017 | 1018 | // triangleQueue.push(tri); 1019 | // while (!triangleQueue.empty()) 1020 | // { 1021 | // Triangle* curTri = triangleQueue.front(); 1022 | // triangleQueue.pop(); 1023 | 1024 | 1025 | // // if curTri is a cluster center asign tri to that cluster 1026 | // std::vector::iterator clusterItr = std::find(clusterCenters.begin(), clusterCenters.end(), curTri->id); 1027 | // if (clusterItr != clusterCenters.end()) { 1028 | // int idx = std::distance(clusterCenters.begin(), clusterItr); 1029 | // clusters[idx].push_back(tri->id); 1030 | // triangleQueue = {}; 1031 | // break; 1032 | // } 1033 | 1034 | // for (Triangle* neighbour : curTri->neighbours) { 1035 | // if (neighbour->flag == i) continue; 1036 | // triangleQueue.push(neighbour); 1037 | // neighbour->flag = i; 1038 | 1039 | 1040 | // } 1041 | // } 1042 | // } 1043 | // // recalculate centroids 1044 | // for (int i = 0; i < clusters.size(); ++i) { 1045 | // std::vector c = clusters[i]; 1046 | // glm::vec3 clusterCentroid = glm::vec3(0.0f); 1047 | // for (uint32_t triIdx : c) { 1048 | // Triangle* tri = triangles[triIdx]; 1049 | // clusterCentroid += vertexBuffer[tri->vertices[0]->index].pos + vertexBuffer[tri->vertices[1]->index].pos + vertexBuffer[tri->vertices[2]->index].pos; 1050 | // } 1051 | // clusterCentroid = clusterCentroid / float(c.size()); 1052 | // clusterCentroids[i] = clusterCentroid; 1053 | // } 1054 | //} 1055 | 1056 | ////pack into caches 1057 | //for (std::vector c : clusters) { 1058 | // for (uint32_t triIdx : c) { 1059 | // VertexIndexType candidateIndices[3]; 1060 | // for (uint32_t j = 0; j < 3; ++j) { 1061 | // candidateIndices[j] = triangles[triIdx]->vertices[j]->index; 1062 | // } 1063 | 1064 | // cache.insert(candidateIndices, vertexBuffer); 1065 | // } 1066 | 1067 | // meshlets.push_back(cache); 1068 | // cache.reset(); 1069 | //} 1070 | break; 1071 | } 1072 | case 11: 1073 | { 1074 | 1075 | std::vector> clusters; 1076 | unsigned char tris[126]; // ideally we could use mem equal to the entire mesh 1077 | unsigned char verts[64]; // ideally we could use mem equal to the entire mesh 1078 | // the challenge is to not end up with small islands of triangles that will become their own clusters 1079 | memset(tris, 0xff, primitiveLimit); 1080 | memset(verts, 0xff, vertexLimit); 1081 | 1082 | // we want to go through our mesh here and mark cluster centers and their radii. 1083 | // that way we can essentially do discreet poison sampling of the mesh to find cluster centers. 1084 | 1085 | 1086 | // should I go round the vertex instead ? 1087 | // pick said triangle fan all verts in it, and then subsequently add triangles from the ring ? 1088 | std::vector cluster; 1089 | std::queue triangleQue; 1090 | for (Triangle* triangle : triangles) { 1091 | if (triangle->flag == 1) continue; 1092 | 1093 | size_t vertices, triangles = 0; 1094 | while (triangles + 1 <= primitiveLimit || vertices <= vertexLimit) { 1095 | 1096 | 1097 | // add neighbours to queue 1098 | for (int i = 0; i < 3; ++i) 1099 | { 1100 | if (triangle->neighbours[i]->flag == 1) continue; 1101 | triangleQue.push(triangle->neighbours[i]); 1102 | } 1103 | 1104 | // try to add triangle to current cluster 1105 | // skip degenerate 1106 | 1107 | if (triangle->vertices[0] == triangle->vertices[1] || triangle->vertices[0] == triangle->vertices[2] || triangle->vertices[1] == triangle->vertices[2]) 1108 | { 1109 | triangle->flag = 1; 1110 | continue; 1111 | } 1112 | 1113 | uint32_t found = 0; 1114 | // check if any of the incoming three indices are already in cluster 1115 | for (uint32_t v = 0; v < vertices; ++v) 1116 | { 1117 | found += (verts[v] == triangle->vertices[0]->index) + (verts[v] == triangle->vertices[1]->index) + (verts[v] == triangle->vertices[2]->index); 1118 | } 1119 | 1120 | // add triangle and verts 1121 | if ((vertices + 3 - found) > vertexLimit || (triangles + 1) > primitiveLimit) { 1122 | vertices += 3 - found; 1123 | triangles++; 1124 | } 1125 | 1126 | 1127 | // // potential speed up is keeping track of cluster center 1128 | // // might be required for the next part. 1129 | } 1130 | // 1131 | ////reset cluster 1132 | memset(tris, 0xff, primitiveLimit); 1133 | memset(verts, 0xff, vertexLimit); 1134 | } 1135 | 1136 | // grow out while we have less than vertexlimit and primitivelimit verts and triangles. 1137 | // grab new triangle center and repeat 1138 | 1139 | 1140 | 1141 | // run a pass or two of k-medoids clustering to balance out clusters before backing into caches 1142 | break; 1143 | } 1144 | // our advanced stat 1145 | case 3: 1146 | { 1147 | 1148 | std::unordered_set currentVerts; 1149 | std::vector trianglesInCluster; 1150 | std::deque priorityQueue; 1151 | double boarderLength = 0.0; 1152 | // add triangles to cache untill full. 1153 | for (Triangle* triangle : triangles) { 1154 | // if triangle is not used generate meshlet 1155 | if (triangle->flag == 1) continue; 1156 | 1157 | //reset 1158 | boarderLength = 0.0; 1159 | priorityQueue.push_back(triangle); 1160 | currentVerts.clear(); 1161 | trianglesInCluster.clear(); 1162 | 1163 | // add triangles to cache untill it is full. 1164 | while (!priorityQueue.empty()) { 1165 | // pop current triangle that expands boarder the least 1166 | 1167 | 1168 | float boarderIncrease = DBL_MAX; 1169 | int bestTriIdx = 0; 1170 | int triIdx = 0; 1171 | for (Triangle* possible_tri : priorityQueue) { 1172 | //Triangle* tri = priorityQueue.front(); 1173 | // find out how many verts are already in cluster 1174 | int numVerts = 0; 1175 | bool newVerts[3]; 1176 | int idx = 0; 1177 | for (Vert* v : possible_tri->vertices) 1178 | { 1179 | int count = currentVerts.count(v->index); 1180 | newVerts[idx++] = count; 1181 | numVerts += count; 1182 | //if (numVerts >= 3) { 1183 | // std::cout << "we have 3 verts" << std::endl; 1184 | //} 1185 | } 1186 | 1187 | float newBoarder = 0.0f; 1188 | float oldBoarder = 0.0f; 1189 | float newBoarderIncrease = 0.0f; 1190 | switch (numVerts) { 1191 | case 3: 1192 | { 1193 | for (Triangle* nb : possible_tri->neighbours) { 1194 | // find common verts 1195 | std::vector common_verts; 1196 | for (Vert* v : possible_tri->vertices) 1197 | { 1198 | if (v->index == nb->vertices[0]->index) 1199 | { 1200 | common_verts.push_back(nb->vertices[0]->index); 1201 | } 1202 | else if (v->index == nb->vertices[1]->index) 1203 | { 1204 | common_verts.push_back(nb->vertices[1]->index); 1205 | } 1206 | else if (v->index == nb->vertices[2]->index) 1207 | { 1208 | common_verts.push_back(nb->vertices[2]->index); 1209 | } 1210 | } 1211 | if (std::find(trianglesInCluster.begin(), trianglesInCluster.end(), nb) != trianglesInCluster.end()) //nb->flag == 1) 1212 | { 1213 | //add to old boarder 1214 | oldBoarder += vertexBuffer[common_verts[0]].euclideanDistance(vertexBuffer[common_verts[1]]); 1215 | } 1216 | else 1217 | { 1218 | //add to new boarder 1219 | newBoarder = vertexBuffer[common_verts[0]].euclideanDistance(vertexBuffer[common_verts[1]]); 1220 | } 1221 | } 1222 | newBoarderIncrease = newBoarder - oldBoarder; 1223 | break; 1224 | } 1225 | case 2: 1226 | { 1227 | // figure out which vertex is not in cluster 1228 | if (newVerts[0] == 1 && newVerts[1] == 1) 1229 | { 1230 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]) 1231 | + vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]) 1232 | - vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]); 1233 | 1234 | 1235 | } 1236 | else if (newVerts[2] == 1 && newVerts[1] == 1) 1237 | { 1238 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[0]->index]) 1239 | + vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[0]->index]) 1240 | - vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]); 1241 | } 1242 | else if (newVerts[0] == 1 && newVerts[2] == 1) 1243 | { 1244 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[2]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]) 1245 | + vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]) 1246 | - vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]); 1247 | } 1248 | break; 1249 | } 1250 | // 1 shared vert and none result in entire triangle boarder being added 1251 | default: 1252 | { 1253 | // based on that we calculate new boarder 1254 | newBoarderIncrease = vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[1]->index]) 1255 | + vertexBuffer[possible_tri->vertices[0]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]) 1256 | + vertexBuffer[possible_tri->vertices[1]->index].euclideanDistance(vertexBuffer[possible_tri->vertices[2]->index]); 1257 | break; 1258 | } 1259 | }; 1260 | 1261 | if (newBoarderIncrease <= boarderIncrease) { 1262 | boarderIncrease = newBoarderIncrease; 1263 | bestTriIdx = triIdx; 1264 | 1265 | } 1266 | 1267 | 1268 | triIdx++; 1269 | } 1270 | // move best tri to front of queue 1271 | std::swap(priorityQueue.front(), priorityQueue[bestTriIdx]); 1272 | Triangle* tri = priorityQueue.front(); 1273 | 1274 | // get all vertices of current triangle 1275 | VertexIndexType candidateIndices[3]; 1276 | for (VertexIndexType i = 0; i < 3; ++i) { 1277 | candidateIndices[i] = tri->vertices[i]->index; 1278 | } 1279 | // break if cache is full 1280 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 1281 | meshlets.push_back(cache); 1282 | //addMeshlet(geometry, cache); 1283 | 1284 | //reset cache and empty priorityQueue 1285 | priorityQueue = {tri}; 1286 | trianglesInCluster.clear(); 1287 | currentVerts.clear(); 1288 | cache.reset(); 1289 | continue; 1290 | } 1291 | 1292 | cache.insert(candidateIndices, vertexBuffer); 1293 | 1294 | // if triangle is inserted set flag to used. 1295 | priorityQueue.pop_front(); 1296 | tri->flag = 1; 1297 | 1298 | //insert triangle and calculate added boarder 1299 | boarderLength += boarderIncrease; 1300 | 1301 | // add the used vertices to the current cluster 1302 | currentVerts.insert(tri->vertices[0]->index); 1303 | currentVerts.insert(tri->vertices[1]->index); 1304 | currentVerts.insert(tri->vertices[2]->index); 1305 | trianglesInCluster.push_back(tri); 1306 | 1307 | // get alle neighbours of triangles currently in meshlet 1308 | priorityQueue.clear(); 1309 | for (Triangle* tr : trianglesInCluster) { 1310 | for (Triangle* t : tr->neighbours) { 1311 | if (t->flag != 1) priorityQueue.push_back(t); 1312 | } 1313 | } 1314 | //for (Triangle* t : tri->neighbours) { 1315 | // if (t->flag != 1) priorityQueue.push_back(t); 1316 | //} 1317 | 1318 | }; 1319 | } 1320 | // add remaining triangles to a meshlet 1321 | if (!cache.empty()) { 1322 | meshlets.push_back(cache); 1323 | cache.reset(); 1324 | } 1325 | 1326 | //// add triangles to cache untill full. 1327 | //for (Triangle* triangle : triangles) { 1328 | // // if triangle is not used generate meshlet 1329 | // if (triangle->flag != 1) { 1330 | // //get indicies 1331 | // VertexIndexType candidateIndices[3]; 1332 | // for (VertexIndexType i = 0; i < 3; ++i) { 1333 | // candidateIndices[i] = triangle->vertices[i]->index; 1334 | // } 1335 | 1336 | // // check if we can add to current meshlet if not we finish it. 1337 | // if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 1338 | // meshlets.push_back(cache); 1339 | // cache.reset(); 1340 | // } 1341 | 1342 | // // insert current triangle 1343 | // cache.insert(candidateIndices, vertexBuffer); 1344 | // triangle->flag = 1; 1345 | // } 1346 | //} 1347 | 1348 | //// add remaining triangles to a meshlet 1349 | //if (!cache.empty()) { 1350 | // meshlets.push_back(cache); 1351 | // cache.reset(); 1352 | //} 1353 | 1354 | 1355 | // return numIndicies for now - maybe change return type 1356 | break; 1357 | } 1358 | // graphicslab cluster without building sparse matrix 1359 | case 4: 1360 | { 1361 | int generated = 0; 1362 | 1363 | //// cluster center indices 1364 | std::unordered_set c_indices; 1365 | c_indices.reserve(glm::ceil(triangles.size() / 100)); // indexVertexMap.size() / 3 / primitiveLimit); // triangles.size() / primitiveLimit);// 1366 | 1367 | //// find random centers 1368 | std::default_random_engine generator; 1369 | std::uniform_int_distribution distribution(0, triangles.size() - 1); 1370 | 1371 | //// this loop here is made to make sure that different cluster centers are chosen 1372 | while (c_indices.size() < glm::ceil(triangles.size() / 100)) { //indexVertexMap.size() / 3 / primitiveLimit) { //triangles.size() / primitiveLimit) { // Consider dropping std::rand - fails to generate sufficiently random numbers 1373 | c_indices.insert(distribution(generator)); // Can loop forever if random produces few distinct random values 1374 | } 1375 | 1376 | //std::vector weightedAreaTriangleList = AreaWeightedTriangleList(triangles, vertexBuffer); 1377 | //std::vector c_indices = SampleList(weightedAreaTriangleList, 1000); 1378 | 1379 | std::cout << c_indices.size() << " centers generated" << std::endl; 1380 | 1381 | // putting cluster centers into a vector for later use 1382 | std::vector> centers; 1383 | for (uint32_t i : c_indices) { 1384 | centers.push_back(std::vector{i}); 1385 | } 1386 | c_indices.clear(); 1387 | 1388 | 1389 | 1390 | 1391 | std::cout << "Starting Kmeans" << std::endl; 1392 | 1393 | // create the new clusters 1394 | std::vector> clusters(centers.size(), std::vector()); 1395 | 1396 | uint32_t distance; 1397 | uint32_t minDistance; 1398 | 1399 | uint32_t iter = 0; 1400 | 1401 | // settings and structures from on Graphics Lab 1402 | bool CENTER_IS_SET = false; 1403 | unsigned int ITER_LIM = -1; 1404 | bool SMOOTH_CLUSTERS = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSE || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSEO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU; 1405 | bool MULTI_SPLIT = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSEO || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA || NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU; 1406 | bool AGGRESSIVE_BALANCING = NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSA; 1407 | 1408 | double convergenceDist; 1409 | //double CONVERGENCE_LIM = 3 * (NVMeshlet::GenStrategy::KMEANSU == NVMeshlet::KMEANSU);//(*vertices).size()/1000000; // Seems to perform well? 1410 | double CONVERGENCE_LIM = 1.0f; // 1.5f 1411 | bool done = false; 1412 | 1413 | std::vector> dirtyVerts; 1414 | std::vector flippableVerts; 1415 | 1416 | mm::MeshletCache cache; 1417 | VertexIndexType* candidateIndices = new VertexIndexType[3]; 1418 | 1419 | std::unordered_map center_map; 1420 | 1421 | std::vector> prevCenters; 1422 | 1423 | for (uint32_t i = 0; i < centers.size(); ++i) { 1424 | for (uint32_t j = 0; j < centers[i].size(); ++j) { 1425 | center_map[centers[i][j]] = i; 1426 | } 1427 | } 1428 | 1429 | // while clusters do not fit into meshlets 1430 | while (!done) { 1431 | iter = 0; 1432 | convergenceDist = CONVERGENCE_LIM + 1; 1433 | // should resample list with new clusters - to reconverge 1434 | //std::vector newCenters = SampleList(weightedAreaTriangleList, center_map.size()); 1435 | 1436 | //centers.clear(); 1437 | //for (uint32_t i : newCenters) { 1438 | // centers.push_back(std::vector{i}); 1439 | //} 1440 | 1441 | //center_map.clear(); 1442 | //for (uint32_t i = 0; i < newCenters.size(); ++i) { 1443 | // center_map[newCenters[i]] = i; 1444 | //} 1445 | 1446 | // while clusters have not yet converged 1447 | while (convergenceDist > CONVERGENCE_LIM) { 1448 | prevCenters = centers; 1449 | 1450 | iter++; 1451 | 1452 | // clear clusters 1453 | for (uint32_t i = 0; i < clusters.size(); ++i) { 1454 | clusters[i].reserve(10000); 1455 | clusters[i].clear(); 1456 | } 1457 | 1458 | // reserve to make sure that the vectors are threadsafe 1459 | dirtyVerts.reserve(centers.size()); 1460 | flippableVerts.reserve(centers.size()); 1461 | 1462 | dirtyVerts.clear(); 1463 | flippableVerts.clear(); 1464 | 1465 | bool dirty; 1466 | uint32_t dist; 1467 | uint32_t finalDistance; 1468 | uint32_t distlim = 125; 1469 | uint32_t count; 1470 | double differenceBetweenCenters = 0.0; 1471 | #pragma omp parallel shared(triangles, dirtyVerts, flippableVerts, distlim, AGGRESSIVE_BALANCING) private(count, finalDistance, dirty, minDistance, dist, distance) firstprivate(center_map) 1472 | { 1473 | 1474 | //for (Triangle* v : triangles) { 1475 | #pragma omp for collapse(2) //shared(triangles, dirtyVerts, flippableVerts, distlim, AGGRESSIVE_BALANCING) private(count, finalDistance, dirty, minDistance, dist, distance) firstprivate(center_map) 1476 | for (int t = 0; t < triangles.size(); ++t) { 1477 | Triangle* v = triangles[t]; 1478 | minDistance = -1; 1479 | 1480 | Triangle* c; 1481 | v->flag = -1; 1482 | //uint32_t clustercenter = -1; 1483 | finalDistance = -1; 1484 | count = 1; 1485 | 1486 | dirty = false; 1487 | std::vector dirtyCandidates{}; 1488 | 1489 | // BFS on structure based on the current triangle 1490 | // Ideally we would want to check all clusters but it should be ok 1491 | // to break after finding the first cluster because all other clusters 1492 | // must be further away (I THINK) 1493 | std::queue priorityQueue; 1494 | std::queue distanceQueue; 1495 | priorityQueue.push(v); 1496 | distanceQueue.push(1); 1497 | 1498 | // actually we might not even need to keep a distance since we are going to 1499 | // grab the first cluster center we meet 1500 | 1501 | // for each triangle in frontier 1502 | 1503 | //reset finalDistance between each triangle. 1504 | // essentially finaldistance is 1505 | std::unordered_map visitedTriangleIds{}; 1506 | visitedTriangleIds[v->id] = v->id; 1507 | bool centerFound = false; 1508 | while (!priorityQueue.empty()) { 1509 | 1510 | // add neighbours to queue 1511 | Triangle* cur_t = priorityQueue.front(); 1512 | priorityQueue.pop(); 1513 | // update distance 1514 | //dist = cur_t->dist + 1; 1515 | uint32_t cur_dist = distanceQueue.front(); 1516 | dist = cur_dist + 1; 1517 | distanceQueue.pop(); 1518 | 1519 | 1520 | // check current triangles id against clusters 1521 | if (center_map.find(cur_t->id) != center_map.end()) { 1522 | 1523 | // if current tri is a cluster center break 1524 | //distance = cur_t->dist; 1525 | distance = cur_dist; 1526 | 1527 | if (distance < minDistance) { 1528 | if (AGGRESSIVE_BALANCING && distance == minDistance - 1) { 1529 | dirty = true; 1530 | } 1531 | else { 1532 | dirty = false; 1533 | } 1534 | c = cur_t; 1535 | minDistance = distance; 1536 | v->flag = center_map[c->id]; 1537 | //clustercenter = center_map[c->id]; 1538 | dirtyCandidates = { v->flag }; 1539 | //dirtyCandidates = { clustercenter }; 1540 | //v->dist = 0; 1541 | centerFound = true; 1542 | } 1543 | else if (distance != -1) { 1544 | if (AGGRESSIVE_BALANCING && distance == minDistance + 1) { 1545 | dirty = true; 1546 | } 1547 | else if (distance == minDistance) { 1548 | dirtyCandidates.push_back(center_map[c->id]); 1549 | //v->dist = 1; 1550 | if (SMOOTH_CLUSTERS) v->flag = -1; 1551 | //if (SMOOTH_CLUSTERS) clustercenter = -1; 1552 | dirty = false; 1553 | } 1554 | } 1555 | //finalDistance = distance; 1556 | } 1557 | 1558 | for (Triangle* neighbour : cur_t->neighbours) { 1559 | //Triangle localNeighbour = *neighbour; 1560 | if (visitedTriangleIds.find(neighbour->id) != visitedTriangleIds.end()) continue; 1561 | //neighbour->dist = dist; 1562 | visitedTriangleIds[neighbour->id] = neighbour->id; 1563 | //neighbour->flag = v->flag; 1564 | 1565 | // no need to explore more than the 125 surrounding triangles 1566 | // since if a cluster is further away we actually need a new cluster 1567 | //if (dist <= 15) priorityQueue.push(neighbour); 1568 | if (!centerFound) { 1569 | priorityQueue.push(neighbour); 1570 | distanceQueue.push(dist); 1571 | } 1572 | count++; 1573 | } 1574 | //if (count >= distlim) finalDistance = dist; 1575 | 1576 | } 1577 | 1578 | //if (c == nullptr) { 1579 | // v->flag = -1; 1580 | //} 1581 | 1582 | #pragma omp critical 1583 | { 1584 | if (dirtyCandidates.size() > 1) { 1585 | dirtyCandidates.push_back(v->id); 1586 | dirtyVerts.push_back(dirtyCandidates); 1587 | //continue; 1588 | } 1589 | else { 1590 | if (dirty) { 1591 | flippableVerts.push_back(v->id); 1592 | } 1593 | if (v->flag < centers.size()) { 1594 | //if (clustercenter < centers.size()) { 1595 | 1596 | clusters[v->flag].push_back(v->id); 1597 | //clusters[clustercenter].push_back(v->id); 1598 | // setting the flag changes total number of clusters, who knows why 1599 | //v->flag = clustercenter; 1600 | } 1601 | else { 1602 | v->flag = centers.size(); 1603 | //clustercenter = centers.size(); 1604 | clusters.push_back(std::vector{v->id}); 1605 | centers.push_back(std::vector{v->id}); 1606 | center_map[v->id] = v->flag; 1607 | //center_map[v->id] = clustercenter; 1608 | std::cout << "damn" << std::endl; 1609 | } 1610 | } 1611 | } 1612 | } 1613 | 1614 | //if (iter > ITER_LIM) break; 1615 | 1616 | // Update centers 1617 | 1618 | uint32_t maxDistance; 1619 | #pragma omp for reduction (+:differenceBetweenCenters) collapse(2) //shared(triangles, centers, clusters, CENTER_IS_SET) private(maxDistance, dist, minDistance) 1620 | for (int i = 0; i < clusters.size(); ++i) { 1621 | std::vector cluster = clusters[i]; 1622 | std::vector center = centers[i]; 1623 | minDistance = -1; 1624 | std::vector candidates{}; 1625 | //build subgraph here ? 1626 | uint32_t difference = 0; 1627 | for (int j = 0; j < cluster.size(); ++j) { 1628 | count = 0; 1629 | Triangle* t = triangles[cluster[j]]; 1630 | //Triangle t = *triangles[cluster[j]]; 1631 | 1632 | //t->dist = 0; 1633 | //t.dist = 0; 1634 | // 1635 | //t->flag = -1; 1636 | //for (Triangle* v : clusters[i]) { 1637 | // if (v->id == cur_t->id) continue; 1638 | 1639 | 1640 | 1641 | 1642 | //} 1643 | // BFS on structure based on the current triangle 1644 | // Ideally we would want to check all clusters but it should be ok 1645 | // to break after finding the first cluster because all other clusters 1646 | // must be further away (I THINK) 1647 | std::queue priorityQueue; 1648 | //std::queue priorityQueue; 1649 | priorityQueue.push(t); 1650 | std::queue distanceQueue; 1651 | distanceQueue.push(0); 1652 | 1653 | // actually we might not even need to keep a distance since we are going to 1654 | // grab the first cluster center we meet 1655 | 1656 | // for each triangle in frontier 1657 | dist = 0; 1658 | uint32_t distanceToClusterCenter = 0; 1659 | std::unordered_map visitedIds{}; 1660 | visitedIds[t->id] = t->id; 1661 | while (!priorityQueue.empty()) { 1662 | // add neighbours to queue 1663 | Triangle* cur_t = priorityQueue.front(); 1664 | priorityQueue.pop(); 1665 | // update distance 1666 | uint32_t cur_dist = distanceQueue.front(); 1667 | distanceQueue.pop(); 1668 | dist = cur_dist + 1; 1669 | 1670 | 1671 | if (std::find(center.begin(), center.end(), cur_t->id) != center.end()) { 1672 | distanceToClusterCenter = cur_dist; 1673 | } 1674 | 1675 | 1676 | 1677 | 1678 | for (Triangle* neighbour : cur_t->neighbours) { 1679 | //Triangle localNeighbour = *neighbour; 1680 | if (visitedIds.find(neighbour->id) != visitedIds.end() || std::find(cluster.begin(), cluster.end(), neighbour->id) == cluster.end()) continue; 1681 | //localNeighbour.dist = dist; 1682 | //neighbour->flag = cur_t->flag; 1683 | visitedIds[neighbour->id] = neighbour->id; 1684 | //if (priorityQueue.size() <= clusters[i].size()) 1685 | distanceQueue.push(dist); 1686 | priorityQueue.push(neighbour); 1687 | ++count; 1688 | } 1689 | 1690 | } 1691 | 1692 | 1693 | maxDistance = dist; 1694 | 1695 | if (visitedIds.size() != cluster.size()) { 1696 | maxDistance = -1; // Does not consider every element of cluster a possibility 1697 | // center is set means that we can have more than one triangle in the center 1698 | }if (maxDistance == minDistance && CENTER_IS_SET) { // We might not have convergence guarantees for accurate graph centers 1699 | candidates.push_back(cluster[j]); 1700 | } 1701 | else if (maxDistance < minDistance) { 1702 | candidates.clear(); 1703 | candidates.push_back(cluster[j]); 1704 | difference = distanceToClusterCenter; 1705 | //std::cout << "Cluster " << i << " has candidate " << clusters[i][j] << " with eccentricity " << maxDistance << " compared to previous " << minDistance << std::endl; 1706 | minDistance = maxDistance; 1707 | } 1708 | } 1709 | 1710 | if (candidates.size() == 0) { 1711 | std::cout << "Error no candidates for cluster " << i << std::endl; 1712 | } 1713 | centers[i] = candidates; 1714 | differenceBetweenCenters += difference; 1715 | } 1716 | } 1717 | center_map.clear(); 1718 | convergenceDist = 0; 1719 | for (int i = 0; i < centers.size(); ++i) { 1720 | //std::cout << "Center " << i << " size " << centers[i].size() << std::endl; 1721 | for (int j = 0; j < centers[i].size(); ++j) { 1722 | center_map[centers[i][j]] = i; 1723 | } 1724 | // TODO: Adapt to center-sets 1725 | // this loop looks at difference between the distance of all triangles in cluster to 1726 | // the old cluster center and the new cluster center 1727 | //if (i < prevCenters.size()) { 1728 | // //distance = distanceMatrix->get(centers[i][0], prevCenters[i][0]) - 1; 1729 | // if (distance > convergenceDist) convergenceDist = distance; 1730 | //} 1731 | //else { 1732 | // convergenceDist = -1; 1733 | //} 1734 | } 1735 | 1736 | convergenceDist = differenceBetweenCenters / centers.size(); 1737 | //std::cout << "Convergence distance " << convergenceDist << std::endl; 1738 | //std::cout << "Number of clusters " << center_map.size() << std::endl; 1739 | 1740 | } 1741 | 1742 | //std::cout << "Centers converged" << std::endl; 1743 | //Assign "dirty" vertices 1744 | if (AGGRESSIVE_BALANCING) { 1745 | for (uint32_t vert_id : flippableVerts) { 1746 | Triangle* vertex = triangles[vert_id]; // bamboozle is actually triangle! 1747 | uint32_t old_flag = vertex->flag; 1748 | for (uint32_t i = 0; i < vertex->neighbours.size(); ++i) { 1749 | if (vertex->neighbours[i]->flag == vertex->neighbours[(i + 1) % vertex->neighbours.size()]->flag) { 1750 | if (vertex->neighbours[i]->flag != -1) vertex->flag = vertex->neighbours[i]->flag; 1751 | break; 1752 | } 1753 | } 1754 | if (vertex->flag != old_flag) { 1755 | for (uint32_t i = 0; i < clusters[old_flag].size(); ++i) { 1756 | if (triangles[clusters[old_flag][i]]->id == vertex->id) { 1757 | std::swap(clusters[old_flag][i], clusters[old_flag][clusters[old_flag].size() - 1]); 1758 | clusters[old_flag].pop_back(); 1759 | } 1760 | } 1761 | clusters[vertex->flag].push_back(vertex->id); 1762 | } 1763 | } 1764 | } 1765 | for (auto dirtyList : dirtyVerts) { 1766 | uint32_t vert_id = dirtyList.back(); 1767 | 1768 | Triangle* vertex = triangles[vert_id]; 1769 | vertex->dist = 0; 1770 | 1771 | // Check neighbours 1772 | if (SMOOTH_CLUSTERS || AGGRESSIVE_BALANCING) { 1773 | for (uint32_t i = 0; i < vertex->neighbours.size(); ++i) { 1774 | if (vertex->neighbours[i]->flag == vertex->neighbours[(i + 1) % vertex->neighbours.size()]->flag) { 1775 | vertex->flag = vertex->neighbours[i]->flag; 1776 | break; 1777 | } 1778 | } 1779 | if (vertex->flag == -1) { 1780 | uint32_t min_size = -1; 1781 | uint32_t curr_candidate = -1; 1782 | for (uint32_t i = 0; i < dirtyList.size() - 1; ++i) { 1783 | if (clusters[dirtyList[i]].size() < min_size) { 1784 | min_size = clusters[dirtyList[i]].size(); 1785 | curr_candidate = dirtyList[i]; 1786 | } 1787 | } 1788 | vertex->flag = curr_candidate; 1789 | } 1790 | } 1791 | clusters[vertex->flag].push_back(vertex->id); 1792 | } 1793 | 1794 | //Check if the partitioning fits 1795 | done = true; 1796 | [&] { 1797 | for (uint32_t c = 0; c < clusters.size(); ++c) { 1798 | if (!done) break; 1799 | cache.reset(); 1800 | for (uint32_t v_id : clusters[c]) { 1801 | for (uint32_t i = 0; i < 3; ++i) { 1802 | candidateIndices[i] = triangles[v_id]->vertices[i]->index; 1803 | } 1804 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 1805 | // Create initial centers and recurse 1806 | if (centers[c].size() > 1) { 1807 | centers.push_back(std::vector{centers[c].back()}); 1808 | centers[c].pop_back(); 1809 | //std::cout << "Splitting center " << centers[c].back() << "," << centers.back()[0] << std::endl; 1810 | } 1811 | else{ 1812 | uint32_t candidate_center = clusters[c][std::rand() % clusters[c].size()]; 1813 | while (center_map.count(candidate_center) != 0) { 1814 | candidate_center = clusters[c][std::rand() % clusters[c].size()]; 1815 | } 1816 | centers.push_back(std::vector{candidate_center}); 1817 | center_map[candidate_center] = centers.size() - 1; 1818 | //std::cout << "Adding neighbour center " << centers.back()[0] << std::endl; 1819 | } 1820 | //std::cout << "Cluster size conflict, recursing " << clusters[c].size() << std::endl; 1821 | clusters.push_back(std::vector()); 1822 | done = false; 1823 | 1824 | if (!MULTI_SPLIT) c = clusters.size(); 1825 | //std::cout << "Number of clusters: " << clusters.size() << std::endl; 1826 | return; 1827 | } 1828 | cache.insert(candidateIndices, vertexBuffer); 1829 | } 1830 | } 1831 | }(); 1832 | } 1833 | delete[] candidateIndices; 1834 | 1835 | 1836 | 1837 | 1838 | //std::cout << "Kmeans done building meshlets" << std::endl; 1839 | 1840 | 1841 | for (std::vector c : clusters) { 1842 | cache.reset(); 1843 | for (uint32_t index : c) { 1844 | for (uint32_t i = 0; i < 3; ++i) { 1845 | candidateIndices[i] = triangles[index]->vertices[i]->index; 1846 | } 1847 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) return; // U done goofed 1848 | cache.insert(candidateIndices, vertexBuffer); 1849 | } 1850 | generated++; 1851 | meshlets.push_back(cache); 1852 | } 1853 | //std::cout << "Meshlets generated " << generated << std::endl; 1854 | 1855 | break; 1856 | 1857 | 1858 | } 1859 | //graphicslab cluster commented out because of eigen dependency 1860 | // zoutmans version 1861 | case 0: 1862 | { 1863 | 1864 | std::vector used(triangles.size(), false); 1865 | 1866 | std::unordered_set currentVerts; 1867 | 1868 | std::vector frontier; 1869 | 1870 | VertexIndexType* candidateIndices = new VertexIndexType[3]; 1871 | 1872 | uint32_t score; 1873 | uint32_t maxScore; 1874 | 1875 | Triangle* candidate; 1876 | Triangle* current; 1877 | uint32_t candidateIndex; 1878 | 1879 | for (uint32_t used_count = 0; used_count < triangles.size(); ++used_count) { 1880 | if (used[used_count]) continue; 1881 | 1882 | // Empty frontier 1883 | frontier = { triangles[used_count] }; 1884 | currentVerts.clear(); 1885 | 1886 | while (frontier.size() > 0) { 1887 | maxScore = 0; 1888 | 1889 | for (uint32_t i = 0; i < frontier.size(); ++i) { 1890 | current = frontier[i]; 1891 | score = 0; 1892 | for (Vert* v : current->vertices) score += currentVerts.count(v->index); 1893 | 1894 | if (score >= maxScore) { 1895 | maxScore = score; 1896 | candidate = current; 1897 | candidateIndex = i; 1898 | } 1899 | } 1900 | 1901 | for (uint32_t i = 0; i < 3; ++i) { 1902 | candidateIndices[i] = candidate->vertices[i]->index; 1903 | } 1904 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 1905 | meshlets.push_back(cache); 1906 | cache.reset(); 1907 | break; 1908 | } 1909 | cache.insert(candidateIndices, vertexBuffer); 1910 | std::swap(frontier[candidateIndex], frontier[frontier.size() - 1]); 1911 | frontier.pop_back(); 1912 | for (Vert* v : candidate->vertices) currentVerts.insert(v->index); 1913 | for (Triangle* t : candidate->neighbours) { 1914 | if (!used[t->id]) frontier.push_back(t); 1915 | } 1916 | 1917 | used[candidate->id] = true; 1918 | } 1919 | 1920 | 1921 | // Find best scoring triangle in frontier 1922 | // Attempt to add to meshlet 1923 | // If fail 1924 | // Add meshlet to geometry 1925 | // Reset cache 1926 | // Continue loop 1927 | // If success 1928 | // Add triangle to meshlet 1929 | // If frontier empty continue loop 1930 | } 1931 | 1932 | if (!cache.empty()) 1933 | { 1934 | meshlets.push_back(cache); 1935 | } 1936 | 1937 | break; 1938 | } 1939 | // Our Greedy version 1940 | default: 1941 | { 1942 | 1943 | std::queue priorityQueue; 1944 | 1945 | // add triangles to cache untill full. 1946 | for (int i = 0; i < triangles.size(); ++i) { 1947 | // for (Triangle* triangle : triangles) { 1948 | // if triangle is not used generate meshlet 1949 | Triangle* triangle = triangles[i]; 1950 | 1951 | if (triangle->flag == 1) continue; 1952 | 1953 | //reset 1954 | priorityQueue.push(triangle); 1955 | 1956 | 1957 | 1958 | 1959 | // add triangles to cache untill it is full. 1960 | while (!priorityQueue.empty()) { 1961 | // pop current triangle 1962 | Triangle* tri = priorityQueue.front(); 1963 | 1964 | // get all vertices of current triangle 1965 | VertexIndexType candidateIndices[3]; 1966 | for (uint32_t j = 0; j < 3; ++j) { 1967 | candidateIndices[j] = tri->vertices[j]->index; 1968 | } 1969 | // break if cache is full 1970 | if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 1971 | meshlets.push_back(cache); 1972 | 1973 | //reset cache and empty priorityQueue 1974 | priorityQueue = {}; 1975 | priorityQueue.push(tri); 1976 | cache.reset(); 1977 | break; 1978 | // start over again but from the fringe of the current cluster 1979 | } 1980 | // get alle neighbours of current triangle 1981 | for (Triangle* t : tri->neighbours) { 1982 | if (t->flag != 1) priorityQueue.push(t); 1983 | } 1984 | 1985 | 1986 | cache.insert(candidateIndices, vertexBuffer); 1987 | // if triangle is inserted set flag to used. 1988 | priorityQueue.pop(); 1989 | tri->flag = 1; 1990 | 1991 | 1992 | }; 1993 | } 1994 | // add remaining triangles to a meshlet 1995 | if (!cache.empty()) { 1996 | meshlets.push_back(cache); 1997 | cache.reset(); 1998 | } 1999 | 2000 | //// add triangles to cache untill full. 2001 | //for (Triangle* triangle : triangles) { 2002 | // // if triangle is not used generate meshlet 2003 | // if (triangle->flag != 1) { 2004 | // //get indicies 2005 | // VertexIndexType candidateIndices[3]; 2006 | // for (VertexIndexType i = 0; i < 3; ++i) { 2007 | // candidateIndices[i] = triangle->vertices[i]->index; 2008 | // } 2009 | 2010 | // // check if we can add to current meshlet if not we finish it. 2011 | // if (cache.cannotInsert(candidateIndices, vertexLimit, primitiveLimit)) { 2012 | // meshlets.push_back(cache); 2013 | // cache.reset(); 2014 | // } 2015 | 2016 | // // insert current triangle 2017 | // cache.insert(candidateIndices, vertexBuffer); 2018 | // triangle->flag = 1; 2019 | // } 2020 | //} 2021 | 2022 | //// add remaining triangles to a meshlet 2023 | //if (!cache.empty()) { 2024 | // meshlets.push_back(cache); 2025 | // cache.reset(); 2026 | //} 2027 | } 2028 | } 2029 | } 2030 | 2031 | } -------------------------------------------------------------------------------- /core/meshletTaskDescriptor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/core/meshletTaskDescriptor.cpp -------------------------------------------------------------------------------- /core/meshlet_util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace NVMeshlet { 9 | struct Vertex; 10 | struct Triangle; 11 | 12 | struct Vertex { 13 | std::vector neighbours; 14 | unsigned int index; 15 | unsigned int degree; 16 | }; 17 | 18 | struct Triangle { 19 | std::vector vertices; 20 | std::vector neighbours; 21 | uint32_t id; 22 | uint32_t flag = -1; 23 | uint32_t dist; 24 | }; 25 | 26 | class DistMatrix { 27 | public: 28 | virtual void set(uint32_t i, uint32_t j, uint32_t val) = 0; 29 | virtual uint32_t get(uint32_t i, uint32_t j) = 0; 30 | }; 31 | 32 | class SymMatrix : public DistMatrix { 33 | private: 34 | unsigned int m_n; 35 | std::vector m_data; 36 | 37 | uint64_t translate(uint32_t i, uint32_t j) { 38 | // Consider assert/error return on i=j or exceeding size 39 | // Trust in the math 40 | if (i > j) std::swap(i, j); 41 | return ((uint64_t)j * j - j) / 2 + i; 42 | }; 43 | 44 | public: 45 | SymMatrix(std::vector* vertices, uint32_t distlim) { 46 | m_n = vertices->size(); 47 | uint64_t temp = ((uint64_t)m_n * m_n - m_n) / 2; 48 | m_data = std::vector(temp, -1); 49 | 50 | // BFS 51 | Triangle* current; 52 | std::queue frontier; 53 | uint32_t dist; 54 | for (uint32_t i = 0; i < m_n; ++i) { 55 | current = (*vertices)[i]; 56 | current->flag = i; 57 | 58 | //if(i%100 == 0) std::cout << i << "/" << m_n <<"\n"; 59 | 60 | dist = 0; 61 | frontier.push(current); 62 | 63 | while (!frontier.empty()) { 64 | current = frontier.front(); 65 | frontier.pop(); // Y u do this stdlib 66 | 67 | dist = get(i, current->id); 68 | 69 | for (uint32_t t = 0; t < current->neighbours.size(); ++t) { 70 | if (current->neighbours[t]->flag == i) continue; 71 | current->neighbours[t]->flag = i; 72 | set(i, current->neighbours[t]->id, dist + 1); 73 | frontier.push(current->neighbours[t]); 74 | } 75 | } 76 | } 77 | }; 78 | 79 | void set(uint32_t i, uint32_t j, uint32_t val) { 80 | if (i == j) return; 81 | if (i > j) std::swap(i, j); 82 | m_data[translate(i, j)] = val; 83 | }; 84 | 85 | uint32_t get(uint32_t i, uint32_t j) { 86 | if (i == j) return 0; 87 | if (i > j) std::swap(i, j); 88 | return m_data[translate(i, j)]; 89 | }; 90 | }; 91 | 92 | inline Vertex* findMaxVertex(std::vector* vec) { 93 | unsigned int max = 0; 94 | Vertex* res = vec->front(); 95 | for (const auto& v : *vec) { 96 | //std::cout << v->degree; 97 | if (v->degree > max) { 98 | max = v->degree; 99 | res = v; 100 | } 101 | } 102 | //std::cout << "\nMax " << max << std::endl; 103 | return res; 104 | }; 105 | 106 | } 107 | 108 | namespace mm { 109 | 110 | class DistMatrix { 111 | public: 112 | virtual void set(uint32_t i, uint32_t j, uint32_t val) = 0; 113 | virtual uint32_t get(uint32_t i, uint32_t j) = 0; 114 | }; 115 | 116 | class SymMatrix : public DistMatrix { 117 | private: 118 | unsigned int m_n; 119 | std::vector m_data; 120 | 121 | uint64_t translate(uint32_t i, uint32_t j) { 122 | // Consider assert/error return on i=j or exceeding size 123 | // Trust in the math 124 | if (i > j) std::swap(i, j); 125 | return ((uint64_t)j * j - j) / 2 + i; 126 | }; 127 | 128 | public: 129 | SymMatrix(std::vector* vertices, uint32_t distlim) { 130 | m_n = vertices->size(); 131 | uint64_t temp = ((uint64_t)m_n * m_n - m_n) / 2; 132 | m_data = std::vector(temp, -1); 133 | 134 | // BFS 135 | Triangle* current; 136 | std::queue frontier; 137 | uint32_t dist; 138 | for (uint32_t i = 0; i < m_n; ++i) { 139 | current = (*vertices)[i]; 140 | current->flag = i; 141 | 142 | //if(i%100 == 0) std::cout << i << "/" << m_n <<"\n"; 143 | 144 | dist = 0; 145 | frontier.push(current); 146 | 147 | while (!frontier.empty()) { 148 | current = frontier.front(); 149 | frontier.pop(); // Y u do this stdlib 150 | 151 | dist = get(i, current->id); 152 | 153 | for (uint32_t t = 0; t < current->neighbours.size(); ++t) { 154 | if (current->neighbours[t]->flag == i) continue; 155 | current->neighbours[t]->flag = i; 156 | set(i, current->neighbours[t]->id, dist + 1); 157 | frontier.push(current->neighbours[t]); 158 | } 159 | } 160 | } 161 | }; 162 | 163 | void set(uint32_t i, uint32_t j, uint32_t val) { 164 | if (i == j) return; 165 | if (i > j) std::swap(i, j); 166 | m_data[translate(i, j)] = val; 167 | }; 168 | 169 | uint32_t get(uint32_t i, uint32_t j) { 170 | if (i == j) return 0; 171 | if (i > j) std::swap(i, j); 172 | return m_data[translate(i, j)]; 173 | }; 174 | }; 175 | 176 | inline Vert* findMaxVertex(std::vector* vec) { 177 | unsigned int max = 0; 178 | Vert* res = vec->front(); 179 | for (const auto& v : *vec) { 180 | //std::cout << v->degree; 181 | if (v->degree > max) { 182 | max = v->degree; 183 | res = v; 184 | } 185 | } 186 | //std::cout << "\nMax " << max << std::endl; 187 | return res; 188 | }; 189 | 190 | } // namespace MeshletGen -------------------------------------------------------------------------------- /core/mm_structures.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef VK_STRUCTURES_H 3 | #define VK_STRUCTURES_H 4 | 5 | #define GLM_ENABLE_EXPERIMENTAL 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | struct MeshletDescMesh 18 | { 19 | 20 | }; 21 | 22 | 23 | struct MeshletDescTask 24 | { 25 | // A Meshlet contains a set of unique vertices 26 | // and a group of primitives that are defined by 27 | // indices into this local set of vertices. 28 | // 29 | // The information here is used by a single 30 | // mesh shader's workgroup to execute vertex 31 | // and primitive shading. 32 | // It is packed into single "uvec4"/"uint4" value 33 | // so the hardware can leverage 128-bit loads in the 34 | // shading languages. 35 | // The offsets used here are for the appropriate 36 | // indices arrays. 37 | // 38 | // A bounding box as well as an angled cone is stored to allow 39 | // quick culling in the task shader. 40 | // The current packing is just a basic implementation, that 41 | // may be customized, but ideally fits within 128 bit. 42 | 43 | // 44 | // Bitfield layout : 45 | // 46 | // Field.X | Bits | Content 47 | // ------------|:----:|---------------------------------------------- 48 | // bboxMinX | 8 | bounding box coord relative to object bbox 49 | // bboxMinY | 8 | UNORM8 50 | // bboxMinZ | 8 | 51 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet 52 | // ------------|:----:|---------------------------------------------- 53 | // Field.Y | | 54 | // ------------|:----:|---------------------------------------------- 55 | // bboxMaxX | 8 | bounding box coord relative to object bbox 56 | // bboxMaxY | 8 | UNORM8 57 | // bboxMaxZ | 8 | 58 | // primMax | 8 | number of primitives - 1 in the meshlet 59 | // ------------|:----:|---------------------------------------------- 60 | // Field.Z | | 61 | // ------------|:----:|---------------------------------------------- 62 | // vertexBegin | 20 | offset to the first vertex index, times alignment 63 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8 64 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8 65 | // ------------|:----:|---------------------------------------------- 66 | // Field.W | | 67 | // ------------|:----:|---------------------------------------------- 68 | // primBegin | 20 | offset to the first primitive index, times alignment 69 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8 70 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8 71 | // 72 | // Note : the bitfield is not expanded in the struct due to differences in how 73 | // GPU & CPU compilers pack bit-fields and endian-ness. 74 | 75 | union 76 | { 77 | #if !defined(NDEBUG) && defined(_MSC_VER) 78 | struct 79 | { 80 | // warning, not portable 81 | unsigned bboxMinX : 8; 82 | unsigned bboxMinY : 8; 83 | unsigned bboxMinZ : 8; 84 | unsigned vertexMax : 8; 85 | 86 | unsigned bboxMaxX : 8; 87 | unsigned bboxMaxY : 8; 88 | unsigned bboxMaxZ : 8; 89 | unsigned primMax : 8; 90 | 91 | unsigned vertexBegin : 20; 92 | signed coneOctX : 8; 93 | unsigned coneAngleLo : 4; 94 | 95 | unsigned primBegin : 20; 96 | signed coneOctY : 8; 97 | unsigned coneAngleHi : 4; 98 | } _debug; 99 | #endif 100 | struct 101 | { 102 | uint32_t fieldX; 103 | uint32_t fieldY; 104 | uint32_t fieldZ; 105 | uint32_t fieldW; 106 | }; 107 | }; 108 | 109 | 110 | }; 111 | 112 | #endif // VK_STRUCTURES_H 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /core/settings.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | static const int MAX_VERTEX_COUNT_LIMIT = 256; 7 | static const int MAX_PRIMITIVE_COUNT_LIMIT = 256; 8 | 9 | static const uint32_t PACKBASIC_ALIGN = 16; 10 | // how many indices are fetched per thread, 8 or 4 11 | static const uint32_t PACKBASIC_PRIMITIVE_INDICES_PER_FETCH = 8; 12 | 13 | typedef uint32_t PackBasicType; 14 | 15 | // must not change 16 | typedef uint8_t PrimitiveIndexType; // must store [0,MAX_VERTEX_COUNT_LIMIT-1] 17 | 18 | 19 | namespace NVMeshlet { 20 | 21 | // Enum for meshlet generation strategies. 22 | enum GenStrategy { 23 | NAIVE, 24 | GREEDY, 25 | KMEANSD, 26 | KMEANSS, 27 | KMEANSE, 28 | KMEANSO, 29 | KMEANSEO, 30 | KMEANSA, 31 | KMEANSU 32 | }; 33 | 34 | 35 | struct Stats 36 | { 37 | size_t meshletsTotal = 0; 38 | // slightly more due to task-shader alignment 39 | size_t meshletsStored = 0; 40 | 41 | // number of meshlets that can be backface cluster culled at all 42 | // due to similar normals 43 | size_t backfaceTotal = 0; 44 | 45 | size_t primIndices = 0; 46 | size_t primTotal = 0; 47 | 48 | size_t vertexIndices = 0; 49 | size_t vertexTotal = 0; 50 | 51 | 52 | 53 | // used when we sum multiple stats into a single to 54 | // compute averages of the averages/variances below. 55 | 56 | // Special data points. 57 | size_t triangleCountHist[MAX_PRIMITIVE_COUNT_LIMIT] = { 0 }; 58 | size_t vertexCountHist[MAX_VERTEX_COUNT_LIMIT] = { 0 }; 59 | size_t reusageMeasure = 0; 60 | 61 | size_t appended = 0; 62 | 63 | double primloadAvg = 0.f; 64 | double primloadVar = 0.f; 65 | double vertexloadAvg = 0.f; 66 | double vertexloadVar = 0.f; 67 | 68 | void append(const Stats& other) 69 | { 70 | meshletsTotal += other.meshletsTotal; 71 | meshletsStored += other.meshletsStored; 72 | backfaceTotal += other.backfaceTotal; 73 | 74 | primIndices += other.primIndices; 75 | vertexIndices += other.vertexIndices; 76 | vertexTotal += other.vertexTotal; 77 | primTotal += other.primTotal; 78 | 79 | appended += other.appended; 80 | primloadAvg += other.primloadAvg; 81 | primloadVar += other.primloadVar; 82 | vertexloadAvg += other.vertexloadAvg; 83 | vertexloadVar += other.vertexloadVar; 84 | } 85 | 86 | void fprint(FILE* log) const 87 | { 88 | if (!appended || !meshletsTotal) 89 | return; 90 | 91 | double fprimloadAvg = primloadAvg / double(appended); 92 | double fprimloadVar = primloadVar / double(appended); 93 | double fvertexloadAvg = vertexloadAvg / double(appended); 94 | double fvertexloadVar = vertexloadVar / double(appended); 95 | 96 | double statsNum = double(meshletsTotal); 97 | double backfaceAvg = double(backfaceTotal) / statsNum; 98 | 99 | double primWaste = double(primIndices) / double(primTotal * 3) - 1.0; 100 | double vertexWaste = double(vertexIndices) / double(vertexTotal) - 1.0; 101 | double meshletWaste = double(meshletsStored) / double(meshletsTotal) - 1.0; 102 | 103 | fprintf(log, 104 | "meshlets; %7zd; prim; %9zd; %.2f; vertex; %9zd; %.2f; backface; %.2f; waste; v; %.2f; p; %.2f; m; %.2f\n", meshletsTotal, 105 | primTotal, fprimloadAvg, vertexTotal, fvertexloadAvg, backfaceAvg, vertexWaste, primWaste, meshletWaste); 106 | } 107 | }; 108 | 109 | // use getTaskPaddedElements 110 | static const uint32_t MESHLETS_PER_TASK = 32; 111 | 112 | 113 | // We allow two different type of primitive index packings. 114 | // The first is preferred, but yields slightly greater code complexity. 115 | enum PrimitiveIndexPacking 116 | { 117 | // Dense array of multiple uint8s, 3 uint8s per primitive. 118 | // Least waste, can partially use 32-bit storage intrinsic for writing to gl_PrimitiveIndices 119 | PRIMITIVE_PACKING_TIGHT_UINT8, 120 | 121 | // Same as above but we may use less triangles to simplify loader logic. 122 | // We guarantee that all indices can be safely written to the gl_PrimitiveIndices array 123 | // using the 32-bit write intrinsic in the shader. 124 | PRIMITIVE_PACKING_FITTED_UINT8, 125 | 126 | // 4 uint8s per primitive, indices in first three 8-bit 127 | // makes decoding an individual triangle easy, but sacrifices bandwidth/storage 128 | NVMESHLET_PACKING_TRIANGLE_UINT32, 129 | }; 130 | 131 | // The default shown here packs uint8 tightly, and makes them accessible as 64-bit load. 132 | // Keep in sync with shader configuration! 133 | 134 | static const PrimitiveIndexPacking PRIMITIVE_PACKING = PRIMITIVE_PACKING_FITTED_UINT8; 135 | // how many indices are fetched per thread, 8 or 4 136 | static const uint32_t PRIMITIVE_INDICES_PER_FETCH = 8; 137 | 138 | // Higher values mean slightly more wasted memory, but allow to use greater offsets within 139 | // the few bits we have, resulting in a higher total amount of triangles and vertices. 140 | static const uint32_t PRIMITIVE_PACKING_ALIGNMENT = 32; // must be multiple of PRIMITIVE_BITS_PER_FETCH 141 | static const uint32_t VERTEX_PACKING_ALIGNMENT = 16; 142 | 143 | struct MeshletPackBasicDesc 144 | { 145 | // 146 | // Bitfield layout : 147 | // 148 | // Field.X | Bits | Content 149 | // ------------|:----:|---------------------------------------------- 150 | // bboxMinX | 8 | bounding box coord relative to object bbox 151 | // bboxMinY | 8 | UNORM8 152 | // bboxMinZ | 8 | 153 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet 154 | // ------------|:----:|---------------------------------------------- 155 | // Field.Y | | 156 | // ------------|:----:|---------------------------------------------- 157 | // bboxMaxX | 8 | bounding box coord relative to object bbox 158 | // bboxMaxY | 8 | UNORM8 159 | // bboxMaxZ | 8 | 160 | // primMax | 8 | number of primitives - 1 in the meshlet 161 | // ------------|:----:|---------------------------------------------- 162 | // Field.Z | | 163 | // ------------|:----:|---------------------------------------------- 164 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8 165 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8 166 | // coneAngle | 8 | -sin(cone.angle), SNORM8 167 | // vertexPack | 8 | vertex indices per 32 bits (1 or 2) 168 | // ------------|:----:|---------------------------------------------- 169 | // Field.W | | 170 | // ------------|:----:|---------------------------------------------- 171 | // packOffset | 32 | index buffer value of the first vertex 172 | 173 | // 174 | // Note : the bitfield is not expanded in the struct due to differences in how 175 | // GPU & CPU compilers pack bit-fields and endian-ness. 176 | 177 | union 178 | { 179 | #if !defined(NDEBUG) && defined(_MSC_VER) 180 | struct 181 | { 182 | // warning, not portable 183 | unsigned bboxMinX : 8; 184 | unsigned bboxMinY : 8; 185 | unsigned bboxMinZ : 8; 186 | unsigned vertexMax : 8; 187 | 188 | unsigned bboxMaxX : 8; 189 | unsigned bboxMaxY : 8; 190 | unsigned bboxMaxZ : 8; 191 | unsigned primMax : 8; 192 | 193 | signed coneOctX : 8; 194 | signed coneOctY : 8; 195 | signed coneAngle : 8; 196 | unsigned vertexPack : 8; 197 | 198 | unsigned packOffset : 32; 199 | } _debug; 200 | #endif 201 | struct 202 | { 203 | uint32_t fieldX; 204 | uint32_t fieldY; 205 | uint32_t fieldZ; 206 | uint32_t fieldW; 207 | }; 208 | }; 209 | 210 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; } 211 | void setNumVertices(uint32_t num) 212 | { 213 | assert(num <= MAX_VERTEX_COUNT_LIMIT); 214 | fieldX |= pack(num - 1, 8, 24); 215 | } 216 | 217 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; } 218 | void setNumPrims(uint32_t num) 219 | { 220 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT); 221 | fieldY |= pack(num - 1, 8, 24); 222 | } 223 | 224 | uint32_t getNumVertexPack() const { return unpack(fieldZ, 8, 24); } 225 | void setNumVertexPack(uint32_t num) { fieldZ |= pack(num, 8, 24); } 226 | 227 | uint32_t getPackOffset() const { return fieldW; } 228 | void setPackOffset(uint32_t index) { fieldW = index; } 229 | 230 | uint32_t getVertexStart() const { return 0; } 231 | uint32_t getVertexSize() const 232 | { 233 | uint32_t vertexDiv = getNumVertexPack(); 234 | uint32_t vertexElems = ((getNumVertices() + vertexDiv - 1) / vertexDiv); 235 | 236 | return vertexElems; 237 | } 238 | 239 | uint32_t getPrimStart() const { return (getVertexStart() + getVertexSize() + 1) & (~1u); } 240 | uint32_t getPrimSize() const 241 | { 242 | uint32_t primDiv = 4; 243 | uint32_t primElems = ((getNumPrims() * 3 + PACKBASIC_PRIMITIVE_INDICES_PER_FETCH - 1) / primDiv); 244 | 245 | return primElems; 246 | } 247 | 248 | // positions are relative to object's bbox treated as UNORM 249 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3]) 250 | { 251 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16); 252 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16); 253 | } 254 | 255 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const 256 | { 257 | bboxMin[0] = unpack(fieldX, 8, 0); 258 | bboxMin[0] = unpack(fieldX, 8, 8); 259 | bboxMin[0] = unpack(fieldX, 8, 16); 260 | 261 | bboxMax[0] = unpack(fieldY, 8, 0); 262 | bboxMax[0] = unpack(fieldY, 8, 8); 263 | bboxMax[0] = unpack(fieldY, 8, 16); 264 | } 265 | 266 | // uses octant encoding for cone Normal 267 | // positive angle means the cluster cannot be backface-culled 268 | // numbers are treated as SNORM 269 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle) 270 | { 271 | uint8_t anglebits = minusSinAngle; 272 | fieldZ |= pack(coneOctX, 8, 0); 273 | fieldZ |= pack(coneOctY, 8, 8); 274 | fieldZ |= pack(minusSinAngle, 8, 16); 275 | } 276 | 277 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const 278 | { 279 | coneOctX = unpack(fieldZ, 8, 0); 280 | coneOctY = unpack(fieldZ, 8, 8); 281 | minusSinAngle = unpack(fieldZ, 8, 16); 282 | } 283 | 284 | MeshletPackBasicDesc() 285 | { 286 | fieldX = 0; 287 | fieldY = 0; 288 | fieldZ = 0; 289 | fieldW = 0; 290 | } 291 | 292 | static uint32_t pack(uint32_t value, int width, int offset) 293 | { 294 | return (uint32_t)((value & ((1 << width) - 1)) << offset); 295 | } 296 | static uint32_t unpack(uint32_t value, int width, int offset) 297 | { 298 | return (uint32_t)((value >> offset) & ((1 << width) - 1)); 299 | } 300 | }; 301 | 302 | struct MeshletPackBasic 303 | { 304 | 305 | // variable size 306 | // 307 | // aligned to PACKBASIC_ALIGN bytes 308 | // - first squence is either 16 or 32 bit indices per vertex 309 | // (vertexPack is 2 or 1) respectively 310 | // - second sequence aligned to 8 bytes, primitive many 8 bit values 311 | // 312 | // 313 | // { u32[numVertices/vertexPack ...], padding..., u8[(numPrimitives) * 3 ...] } 314 | 315 | union 316 | { 317 | uint32_t data32[1]; 318 | uint16_t data16[1]; 319 | uint8_t data8[1]; 320 | }; 321 | 322 | inline void setVertexIndex(uint32_t PACKED_SIZE, uint32_t vertex, uint32_t vertexPack, uint32_t indexValue) 323 | { 324 | #if 1 325 | if (vertexPack == 1) { 326 | data32[vertex] = indexValue; 327 | } 328 | else { 329 | data16[vertex] = indexValue; 330 | } 331 | #else 332 | uint32_t idx = vertex / vertexPack; 333 | uint32_t shift = vertex % vertexPack; 334 | assert(idx < PACKED_SIZE); 335 | data32[idx] |= indexValue << (shift * 16); 336 | #endif 337 | } 338 | 339 | inline uint32_t getVertexIndex(uint32_t vertex, uint32_t vertexPack) const 340 | { 341 | #if 1 342 | return (vertexPack == 1) ? data32[vertex] : data16[vertex]; 343 | #else 344 | uint32_t idx = vertex / vertexPack; 345 | uint32_t shift = vertex & (vertexPack - 1); 346 | uint32_t bits = vertexPack == 2 ? 16 : 0; 347 | uint32_t indexValue = data32[idx]; 348 | indexValue <<= ((1 - shift) * bits); 349 | indexValue >>= (bits); 350 | return indexValue; 351 | #endif 352 | } 353 | 354 | inline void setPrimIndices(uint32_t PACKED_SIZE, uint32_t prim, uint32_t primStart, const uint8_t indices[3]) 355 | { 356 | uint32_t idx = primStart * 4 + prim * 3; 357 | 358 | assert(idx < PACKED_SIZE * 4); 359 | 360 | data8[idx + 0] = indices[0]; 361 | data8[idx + 1] = indices[1]; 362 | data8[idx + 2] = indices[2]; 363 | } 364 | 365 | inline void getPrimIndices(uint32_t prim, uint32_t primStart, uint8_t indices[3]) const 366 | { 367 | uint32_t idx = primStart * 4 + prim * 3; 368 | 369 | indices[0] = data8[idx + 0]; 370 | indices[1] = data8[idx + 1]; 371 | indices[2] = data8[idx + 2]; 372 | } 373 | }; 374 | 375 | struct MeshletGeometryPack 376 | { 377 | std::vector meshletPacks; 378 | std::vector meshletDescriptors; 379 | //std::vector meshletBboxes; 380 | }; 381 | 382 | struct MeshletDesc 383 | { 384 | // A Meshlet contains a set of unique vertices 385 | // and a group of primitives that are defined by 386 | // indices into this local set of vertices. 387 | // 388 | // The information here is used by a single 389 | // mesh shader's workgroup to execute vertex 390 | // and primitive shading. 391 | // It is packed into single "uvec4"/"uint4" value 392 | // so the hardware can leverage 128-bit loads in the 393 | // shading languages. 394 | // The offsets used here are for the appropriate 395 | // indices arrays. 396 | // 397 | // A bounding box as well as an angled cone is stored to allow 398 | // quick culling in the task shader. 399 | // The current packing is just a basic implementation, that 400 | // may be customized, but ideally fits within 128 bit. 401 | 402 | // 403 | // Bitfield layout : 404 | // 405 | // Field.X | Bits | Content 406 | // ------------|:----:|---------------------------------------------- 407 | // bboxMinX | 8 | bounding box coord relative to object bbox 408 | // bboxMinY | 8 | UNORM8 409 | // bboxMinZ | 8 | 410 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet 411 | // ------------|:----:|---------------------------------------------- 412 | // Field.Y | | 413 | // ------------|:----:|---------------------------------------------- 414 | // bboxMaxX | 8 | bounding box coord relative to object bbox 415 | // bboxMaxY | 8 | UNORM8 416 | // bboxMaxZ | 8 | 417 | // primMax | 8 | number of primitives - 1 in the meshlet 418 | // ------------|:----:|---------------------------------------------- 419 | // Field.Z | | 420 | // ------------|:----:|---------------------------------------------- 421 | // vertexBegin | 20 | offset to the first vertex index, times alignment 422 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8 423 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8 424 | // ------------|:----:|---------------------------------------------- 425 | // Field.W | | 426 | // ------------|:----:|---------------------------------------------- 427 | // primBegin | 20 | offset to the first primitive index, times alignment 428 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8 429 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8 430 | // 431 | // Note : the bitfield is not expanded in the struct due to differences in how 432 | // GPU & CPU compilers pack bit-fields and endian-ness. 433 | 434 | union 435 | { 436 | #if !defined(NDEBUG) && defined(_MSC_VER) 437 | struct 438 | { 439 | // warning, not portable 440 | unsigned bboxMinX : 8; 441 | unsigned bboxMinY : 8; 442 | unsigned bboxMinZ : 8; 443 | unsigned vertexMax : 8; 444 | 445 | unsigned bboxMaxX : 8; 446 | unsigned bboxMaxY : 8; 447 | unsigned bboxMaxZ : 8; 448 | unsigned primMax : 8; 449 | 450 | unsigned vertexBegin : 20; 451 | signed coneOctX : 8; 452 | unsigned coneAngleLo : 4; 453 | 454 | unsigned primBegin : 20; 455 | signed coneOctY : 8; 456 | unsigned coneAngleHi : 4; 457 | } _debug; 458 | #endif 459 | struct 460 | { 461 | uint32_t fieldX; 462 | uint32_t fieldY; 463 | uint32_t fieldZ; 464 | uint32_t fieldW; 465 | }; 466 | }; 467 | 468 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; } 469 | void setNumVertices(uint32_t num) 470 | { 471 | assert(num <= MAX_VERTEX_COUNT_LIMIT); 472 | fieldX |= pack(num - 1, 8, 24); 473 | } 474 | 475 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; } 476 | void setNumPrims(uint32_t num) 477 | { 478 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT); 479 | fieldY |= pack(num - 1, 8, 24); 480 | } 481 | 482 | uint32_t getVertexBegin() const { return unpack(fieldZ, 20, 0) * VERTEX_PACKING_ALIGNMENT; } 483 | void setVertexBegin(uint32_t begin) 484 | { 485 | assert(begin % VERTEX_PACKING_ALIGNMENT == 0); 486 | assert(begin / VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1)); 487 | fieldZ |= pack(begin / VERTEX_PACKING_ALIGNMENT, 20, 0); 488 | } 489 | 490 | uint32_t getPrimBegin() const { return unpack(fieldW, 20, 0) * PRIMITIVE_PACKING_ALIGNMENT; } 491 | void setPrimBegin(uint32_t begin) 492 | { 493 | assert(begin % PRIMITIVE_PACKING_ALIGNMENT == 0); 494 | assert(begin / PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1)); 495 | fieldW |= pack(begin / PRIMITIVE_PACKING_ALIGNMENT, 20, 0); 496 | } 497 | 498 | // positions are relative to object's bbox treated as UNORM 499 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3]) 500 | { 501 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16); 502 | 503 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16); 504 | } 505 | 506 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const 507 | { 508 | bboxMin[0] = unpack(fieldX, 8, 0); 509 | bboxMin[1] = unpack(fieldX, 8, 8); 510 | bboxMin[2] = unpack(fieldX, 8, 16); 511 | 512 | bboxMax[0] = unpack(fieldY, 8, 0); 513 | bboxMax[1] = unpack(fieldY, 8, 8); 514 | bboxMax[2] = unpack(fieldY, 8, 16); 515 | } 516 | 517 | // uses octant encoding for cone Normal 518 | // positive angle means the cluster cannot be backface-culled 519 | // numbers are treated as SNORM 520 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle) 521 | { 522 | uint8_t anglebits = minusSinAngle; 523 | fieldZ |= pack(coneOctX, 8, 20) | pack((anglebits >> 0) & 0xF, 4, 28); 524 | fieldW |= pack(coneOctY, 8, 20) | pack((anglebits >> 4) & 0xF, 4, 28); 525 | } 526 | 527 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const 528 | { 529 | coneOctX = unpack(fieldZ, 8, 20); 530 | coneOctY = unpack(fieldW, 8, 20); 531 | minusSinAngle = unpack(fieldZ, 4, 28) | (unpack(fieldW, 4, 28) << 4); 532 | } 533 | 534 | MeshletDesc() { memset(this, 0, sizeof(MeshletDesc)); } 535 | 536 | static uint32_t pack(uint32_t value, int width, int offset) 537 | { 538 | return (uint32_t)((value & ((1 << width) - 1)) << offset); 539 | } 540 | static uint32_t unpack(uint32_t value, int width, int offset) 541 | { 542 | return (uint32_t)((value >> offset) & ((1 << width) - 1)); 543 | } 544 | 545 | static bool isPrimBeginLegal(uint32_t begin) { return begin / PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1); } 546 | 547 | static bool isVertexBeginLegal(uint32_t begin) { return begin / VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1); } 548 | }; 549 | 550 | 551 | 552 | struct MeshletGeometry 553 | { 554 | // The vertex indices are similar to provided to the provided 555 | // triangle index buffer. Instead of each triangle using 3 vertex indices, 556 | // each meshlet holds a unique set of variable vertex indices. 557 | std::vector vertexIndices; 558 | 559 | // Each triangle is using 3 primitive indices, these indices 560 | // are local to the meshlet's unique set of vertices. 561 | // Due to alignment the number of primitiveIndices != input triangle indices. 562 | std::vector primitiveIndices; 563 | 564 | // Each meshlet contains offsets into the above arrays. 565 | std::vector meshletDescriptors; 566 | }; 567 | 568 | struct MeshletGeometry16 569 | { 570 | // The vertex indices are similar to provided to the provided 571 | // triangle index buffer. Instead of each triangle using 3 vertex indices, 572 | // each meshlet holds a unique set of variable vertex indices. 573 | std::vector vertexIndices; 574 | 575 | // Each triangle is using 3 primitive indices, these indices 576 | // are local to the meshlet's unique set of vertices. 577 | // Due to alignment the number of primitiveIndices != input triangle indices. 578 | std::vector primitiveIndices; 579 | 580 | // Each meshlet contains offsets into the above arrays. 581 | std::vector meshletDescriptors; 582 | }; 583 | 584 | inline uint32_t computeTasksCount(uint32_t numMeshlets) 585 | { 586 | return (numMeshlets + MESHLETS_PER_TASK - 1) / MESHLETS_PER_TASK; 587 | } 588 | 589 | inline uint32_t computePackedPrimitiveCount(uint32_t numTris) 590 | { 591 | if (PRIMITIVE_PACKING != PRIMITIVE_PACKING_FITTED_UINT8) 592 | return numTris; 593 | 594 | uint32_t indices = numTris * 3; 595 | // align to PRIMITIVE_INDICES_PER_FETCH 596 | uint32_t indicesFit = (indices / PRIMITIVE_INDICES_PER_FETCH) * PRIMITIVE_INDICES_PER_FETCH; 597 | uint32_t numTrisFit = indicesFit / 3; 598 | ; 599 | assert(numTrisFit > 0); 600 | return numTrisFit; 601 | } 602 | 603 | inline uint64_t computeCommonAlignedSize(uint64_t size) 604 | { 605 | // To be able to store different data of the meshlet (desc, prim & vertex indices) in the same buffer, 606 | // we need to have a common alignment that keeps all the data natural aligned. 607 | 608 | static const uint64_t align = std::max(std::max(sizeof(MeshletDesc), sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT), 609 | sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT); 610 | static_assert(align % sizeof(MeshletDesc) == 0, "nvmeshlet failed common align"); 611 | static_assert(align % sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align"); 612 | static_assert(align % sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align"); 613 | 614 | return ((size + align - 1) / align) * align; 615 | } 616 | 617 | inline uint64_t computeIndicesAlignedSize(uint64_t size) 618 | { 619 | // To be able to store different data of the meshlet (prim & vertex indices) in the same buffer, 620 | // we need to have a common alignment that keeps all the data natural aligned. 621 | 622 | static const uint64_t align = std::max(sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT, sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT); 623 | static_assert(align % sizeof(uint8_t) * PRIMITIVE_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align"); 624 | static_assert(align % sizeof(uint32_t) * VERTEX_PACKING_ALIGNMENT == 0, "nvmeshlet failed common align"); 625 | 626 | return ((size + align - 1) / align) * align; 627 | } 628 | 629 | } // end namespace NVMeshlet 630 | 631 | 632 | namespace mm { 633 | 634 | 635 | 636 | // must match cadscene! 637 | struct ObjectData { 638 | glm::mat4 worldMatrix; 639 | glm::mat4 worldMatrixIT; 640 | glm::mat4 objectMatrix; 641 | glm::vec4 bboxMin; 642 | glm::vec4 bboxMax; 643 | glm::vec3 _pad0; 644 | float winding; 645 | glm::vec4 color; 646 | }; 647 | 648 | struct Vertex { 649 | glm::vec3 pos; 650 | glm::vec3 color; 651 | glm::vec2 texCoord; 652 | 653 | bool operator==(const Vertex& other) const { 654 | return pos == other.pos && color == other.color && texCoord == other.texCoord; 655 | } 656 | 657 | glm::vec3 operator-(const Vertex& other) const { 658 | return glm::vec3(pos.x - other.pos.x, pos.y - other.pos.y, pos.z - other.pos.z); 659 | } 660 | 661 | float euclideanDistance(const Vertex& other) const { 662 | return std::sqrt(std::pow(other.pos.x - pos.x,2) + std::pow(other.pos.y - pos.y, 2) + std::pow(other.pos.z - pos.z, 2)); 663 | } 664 | }; 665 | 666 | struct Vert; 667 | 668 | struct Triangle { 669 | std::vector vertices; 670 | std::vector neighbours; 671 | float centroid[3]{}; 672 | uint32_t id; 673 | uint32_t flag = -1; 674 | uint32_t dist; 675 | }; 676 | 677 | struct Vert { 678 | std::vector neighbours; 679 | unsigned int index; 680 | unsigned int degree; 681 | }; 682 | 683 | template 684 | struct MeshletCache { 685 | PrimitiveIndexType primitives[MAX_PRIMITIVE_COUNT_LIMIT][3]; 686 | uint32_t vertices[MAX_VERTEX_COUNT_LIMIT]; // this is the actual index buffer 687 | uint32_t numPrims; 688 | uint32_t numVertices; 689 | Vertex actualVertices[MAX_VERTEX_COUNT_LIMIT]; 690 | 691 | // funky version! 692 | uint32_t numVertexDeltaBits; 693 | uint32_t numVertexAllBits; 694 | 695 | uint32_t primitiveBits = 1; 696 | uint32_t maxBlockBits = ~0; 697 | 698 | bool empty() const { return numVertices == 0; } 699 | 700 | void reset() { 701 | numPrims = 0; 702 | numVertices = 0; 703 | numVertexDeltaBits = 0; 704 | numVertexAllBits = 0; 705 | 706 | memset(vertices, 0xFFFFFFFF, sizeof(vertices)); 707 | memset(actualVertices, 0x00000000, sizeof(actualVertices)); 708 | } 709 | 710 | bool fitsBlock() const 711 | { 712 | uint32_t primBits = (numPrims - 1) * 3 * primitiveBits; 713 | uint32_t vertBits = (numVertices - 1) * numVertexDeltaBits; 714 | bool state = (primBits + vertBits) <= maxBlockBits; 715 | 716 | return state; 717 | } 718 | 719 | // check if cache can hold one more triangle 720 | bool cannotInsert(const VertexIndexType* indices, uint32_t maxVertexSize, uint32_t maxPrimitiveSize) const 721 | { 722 | // skip degenerate 723 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2]) 724 | { 725 | return false; 726 | } 727 | 728 | uint32_t found = 0; 729 | 730 | // check if any of the incoming three indices are already in cache 731 | for (uint32_t v = 0; v < numVertices; ++v) { 732 | for (int i = 0; i < 3; ++i) { 733 | uint32_t idx = indices[i]; 734 | if (vertices[v] == idx) { 735 | found++; 736 | } 737 | } 738 | } 739 | // out of bounds 740 | return (numVertices + 3 - found) > maxVertexSize || (numPrims + 1) > maxPrimitiveSize; 741 | } 742 | 743 | bool cannotInsertBlock(const VertexIndexType* indices, uint32_t maxVertexSize, uint32_t maxPrimitiveSize) const 744 | { 745 | // skip degenerate 746 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2]) 747 | { 748 | return false; 749 | } 750 | 751 | uint32_t found = 0; 752 | 753 | // check if any of the incoming three indices are already in cache 754 | for (uint32_t v = 0; v < numVertices; ++v) { 755 | for (int i = 0; i < 3; ++i) { 756 | uint32_t idx = indices[i]; 757 | if (vertices[v] == idx) { 758 | found++; 759 | } 760 | } 761 | } 762 | 763 | uint32_t firstVertex = numVertices ? vertices[0] : indices[0]; 764 | uint32_t cmpBits = std::max(findMSB((firstVertex ^ indices[0]) | 1), 765 | std::max(findMSB((firstVertex ^ indices[1]) | 1), findMSB((firstVertex ^ indices[2]) | 1))) 766 | + 1; 767 | 768 | uint32_t deltaBits = std::max(cmpBits, numVertexDeltaBits); 769 | 770 | uint32_t newVertices = numVertices + 3 - found; 771 | uint32_t newPrims = numPrims + 1; 772 | 773 | uint32_t newBits; 774 | 775 | { 776 | uint32_t newVertBits = (newVertices - 1) * deltaBits; 777 | uint32_t newPrimBits = (newPrims - 1) * 3 * primitiveBits; 778 | newBits = newVertBits + newPrimBits; 779 | } 780 | 781 | 782 | // out of bounds 783 | return (numVertices + 3 - found) > maxVertexSize || (numPrims + 1) > maxPrimitiveSize; 784 | } 785 | 786 | // insert new triangle 787 | void insert(const VertexIndexType* indices, const Vertex* verts) 788 | { 789 | uint32_t triangle[3]; 790 | 791 | // skip degenerate 792 | if (indices[0] == indices[1] || indices[0] == indices[2] || indices[1] == indices[2]) 793 | { 794 | return; 795 | } 796 | 797 | for (int i = 0; i < 3; ++i) { 798 | // take out an index 799 | uint32_t idx = indices[i]; 800 | bool found = false; 801 | 802 | // check if idx is already in cache 803 | for (uint32_t v = 0; v < numVertices; ++v) 804 | { 805 | if (idx == vertices[v]) 806 | { 807 | triangle[i] = v; 808 | found = true; 809 | break; 810 | } 811 | } 812 | // if idx is not in cache add it 813 | if (!found) 814 | { 815 | vertices[numVertices] = idx; 816 | actualVertices[numVertices] = verts[idx]; 817 | triangle[i] = numVertices; 818 | 819 | if (numVertices) 820 | { 821 | numVertexDeltaBits = std::max(findMSB((idx ^ vertices[0]) | 1) + 1, numVertexDeltaBits); 822 | } 823 | numVertexAllBits = std::max(numVertexAllBits, findMSB(idx) + 1); 824 | 825 | numVertices++; 826 | } 827 | } 828 | 829 | primitives[numPrims][0] = triangle[0]; 830 | primitives[numPrims][1] = triangle[1]; 831 | primitives[numPrims][2] = triangle[2]; 832 | numPrims++; 833 | 834 | assert(fitsBlock()); 835 | } 836 | }; 837 | 838 | struct MeshletMeshDesc 839 | { 840 | // A Meshlet contains a set of unique vertices 841 | // and a group of primitives that are defined by 842 | // indices into this local set of vertices. 843 | // 844 | // The information here is used by a single 845 | // mesh shader's workgroup to execute vertex 846 | // and primitive shading. 847 | // It is packed into single "uvec4"/"uint4" value 848 | // so the hardware can leverage 128-bit loads in the 849 | // shading languages. 850 | // The offsets used here are for the appropriate 851 | // indices arrays. 852 | // 853 | // A bounding box as well as an angled cone is stored to allow 854 | // quick culling in the task shader. 855 | // The current packing is just a basic implementation, that 856 | // may be customized, but ideally fits within 128 bit. 857 | 858 | // 859 | // Bitfield layout : 860 | // 861 | // Field.X | Bits | Content 862 | // ------------|:----:|---------------------------------------------- 863 | // bboxMinX | 8 | bounding box coord relative to object bbox 864 | // bboxMinY | 8 | UNORM8 865 | // bboxMinZ | 8 | 866 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet 867 | // ------------|:----:|---------------------------------------------- 868 | // Field.Y | | 869 | // ------------|:----:|---------------------------------------------- 870 | // bboxMaxX | 8 | bounding box coord relative to object bbox 871 | // bboxMaxY | 8 | UNORM8 872 | // bboxMaxZ | 8 | 873 | // primMax | 8 | number of primitives - 1 in the meshlet 874 | // ------------|:----:|---------------------------------------------- 875 | // Field.Z | | 876 | // ------------|:----:|---------------------------------------------- 877 | // vertexBegin | 32 | offset to the first vertex index, times alignment 878 | // ------------|:----:|---------------------------------------------- 879 | // Field.W | | 880 | // ------------|:----:|---------------------------------------------- 881 | // primBegin | 32 | offset to the first primitive index, times alignment 882 | 883 | union 884 | { 885 | #if !defined(NDEBUG) && defined(_MSC_VER) 886 | struct 887 | { 888 | // warning, not portable 889 | unsigned bboxMinX : 8; 890 | unsigned bboxMinY : 8; 891 | unsigned bboxMinZ : 8; 892 | unsigned vertexMax : 8; 893 | 894 | unsigned bboxMaxX : 8; 895 | unsigned bboxMaxY : 8; 896 | unsigned bboxMaxZ : 8; 897 | unsigned primMax : 8; 898 | 899 | unsigned vertexBegin : 20; 900 | signed coneOctX : 8; 901 | unsigned coneAngleLo : 4; 902 | 903 | unsigned primBegin : 20; 904 | signed coneOctY : 8; 905 | unsigned coneAngleHi : 4; 906 | } _debug; 907 | #endif 908 | struct 909 | { 910 | uint32_t fieldX; 911 | uint32_t fieldY; 912 | uint32_t fieldZ; 913 | uint32_t fieldW; 914 | }; 915 | }; 916 | uint32_t getNumVertices() const { return unpack(fieldX, 8, 24) + 1; } 917 | void setNumVertices(uint32_t num) 918 | { 919 | assert(num <= MAX_VERTEX_COUNT_LIMIT); 920 | fieldX |= pack(num - 1, 8, 24); 921 | } 922 | 923 | uint32_t getNumPrims() const { return unpack(fieldY, 8, 24) + 1; } 924 | void setNumPrims(uint32_t num) 925 | { 926 | assert(num <= MAX_PRIMITIVE_COUNT_LIMIT); 927 | fieldY |= pack(num - 1, 8, 24); 928 | } 929 | 930 | uint32_t getVertexBegin() const { return fieldZ;/*unpack(fieldZ, 20, 0) * NVMeshlet::VERTEX_PACKING_ALIGNMENT;*/ } 931 | void setVertexBegin(uint32_t begin) 932 | { 933 | //assert(begin % NVMeshlet::VERTEX_PACKING_ALIGNMENT == 0); 934 | //assert(begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT < ((1 << 20) - 1)); 935 | //fieldZ |= pack(begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT, 20, 0); 936 | fieldZ = begin; 937 | } 938 | 939 | uint32_t getPrimBegin() const { return fieldW;/*unpack(fieldW, 20, 0) * NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT;*/ } 940 | void setPrimBegin(uint32_t begin) 941 | { 942 | //assert(begin % NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT == 0); 943 | //assert(begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT < ((1 << 20) - 1)); 944 | //fieldW |= pack(begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT, 20, 0); 945 | fieldW = begin; 946 | } 947 | 948 | // positions are relative to object's bbox treated as UNORM 949 | void setBBox(uint8_t const bboxMin[3], uint8_t const bboxMax[3]) 950 | { 951 | fieldX |= pack(bboxMin[0], 8, 0) | pack(bboxMin[1], 8, 8) | pack(bboxMin[2], 8, 16); 952 | 953 | fieldY |= pack(bboxMax[0], 8, 0) | pack(bboxMax[1], 8, 8) | pack(bboxMax[2], 8, 16); 954 | } 955 | 956 | void getBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const 957 | { 958 | bboxMin[0] = unpack(fieldX, 8, 0); 959 | bboxMin[0] = unpack(fieldX, 8, 8); 960 | bboxMin[0] = unpack(fieldX, 8, 16); 961 | 962 | bboxMax[0] = unpack(fieldY, 8, 0); 963 | bboxMax[0] = unpack(fieldY, 8, 8); 964 | bboxMax[0] = unpack(fieldY, 8, 16); 965 | } 966 | 967 | // uses octant encoding for cone Normal 968 | // positive angle means the cluster cannot be backface-culled 969 | // numbers are treated as SNORM 970 | void setCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle) 971 | { 972 | uint8_t anglebits = minusSinAngle; 973 | fieldZ |= pack(coneOctX, 8, 20) | pack((anglebits >> 0) & 0xF, 4, 28); 974 | fieldW |= pack(coneOctY, 8, 20) | pack((anglebits >> 4) & 0xF, 4, 28); 975 | } 976 | 977 | void getCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const 978 | { 979 | coneOctX = unpack(fieldZ, 8, 20); 980 | coneOctY = unpack(fieldW, 8, 20); 981 | minusSinAngle = unpack(fieldZ, 4, 28) | (unpack(fieldW, 4, 28) << 4); 982 | } 983 | 984 | MeshletMeshDesc() { memset(this, 0, sizeof(MeshletMeshDesc)); } 985 | 986 | static uint32_t pack(uint32_t value, int width, int offset) 987 | { 988 | return (uint32_t)((value & ((1 << width) - 1)) << offset); 989 | } 990 | static uint32_t unpack(uint32_t value, int width, int offset) 991 | { 992 | return (uint32_t)((value >> offset) & ((1 << width) - 1)); 993 | } 994 | 995 | static bool isPrimBeginLegal(uint32_t begin) { return begin / NVMeshlet::PRIMITIVE_PACKING_ALIGNMENT < ((1 << 32) - 1); } 996 | 997 | static bool isVertexBeginLegal(uint32_t begin) { return begin / NVMeshlet::VERTEX_PACKING_ALIGNMENT < ((1 << 32) - 1); } 998 | }; 999 | 1000 | struct MeshletTaskDesc 1001 | { 1002 | // A Meshlet contains a set of unique vertices 1003 | // and a group of primitives that are defined by 1004 | // indices into this local set of vertices. 1005 | // 1006 | // The information here is used by a single 1007 | // mesh shader's workgroup to execute vertex 1008 | // and primitive shading. 1009 | // It is packed into single "uvec4"/"uint4" value 1010 | // so the hardware can leverage 128-bit loads in the 1011 | // shading languages. 1012 | // The offsets used here are for the appropriate 1013 | // indices arrays. 1014 | // 1015 | // A bounding box as well as an angled cone is stored to allow 1016 | // quick culling in the task shader. 1017 | // The current packing is just a basic implementation, that 1018 | // may be customized, but ideally fits within 128 bit. 1019 | 1020 | // 1021 | // Bitfield layout : 1022 | // 1023 | // Field.X | Bits | Content 1024 | // ------------|:----:|---------------------------------------------- 1025 | // bboxMinX | 8 | bounding box coord relative to object bbox 1026 | // bboxMinY | 8 | UNORM8 1027 | // bboxMinZ | 8 | 1028 | // vertexMax | 8 | number of vertex indices - 1 in the meshlet 1029 | // ------------|:----:|---------------------------------------------- 1030 | // Field.Y | | 1031 | // ------------|:----:|---------------------------------------------- 1032 | // bboxMaxX | 8 | bounding box coord relative to object bbox 1033 | // bboxMaxY | 8 | UNORM8 1034 | // bboxMaxZ | 8 | 1035 | // primMax | 8 | number of primitives - 1 in the meshlet 1036 | // ------------|:----:|---------------------------------------------- 1037 | // Field.Z | | 1038 | // ------------|:----:|---------------------------------------------- 1039 | // vertexBegin | 20 | offset to the first vertex index, times alignment 1040 | // coneOctX | 8 | octant coordinate for cone normal, SNORM8 1041 | // coneAngleLo | 4 | lower 4 bits of -sin(cone.angle), SNORM8 1042 | // ------------|:----:|---------------------------------------------- 1043 | // Field.W | | 1044 | // ------------|:----:|---------------------------------------------- 1045 | // primBegin | 20 | offset to the first primitive index, times alignment 1046 | // coneOctY | 8 | octant coordinate for cone normal, SNORM8 1047 | // coneAngleHi | 4 | higher 4 bits of -sin(cone.angle), SNORM8 1048 | // 1049 | // Note : the bitfield is not expanded in the struct due to differences in how 1050 | // GPU & CPU compilers pack bit-fields and endian-ness. 1051 | 1052 | union 1053 | { 1054 | #if !defined(NDEBUG) && defined(_MSC_VER) 1055 | struct 1056 | { 1057 | // warning, not portable 1058 | unsigned bboxMinX : 8; 1059 | unsigned bboxMinY : 8; 1060 | unsigned bboxMinZ : 8; 1061 | unsigned vertexMax : 8; 1062 | 1063 | unsigned bboxMaxX : 8; 1064 | unsigned bboxMaxY : 8; 1065 | unsigned bboxMaxZ : 8; 1066 | unsigned primMax : 8; 1067 | 1068 | unsigned vertexBegin : 20; 1069 | signed coneOctX : 8; 1070 | unsigned coneAngleLo : 4; 1071 | 1072 | unsigned primBegin : 20; 1073 | signed coneOctY : 8; 1074 | unsigned coneAngleHi : 4; 1075 | } _debug; 1076 | #endif 1077 | struct 1078 | { 1079 | uint32_t fieldX; 1080 | uint32_t fieldY; 1081 | uint32_t fieldZ; 1082 | uint32_t fieldW; 1083 | }; 1084 | }; 1085 | }; 1086 | 1087 | struct MeshletGeometry 1088 | { 1089 | // The vertex indices are similar to provided to the provided 1090 | // triangle index buffer. Instead of each triangle using 3 vertex indices, 1091 | // each meshlet holds a unique set of variable vertex indices. 1092 | std::vector vertexIndices; 1093 | 1094 | // Each triangle is using 3 primitive indices, these indices 1095 | // are local to the meshlet's unique set of vertices. 1096 | // Due to alignment the number of primitiveIndices != input triangle indices. 1097 | std::vector primitiveIndices; 1098 | std::vector vertices; 1099 | 1100 | // Each meshlet contains offsets into the above arrays. 1101 | std::vector meshletMeshDescriptors; 1102 | std::vector meshletTaskDescriptors; 1103 | //std::vector meshletTaskDescriptors; 1104 | }; 1105 | } 1106 | 1107 | -------------------------------------------------------------------------------- /images/bounding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/bounding.png -------------------------------------------------------------------------------- /images/greedy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/greedy.png -------------------------------------------------------------------------------- /images/kmedoids.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/kmedoids.png -------------------------------------------------------------------------------- /images/tipsynvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/tipsynvidia.png -------------------------------------------------------------------------------- /images/zeux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Senbyo/meshletmaker/ef4082930e2b7c317adbe3bdf7a8ad94871e4313/images/zeux.png --------------------------------------------------------------------------------