├── External ├── tiny_obj_loader │ ├── tiny_obj_loader.cc │ └── CMakeLists.txt ├── stb │ └── CMakeLists.txt ├── CMakeLists.txt └── meshoptimizer │ ├── CMakeLists.txt │ ├── LICENSE.md │ ├── src │ ├── vfetchanalyzer.cpp │ ├── vfetchoptimizer.cpp │ ├── vcacheanalyzer.cpp │ ├── indexgenerator.cpp │ ├── stripifier.cpp │ ├── overdrawanalyzer.cpp │ ├── simplifier.cpp │ ├── overdrawoptimizer.cpp │ ├── vcacheoptimizer.cpp │ ├── indexcodec.cpp │ ├── meshoptimizer.h │ └── vertexcodec.cpp │ └── README.md ├── Scripts ├── cmake-vs2015-vk.cmd └── cmake-vs2017-vk.cmd ├── .gitmodules ├── .gitignore ├── CMakeLists.txt ├── Source ├── Shaders │ ├── ModelIndexed.frag │ ├── ModelIndexed.vert │ ├── ModelManual.vert │ ├── ModelNativeAMD.vert │ ├── ModelPassthrough.vert │ ├── ModelPassthrough.frag │ ├── Model.frag │ ├── Model.vert │ ├── ModelPassthroughTextured.frag │ ├── ModelBarycentrics.geom │ ├── ModelManual.frag │ ├── ModelNativeAMD.frag │ ├── ModelNativeAMDTextured.frag │ ├── ModelPassthrough.geom │ ├── ModelPassthroughTextured.geom │ └── Common.glsl ├── BaseApplication.h ├── DemoUtils.h ├── CMakeLists.txt ├── DemoUtils.cpp ├── Barycentrics.h ├── BaseApplication.cpp └── Barycentrics.cpp ├── LICENSE └── README.md /External/tiny_obj_loader/tiny_obj_loader.cc: -------------------------------------------------------------------------------- 1 | #define TINYOBJLOADER_IMPLEMENTATION 2 | #include "tiny_obj_loader.h" 3 | -------------------------------------------------------------------------------- /Scripts/cmake-vs2015-vk.cmd: -------------------------------------------------------------------------------- 1 | del ..\Build\CMakeCache.txt 2 | cmake -G "Visual Studio 14 2015 Win64" -B..\Build -H.. 3 | -------------------------------------------------------------------------------- /Scripts/cmake-vs2017-vk.cmd: -------------------------------------------------------------------------------- 1 | del ..\Build\CMakeCache.txt 2 | cmake -G "Visual Studio 15 2017 Win64" -B..\Build -H.. 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "External/librush"] 2 | path = External/librush 3 | url = https://github.com/kayru/librush.git 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Build 3 | build 4 | *.spv 5 | *.sublime-workspace 6 | /.vscode/* 7 | *.user 8 | .vs 9 | x64 10 | -------------------------------------------------------------------------------- /External/stb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(stb INTERFACE) 2 | target_include_directories(stb INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 3 | -------------------------------------------------------------------------------- /External/tiny_obj_loader/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(tiny_obj_loader STATIC tiny_obj_loader.cc) 2 | target_include_directories(tiny_obj_loader INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 3 | -------------------------------------------------------------------------------- /External/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory("librush") 2 | 3 | if (MSVC) 4 | add_compile_options(-W0) 5 | else() 6 | add_compile_options(-w) 7 | endif() 8 | 9 | add_subdirectory("meshoptimizer") 10 | add_subdirectory("stb") 11 | add_subdirectory("tiny_obj_loader") 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 3 | 4 | project(Barycentrics) 5 | 6 | set(RUSH_RENDER_API "VK" CACHE STRING "Force Vulkan renderer") 7 | 8 | find_program(GLSLC NAMES glslc PATHS 9 | $ENV{VULKAN_SDK}/Bin 10 | $ENV{VK_SDK_PATH}/Bin 11 | $ENV{PATH} 12 | "~/bin" 13 | ) 14 | 15 | add_subdirectory("External") 16 | add_subdirectory("Source") 17 | -------------------------------------------------------------------------------- /Source/Shaders/ModelIndexed.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec2 v_tex0; 6 | layout (location = 2) in vec3 v_viewVector; 7 | 8 | layout (location = 0) out vec4 fragColor0; 9 | 10 | void main() 11 | { 12 | if (g_useTexture) 13 | { 14 | fragColor0.rgb = texture(albedoSampler, v_tex0).rgb; 15 | } 16 | else 17 | { 18 | fragColor0.rgb = vec3(v_tex0, 0.0); 19 | } 20 | 21 | fragColor0.a = 1; 22 | } 23 | -------------------------------------------------------------------------------- /Source/Shaders/ModelIndexed.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec3 a_pos0; 6 | layout (location = 1) in vec2 a_tex0; 7 | 8 | layout (location = 0) out vec2 v_tex0; 9 | layout (location = 2) out vec3 v_viewVector; 10 | 11 | void main() 12 | { 13 | vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz; 14 | gl_Position = vec4(worldPos, 1) * g_matViewProj; 15 | v_tex0 = a_tex0; 16 | v_viewVector = worldPos - g_cameraPos.xyz; 17 | } 18 | -------------------------------------------------------------------------------- /Source/Shaders/ModelManual.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec3 a_pos0; 6 | layout (location = 1) in vec2 a_tex0; 7 | 8 | layout (location = 0) out vec3 v_worldPos; 9 | layout (location = 1) out vec3 v_viewVector; 10 | 11 | void main() 12 | { 13 | vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz; 14 | gl_Position = vec4(worldPos, 1) * g_matViewProj; 15 | v_worldPos = worldPos; 16 | v_viewVector = worldPos - g_cameraPos.xyz; 17 | } 18 | -------------------------------------------------------------------------------- /Source/Shaders/ModelNativeAMD.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec3 a_pos0; 6 | layout (location = 1) in vec2 a_tex0; 7 | 8 | layout (location = 0) out float v_IdFlat; 9 | layout (location = 1) out float v_Id; 10 | 11 | void main() 12 | { 13 | vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz; 14 | gl_Position = vec4(worldPos, 1) * g_matViewProj; 15 | 16 | float id = intBitsToFloat(gl_VertexIndex); 17 | v_IdFlat = id; 18 | v_Id = id; 19 | } 20 | -------------------------------------------------------------------------------- /Source/Shaders/ModelPassthrough.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec3 a_pos0; 6 | layout (location = 1) in vec2 a_tex0; 7 | 8 | layout (location = 0) out vec2 v_tex0; 9 | layout (location = 1) out vec3 v_viewVector; 10 | layout (location = 2) out vec3 v_worldPos; 11 | 12 | void main() 13 | { 14 | vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz; 15 | gl_Position = vec4(worldPos, 1) * g_matViewProj; 16 | v_tex0 = a_tex0; 17 | v_viewVector = worldPos - g_cameraPos.xyz; 18 | v_worldPos = worldPos; 19 | } 20 | -------------------------------------------------------------------------------- /Source/Shaders/ModelPassthrough.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec2 v_tex0; 6 | layout (location = 1) in vec3 v_viewVector; 7 | layout (location = 2) in flat vec3 v_worldPos0; 8 | layout (location = 3) in flat vec3 v_worldPos1; 9 | layout (location = 4) in flat vec3 v_worldPos2; 10 | 11 | layout (location = 0) out vec4 fragColor0; 12 | 13 | void main() 14 | { 15 | vec3 barycentrics = intersectRayTri(g_cameraPos.xyz, 16 | normalize(v_viewVector), 17 | v_worldPos0, 18 | v_worldPos1, 19 | v_worldPos2); 20 | 21 | fragColor0.rgb = barycentrics; 22 | 23 | fragColor0.a = 1.0; 24 | } 25 | -------------------------------------------------------------------------------- /Source/Shaders/Model.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec2 v_barycentrics; 6 | layout (location = 1) in flat uint v_primId; 7 | layout (location = 2) in vec3 v_viewVector; 8 | 9 | layout (location = 0) out vec4 fragColor0; 10 | 11 | void main() 12 | { 13 | vec3 barycentrics = vec3(v_barycentrics.x, v_barycentrics.y, 1.0 - v_barycentrics.x - v_barycentrics.y); 14 | 15 | if (g_useTexture) 16 | { 17 | vec2 texcoords = interpolateTexCoords(v_primId, barycentrics); 18 | fragColor0.rgb = texture(albedoSampler, texcoords).rgb; 19 | } 20 | else 21 | { 22 | fragColor0.rgb = barycentrics; 23 | } 24 | 25 | fragColor0.a = 1; 26 | } 27 | -------------------------------------------------------------------------------- /Source/Shaders/Model.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) out vec2 v_barycentrics; 6 | layout (location = 1) out uint v_primId; 7 | layout (location = 2) out vec3 v_viewVector; 8 | 9 | void main() 10 | { 11 | uint index = g_indices[gl_VertexIndex]; 12 | Vertex vertex = getVertex(index); 13 | vec3 worldPos = (vec4(vertex.position, 1) * g_matWorld).xyz; 14 | 15 | gl_Position = vec4(worldPos, 1) * g_matViewProj; 16 | 17 | uint id = gl_VertexIndex%3; 18 | switch(id) 19 | { 20 | case 0: v_barycentrics = vec2(1,0); break; 21 | case 1: v_barycentrics = vec2(0,1); break; 22 | case 2: v_barycentrics = vec2(0,0); break; 23 | } 24 | 25 | v_primId = gl_VertexIndex / 3; 26 | v_viewVector = worldPos - g_cameraPos.xyz; 27 | } 28 | -------------------------------------------------------------------------------- /Source/Shaders/ModelPassthroughTextured.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in vec2 v_tex0; 6 | layout (location = 1) in vec3 v_viewVector; 7 | layout (location = 2) in flat vec3 v_worldPos0; 8 | layout (location = 3) in flat vec3 v_worldPos1; 9 | layout (location = 4) in flat vec3 v_worldPos2; 10 | layout (location = 5) in flat uint v_primId; 11 | 12 | layout (location = 0) out vec4 fragColor0; 13 | 14 | void main() 15 | { 16 | vec3 barycentrics = intersectRayTri(g_cameraPos.xyz, 17 | normalize(v_viewVector), 18 | v_worldPos0, 19 | v_worldPos1, 20 | v_worldPos2); 21 | 22 | vec2 texcoords = interpolateTexCoords(v_primId, barycentrics); 23 | fragColor0.rgb = texture(albedoSampler, texcoords).rgb; 24 | fragColor0.a = 1.0; 25 | } 26 | -------------------------------------------------------------------------------- /Source/Shaders/ModelBarycentrics.geom: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(triangles) in; 4 | layout(triangle_strip, max_vertices=3) out; 5 | 6 | layout (location = 0) in vec2 v_tex0[]; // unused 7 | layout (location = 2) in vec3 v_viewVectorIn[]; 8 | 9 | layout (location = 0) out vec2 v_barycentrics; 10 | layout (location = 1) out uint v_primId; 11 | layout (location = 2) out vec3 v_viewVector; 12 | 13 | void main() 14 | { 15 | gl_Position = gl_in[0].gl_Position; 16 | v_primId = gl_PrimitiveIDIn; 17 | v_viewVector = v_viewVectorIn[0]; 18 | v_barycentrics = vec2(1,0); 19 | EmitVertex(); 20 | 21 | gl_Position = gl_in[1].gl_Position; 22 | v_primId = gl_PrimitiveIDIn; 23 | v_viewVector = v_viewVectorIn[1]; 24 | v_barycentrics = vec2(0,1); 25 | EmitVertex(); 26 | 27 | gl_Position = gl_in[2].gl_Position; 28 | v_primId = gl_PrimitiveIDIn; 29 | v_viewVector = v_viewVectorIn[2]; 30 | v_barycentrics = vec2(0,0); 31 | EmitVertex(); 32 | } 33 | -------------------------------------------------------------------------------- /External/meshoptimizer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(meshoptimizer) 2 | cmake_minimum_required(VERSION 3.0) 3 | 4 | option(BUILD_DEMO "Build demo" OFF) 5 | 6 | if(MSVC) 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX") 8 | endif(MSVC) 9 | 10 | set(SOURCES 11 | src/meshoptimizer.h 12 | src/indexcodec.cpp 13 | src/indexgenerator.cpp 14 | src/overdrawanalyzer.cpp 15 | src/overdrawoptimizer.cpp 16 | src/simplifier.cpp 17 | src/stripifier.cpp 18 | src/vcacheanalyzer.cpp 19 | src/vcacheoptimizer.cpp 20 | src/vertexcodec.cpp 21 | src/vfetchanalyzer.cpp 22 | src/vfetchoptimizer.cpp 23 | ) 24 | 25 | add_library(meshoptimizer STATIC ${SOURCES}) 26 | 27 | if(BUILD_DEMO) 28 | add_executable(demo demo/main.cpp demo/miniz.cpp demo/objparser.cpp) 29 | target_link_libraries(demo meshoptimizer) 30 | endif() 31 | 32 | target_include_directories(meshoptimizer INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/src") 33 | -------------------------------------------------------------------------------- /Source/Shaders/ModelManual.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "Common.glsl" 4 | 5 | layout (location = 0) in flat vec3 v_worldPos; 6 | layout (location = 1) in vec3 v_viewVector; 7 | 8 | layout (location = 0) out vec4 fragColor0; 9 | 10 | void main() 11 | { 12 | uint index1 = g_indices[gl_PrimitiveID*3+1]; 13 | uint index2 = g_indices[gl_PrimitiveID*3+2]; 14 | 15 | Vertex vertex1 = getVertex(index1); 16 | Vertex vertex2 = getVertex(index2); 17 | 18 | vec3 barycentrics = intersectRayTri(g_cameraPos.xyz, 19 | normalize(v_viewVector), 20 | v_worldPos, 21 | (vec4(vertex1.position, 1) * g_matWorld).xyz, 22 | (vec4(vertex2.position, 1) * g_matWorld).xyz); 23 | 24 | if (g_useTexture) 25 | { 26 | vec2 texcoords = interpolateTexCoords(gl_PrimitiveID, barycentrics); 27 | fragColor0.rgb = texture(albedoSampler, texcoords).rgb; 28 | } 29 | else 30 | { 31 | fragColor0.rgb = barycentrics; 32 | } 33 | 34 | fragColor0.a = 1; 35 | } 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Yuriy O'Donnell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /External/meshoptimizer/LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2018 Arseny Kapoulkine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Source/BaseApplication.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class ShaderCompiler; 8 | 9 | namespace Rush 10 | { 11 | class PrimitiveBatch; 12 | class BitmapFontRenderer; 13 | } 14 | 15 | class BaseApplication : public Application 16 | { 17 | RUSH_DISALLOW_COPY_AND_ASSIGN(BaseApplication); 18 | 19 | public: 20 | BaseApplication(); 21 | ~BaseApplication(); 22 | 23 | protected: 24 | struct DepthStencilStates 25 | { 26 | GfxDepthStencilStateRef testLessEqual; 27 | GfxDepthStencilStateRef writeLessEqual; 28 | GfxDepthStencilStateRef writeAlways; 29 | GfxDepthStencilStateRef disable; 30 | } m_depthStencilStates; 31 | 32 | struct SamplerStates 33 | { 34 | GfxSamplerRef pointClamp; 35 | GfxSamplerRef linearClamp; 36 | GfxSamplerRef linearWrap; 37 | GfxSamplerRef anisotropicWrap; 38 | } m_samplerStates; 39 | 40 | struct BlendStates 41 | { 42 | GfxBlendStateRef lerp; 43 | GfxBlendStateRef opaque; 44 | GfxBlendStateRef additive; 45 | } m_blendStates; 46 | 47 | GfxDevice* m_dev; 48 | GfxContext* m_ctx; 49 | Window* m_window; 50 | PrimitiveBatch* m_prim; 51 | BitmapFontRenderer* m_font; 52 | 53 | GfxTexture m_defaultWhiteTexture; 54 | GfxTexture m_checkerboardTexture; 55 | }; 56 | -------------------------------------------------------------------------------- /Source/Shaders/ModelNativeAMD.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | #extension GL_AMD_shader_explicit_vertex_parameter : require 3 | 4 | #include "Common.glsl" 5 | 6 | layout (location = 0) in flat float v_IdFlat; 7 | layout (location = 1) in __explicitInterpAMD float v_Id; 8 | 9 | layout (location = 0) out vec4 fragColor0; 10 | 11 | void main() 12 | { 13 | int idRef = floatBitsToInt(v_IdFlat); 14 | int id0 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 0)); 15 | int id1 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 1)); 16 | int id2 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 2)); 17 | 18 | vec3 barycentrics; 19 | if (idRef == id0) 20 | { 21 | barycentrics.y = gl_BaryCoordSmoothAMD.x; 22 | barycentrics.z = gl_BaryCoordSmoothAMD.y; 23 | barycentrics.x = 1.0 - barycentrics.z - barycentrics.y; 24 | } 25 | else if (idRef == id1) 26 | { 27 | barycentrics.x = gl_BaryCoordSmoothAMD.x; 28 | barycentrics.y = gl_BaryCoordSmoothAMD.y; 29 | barycentrics.z = 1.0 - barycentrics.x - barycentrics.y; 30 | } 31 | else if (idRef == id2) 32 | { 33 | barycentrics.z = gl_BaryCoordSmoothAMD.x; 34 | barycentrics.x = gl_BaryCoordSmoothAMD.y; 35 | barycentrics.y = 1.0 - barycentrics.x - barycentrics.z; 36 | } 37 | else 38 | { 39 | barycentrics = vec3(1.0); 40 | } 41 | 42 | fragColor0.rgb = barycentrics; 43 | 44 | fragColor0.a = 1.0; 45 | } 46 | -------------------------------------------------------------------------------- /Source/Shaders/ModelNativeAMDTextured.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | #extension GL_AMD_shader_explicit_vertex_parameter : require 3 | 4 | #include "Common.glsl" 5 | 6 | layout (location = 0) in flat float v_IdFlat; 7 | layout (location = 1) in __explicitInterpAMD float v_Id; 8 | 9 | layout (location = 0) out vec4 fragColor0; 10 | 11 | void main() 12 | { 13 | int idRef = floatBitsToInt(v_IdFlat); 14 | int id0 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 0)); 15 | int id1 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 1)); 16 | int id2 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 2)); 17 | 18 | vec3 barycentrics; 19 | if (idRef == id0) 20 | { 21 | barycentrics.y = gl_BaryCoordSmoothAMD.x; 22 | barycentrics.z = gl_BaryCoordSmoothAMD.y; 23 | barycentrics.x = 1.0 - barycentrics.z - barycentrics.y; 24 | } 25 | else if (idRef == id1) 26 | { 27 | barycentrics.x = gl_BaryCoordSmoothAMD.x; 28 | barycentrics.y = gl_BaryCoordSmoothAMD.y; 29 | barycentrics.z = 1.0 - barycentrics.x - barycentrics.y; 30 | } 31 | else if (idRef == id2) 32 | { 33 | barycentrics.z = gl_BaryCoordSmoothAMD.x; 34 | barycentrics.x = gl_BaryCoordSmoothAMD.y; 35 | barycentrics.y = 1.0 - barycentrics.x - barycentrics.z; 36 | } 37 | else 38 | { 39 | barycentrics = vec3(1.0); 40 | } 41 | 42 | vec2 texcoords = interpolateTexCoords(gl_PrimitiveID, barycentrics); 43 | fragColor0.rgb = texture(albedoSampler, texcoords).rgb; 44 | 45 | fragColor0.a = 1.0; 46 | } 47 | -------------------------------------------------------------------------------- /Source/DemoUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | struct MovingAverage 10 | { 11 | MovingAverage() { reset(); } 12 | void reset() { idx = 0; sum = 0; for(T& it : buf) it=0; } 13 | T get() const { return sum / SIZE; } 14 | void add(T v) 15 | { 16 | sum += v; 17 | sum -= buf[idx]; 18 | buf[idx] = v; 19 | idx = (idx + 1) % SIZE; 20 | } 21 | size_t idx; 22 | T sum; 23 | T buf[SIZE]; 24 | }; 25 | 26 | template 27 | struct TimingScope 28 | { 29 | 30 | TimingScope(MovingAverage& output) 31 | : m_output(output) 32 | {} 33 | 34 | ~TimingScope() 35 | { 36 | m_output.add(m_timer.time()); 37 | } 38 | 39 | MovingAverage& m_output; 40 | Timer m_timer; 41 | }; 42 | 43 | inline u64 hashFnv1a64(const void* message, size_t length, u64 state = 0xcbf29ce484222325) 44 | { 45 | const u8* bytes = (const u8*)message; 46 | for (size_t i = 0; i < length; ++i) 47 | { 48 | state ^= bytes[i]; 49 | state *= 0x100000001b3; 50 | } 51 | return state; 52 | } 53 | 54 | std::string directoryFromFilename(const std::string& filename); 55 | GfxShaderSource shaderFromFile(const char* filename, const char* shaderDirectory = Platform_GetExecutableDirectory()); 56 | GfxTexture textureFromFile(const char* filename); 57 | GfxTexture generateMipsRGBA8(u8* pixels, int w, int h); 58 | -------------------------------------------------------------------------------- /Source/Shaders/ModelPassthrough.geom: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #if 0 // Reference mode 4 | 5 | layout(triangles) in; 6 | layout(triangle_strip, max_vertices=3) out; 7 | 8 | layout (location = 0) in vec2 v_tex0[]; 9 | layout (location = 1) in vec3 v_viewVector[]; 10 | layout (location = 2) in vec3 v_worldPos[]; 11 | 12 | layout (location = 0) out vec2 out_tex0; 13 | layout (location = 1) out vec3 out_viewVector; 14 | layout (location = 2) out vec3 out_worldPos0; 15 | layout (location = 3) out vec3 out_worldPos1; 16 | layout (location = 4) out vec3 out_worldPos2; 17 | layout (location = 5) out uint out_primId; 18 | 19 | void main() 20 | { 21 | for (int i=0; i<3; ++i) 22 | { 23 | gl_Position = gl_in[i].gl_Position; 24 | out_tex0 = v_tex0[i]; 25 | out_viewVector = v_viewVector[i]; 26 | out_worldPos0 = v_worldPos[0]; 27 | out_worldPos1 = v_worldPos[1]; 28 | out_worldPos2 = v_worldPos[2]; 29 | EmitVertex(); 30 | } 31 | } 32 | 33 | #else 34 | 35 | #extension GL_NV_geometry_shader_passthrough : require 36 | 37 | layout(triangles) in; 38 | 39 | layout(passthrough) in gl_PerVertex 40 | { 41 | vec4 gl_Position; 42 | }; 43 | 44 | layout (location = 0, passthrough) in vec2 in_tex0; 45 | layout (location = 1, passthrough) in vec3 in_viewVector; 46 | layout (location = 2) in vec3 in_worldPos[]; 47 | 48 | layout (location = 2) out vec3 v_worldPos0; 49 | layout (location = 3) out vec3 v_worldPos1; 50 | layout (location = 4) out vec3 v_worldPos2; 51 | 52 | void main() 53 | { 54 | v_worldPos0 = in_worldPos[0]; 55 | v_worldPos1 = in_worldPos[1]; 56 | v_worldPos2 = in_worldPos[2]; 57 | } 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/vfetchanalyzer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | 6 | meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) 7 | { 8 | assert(index_count % 3 == 0); 9 | assert(vertex_size > 0 && vertex_size <= 256); 10 | 11 | meshopt_VertexFetchStatistics result = {}; 12 | 13 | const size_t kCacheLine = 64; 14 | const size_t kCacheSize = 128 * 1024; 15 | 16 | // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway 17 | size_t cache[kCacheSize / kCacheLine] = {}; 18 | 19 | for (size_t i = 0; i < index_count; ++i) 20 | { 21 | unsigned int index = indices[i]; 22 | assert(index < vertex_count); 23 | 24 | size_t start_address = index * vertex_size; 25 | size_t end_address = start_address + vertex_size; 26 | 27 | size_t start_tag = start_address / kCacheLine; 28 | size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; 29 | 30 | assert(start_tag < end_tag); 31 | 32 | for (size_t tag = start_tag; tag < end_tag; ++tag) 33 | { 34 | size_t line = tag % (sizeof(cache) / sizeof(cache[0])); 35 | 36 | // we store +1 since cache is filled with 0 by default 37 | result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; 38 | cache[line] = tag + 1; 39 | } 40 | } 41 | 42 | result.overfetch = vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(vertex_count * vertex_size); 43 | 44 | return result; 45 | } 46 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/vfetchoptimizer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | 7 | size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) 8 | { 9 | assert(index_count % 3 == 0); 10 | assert(vertex_size > 0 && vertex_size <= 256); 11 | 12 | // support in-place optimization 13 | meshopt_Buffer vertices_copy; 14 | 15 | if (destination == vertices) 16 | { 17 | vertices_copy.data = new char[vertex_count * vertex_size]; 18 | memcpy(vertices_copy.data, vertices, vertex_count * vertex_size); 19 | vertices = vertices_copy.data; 20 | } 21 | 22 | // build vertex remap table 23 | meshopt_Buffer vertex_remap(vertex_count); 24 | memset(vertex_remap.data, -1, vertex_remap.size * sizeof(unsigned int)); 25 | 26 | unsigned int next_vertex = 0; 27 | 28 | for (size_t i = 0; i < index_count; ++i) 29 | { 30 | unsigned int index = indices[i]; 31 | assert(index < vertex_count); 32 | 33 | unsigned int& remap = vertex_remap[index]; 34 | 35 | if (remap == ~0u) // vertex was not added to destination VB 36 | { 37 | // add vertex 38 | memcpy(static_cast(destination) + next_vertex * vertex_size, static_cast(vertices) + index * vertex_size, vertex_size); 39 | 40 | remap = next_vertex++; 41 | } 42 | 43 | // modify indices in place 44 | indices[i] = remap; 45 | } 46 | 47 | assert(next_vertex <= vertex_count); 48 | 49 | return next_vertex; 50 | } 51 | -------------------------------------------------------------------------------- /Source/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(shaderDependencies 2 | # Add explicit dependencies here 3 | Shaders/Common.glsl 4 | ) 5 | 6 | set(shaders 7 | Shaders/Model.frag 8 | Shaders/Model.vert 9 | Shaders/ModelBarycentrics.geom 10 | Shaders/ModelIndexed.frag 11 | Shaders/ModelIndexed.vert 12 | Shaders/ModelManual.frag 13 | Shaders/ModelManual.vert 14 | Shaders/ModelNativeAMD.frag 15 | Shaders/ModelNativeAMD.vert 16 | Shaders/ModelNativeAMDTextured.frag 17 | Shaders/ModelPassthrough.frag 18 | Shaders/ModelPassthrough.geom 19 | Shaders/ModelPassthrough.vert 20 | Shaders/ModelPassthroughTextured.frag 21 | Shaders/ModelPassthroughTextured.geom 22 | ) 23 | 24 | set(src 25 | ${shaders} 26 | BaseApplication.cpp 27 | BaseApplication.h 28 | Barycentrics.cpp 29 | Barycentrics.h 30 | DemoUtils.cpp 31 | DemoUtils.h 32 | ) 33 | 34 | set(app Barycentrics) 35 | 36 | add_executable(${app} 37 | ${src} 38 | ) 39 | 40 | source_group("Shaders" FILES ${shaders} ${shaderDependencies}) 41 | 42 | function(shader_compile_rule shaderName dependencies) 43 | add_custom_command( 44 | OUTPUT ${CMAKE_CFG_INTDIR}/${shaderName}.spv 45 | COMMAND ${GLSLC} -o ${CMAKE_CFG_INTDIR}/${shaderName}.spv ${CMAKE_CURRENT_SOURCE_DIR}/${shaderName} 46 | MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${shaderName} 47 | DEPENDS ${dependencies} 48 | ) 49 | endfunction(shader_compile_rule) 50 | 51 | foreach(shader ${shaders}) 52 | shader_compile_rule(${shader} "${shaderDependencies}") 53 | endforeach() 54 | 55 | target_compile_definitions(${app} PRIVATE 56 | RUSH_USING_NAMESPACE # Automatically use Rush namespace 57 | ) 58 | 59 | target_link_libraries(${app} 60 | meshoptimizer 61 | Rush 62 | stb 63 | tiny_obj_loader 64 | ) 65 | -------------------------------------------------------------------------------- /Source/Shaders/ModelPassthroughTextured.geom: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #if 0 // Reference mode 4 | 5 | layout(triangles) in; 6 | layout(triangle_strip, max_vertices=3) out; 7 | 8 | layout (location = 0) in vec2 v_tex0[]; 9 | layout (location = 1) in vec3 v_viewVector[]; 10 | layout (location = 2) in vec3 v_worldPos[]; 11 | 12 | layout (location = 0) out vec2 out_tex0; 13 | layout (location = 1) out vec3 out_viewVector; 14 | layout (location = 2) out vec3 out_worldPos0; 15 | layout (location = 3) out vec3 out_worldPos1; 16 | layout (location = 4) out vec3 out_worldPos2; 17 | layout (location = 5) out uint out_primId; 18 | 19 | void main() 20 | { 21 | for (int i=0; i<3; ++i) 22 | { 23 | gl_Position = gl_in[i].gl_Position; 24 | out_tex0 = v_tex0[i]; 25 | out_viewVector = v_viewVector[i]; 26 | out_worldPos0 = v_worldPos[0]; 27 | out_worldPos1 = v_worldPos[1]; 28 | out_worldPos2 = v_worldPos[2]; 29 | out_primId = gl_PrimitiveIDIn; 30 | EmitVertex(); 31 | } 32 | } 33 | 34 | #else 35 | 36 | #extension GL_NV_geometry_shader_passthrough : require 37 | 38 | layout(triangles) in; 39 | 40 | layout(passthrough) in gl_PerVertex 41 | { 42 | vec4 gl_Position; 43 | }; 44 | 45 | layout (location = 0, passthrough) in vec2 in_tex0; 46 | layout (location = 1, passthrough) in vec3 in_viewVector; 47 | layout (location = 2) in vec3 in_worldPos[]; 48 | 49 | layout (location = 2) out vec3 v_worldPos0; 50 | layout (location = 3) out vec3 v_worldPos1; 51 | layout (location = 4) out vec3 v_worldPos2; 52 | layout (location = 5) out uint v_primId; 53 | 54 | void main() 55 | { 56 | v_worldPos0 = in_worldPos[0]; 57 | v_worldPos1 = in_worldPos[1]; 58 | v_worldPos2 = in_worldPos[2]; 59 | v_primId = gl_PrimitiveIDIn; 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /Source/Shaders/Common.glsl: -------------------------------------------------------------------------------- 1 | layout(constant_id = 0) const bool g_useTexture = false; 2 | 3 | layout (binding = 0) uniform Global 4 | { 5 | mat4 g_matView; 6 | mat4 g_matProj; 7 | mat4 g_matViewProj; 8 | mat4 g_matWorld; 9 | vec4 g_cameraPos; 10 | }; 11 | 12 | layout (binding = 1) uniform Material 13 | { 14 | vec4 g_baseColor; 15 | }; 16 | 17 | layout (binding = 2) uniform sampler2D albedoSampler; 18 | 19 | struct VertexPacked 20 | { 21 | float pX, pY, pZ; 22 | float tX, tY; 23 | }; 24 | 25 | layout (std430, binding = 3) readonly buffer VertexBuffer 26 | { 27 | VertexPacked g_vertices[]; 28 | }; 29 | 30 | layout (std430, binding = 4) readonly buffer IndexBuffer 31 | { 32 | uint g_indices[]; 33 | }; 34 | 35 | struct Vertex 36 | { 37 | vec3 position; 38 | vec2 texcoord; // TODO: de-interleave vertex streams 39 | }; 40 | 41 | Vertex getVertex(uint i) 42 | { 43 | VertexPacked v = g_vertices[i]; 44 | 45 | Vertex r; 46 | r.position = vec3(v.pX, v.pY, v.pZ); 47 | r.texcoord = vec2(v.tX, v.tY); 48 | 49 | return r; 50 | } 51 | 52 | vec3 intersectRayTri(vec3 rayOrigin, vec3 rayDirection, vec3 v0, vec3 v1, vec3 v2) 53 | { 54 | vec3 e0 = v1 - v0; 55 | vec3 e1 = v2 - v0; 56 | vec3 s1 = cross(rayDirection, e1); 57 | float invd = 1.0 / (dot(s1, e0)); 58 | vec3 d = rayOrigin - v0; 59 | float b1 = dot(d, s1) * invd; 60 | vec3 s2 = cross(d, e0); 61 | float b2 = dot(rayDirection, s2) * invd; 62 | float temp = dot(e1, s2) * invd; 63 | 64 | return vec3(1.0 - b1 - b2, b1, b2); 65 | } 66 | 67 | vec2 interpolateTexCoords(vec2 t0, vec2 t1, vec2 t2, vec3 barycentrics) 68 | { 69 | vec2 texcoord = 70 | t0 * barycentrics.x + 71 | t1 * barycentrics.y + 72 | t2 * barycentrics.z; 73 | 74 | return texcoord; 75 | } 76 | 77 | vec2 interpolateTexCoords(uint primId, vec3 barycentrics) 78 | { 79 | uint index0 = g_indices[3*primId+0]; 80 | uint index1 = g_indices[3*primId+1]; 81 | uint index2 = g_indices[3*primId+2]; 82 | 83 | Vertex vertex0 = getVertex(index0); 84 | Vertex vertex1 = getVertex(index1); 85 | Vertex vertex2 = getVertex(index2); 86 | 87 | return interpolateTexCoords( 88 | vertex0.texcoord, 89 | vertex1.texcoord, 90 | vertex2.texcoord, 91 | barycentrics); 92 | } 93 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/vcacheanalyzer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | 7 | meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size) 8 | { 9 | assert(index_count % 3 == 0); 10 | assert(cache_size >= 3); 11 | assert(warp_size == 0 || warp_size >= 3); 12 | 13 | meshopt_VertexCacheStatistics result = {}; 14 | 15 | unsigned int warp_offset = 0; 16 | unsigned int primgroup_offset = 0; 17 | 18 | meshopt_Buffer cache_timestamps(vertex_count); 19 | memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int)); 20 | 21 | unsigned int timestamp = cache_size + 1; 22 | 23 | for (size_t i = 0; i < index_count; i += 3) 24 | { 25 | unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; 26 | assert(a < vertex_count && b < vertex_count && c < vertex_count); 27 | 28 | bool ac = (timestamp - cache_timestamps[a]) > cache_size; 29 | bool bc = (timestamp - cache_timestamps[b]) > cache_size; 30 | bool cc = (timestamp - cache_timestamps[c]) > cache_size; 31 | 32 | // flush cache if triangle doesn't fit into warp or into the primitive buffer 33 | if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size)) 34 | { 35 | result.warps_executed += warp_offset > 0; 36 | 37 | warp_offset = 0; 38 | primgroup_offset = 0; 39 | 40 | // reset cache 41 | timestamp += cache_size + 1; 42 | } 43 | 44 | // update cache and add vertices to warp 45 | for (int j = 0; j < 3; ++j) 46 | { 47 | unsigned int index = indices[i + j]; 48 | 49 | if (timestamp - cache_timestamps[index] > cache_size) 50 | { 51 | cache_timestamps[index] = timestamp++; 52 | result.vertices_transformed++; 53 | warp_offset++; 54 | } 55 | } 56 | 57 | primgroup_offset++; 58 | } 59 | 60 | result.warps_executed += warp_offset > 0; 61 | 62 | result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3); 63 | result.atvr = vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(vertex_count); 64 | 65 | return result; 66 | } 67 | -------------------------------------------------------------------------------- /Source/DemoUtils.cpp: -------------------------------------------------------------------------------- 1 | #include "DemoUtils.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma warning(push) 8 | #pragma warning(disable: 4996) 9 | #define STB_IMAGE_IMPLEMENTATION 10 | #include 11 | #define STB_IMAGE_RESIZE_IMPLEMENTATION 12 | #include 13 | #pragma warning(pop) 14 | 15 | #include 16 | #include 17 | 18 | std::string directoryFromFilename(const std::string& filename) 19 | { 20 | size_t pos = filename.find_last_of("/\\"); 21 | if (pos != std::string::npos) 22 | { 23 | return filename.substr(0, pos + 1); 24 | } 25 | else 26 | { 27 | return std::string(); 28 | } 29 | } 30 | 31 | GfxShaderSource shaderFromFile(const char* filename, const char* shaderDirectory) 32 | { 33 | std::string fullFilename = std::string(shaderDirectory) + "/" + std::string(filename); 34 | Log::message("Loading shader '%s'", filename); 35 | 36 | GfxShaderSource source; 37 | source.type = GfxShaderSourceType_SPV; 38 | 39 | FileIn f(fullFilename.c_str()); 40 | if (f.valid()) 41 | { 42 | u32 fileSize = f.length(); 43 | source.resize(fileSize); 44 | f.read(&source[0], fileSize); 45 | } 46 | 47 | if (source.empty()) 48 | { 49 | Log::error("Failed to load shader '%s'", filename); 50 | } 51 | 52 | return source; 53 | }; 54 | 55 | GfxTexture textureFromFile(const char* filename) 56 | { 57 | int w, h, comp; 58 | stbi_set_flip_vertically_on_load(true); 59 | u8* pixels = stbi_load(filename, &w, &h, &comp, 4); 60 | 61 | GfxTexture result; 62 | 63 | if (pixels) 64 | { 65 | result = generateMipsRGBA8(pixels, w, h); 66 | stbi_image_free(pixels); 67 | } 68 | else 69 | { 70 | Log::warning("Failed to load texture '%s'", filename); 71 | } 72 | 73 | return result; 74 | } 75 | 76 | GfxTexture generateMipsRGBA8(u8* pixels, int w, int h) 77 | { 78 | GfxTexture result; 79 | 80 | std::vector> mips; 81 | mips.reserve(16); 82 | 83 | std::vector textureData; 84 | textureData.reserve(16); 85 | textureData.push_back(GfxTextureData(pixels)); 86 | 87 | u32 mipWidth = w; 88 | u32 mipHeight = h; 89 | 90 | while (mipWidth != 1 && mipHeight != 1) 91 | { 92 | u32 nextMipWidth = max(1, mipWidth / 2); 93 | u32 nextMipHeight = max(1, mipHeight / 2); 94 | 95 | u8* nextMip = new u8[nextMipWidth * nextMipHeight * 4]; 96 | mips.push_back(std::unique_ptr(nextMip)); 97 | 98 | const u32 mipPitch = mipWidth * 4; 99 | const u32 nextMipPitch = nextMipWidth * 4; 100 | int resizeResult = stbir_resize_uint8( 101 | (const u8*)textureData.back().pixels, mipWidth, mipHeight, mipPitch, 102 | nextMip, nextMipWidth, nextMipHeight, nextMipPitch, 4); 103 | RUSH_ASSERT(resizeResult); 104 | 105 | textureData.push_back(GfxTextureData(nextMip, (u32)textureData.size())); 106 | 107 | mipWidth = nextMipWidth; 108 | mipHeight = nextMipHeight; 109 | } 110 | 111 | GfxTextureDesc desc = GfxTextureDesc::make2D(w, h); 112 | desc.mips = (u32)textureData.size(); 113 | 114 | result = Gfx_CreateTexture(desc, textureData.data(), (u32)textureData.size()); 115 | return result; 116 | } 117 | 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Barycentrics 2 | 3 | This demo several approaches for computing barycentric coordinates in the pixel shader. 4 | 5 | ## Mode 1: Non-indexed geometry 6 | 7 | Geometry is rendered using non-indexed draws. Vertex shader explicitly loads indices and vertices from index and vertex buffers and writes out barycentric coordinates. This approach is similar to using a geometry shader that outputs per-vertex barycentrics. 8 | 9 | This approach results in geometry throughput ~2x slower than regular indexed rendering. 10 | 11 | ## Mode 2: Geometry shader 12 | 13 | Geometry shader is used to output new triangles with explicit per-vertex barycentric coordinates. This approach does not require custom vertex fetching (unlike mode 1). 14 | 15 | Performance is slightly worse than mode 1 on AMD Fury X, but better on NVIDIA 1080. 16 | In general, we are still looking at ~2x slower rendering in geometry-bound scenes. 17 | 18 | ## Mode 3: Manual ray-triangle intersection in pixel shader 19 | 20 | Primitive indices and vertices are loaded in the pixel shader based on primitive ID. 21 | Positions are transformed into world space and resulting triangle is intersected with the eye ray to calculate barycentrics. 22 | 23 | Despite doing quite a lot of work per pixel, this mode is much faster than modes 1 and 2 in geometry-heavy scenes. 24 | 25 | On NVIDIA 1080 this runs ~25% slower than baseline "speed-of-light" shader that simply outpts texture coordinates in a geometry-bound scene. Interestingly, even with no geometry visible on screen (camera facing away, but still shading all vertices) performance is ~13% slower than "speed-of-light". It appears that simply using gl_PrimitiveID incurs an overhead. 26 | 27 | AMD Fury X is ~10% slower than "speed-of-light" on average and ~7% slower in pure geometry-bound case, again suggesting an overhead from using gl_PrimitiveID. 28 | 29 | ## Mode 4: Passthrough geometry shader (NVIDIA) 30 | 31 | This mode uses `VK_NV_geometry_shader_passthrough` extension. Fast / passthrough geometry shader is used to output world positions of triangles to the pixel shader, which then performs a ray-triangle intersection similar to mode 3. 32 | 33 | Performance is slightly better than mode 3, averaging ~15% slowdown compared to "speed-of-light". With no geometry in view, performance matches the baseline (no primitive ID overhead, unlike mode 3). 34 | 35 | ## Mode 5: Native barycentrics (AMD) 36 | 37 | This mode uses `VK_AMD_shader_explicit_vertex_parameter` extension. This approach is described in [GPUOpen blog post](https://gpuopen.com/stable-barycentric-coordinates). 38 | 39 | Vertex shader writes gl_VertexIndex into 2 separate outputs. Pixel shader accesses those parameters through `flat` and `__explicitInterpAMD` interpolators to establish the order of native barycentrics available through `gl_BaryCoordSmoothAMD`. 40 | 41 | Performance matches the "speed-of-light". There is no measurable overhead from accessing barycentrics with this method. 42 | 43 | # Notes 44 | 45 | Geometry-heavy scene used for testing is San Miguel 2.0 from http://casual-effects.com/data. 46 | 47 | In more balanced scenes, performance delta between different methods can be much less dramatic. 48 | In any case, Mode 4 seems to be the most preferable one on NVIDIA and Mode 5 is obviously hard to compete with on AMD. 49 | 50 | Mode 3 may be the best cross-platform mechanism at this point, though it would be interesting to implement a way to avoid gl_PrimitiveID overhead. 51 | -------------------------------------------------------------------------------- /Source/Barycentrics.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "BaseApplication.h" 15 | #include "DemoUtils.h" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | class BarycentricsApp : public BaseApplication 23 | { 24 | public: 25 | 26 | BarycentricsApp(); 27 | ~BarycentricsApp(); 28 | 29 | void update() override; 30 | 31 | private: 32 | 33 | void render(); 34 | 35 | bool loadModel(const char* filename); 36 | bool loadTunnelTestModel(); 37 | 38 | Camera m_camera; 39 | Camera m_interpolatedCamera; 40 | 41 | CameraManipulator* m_cameraMan; 42 | 43 | GfxTechniqueRef m_techniqueNonIndexed[2]; 44 | GfxTechniqueRef m_techniqueGeometryShader[2]; 45 | GfxTechniqueRef m_techniqueIndexed[2]; 46 | GfxTechniqueRef m_techniqueManual[2]; 47 | GfxTechniqueRef m_techniquePassthroughGS[2]; 48 | GfxTechniqueRef m_techniqueNativeAMD[2]; 49 | 50 | GfxBuffer m_vertexBuffer; 51 | GfxBuffer m_indexBuffer; 52 | GfxBuffer m_constantBuffer; 53 | 54 | u32 m_indexCount = 0; 55 | u32 m_vertexCount = 0; 56 | 57 | struct Constants 58 | { 59 | Mat4 matView = Mat4::identity(); 60 | Mat4 matProj = Mat4::identity(); 61 | Mat4 matViewProj = Mat4::identity(); 62 | Mat4 matWorld = Mat4::identity(); 63 | Vec4 cameraPos = Vec4(0.0f); 64 | }; 65 | 66 | Mat4 m_worldTransform = Mat4::identity(); 67 | 68 | Box3 m_boundingBox; 69 | 70 | struct Vertex // TODO: make a packed version of this for GPU 71 | { 72 | Vec3 position; 73 | Vec2 texcoord; // TODO: de-interleave vertex streams 74 | }; 75 | 76 | std::string m_statusString; 77 | bool m_valid = false; 78 | 79 | struct MaterialConstants 80 | { 81 | Vec4 baseColor; 82 | }; 83 | 84 | struct Material 85 | { 86 | GfxTextureRef albedoTexture; 87 | GfxBufferRef constantBuffer; 88 | }; 89 | 90 | Material m_defaultMaterial; 91 | 92 | WindowEventListener m_windowEvents; 93 | 94 | float m_cameraScale = 1.0f; 95 | 96 | Timer m_timer; 97 | 98 | 99 | enum Timestamp 100 | { 101 | Timestamp_World, 102 | Timestamp_UI, 103 | }; 104 | 105 | struct Stats 106 | { 107 | MovingAverage gpuTotal; 108 | MovingAverage gpuWorld; 109 | MovingAverage gpuUI; 110 | MovingAverage cpuTotal; 111 | MovingAverage cpuWorld; 112 | MovingAverage cpuUI; 113 | } m_stats; 114 | 115 | enum class Mode 116 | { 117 | Indexed, 118 | NonIndexed, 119 | GeometryShader, 120 | Manual, 121 | PassthroughGS, 122 | NativeAMD, 123 | } m_mode = Mode::NonIndexed; 124 | 125 | const char* toString(Mode m) 126 | { 127 | switch (m) 128 | { 129 | default: return "Unknown"; 130 | case Mode::Indexed: return "Indexed"; 131 | case Mode::NonIndexed: return "NonIndexed"; 132 | case Mode::GeometryShader: return "GeometryShader"; 133 | case Mode::Manual: return "Manual"; 134 | case Mode::PassthroughGS: return "PassthroughGS"; 135 | case Mode::NativeAMD: return "NativeAMD"; 136 | } 137 | } 138 | 139 | bool m_useTexture = false; 140 | bool m_showUI = true; 141 | }; 142 | 143 | -------------------------------------------------------------------------------- /Source/BaseApplication.cpp: -------------------------------------------------------------------------------- 1 | #include "BaseApplication.h" 2 | #include "DemoUtils.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | BaseApplication::BaseApplication() 9 | : m_dev(Platform_GetGfxDevice()), m_ctx(Platform_GetGfxContext()), m_window(Platform_GetWindow()) 10 | { 11 | m_window->retain(); 12 | Gfx_Retain(m_dev); 13 | Gfx_Retain(m_ctx); 14 | 15 | m_prim = new PrimitiveBatch(); 16 | m_font = new BitmapFontRenderer(BitmapFontRenderer::createEmbeddedFont(true, 0, 1)); 17 | 18 | // Depth stencil states 19 | 20 | { 21 | GfxDepthStencilDesc desc; 22 | desc.enable = false; 23 | desc.writeEnable = false; 24 | desc.compareFunc = GfxCompareFunc::Always; 25 | m_depthStencilStates.disable.takeover(Gfx_CreateDepthStencilState(desc)); 26 | } 27 | 28 | { 29 | GfxDepthStencilDesc desc; 30 | desc.enable = true; 31 | desc.writeEnable = true; 32 | desc.compareFunc = GfxCompareFunc::LessEqual; 33 | m_depthStencilStates.writeLessEqual.takeover(Gfx_CreateDepthStencilState(desc)); 34 | } 35 | 36 | { 37 | GfxDepthStencilDesc desc; 38 | desc.enable = true; 39 | desc.writeEnable = true; 40 | desc.compareFunc = GfxCompareFunc::Always; 41 | m_depthStencilStates.writeAlways.takeover(Gfx_CreateDepthStencilState(desc)); 42 | } 43 | 44 | { 45 | GfxDepthStencilDesc desc; 46 | desc.enable = true; 47 | desc.writeEnable = false; 48 | desc.compareFunc = GfxCompareFunc::LessEqual; 49 | m_depthStencilStates.testLessEqual.takeover(Gfx_CreateDepthStencilState(desc)); 50 | } 51 | 52 | // Blend states 53 | 54 | { 55 | GfxBlendStateDesc desc = GfxBlendStateDesc::makeOpaque(); 56 | m_blendStates.opaque.takeover(Gfx_CreateBlendState(desc)); 57 | } 58 | 59 | { 60 | GfxBlendStateDesc desc = GfxBlendStateDesc::makeLerp(); 61 | m_blendStates.lerp.takeover(Gfx_CreateBlendState(desc)); 62 | } 63 | 64 | { 65 | GfxBlendStateDesc desc = GfxBlendStateDesc::makeAdditive(); 66 | m_blendStates.additive.takeover(Gfx_CreateBlendState(desc)); 67 | } 68 | 69 | // Sampler states 70 | 71 | { 72 | GfxSamplerDesc desc = GfxSamplerDesc::makePoint(); 73 | desc.wrapU = GfxTextureWrap::Clamp; 74 | desc.wrapV = GfxTextureWrap::Clamp; 75 | desc.wrapW = GfxTextureWrap::Clamp; 76 | m_samplerStates.pointClamp.takeover(Gfx_CreateSamplerState(desc)); 77 | } 78 | 79 | { 80 | GfxSamplerDesc desc = GfxSamplerDesc::makeLinear(); 81 | desc.wrapU = GfxTextureWrap::Clamp; 82 | desc.wrapV = GfxTextureWrap::Clamp; 83 | desc.wrapW = GfxTextureWrap::Clamp; 84 | m_samplerStates.linearClamp.takeover(Gfx_CreateSamplerState(desc)); 85 | } 86 | 87 | { 88 | GfxSamplerDesc desc = GfxSamplerDesc::makeLinear(); 89 | desc.wrapU = GfxTextureWrap::Wrap; 90 | desc.wrapV = GfxTextureWrap::Wrap; 91 | desc.wrapW = GfxTextureWrap::Wrap; 92 | m_samplerStates.linearWrap.takeover(Gfx_CreateSamplerState(desc)); 93 | } 94 | 95 | { 96 | GfxSamplerDesc desc = GfxSamplerDesc::makeLinear(); 97 | desc.wrapU = GfxTextureWrap::Wrap; 98 | desc.wrapV = GfxTextureWrap::Wrap; 99 | desc.wrapW = GfxTextureWrap::Wrap; 100 | desc.anisotropy = 16.0f; 101 | m_samplerStates.anisotropicWrap.takeover(Gfx_CreateSamplerState(desc)); 102 | } 103 | 104 | // Resources 105 | 106 | { 107 | const u32 whiteTexturePixels[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; 108 | GfxTextureDesc textureDescr = GfxTextureDesc::make2D(2, 2); 109 | m_defaultWhiteTexture = Gfx_CreateTexture(textureDescr, whiteTexturePixels); 110 | } 111 | 112 | { 113 | const u32 dimension = 256; 114 | const u32 square = dimension / 2; 115 | 116 | std::vector pixels(dimension * dimension, 0x00000000); 117 | for (u32 y = 0; y < square; ++y) 118 | { 119 | for (u32 x = 0; x < square; ++x) 120 | { 121 | pixels[y * dimension + x] = 0xFFFFFFFF; 122 | pixels[(y + square) * dimension + (x + square)] = 0xFFFFFFFF; 123 | } 124 | } 125 | 126 | m_checkerboardTexture = generateMipsRGBA8(reinterpret_cast(pixels.data()), dimension, dimension); 127 | } 128 | } 129 | 130 | BaseApplication::~BaseApplication() 131 | { 132 | delete m_font; 133 | delete m_prim; 134 | 135 | Gfx_Release(m_defaultWhiteTexture); 136 | Gfx_Release(m_ctx); 137 | Gfx_Release(m_dev); 138 | m_window->release(); 139 | } 140 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/indexgenerator.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | 7 | namespace meshopt 8 | { 9 | 10 | static unsigned int murmurHash(const char* key, size_t len, unsigned int h) 11 | { 12 | const unsigned int m = 0x5bd1e995; 13 | const int r = 24; 14 | 15 | while (len >= 4) 16 | { 17 | unsigned int k = *reinterpret_cast(key); 18 | 19 | k *= m; 20 | k ^= k >> r; 21 | k *= m; 22 | 23 | h *= m; 24 | h ^= k; 25 | 26 | key += 4; 27 | len -= 4; 28 | } 29 | 30 | return h; 31 | } 32 | 33 | struct VertexHasher 34 | { 35 | const char* vertices; 36 | size_t vertex_size; 37 | 38 | unsigned int empty() const 39 | { 40 | return ~0u; 41 | } 42 | 43 | size_t operator()(unsigned int index) const 44 | { 45 | return murmurHash(vertices + index * vertex_size, vertex_size, 0); 46 | } 47 | 48 | size_t operator()(unsigned int lhs, unsigned int rhs) const 49 | { 50 | return memcmp(vertices + lhs * vertex_size, vertices + rhs * vertex_size, vertex_size) == 0; 51 | } 52 | }; 53 | 54 | struct VertexHashEntry 55 | { 56 | unsigned int key; 57 | unsigned int value; 58 | }; 59 | 60 | static size_t hashBuckets(size_t count) 61 | { 62 | size_t buckets = 1; 63 | while (buckets < count) 64 | buckets *= 2; 65 | 66 | return buckets; 67 | } 68 | 69 | template 70 | static T* hashLookup(T* table, size_t buckets, const Hash& hash, const Key& key, const Key& empty) 71 | { 72 | assert(buckets > 0); 73 | assert((buckets & (buckets - 1)) == 0); 74 | 75 | size_t hashmod = buckets - 1; 76 | size_t bucket = hash(key) & hashmod; 77 | 78 | for (size_t probe = 0; probe <= hashmod; ++probe) 79 | { 80 | T& item = table[bucket]; 81 | 82 | if (item.key == empty) 83 | return &item; 84 | 85 | if (hash(item.key, key)) 86 | return &item; 87 | 88 | // hash collision, quadratic probing 89 | bucket = (bucket + probe + 1) & hashmod; 90 | } 91 | 92 | assert(false && "Hash table is full"); 93 | return 0; 94 | } 95 | 96 | } // namespace meshopt 97 | 98 | size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) 99 | { 100 | using namespace meshopt; 101 | 102 | assert(indices || index_count == vertex_count); 103 | assert(index_count % 3 == 0); 104 | assert(vertex_size > 0 && vertex_size <= 256); 105 | 106 | for (size_t i = 0; i < vertex_count; ++i) 107 | { 108 | destination[i] = ~0u; 109 | } 110 | 111 | VertexHasher hasher = {static_cast(vertices), vertex_size}; 112 | 113 | meshopt_Buffer table(hashBuckets(vertex_count)); 114 | memset(table.data, -1, table.size * sizeof(VertexHashEntry)); 115 | 116 | unsigned int next_vertex = 0; 117 | 118 | for (size_t i = 0; i < index_count; ++i) 119 | { 120 | unsigned int index = indices ? indices[i] : unsigned(i); 121 | assert(index < vertex_count); 122 | 123 | if (destination[index] == ~0u) 124 | { 125 | VertexHashEntry* entry = hashLookup(table.data, table.size, hasher, index, ~0u); 126 | 127 | if (entry->key == ~0u) 128 | { 129 | entry->key = index; 130 | entry->value = next_vertex++; 131 | } 132 | 133 | destination[index] = entry->value; 134 | } 135 | } 136 | 137 | assert(next_vertex <= vertex_count); 138 | 139 | return next_vertex; 140 | } 141 | 142 | void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) 143 | { 144 | assert(destination != vertices); 145 | assert(vertex_size > 0 && vertex_size <= 256); 146 | 147 | for (size_t i = 0; i < vertex_count; ++i) 148 | { 149 | if (remap[i] != ~0u) 150 | { 151 | assert(remap[i] < vertex_count); 152 | 153 | memcpy(static_cast(destination) + remap[i] * vertex_size, static_cast(vertices) + i * vertex_size, vertex_size); 154 | } 155 | } 156 | } 157 | 158 | void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap) 159 | { 160 | assert(index_count % 3 == 0); 161 | 162 | for (size_t i = 0; i < index_count; ++i) 163 | { 164 | unsigned int index = indices ? indices[i] : unsigned(i); 165 | assert(remap[index] != ~0u); 166 | 167 | destination[i] = remap[index]; 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/stripifier.cpp: -------------------------------------------------------------------------------- 1 | #include "meshoptimizer.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace meshopt 8 | { 9 | 10 | static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence) 11 | { 12 | unsigned int index = 0; 13 | unsigned int iv = ~0u; 14 | 15 | for (unsigned int i = 0; i < buffer_size; ++i) 16 | { 17 | unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; 18 | unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc; 19 | 20 | if (v < iv) 21 | { 22 | index = i; 23 | iv = v; 24 | } 25 | } 26 | 27 | return index; 28 | } 29 | 30 | static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1) 31 | { 32 | for (unsigned int i = 0; i < buffer_size; ++i) 33 | { 34 | unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; 35 | 36 | if (e0 == a && e1 == b) 37 | return (i << 2) | 2; 38 | else if (e0 == b && e1 == c) 39 | return (i << 2) | 0; 40 | else if (e0 == c && e1 == a) 41 | return (i << 2) | 1; 42 | } 43 | 44 | return -1; 45 | } 46 | 47 | } // namespace meshopt 48 | 49 | size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) 50 | { 51 | assert(destination != indices); 52 | assert(index_count % 3 == 0); 53 | 54 | using namespace meshopt; 55 | 56 | const size_t buffer_capacity = 8; 57 | 58 | unsigned int buffer[buffer_capacity][3] = {}; 59 | unsigned int buffer_size = 0; 60 | 61 | size_t index_offset = 0; 62 | 63 | unsigned int strip[2] = {}; 64 | unsigned int parity = 0; 65 | 66 | size_t strip_size = 0; 67 | 68 | // compute vertex valence; this is used to prioritize starting triangle for strips 69 | meshopt_Buffer valence(vertex_count); 70 | memset(valence.data, 0, vertex_count * sizeof(unsigned int)); 71 | 72 | for (size_t i = 0; i < index_count; ++i) 73 | { 74 | unsigned int index = indices[i]; 75 | assert(index < vertex_count); 76 | 77 | valence[index]++; 78 | } 79 | 80 | int next = -1; 81 | 82 | while (buffer_size > 0 || index_offset < index_count) 83 | { 84 | assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3)); 85 | 86 | // fill triangle buffer 87 | while (buffer_size < buffer_capacity && index_offset < index_count) 88 | { 89 | buffer[buffer_size][0] = indices[index_offset + 0]; 90 | buffer[buffer_size][1] = indices[index_offset + 1]; 91 | buffer[buffer_size][2] = indices[index_offset + 2]; 92 | 93 | buffer_size++; 94 | index_offset += 3; 95 | } 96 | 97 | assert(buffer_size > 0); 98 | 99 | if (next >= 0) 100 | { 101 | unsigned int i = next >> 2; 102 | unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; 103 | unsigned int v = buffer[i][next & 3]; 104 | 105 | // ordered removal from the buffer 106 | memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); 107 | buffer_size--; 108 | 109 | // update vertex valences for strip start heuristic 110 | valence[a]--; 111 | valence[b]--; 112 | valence[c]--; 113 | 114 | // find next triangle (note that edge order flips on every iteration) 115 | // in some cases we need to perform a swap to pick a different outgoing triangle edge 116 | // for [a b c], the default strip edge is [b c], but we might want to use [a c] 117 | int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]); 118 | int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1; 119 | 120 | if (cont < 0 && swap >= 0) 121 | { 122 | // [a b c] => [a b a c] 123 | destination[strip_size++] = strip[0]; 124 | destination[strip_size++] = v; 125 | 126 | // next strip has same winding 127 | // ? a b => b a v 128 | strip[1] = v; 129 | 130 | next = swap; 131 | } 132 | else 133 | { 134 | // emit the next vertex in the strip 135 | destination[strip_size++] = v; 136 | 137 | // next strip has flipped winding 138 | strip[0] = strip[1]; 139 | strip[1] = v; 140 | parity ^= 1; 141 | 142 | next = cont; 143 | } 144 | } 145 | else 146 | { 147 | // if we didn't find anything, we need to find the next new triangle 148 | // we use a heuristic to maximize the strip length 149 | unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]); 150 | unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; 151 | 152 | // ordered removal from the buffer 153 | memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); 154 | buffer_size--; 155 | 156 | // update vertex valences for strip start heuristic 157 | valence[a]--; 158 | valence[b]--; 159 | valence[c]--; 160 | 161 | // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration 162 | int ea = findStripNext(buffer, buffer_size, c, b); 163 | int eb = findStripNext(buffer, buffer_size, a, c); 164 | int ec = findStripNext(buffer, buffer_size, b, a); 165 | 166 | // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest 167 | // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear 168 | // reasons - slightly improves the stripification efficiency 169 | int mine = INT_MAX; 170 | mine = (ea >= 0 && mine > ea) ? ea : mine; 171 | mine = (eb >= 0 && mine > eb) ? eb : mine; 172 | mine = (ec >= 0 && mine > ec) ? ec : mine; 173 | 174 | if (ea == mine) 175 | { 176 | // keep abc 177 | next = ea; 178 | } 179 | else if (eb == mine) 180 | { 181 | // abc -> bca 182 | unsigned int t = a; 183 | a = b, b = c, c = t; 184 | 185 | next = eb; 186 | } 187 | else if (ec == mine) 188 | { 189 | // abc -> cab 190 | unsigned int t = c; 191 | c = b, b = a, a = t; 192 | 193 | next = ec; 194 | } 195 | 196 | // emit the new strip; we use restart indices 197 | if (strip_size) 198 | destination[strip_size++] = ~0u; 199 | 200 | destination[strip_size++] = a; 201 | destination[strip_size++] = b; 202 | destination[strip_size++] = c; 203 | 204 | // new strip always starts with the same edge winding 205 | strip[0] = b; 206 | strip[1] = c; 207 | parity = 1; 208 | } 209 | } 210 | 211 | return strip_size; 212 | } 213 | 214 | size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count) 215 | { 216 | assert(destination != indices); 217 | 218 | size_t offset = 0; 219 | size_t start = 0; 220 | 221 | for (size_t i = 0; i < index_count; ++i) 222 | { 223 | if (indices[i] == ~0u) 224 | { 225 | start = i + 1; 226 | } 227 | else if (i - start >= 2) 228 | { 229 | unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i]; 230 | 231 | if ((i - start) & 1) 232 | { 233 | unsigned int t = a; 234 | a = b, b = t; 235 | } 236 | 237 | if (a != b && a != c && b != c) 238 | { 239 | destination[offset + 0] = a; 240 | destination[offset + 1] = b; 241 | destination[offset + 2] = c; 242 | offset += 3; 243 | } 244 | } 245 | } 246 | 247 | return offset; 248 | } 249 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/overdrawanalyzer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // This work is based on: 9 | // Nicolas Capens. Advanced Rasterization. 2004 10 | namespace meshopt 11 | { 12 | 13 | const int kViewport = 256; 14 | 15 | struct OverdrawBuffer 16 | { 17 | float z[kViewport][kViewport][2]; 18 | unsigned int overdraw[kViewport][kViewport][2]; 19 | }; 20 | 21 | template 22 | static T min(T a, T b) 23 | { 24 | return a < b ? a : b; 25 | } 26 | 27 | template 28 | static T max(T a, T b) 29 | { 30 | return a > b ? a : b; 31 | } 32 | 33 | static float det2x2(float a, float b, float c, float d) 34 | { 35 | // (a b) 36 | // (c d) 37 | return a * d - b * c; 38 | } 39 | 40 | static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3) 41 | { 42 | // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1) 43 | // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1) 44 | // (x2-x1 y2-y1)(dzdx) = (z2-z1) 45 | // (x3-x1 y3-y1)(dzdy) (z3-z1) 46 | // we'll solve it with Cramer's rule 47 | float det = det2x2(x2 - x1, y2 - y1, x3 - x1, y3 - y1); 48 | float invdet = (det == 0) ? 0 : 1 / det; 49 | 50 | dzdx = det2x2(z2 - z1, y2 - y1, z3 - z1, y3 - y1) * invdet; 51 | dzdy = det2x2(x2 - x1, z2 - z1, x3 - x1, z3 - z1) * invdet; 52 | 53 | return det; 54 | } 55 | 56 | // half-space fixed point triangle rasterizer 57 | static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z) 58 | { 59 | // compute depth gradients 60 | float DZx, DZy; 61 | float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); 62 | int sign = det > 0; 63 | 64 | // flip backfacing triangles to simplify rasterization logic 65 | if (sign) 66 | { 67 | // flipping v2 & v3 preserves depth gradients since they're based on v1 68 | float t; 69 | t = v2x, v2x = v3x, v3x = t; 70 | t = v2y, v2y = v3y, v3y = t; 71 | t = v2z, v2z = v3z, v3z = t; 72 | 73 | // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below 74 | v1z = kViewport - v1z; 75 | DZx = -DZx; 76 | DZy = -DZy; 77 | } 78 | 79 | // coordinates, 28.4 fixed point 80 | int X1 = int(16.0f * v1x + 0.5f); 81 | int X2 = int(16.0f * v2x + 0.5f); 82 | int X3 = int(16.0f * v3x + 0.5f); 83 | 84 | int Y1 = int(16.0f * v1y + 0.5f); 85 | int Y2 = int(16.0f * v2y + 0.5f); 86 | int Y3 = int(16.0f * v3y + 0.5f); 87 | 88 | // bounding rectangle, clipped against viewport 89 | // since we rasterize pixels with covered centers, min >0.5 should round up 90 | // as for max, due to top-left filling convention we will never rasterize right/bottom edges 91 | // so max >= 0.5 should round down 92 | int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0); 93 | int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport); 94 | int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0); 95 | int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport); 96 | 97 | // deltas, 28.4 fixed point 98 | int DX12 = X1 - X2; 99 | int DX23 = X2 - X3; 100 | int DX31 = X3 - X1; 101 | 102 | int DY12 = Y1 - Y2; 103 | int DY23 = Y2 - Y3; 104 | int DY31 = Y3 - Y1; 105 | 106 | // fill convention correction 107 | int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0); 108 | int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0); 109 | int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0); 110 | 111 | // half edge equations, 24.8 fixed point 112 | // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers 113 | int FX = (minx << 4) + 8; 114 | int FY = (miny << 4) + 8; 115 | int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1; 116 | int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1; 117 | int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1; 118 | float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f); 119 | 120 | for (int y = miny; y < maxy; y++) 121 | { 122 | int CX1 = CY1; 123 | int CX2 = CY2; 124 | int CX3 = CY3; 125 | float ZX = ZY; 126 | 127 | for (int x = minx; x < maxx; x++) 128 | { 129 | // check if all CXn are non-negative 130 | if ((CX1 | CX2 | CX3) >= 0) 131 | { 132 | if (ZX >= buffer->z[y][x][sign]) 133 | { 134 | buffer->z[y][x][sign] = ZX; 135 | buffer->overdraw[y][x][sign]++; 136 | } 137 | } 138 | 139 | CX1 -= DY12 << 4; 140 | CX2 -= DY23 << 4; 141 | CX3 -= DY31 << 4; 142 | ZX += DZx; 143 | } 144 | 145 | CY1 += DX12 << 4; 146 | CY2 += DX23 << 4; 147 | CY3 += DX31 << 4; 148 | ZY += DZy; 149 | } 150 | } 151 | 152 | } // namespace meshopt 153 | 154 | meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) 155 | { 156 | using namespace meshopt; 157 | 158 | assert(index_count % 3 == 0); 159 | assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); 160 | assert(vertex_positions_stride % sizeof(float) == 0); 161 | 162 | size_t vertex_stride_float = vertex_positions_stride / sizeof(float); 163 | 164 | meshopt_OverdrawStatistics result = {}; 165 | 166 | float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; 167 | float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; 168 | 169 | for (size_t i = 0; i < vertex_count; ++i) 170 | { 171 | const float* v = vertex_positions + i * vertex_stride_float; 172 | 173 | for (int j = 0; j < 3; ++j) 174 | { 175 | minv[j] = min(minv[j], v[j]); 176 | maxv[j] = max(maxv[j], v[j]); 177 | } 178 | } 179 | 180 | float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2])); 181 | float scale = kViewport / extent; 182 | 183 | meshopt_Buffer triangles(index_count * 3); 184 | 185 | for (size_t i = 0; i < index_count; ++i) 186 | { 187 | unsigned int index = indices[i]; 188 | assert(index < vertex_count); 189 | 190 | const float* v = vertex_positions + index * vertex_stride_float; 191 | 192 | triangles[i * 3 + 0] = (v[0] - minv[0]) * scale; 193 | triangles[i * 3 + 1] = (v[1] - minv[1]) * scale; 194 | triangles[i * 3 + 2] = (v[2] - minv[2]) * scale; 195 | } 196 | 197 | meshopt_Buffer buffer_storage(1); 198 | OverdrawBuffer* buffer = buffer_storage.data; 199 | 200 | for (int axis = 0; axis < 3; ++axis) 201 | { 202 | memset(buffer, 0, sizeof(OverdrawBuffer)); 203 | 204 | for (size_t i = 0; i < index_count; i += 3) 205 | { 206 | const float* vn0 = &triangles[3 * (i + 0)]; 207 | const float* vn1 = &triangles[3 * (i + 1)]; 208 | const float* vn2 = &triangles[3 * (i + 2)]; 209 | 210 | switch (axis) 211 | { 212 | case 0: 213 | rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); 214 | break; 215 | case 1: 216 | rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); 217 | break; 218 | case 2: 219 | rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); 220 | break; 221 | } 222 | } 223 | 224 | for (int y = 0; y < kViewport; ++y) 225 | for (int x = 0; x < kViewport; ++x) 226 | for (int s = 0; s < 2; ++s) 227 | { 228 | unsigned int overdraw = buffer->overdraw[y][x][s]; 229 | 230 | result.pixels_covered += overdraw > 0; 231 | result.pixels_shaded += overdraw; 232 | } 233 | } 234 | 235 | result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f; 236 | 237 | return result; 238 | } 239 | -------------------------------------------------------------------------------- /External/meshoptimizer/README.md: -------------------------------------------------------------------------------- 1 | # meshoptimizer [![Build Status](https://travis-ci.org/zeux/meshoptimizer.svg?branch=master)](https://travis-ci.org/zeux/meshoptimizer) [![Build status](https://ci.appveyor.com/api/projects/status/ptx6p8wmqchivawq?svg=true)](https://ci.appveyor.com/project/zeux/meshoptimizer) [![codecov.io](http://codecov.io/github/zeux/meshoptimizer/coverage.svg?branch=master)](http://codecov.io/github/zeux/meshoptimizer?branch=master) ![MIT](https://img.shields.io/badge/license-MIT-blue.svg) 2 | 3 | ## Purpose 4 | 5 | When GPU renders triangle meshes, various stages of the GPU pipeline have to process vertex and index data. The efficiency of these stages depends on the data you feed to them; this library provides algorithms to help optimize meshes for these stages, as well as algorithms to reduce the mesh complexity and storage overhead. 6 | 7 | The library provides a C and C++ interface for all algorithms; you can use it from C/C++ or from other languages via FFI (such as P/Invoke). 8 | 9 | ## Building 10 | 11 | meshoptimizer is distributed as a set of C++ source files. To include it into your project, you can use one of the two options: 12 | 13 | * Use CMake to build the library (either as a standalone project or as part of your project) 14 | * Add source files to your project's build system 15 | 16 | The source files are organized in such a way that you don't need to change your build-system settings, and you only need to add the files for the algorithms you use. 17 | 18 | ## Pipeline 19 | 20 | When optimizing a mesh, you should typically feed it through a set of optimizations (the order is important!): 21 | 22 | 1. Indexing 23 | 2. Vertex cache optimization 24 | 3. Overdraw optimization 25 | 4. Vertex fetch optimization 26 | 5. Vertex quantization 27 | 28 | ## Indexing 29 | 30 | Most algorithms in this library assume that a mesh has a vertex buffer and an index buffer. For algorithms to work well and also for GPU to render your mesh efficiently, the vertex buffer has to have no redundant vertices; you can generate an index buffer from an unindexed vertex buffer or reindex an existing (potentially redundant) index buffer as follows: 31 | 32 | First, generate a remap table from your existing vertex (and, optionally, index) data: 33 | 34 | ```c++ 35 | size_t index_count = face_count * 3; 36 | std::vector remap(index_count); // allocate temporary memory for the remap table 37 | size_t vertex_count = meshopt_generateVertexRemap(&remap[0], NULL, index_count, &unindexed_vertices[0], index_count, sizeof(Vertex)); 38 | ``` 39 | 40 | Note that in this case we only have an unindexed vertex buffer; the remap table is generated based on binary equivalence of the input vertices, so the resulting mesh will render the same way. 41 | 42 | After generating the remap table, you can allocate space for the target vertex buffer (`vertex_count` elements) and index buffer (`index_count` elements) and generate them: 43 | 44 | ```c++ 45 | meshopt_remapIndexBuffer(indices, NULL, index_count, &remap[0]); 46 | meshopt_remapVertexBuffer(vertices, &unindexed_vertices[0], index_count, sizeof(Vertex), &remap[0]); 47 | ``` 48 | 49 | You can then further optimize the resulting buffers by calling the other functions on them in-place. 50 | 51 | ## Vertex cache optimization 52 | 53 | When the GPU renders the mesh, it has to run the vertex shader for each vertex; usually GPUs have a built-in fixed size cache that stores the transformed vertices (the result of running the vertex shader), and uses this cache to reduce the number of vertex shader invocations. This cache is usually small, 16-32 vertices, and can have different replacement policies; to use this cache efficiently, you have to reorder your triangles to maximize the locality of reused vertex references like so: 54 | 55 | ```c++ 56 | meshopt_optimizeVertexCache(indices, indices, index_count, vertex_count); 57 | ``` 58 | 59 | ## Overdraw optimization 60 | 61 | After transforming the vertices, GPU sends the triangles for rasterization which results in generating pixels that are usually first ran through the depth test, and pixels that pass it get the pixel shader executed to generate the final color. As pixel shaders get more expensive, it becomes more and more important to reduce overdraw. While in general improving overdraw requires view-dependent operations, this library provides an algorithm to reorder triangles to minimize the overdraw from all directions, which you should run after vertex cache optimization like this: 62 | 63 | ```c++ 64 | meshopt_optimizeOverdraw(indices, indices, index_count, &vertices[0].x, vertex_count, sizeof(Vertex), 1.05f); 65 | ``` 66 | 67 | The overdraw optimizer needs to read vertex positions as a float3 from the vertex; the code snippet above assumes that the vertex stores position as `float x, y, z`. 68 | 69 | When performing the overdraw optimization you have to specify a floating-point threshold parameter. The algorithm tries to maintain a balance between vertex cache efficiency and overdraw; the threshold determines how much the algorithm can compromise the vertex cache hit ratio, with 1.05 meaning that the resulting ratio should be at most 5% worse than before the optimization. 70 | 71 | ## Vertex fetch optimization 72 | 73 | After the final triangle order has been established, we still can optimize the vertex buffer for memory efficiency. Before running the vertex shader GPU has to fetch the vertex attributes from the vertex buffer; the fetch is usually backed by a memory cache, and as such optimizing the data for the locality of memory access is important. You can do this by running this code: 74 | 75 | To optimize the index/vertex buffers for vertex fetch efficiency, call: 76 | 77 | ```c++ 78 | meshopt_optimizeVertexFetch(vertices, indices, index_count, vertices, vertex_count, sizeof(Vertex)); 79 | ``` 80 | 81 | This will reorder the vertices in the vertex buffer to try to improve the locality of reference, and rewrite the indices in place to match. This optimization has to be performed on the final index buffer since the optimal vertex order depends on the triangle order. 82 | 83 | Note that the algorithm does not try to model cache replacement precisely and instead just orders vertices in the order of use, which generally produces results that are close to optimal. 84 | 85 | ## Vertex quantization 86 | 87 | To optimize memory bandwidth when fetching the vertex data even further, and to reduce the amount of memory required to store the mesh, it is often beneficial to quantize the vertex attributes to smaller types. While this optimization can technically run at any part of the pipeline (and sometimes doing quantization as the first step can improve indexing by merging almost identical vertices), it generally is easier to run this after all other optimizations since some of them require access to float3 positions. 88 | 89 | Quantization is usually domain specific; it's common to quantize normals using 3 8-bit integers but you can use higher-precision quantization (for example using 10 bits per component in a 10_10_10_2 format), or a different encoding to use just 2 components. For positions and texture coordinate data the two most common storage formats are half precision floats, and 16-bit normalized integers that encode the position relative to the AABB of the mesh or the UV bounding rectangle. 90 | 91 | The number of possible combinations here is very large but this library does provide the building blocks, specifically functions to quantize floating point values to normalized integers, as well as half-precision floats. For example, here's how you can quantize a normal: 92 | 93 | ```c++ 94 | unsigned int normal = 95 | (meshopt_quantizeUnorm(v.nx, 10) << 20) | 96 | (meshopt_quantizeUnorm(v.ny, 10) << 10) | 97 | meshopt_quantizeUnorm(v.nz, 10); 98 | ``` 99 | 100 | and here's how you can quantize a position: 101 | 102 | ```c++ 103 | unsigned short px = meshopt_quantizeHalf(v.x); 104 | unsigned short py = meshopt_quantizeHalf(v.y); 105 | unsigned short pz = meshopt_quantizeHalf(v.z); 106 | ``` 107 | 108 | ## Efficiency analyzers 109 | 110 | While the only way to get precise performance data is to measure performance on the target GPU, it can be valuable to measure the impact of these optimization in a GPU-independent manner. To this end, the library provides analyzers for all three major optimization routines. For each optimization there is a corresponding analyze function, like `meshopt_analyzeOverdraw`, that returns a struct with statistics. 111 | 112 | `meshopt_analyzeVertexCache` returns vertex cache statistics. The common metric to use is ACMR - average cache miss ratio, which is the ratio of the total number of vertex invocations to the triangle count. The worst-case ACMR is 3 (GPU has to process 3 vertices for each triangle); on regular grids the optimal ACMR approaches 0.5. On real meshes it usually is in [0.5..1.5] ratio depending on the amount of vertex splits. One other useful metric is ATVR - average transformed vertex ratio - which represents the ratio of vertex shader invocations to the total vertices, and has the best case of 1.0 regardless of mesh topology (each vertex is transformed once). 113 | 114 | `meshopt_analyzeVertexFetch` returns vertex fetch statistics. The main metric it uses is overfetch - the ratio between the number of bytes read from the vertex buffer to the total number of bytes in the vertex buffer. Assuming non-redundant vertex buffers, the best case is 1.0 - each byte is fetched once. 115 | 116 | `meshopt_analyzeOverdraw` returns overdraw statistics. The main metric it uses is overdraw - the ratio between the number of pixel shader invocations to the total number of covered pixels, as measured from several different orthographic cameras. The best case for overdraw is 1.0 - each pixel is shaded once. 117 | 118 | Note that all analyzers use approximate models for the relevant GPU units, so the numbers you will get as the result are only a rough approximation of the actual performance. 119 | 120 | ## License 121 | 122 | This library is available to anybody free of charge, under the terms of MIT License (see LICENSE.md). 123 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/simplifier.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | // This work is based on: 11 | // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 12 | namespace meshopt 13 | { 14 | 15 | static size_t hash(unsigned long long key) 16 | { 17 | key = (~key) + (key << 18); 18 | key = key ^ (key >> 31); 19 | key = key * 21; 20 | key = key ^ (key >> 11); 21 | key = key + (key << 6); 22 | key = key ^ (key >> 22); 23 | return size_t(key); 24 | } 25 | 26 | static size_t hashBuckets(size_t count) 27 | { 28 | size_t buckets = 1; 29 | while (buckets < count) 30 | buckets *= 2; 31 | 32 | return buckets; 33 | } 34 | 35 | template 36 | static T* hashLookup(T* table, size_t buckets, const T& key, const T& empty) 37 | { 38 | assert(buckets > 0); 39 | assert((buckets & (buckets - 1)) == 0); 40 | 41 | size_t hashmod = buckets - 1; 42 | size_t bucket = hash(key) & hashmod; 43 | 44 | for (size_t probe = 0; probe <= hashmod; ++probe) 45 | { 46 | T& item = table[bucket]; 47 | 48 | if (item == empty || item == key) 49 | return &item; 50 | 51 | // hash collision, quadratic probing 52 | bucket = (bucket + probe + 1) & hashmod; 53 | } 54 | 55 | assert(false && "Hash table is full"); 56 | return 0; 57 | } 58 | 59 | struct Vector3 60 | { 61 | float x, y, z; 62 | }; 63 | 64 | struct Quadric 65 | { 66 | float a00; 67 | float a10, a11; 68 | float a20, a21, a22; 69 | float b0, b1, b2, c; 70 | }; 71 | 72 | struct Collapse 73 | { 74 | size_t v0; 75 | size_t v1; 76 | float error; 77 | 78 | bool operator<(const Collapse& other) const 79 | { 80 | return error < other.error; 81 | } 82 | }; 83 | 84 | static float normalize(Vector3& v) 85 | { 86 | float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 87 | 88 | if (length > 0) 89 | { 90 | v.x /= length; 91 | v.y /= length; 92 | v.z /= length; 93 | } 94 | 95 | return length; 96 | } 97 | 98 | static void quadricAdd(Quadric& Q, const Quadric& R) 99 | { 100 | Q.a00 += R.a00; 101 | Q.a10 += R.a10; 102 | Q.a11 += R.a11; 103 | Q.a20 += R.a20; 104 | Q.a21 += R.a21; 105 | Q.a22 += R.a22; 106 | Q.b0 += R.b0; 107 | Q.b1 += R.b1; 108 | Q.b2 += R.b2; 109 | Q.c += R.c; 110 | } 111 | 112 | static void quadricMul(Quadric& Q, float s) 113 | { 114 | Q.a00 *= s; 115 | Q.a10 *= s; 116 | Q.a11 *= s; 117 | Q.a20 *= s; 118 | Q.a21 *= s; 119 | Q.a22 *= s; 120 | Q.b0 *= s; 121 | Q.b1 *= s; 122 | Q.b2 *= s; 123 | Q.c *= s; 124 | } 125 | 126 | static float quadricError(Quadric& Q, const Vector3& v) 127 | { 128 | float xx = v.x * v.x; 129 | float xy = v.x * v.y; 130 | float xz = v.x * v.z; 131 | float yy = v.y * v.y; 132 | float yz = v.y * v.z; 133 | float zz = v.z * v.z; 134 | 135 | float vTQv = Q.a00 * xx + Q.a10 * xy * 2 + Q.a11 * yy + Q.a20 * xz * 2 + Q.a21 * yz * 2 + Q.a22 * zz + Q.b0 * v.x * 2 + Q.b1 * v.y * 2 + Q.b2 * v.z * 2 + Q.c; 136 | 137 | return fabsf(vTQv); 138 | } 139 | 140 | static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d) 141 | { 142 | Q.a00 = a * a; 143 | Q.a10 = b * a; 144 | Q.a11 = b * b; 145 | Q.a20 = c * a; 146 | Q.a21 = c * b; 147 | Q.a22 = c * c; 148 | Q.b0 = d * a; 149 | Q.b1 = d * b; 150 | Q.b2 = d * c; 151 | Q.c = d * d; 152 | } 153 | 154 | static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2) 155 | { 156 | Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; 157 | Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; 158 | 159 | Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; 160 | float area = normalize(normal); 161 | 162 | float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; 163 | 164 | quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance); 165 | 166 | // Three classical weighting methods include weight=1, weight=area and weight=area^2 167 | // We use weight=area for now 168 | quadricMul(Q, area); 169 | } 170 | 171 | static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2) 172 | { 173 | Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; 174 | float length = normalize(p10); 175 | 176 | Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; 177 | float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z; 178 | 179 | Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p}; 180 | normalize(normal); 181 | 182 | float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; 183 | 184 | quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance); 185 | 186 | quadricMul(Q, length * 1000); 187 | } 188 | 189 | static unsigned long long edgeId(unsigned int a, unsigned int b) 190 | { 191 | return (static_cast(a) << 32) | b; 192 | } 193 | 194 | static size_t simplifyEdgeCollapse(unsigned int* result, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_positions_stride, size_t vertex_count, size_t target_index_count) 195 | { 196 | size_t vertex_stride_float = vertex_positions_stride / sizeof(float); 197 | 198 | meshopt_Buffer vertex_positions(vertex_count); 199 | 200 | for (size_t i = 0; i < vertex_count; ++i) 201 | { 202 | const float* v = vertex_positions_data + i * vertex_stride_float; 203 | 204 | vertex_positions[i].x = v[0]; 205 | vertex_positions[i].y = v[1]; 206 | vertex_positions[i].z = v[2]; 207 | } 208 | 209 | meshopt_Buffer vertex_quadrics(vertex_count); 210 | memset(vertex_quadrics.data, 0, vertex_count * sizeof(Quadric)); 211 | 212 | // face quadrics 213 | for (size_t i = 0; i < index_count; i += 3) 214 | { 215 | Quadric Q; 216 | quadricFromTriangle(Q, vertex_positions[indices[i + 0]], vertex_positions[indices[i + 1]], vertex_positions[indices[i + 2]]); 217 | 218 | quadricAdd(vertex_quadrics[indices[i + 0]], Q); 219 | quadricAdd(vertex_quadrics[indices[i + 1]], Q); 220 | quadricAdd(vertex_quadrics[indices[i + 2]], Q); 221 | } 222 | 223 | // edge quadrics for boundary edges 224 | meshopt_Buffer edges(hashBuckets(index_count)); 225 | memset(edges.data, 0, edges.size * sizeof(unsigned long long)); 226 | 227 | for (size_t i = 0; i < index_count; i += 3) 228 | { 229 | static const int next[3] = {1, 2, 0}; 230 | 231 | for (int e = 0; e < 3; ++e) 232 | { 233 | unsigned int i0 = indices[i + e]; 234 | unsigned int i1 = indices[i + next[e]]; 235 | 236 | unsigned long long edge = edgeId(i0, i1); 237 | 238 | *hashLookup(edges.data, edges.size, edge, 0ull) = edge; 239 | } 240 | } 241 | 242 | for (size_t i = 0; i < index_count; i += 3) 243 | { 244 | static const int next[3] = {1, 2, 0}; 245 | 246 | for (int e = 0; e < 3; ++e) 247 | { 248 | unsigned int i0 = indices[i + e]; 249 | unsigned int i1 = indices[i + next[e]]; 250 | 251 | unsigned long long edge = edgeId(i1, i0); 252 | 253 | if (*hashLookup(edges.data, edges.size, edge, 0ull) != edge) 254 | { 255 | unsigned int i2 = indices[i + next[next[e]]]; 256 | 257 | Quadric Q; 258 | quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2]); 259 | 260 | quadricAdd(vertex_quadrics[i0], Q); 261 | quadricAdd(vertex_quadrics[i1], Q); 262 | } 263 | } 264 | } 265 | 266 | if (result != indices) 267 | { 268 | for (size_t i = 0; i < index_count; ++i) 269 | { 270 | result[i] = indices[i]; 271 | } 272 | } 273 | 274 | size_t pass_count = 0; 275 | float worst_error = 0; 276 | 277 | while (index_count > target_index_count) 278 | { 279 | meshopt_Buffer edge_collapses(index_count); 280 | size_t edge_collapse_count = 0; 281 | 282 | for (size_t i = 0; i < index_count; i += 3) 283 | { 284 | static const int next[3] = {1, 2, 0}; 285 | 286 | for (int e = 0; e < 3; ++e) 287 | { 288 | unsigned int i0 = result[i + e]; 289 | unsigned int i1 = result[i + next[e]]; 290 | 291 | Collapse c01 = {i0, i1, quadricError(vertex_quadrics[i0], vertex_positions[i1])}; 292 | Collapse c10 = {i1, i0, quadricError(vertex_quadrics[i1], vertex_positions[i0])}; 293 | Collapse c = c01.error <= c10.error ? c01 : c10; 294 | 295 | edge_collapses[edge_collapse_count++] = c; 296 | } 297 | } 298 | 299 | std::sort(edge_collapses.data, edge_collapses.data + edge_collapse_count); 300 | 301 | meshopt_Buffer vertex_remap(vertex_count); 302 | 303 | for (size_t i = 0; i < vertex_count; ++i) 304 | { 305 | vertex_remap[i] = unsigned(i); 306 | } 307 | 308 | meshopt_Buffer vertex_locked(vertex_count); 309 | memset(vertex_locked.data, 0, vertex_count); 310 | 311 | // each collapse removes 2 triangles 312 | size_t edge_collapse_goal = (index_count - target_index_count) / 6 + 1; 313 | 314 | size_t collapses = 0; 315 | float pass_error = 0; 316 | 317 | float error_goal = edge_collapses[edge_collapse_goal < edge_collapse_count ? edge_collapse_goal : edge_collapse_count - 1].error; 318 | float error_limit = error_goal * 1.5f; 319 | 320 | for (size_t i = 0; i < edge_collapse_count; ++i) 321 | { 322 | const Collapse& c = edge_collapses[i]; 323 | 324 | if (vertex_locked[c.v0] || vertex_locked[c.v1]) 325 | continue; 326 | 327 | if (c.error > error_limit) 328 | break; 329 | 330 | assert(vertex_remap[c.v0] == c.v0); 331 | assert(vertex_remap[c.v1] == c.v1); 332 | 333 | quadricAdd(vertex_quadrics[c.v1], vertex_quadrics[c.v0]); 334 | 335 | vertex_remap[c.v0] = unsigned(c.v1); 336 | 337 | vertex_locked[c.v0] = 1; 338 | vertex_locked[c.v1] = 1; 339 | 340 | collapses++; 341 | pass_error = c.error; 342 | 343 | if (collapses >= edge_collapse_goal) 344 | break; 345 | } 346 | 347 | // printf("pass %d: collapses: %d/%d, error: %e\n", int(pass_count), int(collapses), int(edge_collapse_count), pass_error); 348 | 349 | pass_count++; 350 | worst_error = (worst_error < pass_error) ? pass_error : worst_error; 351 | 352 | // no edges can be collapsed any more => bail out 353 | if (collapses == 0) 354 | break; 355 | 356 | size_t write = 0; 357 | 358 | for (size_t i = 0; i < index_count; i += 3) 359 | { 360 | unsigned int v0 = vertex_remap[result[i + 0]]; 361 | unsigned int v1 = vertex_remap[result[i + 1]]; 362 | unsigned int v2 = vertex_remap[result[i + 2]]; 363 | 364 | assert(vertex_remap[v0] == v0); 365 | assert(vertex_remap[v1] == v1); 366 | assert(vertex_remap[v2] == v2); 367 | 368 | if (v0 != v1 && v0 != v2 && v1 != v2) 369 | { 370 | result[write + 0] = v0; 371 | result[write + 1] = v1; 372 | result[write + 2] = v2; 373 | write += 3; 374 | } 375 | } 376 | 377 | index_count = write; 378 | } 379 | 380 | // printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); 381 | 382 | return index_count; 383 | } 384 | 385 | } // namespace meshopt 386 | 387 | size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) 388 | { 389 | using namespace meshopt; 390 | 391 | assert(index_count % 3 == 0); 392 | assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); 393 | assert(vertex_positions_stride % sizeof(float) == 0); 394 | assert(target_index_count <= index_count); 395 | 396 | return simplifyEdgeCollapse(destination, indices, index_count, vertex_positions, vertex_positions_stride, vertex_count, target_index_count); 397 | } 398 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/overdrawoptimizer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | // This work is based on: 11 | // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 12 | namespace meshopt 13 | { 14 | 15 | struct ClusterSortData 16 | { 17 | unsigned int cluster; 18 | float dot_product; 19 | 20 | bool operator<(const ClusterSortData& other) const 21 | { 22 | // high product = possible occluder, render early 23 | return dot_product > other.dot_product; 24 | } 25 | }; 26 | 27 | static void calculateSortData(ClusterSortData* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) 28 | { 29 | size_t vertex_stride_float = vertex_positions_stride / sizeof(float); 30 | 31 | float mesh_centroid[3] = {}; 32 | 33 | for (size_t i = 0; i < index_count; ++i) 34 | { 35 | const float* p = vertex_positions + vertex_stride_float * indices[i]; 36 | 37 | mesh_centroid[0] += p[0]; 38 | mesh_centroid[1] += p[1]; 39 | mesh_centroid[2] += p[2]; 40 | } 41 | 42 | mesh_centroid[0] /= index_count; 43 | mesh_centroid[1] /= index_count; 44 | mesh_centroid[2] /= index_count; 45 | 46 | for (size_t cluster = 0; cluster < cluster_count; ++cluster) 47 | { 48 | size_t cluster_begin = clusters[cluster] * 3; 49 | size_t cluster_end = (cluster_count > cluster + 1) ? clusters[cluster + 1] * 3 : index_count; 50 | assert(cluster_begin < cluster_end); 51 | 52 | float cluster_area = 0; 53 | float cluster_centroid[3] = {}; 54 | float cluster_normal[3] = {}; 55 | 56 | for (size_t i = cluster_begin; i < cluster_end; i += 3) 57 | { 58 | const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0]; 59 | const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1]; 60 | const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2]; 61 | 62 | float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; 63 | float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; 64 | 65 | float normalx = p10[1] * p20[2] - p10[2] * p20[1]; 66 | float normaly = p10[2] * p20[0] - p10[0] * p20[2]; 67 | float normalz = p10[0] * p20[1] - p10[1] * p20[0]; 68 | 69 | float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); 70 | 71 | cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3); 72 | cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3); 73 | cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3); 74 | cluster_normal[0] += normalx; 75 | cluster_normal[1] += normaly; 76 | cluster_normal[2] += normalz; 77 | cluster_area += area; 78 | } 79 | 80 | float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area; 81 | 82 | cluster_centroid[0] *= inv_cluster_area; 83 | cluster_centroid[1] *= inv_cluster_area; 84 | cluster_centroid[2] *= inv_cluster_area; 85 | 86 | float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]); 87 | float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length; 88 | 89 | cluster_normal[0] *= inv_cluster_normal_length; 90 | cluster_normal[1] *= inv_cluster_normal_length; 91 | cluster_normal[2] *= inv_cluster_normal_length; 92 | 93 | float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]}; 94 | 95 | sort_data[cluster].cluster = unsigned(cluster); 96 | sort_data[cluster].dot_product = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2]; 97 | } 98 | } 99 | 100 | static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp) 101 | { 102 | unsigned int cache_misses = 0; 103 | 104 | // if vertex is not in cache, put it in cache 105 | if (timestamp - cache_timestamps[a] > cache_size) 106 | { 107 | cache_timestamps[a] = timestamp++; 108 | cache_misses++; 109 | } 110 | 111 | if (timestamp - cache_timestamps[b] > cache_size) 112 | { 113 | cache_timestamps[b] = timestamp++; 114 | cache_misses++; 115 | } 116 | 117 | if (timestamp - cache_timestamps[c] > cache_size) 118 | { 119 | cache_timestamps[c] = timestamp++; 120 | cache_misses++; 121 | } 122 | 123 | return cache_misses; 124 | } 125 | 126 | static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) 127 | { 128 | meshopt_Buffer cache_timestamps(vertex_count); 129 | memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int)); 130 | 131 | unsigned int timestamp = cache_size + 1; 132 | 133 | size_t face_count = index_count / 3; 134 | 135 | size_t result = 0; 136 | 137 | for (size_t i = 0; i < face_count; ++i) 138 | { 139 | unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); 140 | 141 | // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh 142 | // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently 143 | // suggests an inefficiency in the vertex cache optimization algorithm 144 | // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0 145 | if (i == 0 || m == 3) 146 | { 147 | destination[result++] = unsigned(i); 148 | } 149 | } 150 | 151 | assert(result <= index_count / 3); 152 | 153 | return result; 154 | } 155 | 156 | static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold) 157 | { 158 | meshopt_Buffer cache_timestamps(vertex_count); 159 | memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int)); 160 | 161 | unsigned int timestamp = 0; 162 | 163 | size_t result = 0; 164 | 165 | for (size_t it = 0; it < cluster_count; ++it) 166 | { 167 | size_t start = clusters[it]; 168 | size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3; 169 | assert(start < end); 170 | 171 | // reset cache 172 | timestamp += cache_size + 1; 173 | 174 | // measure cluster ACMR 175 | unsigned int cluster_misses = 0; 176 | 177 | for (size_t i = start; i < end; ++i) 178 | { 179 | unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); 180 | 181 | cluster_misses += m; 182 | } 183 | 184 | float cluster_threshold = threshold * (float(cluster_misses) / float(end - start)); 185 | 186 | // first cluster always starts from the hard cluster boundary 187 | destination[result++] = unsigned(start); 188 | 189 | // reset cache 190 | timestamp += cache_size + 1; 191 | 192 | unsigned int running_misses = 0; 193 | unsigned int running_faces = 0; 194 | 195 | for (size_t i = start; i < end; ++i) 196 | { 197 | unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); 198 | 199 | running_misses += m; 200 | running_faces += 1; 201 | 202 | if (float(running_misses) / float(running_faces) <= cluster_threshold) 203 | { 204 | // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one 205 | // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last 206 | // cluster is empty; however, the 'pop_back' after the loop will clean it up 207 | destination[result++] = unsigned(i + 1); 208 | 209 | // reset cache 210 | timestamp += cache_size + 1; 211 | 212 | running_misses = 0; 213 | running_faces = 0; 214 | } 215 | } 216 | 217 | // each time we reach the target ACMR we flush the cluster 218 | // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles 219 | // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results 220 | // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one 221 | // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end' 222 | // to the cluster boundary array which we need to remove anyway - this code will do that automatically 223 | if (destination[result - 1] != start) 224 | { 225 | result--; 226 | } 227 | } 228 | 229 | assert(result >= cluster_count); 230 | assert(result <= index_count / 3); 231 | 232 | return result; 233 | } 234 | 235 | } // namespace 236 | 237 | void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) 238 | { 239 | using namespace meshopt; 240 | 241 | assert(index_count % 3 == 0); 242 | assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); 243 | assert(vertex_positions_stride % sizeof(float) == 0); 244 | 245 | // guard for empty meshes 246 | if (index_count == 0 || vertex_count == 0) 247 | return; 248 | 249 | // support in-place optimization 250 | meshopt_Buffer indices_copy; 251 | 252 | if (destination == indices) 253 | { 254 | indices_copy.data = new unsigned int[index_count]; 255 | memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int)); 256 | indices = indices_copy.data; 257 | } 258 | 259 | unsigned int cache_size = 16; 260 | 261 | // generate hard boundaries from full-triangle cache misses 262 | meshopt_Buffer hard_clusters(index_count / 3); 263 | size_t hard_cluster_count = generateHardBoundaries(&hard_clusters[0], indices, index_count, vertex_count, cache_size); 264 | 265 | // generate soft boundaries 266 | meshopt_Buffer soft_clusters(index_count / 3 + 1); 267 | size_t soft_cluster_count = generateSoftBoundaries(&soft_clusters[0], indices, index_count, vertex_count, &hard_clusters[0], hard_cluster_count, cache_size, threshold); 268 | 269 | const unsigned int* clusters = &soft_clusters[0]; 270 | size_t cluster_count = soft_cluster_count; 271 | 272 | // fill sort data 273 | meshopt_Buffer sort_data(cluster_count); 274 | calculateSortData(&sort_data[0], indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count); 275 | 276 | // high product = possible occluder, render early 277 | std::sort(sort_data.data, sort_data.data + cluster_count); 278 | 279 | // fill output buffer 280 | size_t offset = 0; 281 | 282 | for (size_t it = 0; it < cluster_count; ++it) 283 | { 284 | unsigned int cluster = sort_data[it].cluster; 285 | assert(cluster < cluster_count); 286 | 287 | size_t start = clusters[cluster]; 288 | size_t end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] : index_count / 3; 289 | assert(start < end); 290 | 291 | for (size_t i = start; i < end; ++i) 292 | { 293 | destination[offset++] = indices[3 * i + 0]; 294 | destination[offset++] = indices[3 * i + 1]; 295 | destination[offset++] = indices[3 * i + 2]; 296 | } 297 | } 298 | 299 | assert(offset == index_count); 300 | } 301 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/vcacheoptimizer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // This work is based on: 9 | // Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 10 | // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 11 | namespace meshopt 12 | { 13 | 14 | const size_t max_cache_size = 16; 15 | const size_t max_valence = 8; 16 | 17 | static const float vertex_score_table_cache[1 + max_cache_size] = { 18 | 0.f, 19 | 0.792f, 0.767f, 0.764f, 0.956f, 0.827f, 0.751f, 0.820f, 0.864f, 0.738f, 0.788f, 0.642f, 0.646f, 0.165f, 0.654f, 0.545f, 0.284f 20 | }; 21 | 22 | static const float vertex_score_table_live[1 + max_valence] = { 23 | 0.f, 24 | 0.994f, 0.721f, 0.479f, 0.423f, 0.174f, 0.080f, 0.249f, 0.056f 25 | }; 26 | 27 | struct Adjacency 28 | { 29 | meshopt_Buffer triangle_counts; 30 | meshopt_Buffer offsets; 31 | meshopt_Buffer data; 32 | 33 | Adjacency(size_t index_count, size_t vertex_count) 34 | : triangle_counts(vertex_count) 35 | , offsets(vertex_count) 36 | , data(index_count) 37 | { 38 | } 39 | }; 40 | 41 | static void buildAdjacency(Adjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count) 42 | { 43 | size_t face_count = index_count / 3; 44 | 45 | // fill triangle counts 46 | for (size_t i = 0; i < vertex_count; ++i) 47 | { 48 | adjacency.triangle_counts[i] = 0; 49 | } 50 | 51 | for (size_t i = 0; i < index_count; ++i) 52 | { 53 | assert(indices[i] < vertex_count); 54 | 55 | adjacency.triangle_counts[indices[i]]++; 56 | } 57 | 58 | // fill offset table 59 | unsigned int offset = 0; 60 | 61 | for (size_t i = 0; i < vertex_count; ++i) 62 | { 63 | adjacency.offsets[i] = offset; 64 | offset += adjacency.triangle_counts[i]; 65 | } 66 | 67 | assert(offset == index_count); 68 | 69 | // fill triangle data 70 | for (size_t i = 0; i < face_count; ++i) 71 | { 72 | unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; 73 | 74 | adjacency.data[adjacency.offsets[a]++] = unsigned(i); 75 | adjacency.data[adjacency.offsets[b]++] = unsigned(i); 76 | adjacency.data[adjacency.offsets[c]++] = unsigned(i); 77 | } 78 | 79 | // fix offsets that have been disturbed by the previous pass 80 | for (size_t i = 0; i < vertex_count; ++i) 81 | { 82 | assert(adjacency.offsets[i] >= adjacency.triangle_counts[i]); 83 | 84 | adjacency.offsets[i] -= adjacency.triangle_counts[i]; 85 | } 86 | } 87 | 88 | static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) 89 | { 90 | // check dead-end stack 91 | while (dead_end_top) 92 | { 93 | unsigned int vertex = dead_end[--dead_end_top]; 94 | 95 | if (live_triangles[vertex] > 0) 96 | return vertex; 97 | } 98 | 99 | // input order 100 | while (input_cursor < vertex_count) 101 | { 102 | if (live_triangles[input_cursor] > 0) 103 | return input_cursor; 104 | 105 | ++input_cursor; 106 | } 107 | 108 | return ~0u; 109 | } 110 | 111 | static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) 112 | { 113 | unsigned int best_candidate = ~0u; 114 | int best_priority = -1; 115 | 116 | for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) 117 | { 118 | unsigned int vertex = *next_candidate; 119 | 120 | // otherwise we don't need to process it 121 | if (live_triangles[vertex] > 0) 122 | { 123 | int priority = 0; 124 | 125 | // will it be in cache after fanning? 126 | if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) 127 | { 128 | priority = timestamp - cache_timestamps[vertex]; // position in cache 129 | } 130 | 131 | if (priority > best_priority) 132 | { 133 | best_candidate = vertex; 134 | best_priority = priority; 135 | } 136 | } 137 | } 138 | 139 | return best_candidate; 140 | } 141 | 142 | static float vertexScore(int cache_position, unsigned int live_triangles) 143 | { 144 | assert(cache_position >= -1 && cache_position < int(max_cache_size)); 145 | 146 | unsigned int live_triangles_clamped = live_triangles < max_valence ? live_triangles : max_valence; 147 | 148 | return vertex_score_table_cache[1 + cache_position] + vertex_score_table_live[live_triangles_clamped]; 149 | } 150 | 151 | static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const char* emitted_flags, size_t face_count) 152 | { 153 | // input order 154 | while (input_cursor < face_count) 155 | { 156 | if (!emitted_flags[input_cursor]) 157 | return input_cursor; 158 | 159 | ++input_cursor; 160 | } 161 | 162 | return ~0u; 163 | } 164 | 165 | } // namespace meshopt 166 | 167 | void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) 168 | { 169 | using namespace meshopt; 170 | 171 | assert(index_count % 3 == 0); 172 | 173 | // guard for empty meshes 174 | if (index_count == 0 || vertex_count == 0) 175 | return; 176 | 177 | // support in-place optimization 178 | meshopt_Buffer indices_copy; 179 | 180 | if (destination == indices) 181 | { 182 | indices_copy.data = new unsigned int[index_count]; 183 | memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int)); 184 | indices = indices_copy.data; 185 | } 186 | 187 | unsigned int cache_size = 16; 188 | assert(cache_size <= max_cache_size); 189 | 190 | size_t face_count = index_count / 3; 191 | 192 | // build adjacency information 193 | Adjacency adjacency(index_count, vertex_count); 194 | buildAdjacency(adjacency, indices, index_count, vertex_count); 195 | 196 | // live triangle counts 197 | meshopt_Buffer live_triangles(vertex_count); 198 | memcpy(live_triangles.data, adjacency.triangle_counts.data, vertex_count * sizeof(unsigned int)); 199 | 200 | // emitted flags 201 | meshopt_Buffer emitted_flags(face_count); 202 | memset(emitted_flags.data, 0, face_count); 203 | 204 | // compute initial vertex scores 205 | meshopt_Buffer vertex_scores(vertex_count); 206 | 207 | for (size_t i = 0; i < vertex_count; ++i) 208 | { 209 | vertex_scores[i] = vertexScore(-1, live_triangles[i]); 210 | } 211 | 212 | // compute triangle scores 213 | meshopt_Buffer triangle_scores(face_count); 214 | 215 | for (size_t i = 0; i < face_count; ++i) 216 | { 217 | unsigned int a = indices[i * 3 + 0]; 218 | unsigned int b = indices[i * 3 + 1]; 219 | unsigned int c = indices[i * 3 + 2]; 220 | 221 | triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; 222 | } 223 | 224 | unsigned int cache_holder[2 * (max_cache_size + 3)]; 225 | unsigned int* cache = cache_holder; 226 | unsigned int* cache_new = cache_holder + max_cache_size + 3; 227 | size_t cache_count = 0; 228 | 229 | unsigned int current_triangle = 0; 230 | unsigned int input_cursor = 1; 231 | 232 | unsigned int output_triangle = 0; 233 | 234 | while (current_triangle != ~0u) 235 | { 236 | assert(output_triangle < face_count); 237 | 238 | unsigned int a = indices[current_triangle * 3 + 0]; 239 | unsigned int b = indices[current_triangle * 3 + 1]; 240 | unsigned int c = indices[current_triangle * 3 + 2]; 241 | 242 | // output indices 243 | destination[output_triangle * 3 + 0] = a; 244 | destination[output_triangle * 3 + 1] = b; 245 | destination[output_triangle * 3 + 2] = c; 246 | output_triangle++; 247 | 248 | // update emitted flags 249 | emitted_flags[current_triangle] = true; 250 | triangle_scores[current_triangle] = 0; 251 | 252 | // new triangle 253 | size_t cache_write = 0; 254 | cache_new[cache_write++] = a; 255 | cache_new[cache_write++] = b; 256 | cache_new[cache_write++] = c; 257 | 258 | // old triangles 259 | for (size_t i = 0; i < cache_count; ++i) 260 | { 261 | unsigned int index = cache[i]; 262 | 263 | if (index != a && index != b && index != c) 264 | { 265 | cache_new[cache_write++] = index; 266 | } 267 | } 268 | 269 | unsigned int* cache_temp = cache; 270 | cache = cache_new, cache_new = cache_temp; 271 | cache_count = cache_write > cache_size ? cache_size : cache_write; 272 | 273 | // update live triangle counts 274 | live_triangles[a]--; 275 | live_triangles[b]--; 276 | live_triangles[c]--; 277 | 278 | // remove emitted triangle from adjacency data 279 | // this makes sure that we spend less time traversing these lists on subsequent iterations 280 | for (size_t k = 0; k < 3; ++k) 281 | { 282 | unsigned int index = indices[current_triangle * 3 + k]; 283 | 284 | unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; 285 | size_t neighbours_size = adjacency.triangle_counts[index]; 286 | 287 | for (size_t i = 0; i < neighbours_size; ++i) 288 | { 289 | unsigned int tri = neighbours[i]; 290 | 291 | if (tri == current_triangle) 292 | { 293 | neighbours[i] = neighbours[neighbours_size - 1]; 294 | adjacency.triangle_counts[index]--; 295 | break; 296 | } 297 | } 298 | } 299 | 300 | unsigned int best_triangle = ~0u; 301 | float best_score = 0; 302 | 303 | // update cache positions, vertex scores and triangle scores, and find next best triangle 304 | for (size_t i = 0; i < cache_write; ++i) 305 | { 306 | unsigned int index = cache[i]; 307 | 308 | int cache_position = i >= cache_size ? -1 : int(i); 309 | 310 | // update vertex score 311 | float score = vertexScore(cache_position, live_triangles[index]); 312 | float score_diff = score - vertex_scores[index]; 313 | 314 | vertex_scores[index] = score; 315 | 316 | // update scores of vertex triangles 317 | const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; 318 | const unsigned int* neighbours_end = neighbours_begin + adjacency.triangle_counts[index]; 319 | 320 | for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) 321 | { 322 | unsigned int tri = *it; 323 | assert(!emitted_flags[tri]); 324 | 325 | float tri_score = triangle_scores[tri] + score_diff; 326 | assert(tri_score > 0); 327 | 328 | if (best_score < tri_score) 329 | { 330 | best_triangle = tri; 331 | best_score = tri_score; 332 | } 333 | 334 | triangle_scores[tri] = tri_score; 335 | } 336 | } 337 | 338 | // step through input triangles in order if we hit a dead-end 339 | current_triangle = best_triangle; 340 | 341 | if (current_triangle == ~0u) 342 | { 343 | current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); 344 | } 345 | } 346 | 347 | assert(input_cursor == face_count); 348 | assert(output_triangle == face_count); 349 | } 350 | 351 | void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) 352 | { 353 | using namespace meshopt; 354 | 355 | assert(index_count % 3 == 0); 356 | assert(cache_size >= 3); 357 | 358 | // guard for empty meshes 359 | if (index_count == 0 || vertex_count == 0) 360 | return; 361 | 362 | // support in-place optimization 363 | meshopt_Buffer indices_copy; 364 | 365 | if (destination == indices) 366 | { 367 | indices_copy.data = new unsigned int[index_count]; 368 | memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int)); 369 | indices = indices_copy.data; 370 | } 371 | 372 | size_t face_count = index_count / 3; 373 | 374 | // build adjacency information 375 | Adjacency adjacency(index_count, vertex_count); 376 | buildAdjacency(adjacency, indices, index_count, vertex_count); 377 | 378 | // live triangle counts 379 | meshopt_Buffer live_triangles(vertex_count); 380 | memcpy(live_triangles.data, adjacency.triangle_counts.data, vertex_count * sizeof(unsigned int)); 381 | 382 | // cache time stamps 383 | meshopt_Buffer cache_timestamps(vertex_count); 384 | memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int)); 385 | 386 | // dead-end stack 387 | meshopt_Buffer dead_end(index_count); 388 | unsigned int dead_end_top = 0; 389 | 390 | // emitted flags 391 | meshopt_Buffer emitted_flags(face_count); 392 | memset(emitted_flags.data, 0, face_count); 393 | 394 | unsigned int current_vertex = 0; 395 | 396 | unsigned int timestamp = cache_size + 1; 397 | unsigned int input_cursor = 1; // vertex to restart from in case of dead-end 398 | 399 | unsigned int output_triangle = 0; 400 | 401 | while (current_vertex != ~0u) 402 | { 403 | const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; 404 | 405 | // emit all vertex neighbours 406 | const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; 407 | const unsigned int* neighbours_end = neighbours_begin + adjacency.triangle_counts[current_vertex]; 408 | 409 | for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) 410 | { 411 | unsigned int triangle = *it; 412 | 413 | if (!emitted_flags[triangle]) 414 | { 415 | unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; 416 | 417 | // output indices 418 | destination[output_triangle * 3 + 0] = a; 419 | destination[output_triangle * 3 + 1] = b; 420 | destination[output_triangle * 3 + 2] = c; 421 | output_triangle++; 422 | 423 | // update dead-end stack 424 | dead_end[dead_end_top + 0] = a; 425 | dead_end[dead_end_top + 1] = b; 426 | dead_end[dead_end_top + 2] = c; 427 | dead_end_top += 3; 428 | 429 | // update live triangle counts 430 | live_triangles[a]--; 431 | live_triangles[b]--; 432 | live_triangles[c]--; 433 | 434 | // update cache info 435 | // if vertex is not in cache, put it in cache 436 | if (timestamp - cache_timestamps[a] > cache_size) 437 | cache_timestamps[a] = timestamp++; 438 | 439 | if (timestamp - cache_timestamps[b] > cache_size) 440 | cache_timestamps[b] = timestamp++; 441 | 442 | if (timestamp - cache_timestamps[c] > cache_size) 443 | cache_timestamps[c] = timestamp++; 444 | 445 | // update emitted flags 446 | emitted_flags[triangle] = true; 447 | } 448 | } 449 | 450 | // next candidates are the ones we pushed to dead-end stack just now 451 | const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; 452 | 453 | // get next vertex 454 | current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); 455 | 456 | if (current_vertex == ~0u) 457 | { 458 | current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); 459 | } 460 | } 461 | 462 | assert(output_triangle == face_count); 463 | } 464 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/indexcodec.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | 7 | // This work is based on: 8 | // Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 9 | // Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 10 | namespace meshopt 11 | { 12 | 13 | typedef unsigned int VertexFifo[16]; 14 | typedef unsigned int EdgeFifo[16][2]; 15 | 16 | static const unsigned int kTriangleIndexOrder[3][3] = { 17 | {0, 1, 2}, 18 | {1, 2, 0}, 19 | {2, 0, 1}, 20 | }; 21 | 22 | static const unsigned char kCodeAuxEncodingTable[16] = { 23 | 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, 0, 0, 24 | }; 25 | 26 | static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next) 27 | { 28 | (void)a; 29 | 30 | return (b == next) ? 1 : (c == next) ? 2 : 0; 31 | } 32 | 33 | static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) 34 | { 35 | for (int i = 0; i < 16; ++i) 36 | { 37 | unsigned int index = (offset - 1 - i) & 15; 38 | unsigned int e0 = fifo[index][0]; 39 | unsigned int e1 = fifo[index][1]; 40 | 41 | if (e0 == a && e1 == b) 42 | return (i << 2) | 0; 43 | if (e0 == b && e1 == c) 44 | return (i << 2) | 1; 45 | if (e0 == c && e1 == a) 46 | return (i << 2) | 2; 47 | } 48 | 49 | return -1; 50 | } 51 | 52 | static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset) 53 | { 54 | fifo[offset][0] = a; 55 | fifo[offset][1] = b; 56 | offset = (offset + 1) & 15; 57 | } 58 | 59 | static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset) 60 | { 61 | for (int i = 0; i < 16; ++i) 62 | { 63 | unsigned int index = (offset - 1 - i) & 15; 64 | 65 | if (fifo[index] == v) 66 | return i; 67 | } 68 | 69 | return -1; 70 | } 71 | 72 | static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1) 73 | { 74 | fifo[offset] = v; 75 | offset = (offset + cond) & 15; 76 | } 77 | 78 | static void encodeVByte(unsigned char*& data, unsigned int v) 79 | { 80 | // encode 32-bit value in up to 5 7-bit groups 81 | do 82 | { 83 | *data++ = (v & 127) | (v > 127 ? 128 : 0); 84 | v >>= 7; 85 | } while (v); 86 | } 87 | 88 | static unsigned int decodeVByte(const unsigned char*& data) 89 | { 90 | unsigned char lead = *data++; 91 | 92 | // fast path: single byte 93 | if (lead < 128) 94 | return lead; 95 | 96 | // slow path: up to 4 extra bytes 97 | // note that this loop always terminates, which is important for malformed data 98 | unsigned int result = lead & 127; 99 | unsigned int shift = 7; 100 | 101 | for (int i = 0; i < 4; ++i) 102 | { 103 | unsigned char group = *data++; 104 | result |= (group & 127) << shift; 105 | shift += 7; 106 | 107 | if (group < 128) 108 | break; 109 | } 110 | 111 | return result; 112 | } 113 | 114 | static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int next, unsigned int last) 115 | { 116 | (void)next; 117 | 118 | unsigned int d = index - last; 119 | unsigned int v = (d << 1) ^ (int(d) >> 31); 120 | 121 | encodeVByte(data, v); 122 | } 123 | 124 | static unsigned int decodeIndex(const unsigned char*& data, unsigned int next, unsigned int last) 125 | { 126 | (void)next; 127 | 128 | unsigned int v = decodeVByte(data); 129 | unsigned int d = (v >> 1) ^ -int(v & 1); 130 | 131 | return last + d; 132 | } 133 | 134 | static int getCodeAuxIndex(unsigned char v, const unsigned char* table) 135 | { 136 | for (int i = 0; i < 16; ++i) 137 | if (table[i] == v) 138 | return i; 139 | 140 | return -1; 141 | } 142 | } 143 | 144 | size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) 145 | { 146 | using namespace meshopt; 147 | 148 | assert(index_count % 3 == 0); 149 | 150 | // the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table 151 | if (buffer_size < index_count / 3 + 16) 152 | return 0; 153 | 154 | EdgeFifo edgefifo; 155 | memset(edgefifo, -1, sizeof(edgefifo)); 156 | 157 | VertexFifo vertexfifo; 158 | memset(vertexfifo, -1, sizeof(vertexfifo)); 159 | 160 | size_t edgefifooffset = 0; 161 | size_t vertexfifooffset = 0; 162 | 163 | unsigned int next = 0; 164 | unsigned int last = 0; 165 | 166 | unsigned char* code = buffer; 167 | unsigned char* data = buffer + index_count / 3; 168 | unsigned char* data_safe_end = buffer + buffer_size - 16; 169 | 170 | // use static encoding table; it's possible to pack the result and then build an optimal table and repack 171 | // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set 172 | const unsigned char* codeaux_table = kCodeAuxEncodingTable; 173 | 174 | // two last entries of codeaux_table are redundant - they are never referenced by the encoding 175 | // make sure that they are both zero, since they can serve as version/other data in the future 176 | assert(codeaux_table[14] == 0 && codeaux_table[15] == 0); 177 | 178 | for (size_t i = 0; i < index_count; i += 3) 179 | { 180 | // make sure we have enough space to write a triangle 181 | // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index 182 | // after this we can be sure we can write without extra bounds checks 183 | if (data > data_safe_end) 184 | return 0; 185 | 186 | int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset); 187 | 188 | if (fer >= 0 && (fer >> 2) < 15) 189 | { 190 | const unsigned int* order = kTriangleIndexOrder[fer & 3]; 191 | 192 | unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; 193 | 194 | // encode edge index and vertex fifo index, next or free index 195 | int fe = fer >> 2; 196 | int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); 197 | 198 | int fec = (fc >= 1 && fc < 15) ? fc : (c == next) ? (next++, 0) : 15; 199 | 200 | *code++ = static_cast((fe << 4) | fec); 201 | 202 | // note that we need to update the last index since free indices are delta-encoded 203 | if (fec == 15) 204 | encodeIndex(data, c, next, last), last = c; 205 | 206 | // we only need to push third vertex since first two are likely already in the vertex fifo 207 | if (fec == 0 || fec == 15) 208 | pushVertexFifo(vertexfifo, c, vertexfifooffset); 209 | 210 | // we only need to push two new edges to edge fifo since the third one is already there 211 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 212 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 213 | } 214 | else 215 | { 216 | const unsigned int* order = kTriangleIndexOrder[rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next)]; 217 | 218 | unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; 219 | 220 | int fb = getVertexFifo(vertexfifo, b, vertexfifooffset); 221 | int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); 222 | 223 | // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a 224 | int fea = (a == next) ? (next++, 0) : 15; 225 | int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; 226 | int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; 227 | 228 | // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise 229 | unsigned char codeaux = static_cast((feb << 4) | fec); 230 | int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table); 231 | 232 | // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15 233 | if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14) 234 | { 235 | *code++ = static_cast((15 << 4) | codeauxindex); 236 | } 237 | else 238 | { 239 | *code++ = static_cast((15 << 4) | 14 | fea); 240 | *data++ = codeaux; 241 | } 242 | 243 | // note that we need to update the last index since free indices are delta-encoded 244 | if (fea == 15) 245 | encodeIndex(data, a, next, last), last = a; 246 | 247 | if (feb == 15) 248 | encodeIndex(data, b, next, last), last = b; 249 | 250 | if (fec == 15) 251 | encodeIndex(data, c, next, last), last = c; 252 | 253 | // only push vertices that weren't already in fifo 254 | if (fea == 0 || fea == 15) 255 | pushVertexFifo(vertexfifo, a, vertexfifooffset); 256 | 257 | if (feb == 0 || feb == 15) 258 | pushVertexFifo(vertexfifo, b, vertexfifooffset); 259 | 260 | if (fec == 0 || fec == 15) 261 | pushVertexFifo(vertexfifo, c, vertexfifooffset); 262 | 263 | // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles 264 | pushEdgeFifo(edgefifo, b, a, edgefifooffset); 265 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 266 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 267 | } 268 | } 269 | 270 | // make sure we have enough space to write codeaux table 271 | if (data > data_safe_end) 272 | return 0; 273 | 274 | // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding 275 | // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data 276 | // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input 277 | for (size_t i = 0; i < 16; ++i) 278 | { 279 | *data++ = codeaux_table[i]; 280 | } 281 | 282 | assert(data >= buffer + index_count / 3 + 16); 283 | assert(data <= buffer + buffer_size); 284 | 285 | return data - buffer; 286 | } 287 | 288 | size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) 289 | { 290 | assert(index_count % 3 == 0); 291 | 292 | // compute number of bits required for each index 293 | unsigned int vertex_bits = 1; 294 | 295 | while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) 296 | vertex_bits++; 297 | 298 | // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas 299 | unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7; 300 | 301 | return (index_count / 3) * (2 + 3 * vertex_groups) + 16; 302 | } 303 | 304 | int meshopt_decodeIndexBuffer(unsigned int* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) 305 | { 306 | using namespace meshopt; 307 | 308 | assert(index_count % 3 == 0); 309 | 310 | // the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table 311 | if (buffer_size < index_count / 3 + 16) 312 | return -1; 313 | 314 | EdgeFifo edgefifo; 315 | memset(edgefifo, -1, sizeof(edgefifo)); 316 | 317 | VertexFifo vertexfifo; 318 | memset(vertexfifo, -1, sizeof(vertexfifo)); 319 | 320 | size_t edgefifooffset = 0; 321 | size_t vertexfifooffset = 0; 322 | 323 | unsigned int next = 0; 324 | unsigned int last = 0; 325 | 326 | // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end 327 | const unsigned char* code = buffer; 328 | const unsigned char* data = buffer + index_count / 3; 329 | const unsigned char* data_safe_end = buffer + buffer_size - 16; 330 | 331 | const unsigned char* codeaux_table = data_safe_end; 332 | 333 | for (size_t i = 0; i < index_count; i += 3) 334 | { 335 | // make sure we have enough data to read for a triangle 336 | // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index 337 | // after this we can be sure we can read without extra bounds checks 338 | if (data > data_safe_end) 339 | return -2; 340 | 341 | unsigned char codetri = *code++; 342 | 343 | if (codetri < 0xf0) 344 | { 345 | int fe = codetri >> 4; 346 | 347 | // fifo reads are wrapped around 16 entry buffer 348 | unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; 349 | unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; 350 | 351 | int fec = codetri & 15; 352 | 353 | // note: this is the most common path in the entire decoder 354 | // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable 355 | if (fec != 15) 356 | { 357 | // fifo reads are wrapped around 16 entry buffer 358 | unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; 359 | unsigned int c = (fec == 0) ? next : cf; 360 | 361 | int fec0 = fec == 0; 362 | next += fec0; 363 | 364 | // output triangle 365 | destination[i + 0] = a; 366 | destination[i + 1] = b; 367 | destination[i + 2] = c; 368 | 369 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 370 | pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); 371 | 372 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 373 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 374 | } 375 | else 376 | { 377 | unsigned int c = 0; 378 | 379 | // note that we need to update the last index since free indices are delta-encoded 380 | last = c = decodeIndex(data, next, last); 381 | 382 | // output triangle 383 | destination[i + 0] = a; 384 | destination[i + 1] = b; 385 | destination[i + 2] = c; 386 | 387 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 388 | pushVertexFifo(vertexfifo, c, vertexfifooffset); 389 | 390 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 391 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 392 | } 393 | } 394 | else 395 | { 396 | // fast path: read codeaux from the table 397 | if (codetri < 0xfe) 398 | { 399 | unsigned char codeaux = codeaux_table[codetri & 15]; 400 | 401 | // note: table can't contain feb/fec=15 402 | int feb = codeaux >> 4; 403 | int fec = codeaux & 15; 404 | 405 | // fifo reads are wrapped around 16 entry buffer 406 | // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior 407 | unsigned int a = next++; 408 | 409 | unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15]; 410 | unsigned int b = (feb == 0) ? next : bf; 411 | 412 | int feb0 = feb == 0; 413 | next += feb0; 414 | 415 | unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15]; 416 | unsigned int c = (fec == 0) ? next : cf; 417 | 418 | int fec0 = fec == 0; 419 | next += fec0; 420 | 421 | // output triangle 422 | destination[i + 0] = a; 423 | destination[i + 1] = b; 424 | destination[i + 2] = c; 425 | 426 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 427 | pushVertexFifo(vertexfifo, a, vertexfifooffset); 428 | pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0); 429 | pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); 430 | 431 | pushEdgeFifo(edgefifo, b, a, edgefifooffset); 432 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 433 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 434 | } 435 | else 436 | { 437 | // slow path: read a full byte for codeaux instead of using a table lookup 438 | unsigned char codeaux = *data++; 439 | 440 | int fea = codetri == 0xfe ? 0 : 15; 441 | int feb = codeaux >> 4; 442 | int fec = codeaux & 15; 443 | 444 | // fifo reads are wrapped around 16 entry buffer 445 | // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior 446 | unsigned int a = (fea == 0) ? next++ : 0; 447 | unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15]; 448 | unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15]; 449 | 450 | // note that we need to update the last index since free indices are delta-encoded 451 | if (fea == 15) 452 | last = a = decodeIndex(data, next, last); 453 | 454 | if (feb == 15) 455 | last = b = decodeIndex(data, next, last); 456 | 457 | if (fec == 15) 458 | last = c = decodeIndex(data, next, last); 459 | 460 | // output triangle 461 | destination[i + 0] = a; 462 | destination[i + 1] = b; 463 | destination[i + 2] = c; 464 | 465 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 466 | pushVertexFifo(vertexfifo, a, vertexfifooffset); 467 | pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15)); 468 | pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15)); 469 | 470 | pushEdgeFifo(edgefifo, b, a, edgefifooffset); 471 | pushEdgeFifo(edgefifo, c, b, edgefifooffset); 472 | pushEdgeFifo(edgefifo, a, c, edgefifooffset); 473 | } 474 | } 475 | } 476 | 477 | // we should've read all data bytes and stopped at the boundary between data and codeaux table 478 | if (data != data_safe_end) 479 | return -3; 480 | 481 | return 0; 482 | } 483 | -------------------------------------------------------------------------------- /Source/Barycentrics.cpp: -------------------------------------------------------------------------------- 1 | #include "Barycentrics.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma warning(push) 8 | #pragma warning(disable: 4996) 9 | #define TINYOBJLOADER_IMPLEMENTATION 10 | #include 11 | #pragma warning(pop) 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | AppConfig g_appConfig; 19 | 20 | int main(int argc, char** argv) 21 | { 22 | AppConfig& cfg = g_appConfig; 23 | 24 | cfg.name = "Barycentrics (" RUSH_RENDER_API_NAME ")"; 25 | 26 | cfg.width = 1280; 27 | cfg.height = 720; 28 | cfg.argc = argc; 29 | cfg.argv = argv; 30 | cfg.resizable = true; 31 | 32 | #ifndef NDEBUG 33 | cfg.debug = true; 34 | Log::breakOnError = true; 35 | #endif 36 | 37 | return Platform_Main(cfg); 38 | } 39 | 40 | BarycentricsApp::BarycentricsApp() 41 | : BaseApplication() 42 | , m_boundingBox(Vec3(0.0f), Vec3(0.0f)) 43 | { 44 | Gfx_SetPresentInterval(1); 45 | 46 | m_windowEvents.setOwner(m_window); 47 | 48 | GfxShaderBindings bindings; 49 | bindings.addConstantBuffer("constantBuffer0", 0); // scene consants 50 | bindings.addConstantBuffer("constantBuffer1", 1); // material constants 51 | bindings.addCombinedSampler("sampler0", 2); // albedo texture sampler 52 | bindings.addStorageBuffer("vertexBuffer", 3); 53 | bindings.addStorageBuffer("indexBuffer", 4); 54 | 55 | GfxVertexFormatDesc vfDefaultDesc; // TODO: use de-interleaved vertex streams and packed vertices 56 | vfDefaultDesc.add(0, GfxVertexFormatDesc::DataType::Float3, GfxVertexFormatDesc::Semantic::Position, 0); 57 | vfDefaultDesc.add(0, GfxVertexFormatDesc::DataType::Float2, GfxVertexFormatDesc::Semantic::Texcoord, 0); 58 | 59 | GfxVertexFormatDesc vfEmptyDesc; 60 | 61 | GfxVertexShaderRef vsIndexed; 62 | vsIndexed.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/ModelIndexed.vert.spv"))); 63 | 64 | struct SpecializationData { u32 useTexture; }; 65 | GfxSpecializationConstant specializationConstantLayout; 66 | specializationConstantLayout.id = 0; 67 | specializationConstantLayout.offset = 0; 68 | specializationConstantLayout.size = sizeof(SpecializationData); 69 | 70 | enum { specializationCount = 2 }; 71 | SpecializationData specializationData[specializationCount] = { 0, 1 }; // non-textured and textured variants 72 | 73 | auto setupSpecialization = [&](GfxTechniqueDesc& techniqueDesc, u32 variantIndex) 74 | { 75 | techniqueDesc.specializationConstants = &specializationConstantLayout; 76 | techniqueDesc.specializationConstantCount = 1; 77 | techniqueDesc.specializationData = &specializationData[variantIndex]; 78 | techniqueDesc.specializationDataSize = sizeof(SpecializationData); 79 | }; 80 | 81 | { 82 | GfxVertexShaderRef vs; 83 | vs.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/Model.vert.spv"))); 84 | 85 | GfxPixelShaderRef ps; 86 | ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/Model.frag.spv"))); 87 | 88 | GfxVertexFormatRef vf; 89 | vf.takeover(Gfx_CreateVertexFormat(vfEmptyDesc)); 90 | 91 | GfxTechniqueDesc techniqueDesc(ps.get(), vs.get(), vf.get(), &bindings); 92 | 93 | for (u32 i=0; igetAspect(); 224 | float fov = 1.0f; 225 | 226 | m_camera = Camera(aspect, fov, 0.25f, 10000.0f); 227 | 228 | if (g_appConfig.argc >= 2) 229 | { 230 | const char* modelFilename = g_appConfig.argv[1]; 231 | m_statusString = std::string("Model: ") + modelFilename; 232 | m_valid = loadModel(modelFilename); 233 | 234 | Vec3 center = m_boundingBox.center(); 235 | Vec3 dimensions = m_boundingBox.dimensions(); 236 | float longestSide = dimensions.reduceMax(); 237 | if (longestSide != 0) 238 | { 239 | float scale = 100.0f / longestSide; 240 | m_worldTransform = Mat4::scaleTranslate(scale, -center*scale); 241 | } 242 | 243 | m_boundingBox.m_min = m_worldTransform * m_boundingBox.m_min; 244 | m_boundingBox.m_max = m_worldTransform * m_boundingBox.m_max; 245 | 246 | m_camera.lookAt(Vec3(m_boundingBox.m_max) + Vec3(2.0f), m_boundingBox.center()); 247 | } 248 | else 249 | { 250 | // Default tunnel test model 251 | m_valid = loadTunnelTestModel(); 252 | 253 | Vec3 position = m_boundingBox.center(); 254 | position.z = m_boundingBox.m_min.z; 255 | m_camera.lookAt(position, m_boundingBox.center()); 256 | } 257 | 258 | m_interpolatedCamera = m_camera; 259 | 260 | m_cameraMan = new CameraManipulator(); 261 | } 262 | 263 | BarycentricsApp::~BarycentricsApp() 264 | { 265 | m_windowEvents.setOwner(nullptr); 266 | 267 | delete m_cameraMan; 268 | 269 | Gfx_Release(m_vertexBuffer); 270 | Gfx_Release(m_indexBuffer); 271 | Gfx_Release(m_constantBuffer); 272 | } 273 | 274 | void BarycentricsApp::update() 275 | { 276 | TimingScope timingScope(m_stats.cpuTotal); 277 | 278 | m_stats.gpuWorld.add(Gfx_Stats().customTimer[Timestamp_World]); 279 | m_stats.gpuUI.add(Gfx_Stats().customTimer[Timestamp_UI]); 280 | m_stats.gpuTotal.add(Gfx_Stats().lastFrameGpuTime); 281 | 282 | Gfx_ResetStats(); 283 | 284 | const float dt = (float)m_timer.time(); 285 | m_timer.reset(); 286 | 287 | for (const WindowEvent& e : m_windowEvents) 288 | { 289 | switch (e.type) 290 | { 291 | case WindowEventType_Scroll: 292 | if (e.scroll.y > 0) 293 | { 294 | m_cameraScale *= 1.25f; 295 | } 296 | else 297 | { 298 | m_cameraScale *= 0.9f; 299 | } 300 | Log::message("Camera scale: %f", m_cameraScale); 301 | break; 302 | case WindowEventType_KeyDown: 303 | { 304 | if (e.code == Key_0) 305 | { 306 | m_mode = Mode::Indexed; 307 | } 308 | else if (e.code == Key_1) 309 | { 310 | m_mode = Mode::NonIndexed; 311 | } 312 | else if (e.code == Key_2) 313 | { 314 | m_mode = Mode::GeometryShader; 315 | } 316 | else if (e.code == Key_3) 317 | { 318 | m_mode = Mode::Manual; 319 | } 320 | else if (e.code == Key_4 && m_techniquePassthroughGS[m_useTexture].valid()) 321 | { 322 | m_mode = Mode::PassthroughGS; 323 | } 324 | else if (e.code == Key_5 && m_techniqueNativeAMD[m_useTexture].valid()) 325 | { 326 | m_mode = Mode::NativeAMD; 327 | } 328 | else if (e.code == Key_T) 329 | { 330 | m_useTexture = !m_useTexture; 331 | } 332 | else if (e.code == Key_H) 333 | { 334 | m_showUI = !m_showUI; 335 | } 336 | break; 337 | } 338 | default: 339 | break; 340 | } 341 | } 342 | 343 | float clipNear = 0.25f * m_cameraScale; 344 | float clipFar = 10000.0f * m_cameraScale; 345 | m_camera.setClip(clipNear, clipFar); 346 | m_camera.setAspect(m_window->getAspect()); 347 | m_cameraMan->setMoveSpeed(20.0f * m_cameraScale); 348 | 349 | m_cameraMan->update(&m_camera, dt, m_window->getKeyboardState(), m_window->getMouseState()); 350 | 351 | m_interpolatedCamera.blendTo(m_camera, 0.1f, 0.125f); 352 | 353 | m_windowEvents.clear(); 354 | 355 | render(); 356 | } 357 | 358 | void BarycentricsApp::render() 359 | { 360 | const GfxCapability& caps = Gfx_GetCapability(); 361 | 362 | Mat4 matView = m_interpolatedCamera.buildViewMatrix(); 363 | Mat4 matProj = m_interpolatedCamera.buildProjMatrix(caps.projectionFlags); 364 | 365 | Constants constants; 366 | constants.matView = matView.transposed(); 367 | constants.matProj = matProj.transposed(); 368 | constants.matViewProj = (matView * matProj).transposed(); 369 | constants.matWorld = m_worldTransform.transposed(); 370 | constants.cameraPos = Vec4(m_interpolatedCamera.getPosition()); 371 | 372 | Gfx_UpdateBuffer(m_ctx, m_constantBuffer, &constants, sizeof(constants)); 373 | 374 | GfxPassDesc passDesc; 375 | passDesc.flags = GfxPassFlags::ClearAll; 376 | passDesc.clearColors[0] = ColorRGBA8(11, 22, 33); 377 | Gfx_BeginPass(m_ctx, passDesc); 378 | 379 | Gfx_SetViewport(m_ctx, GfxViewport(m_window->getSize())); 380 | Gfx_SetScissorRect(m_ctx, m_window->getSize()); 381 | 382 | Gfx_SetDepthStencilState(m_ctx, m_depthStencilStates.writeLessEqual); 383 | 384 | if (m_valid) 385 | { 386 | TimingScope timingScope(m_stats.cpuWorld); 387 | GfxTimerScope gpuTimerScopeWorld(m_ctx, Timestamp_World); 388 | 389 | Gfx_SetBlendState(m_ctx, m_blendStates.opaque); 390 | 391 | switch (m_mode) 392 | { 393 | case Mode::Indexed: 394 | Gfx_SetTechnique(m_ctx, m_techniqueIndexed[m_useTexture].get()); 395 | break; 396 | case Mode::NonIndexed: 397 | Gfx_SetTechnique(m_ctx, m_techniqueNonIndexed[m_useTexture].get()); 398 | break; 399 | case Mode::GeometryShader: 400 | Gfx_SetTechnique(m_ctx, m_techniqueGeometryShader[m_useTexture].get()); 401 | break; 402 | case Mode::Manual: 403 | Gfx_SetTechnique(m_ctx, m_techniqueManual[m_useTexture].get()); 404 | break; 405 | case Mode::PassthroughGS: 406 | Gfx_SetTechnique(m_ctx, m_techniquePassthroughGS[m_useTexture].get()); 407 | break; 408 | case Mode::NativeAMD: 409 | Gfx_SetTechnique(m_ctx, m_techniqueNativeAMD[m_useTexture].get()); 410 | break; 411 | default: 412 | RUSH_LOG_ERROR("Rendering mode '%s' not implemented", toString(m_mode)); 413 | } 414 | 415 | if (m_mode != Mode::NonIndexed) 416 | { 417 | Gfx_SetVertexStream(m_ctx, 0, m_vertexBuffer); 418 | Gfx_SetIndexStream(m_ctx, m_indexBuffer); 419 | } 420 | 421 | Gfx_SetConstantBuffer(m_ctx, 0, m_constantBuffer); 422 | 423 | Gfx_SetStorageBuffer(m_ctx, 0, m_vertexBuffer); 424 | Gfx_SetStorageBuffer(m_ctx, 1, m_indexBuffer); 425 | 426 | Gfx_SetConstantBuffer(m_ctx, 1, m_defaultMaterial.constantBuffer); 427 | Gfx_SetTexture(m_ctx, GfxStage::Pixel, 0, m_defaultMaterial.albedoTexture, m_samplerStates.anisotropicWrap); 428 | 429 | if (m_mode == Mode::NonIndexed) 430 | { 431 | Gfx_Draw(m_ctx, 0, m_indexCount); 432 | } 433 | else 434 | { 435 | Gfx_DrawIndexed(m_ctx, m_indexCount, 0, 0, m_vertexCount); 436 | } 437 | } 438 | 439 | // Draw UI on top 440 | if (m_showUI) 441 | { 442 | GfxTimerScope gpuTimerScopeUI(m_ctx, Timestamp_UI); 443 | TimingScope timingScope(m_stats.cpuUI); 444 | 445 | Gfx_SetBlendState(m_ctx, m_blendStates.lerp); 446 | Gfx_SetDepthStencilState(m_ctx, m_depthStencilStates.disable); 447 | 448 | m_prim->begin2D(m_window->getSize()); 449 | 450 | m_font->setScale(2.0f); 451 | 452 | Vec2 textOrigin = Vec2(10.0f); 453 | Vec2 pos = textOrigin; 454 | pos = m_font->draw(m_prim, pos, m_statusString.c_str()); 455 | pos = m_font->draw(m_prim, pos, "\n"); 456 | pos.x = textOrigin.x; 457 | 458 | char tempString[1024]; 459 | 460 | pos = m_font->draw(m_prim, pos, "Mode: "); 461 | pos = m_font->draw(m_prim, pos, toString(m_mode), ColorRGBA8(255, 255, 64)); 462 | pos = m_font->draw(m_prim, pos, "\n"); 463 | pos.x = textOrigin.x; 464 | 465 | const GfxStats& stats = Gfx_Stats(); 466 | sprintf_s(tempString, 467 | "Textured: %d\n" 468 | "Draw calls: %d\n" 469 | "Vertices: %d\n" 470 | "GPU total: %.2f ms\n" 471 | "> World: %.2f\n" 472 | "> UI: %.2f\n" 473 | "CPU time: %.2f ms\n" 474 | "> World: %.2f ms\n" 475 | "> UI: %.2f ms", 476 | int(m_useTexture), 477 | stats.drawCalls, 478 | stats.vertices, 479 | m_stats.gpuTotal.get() * 1000.0f, 480 | m_stats.gpuWorld.get() * 1000.0f, 481 | m_stats.gpuUI.get() * 1000.0f, 482 | m_stats.cpuTotal.get() * 1000.0f, 483 | m_stats.cpuWorld.get() * 1000.0f, 484 | m_stats.cpuUI.get() * 1000.0f); 485 | pos = m_font->draw(m_prim, pos, tempString); 486 | pos.x = textOrigin.x; 487 | 488 | pos = Vec2(10, m_window->getSizeFloat().y - 30); 489 | pos = m_font->draw(m_prim, pos, "Controls: number keys to change modes, 'T' to toggle texturing, 'H' to hide UI"); 490 | 491 | m_prim->end2D(); 492 | } 493 | else 494 | { 495 | GfxTimerScope gpuTimerScopeUI(m_ctx, Timestamp_UI); 496 | m_stats.cpuUI.add(0); 497 | } 498 | 499 | Gfx_EndPass(m_ctx); 500 | } 501 | 502 | bool BarycentricsApp::loadModel(const char* filename) 503 | { 504 | Log::message("Loading model '%s'", filename); 505 | 506 | std::vector shapes; 507 | std::vector materials; 508 | std::string errors; 509 | 510 | std::string directory = directoryFromFilename(filename); 511 | 512 | bool loaded = tinyobj::LoadObj(shapes, materials, errors, filename, directory.c_str()); 513 | if (!loaded) 514 | { 515 | Log::error("Could not load model from '%s'\n%s\n", filename, errors.c_str()); 516 | return false; 517 | } 518 | 519 | std::vector vertices; 520 | std::vector indices; 521 | 522 | m_boundingBox.expandInit(); 523 | 524 | for (const auto& shape : shapes) 525 | { 526 | u32 firstVertex = (u32)vertices.size(); 527 | const auto& mesh = shape.mesh; 528 | 529 | const u32 vertexCount = (u32)mesh.positions.size() / 3; 530 | 531 | const bool haveTexcoords = !mesh.texcoords.empty(); 532 | 533 | for (u32 i = 0; i < vertexCount; ++i) 534 | { 535 | Vertex v; 536 | 537 | v.position.x = mesh.positions[i * 3 + 0]; 538 | v.position.y = mesh.positions[i * 3 + 1]; 539 | v.position.z = mesh.positions[i * 3 + 2]; 540 | 541 | m_boundingBox.expand(v.position); 542 | 543 | if (haveTexcoords) 544 | { 545 | v.texcoord.x = mesh.texcoords[i * 2 + 0]; 546 | v.texcoord.y = mesh.texcoords[i * 2 + 1]; 547 | } 548 | else 549 | { 550 | v.texcoord = Vec2(0.0f); 551 | } 552 | 553 | v.position.x = -v.position.x; 554 | 555 | vertices.push_back(v); 556 | } 557 | 558 | const u32 triangleCount = (u32)mesh.indices.size() / 3; 559 | for (u32 triangleIt = 0; triangleIt < triangleCount; ++triangleIt) 560 | { 561 | indices.push_back(mesh.indices[triangleIt * 3 + 0] + firstVertex); 562 | indices.push_back(mesh.indices[triangleIt * 3 + 2] + firstVertex); 563 | indices.push_back(mesh.indices[triangleIt * 3 + 1] + firstVertex); 564 | } 565 | } 566 | 567 | m_vertexCount = (u32)vertices.size(); 568 | m_indexCount = (u32)indices.size(); 569 | 570 | meshopt_optimizeVertexCache(indices.data(), indices.data(), m_indexCount, m_vertexCount); 571 | 572 | GfxBufferDesc vbDesc(GfxBufferFlags::Vertex | GfxBufferFlags::Storage, GfxFormat_Unknown, m_vertexCount, sizeof(Vertex)); 573 | m_vertexBuffer = Gfx_CreateBuffer(vbDesc, vertices.data()); 574 | 575 | GfxBufferDesc ibDesc(GfxBufferFlags::Index | GfxBufferFlags::Storage, GfxFormat_R32_Uint, m_indexCount, 4); 576 | m_indexBuffer = Gfx_CreateBuffer(ibDesc, indices.data()); 577 | 578 | return true; 579 | } 580 | 581 | 582 | bool BarycentricsApp::loadTunnelTestModel() 583 | { 584 | Log::message("Creating tunnel test model"); 585 | 586 | std::vector vertices; 587 | std::vector indices; 588 | 589 | m_boundingBox.expandInit(); 590 | 591 | const float near = 0.0f; 592 | const float far = 100.0f; 593 | const float radius = 1.0f; 594 | const float uscale = 10.0f; 595 | 596 | const u32 circleVertexCount = 50; 597 | 598 | // Last vertices have unique tex coords so need them 599 | for (u32 i = 0; i <= circleVertexCount; ++i) 600 | { 601 | float n = static_cast(i) / static_cast(circleVertexCount); 602 | 603 | Vertex v; 604 | v.position.x = radius * std::sin(Rush::TwoPi * n); 605 | v.position.y = radius * std::cos(Rush::TwoPi * n); 606 | v.texcoord.x = n * uscale; 607 | 608 | // Near vertex 609 | v.position.z = near; 610 | v.texcoord.y = near; 611 | m_boundingBox.expand(v.position); 612 | vertices.push_back(v); 613 | 614 | // Far vertex 615 | v.position.z = far; 616 | v.texcoord.y = far; 617 | m_boundingBox.expand(v.position); 618 | vertices.push_back(v); 619 | } 620 | 621 | m_vertexCount = (u32)vertices.size(); 622 | 623 | // One quad (connecting near/far pair of vertices) per segment 624 | for (u32 i = 0; i < circleVertexCount; ++i) 625 | { 626 | int i0 = (2*i + 0); 627 | int i1 = (2*i + 1); 628 | int i2 = (2*i + 2); 629 | int i3 = (2*i + 3); 630 | 631 | indices.push_back(i0); 632 | indices.push_back(i1); 633 | indices.push_back(i2); 634 | 635 | indices.push_back(i2); 636 | indices.push_back(i1); 637 | indices.push_back(i3); 638 | } 639 | 640 | m_indexCount = (u32)indices.size(); 641 | 642 | //meshopt_optimizeVertexCache(indices.data(), indices.data(), m_indexCount, m_vertexCount); 643 | 644 | GfxBufferDesc vbDesc(GfxBufferFlags::Vertex | GfxBufferFlags::Storage, GfxFormat_Unknown, m_vertexCount, sizeof(Vertex)); 645 | m_vertexBuffer = Gfx_CreateBuffer(vbDesc, vertices.data()); 646 | 647 | GfxBufferDesc ibDesc(GfxBufferFlags::Index | GfxBufferFlags::Storage, GfxFormat_R32_Uint, m_indexCount, 4); 648 | m_indexBuffer = Gfx_CreateBuffer(ibDesc, indices.data()); 649 | 650 | return true; 651 | } -------------------------------------------------------------------------------- /External/meshoptimizer/src/meshoptimizer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * meshoptimizer - version 0.7 3 | * 4 | * Copyright (C) 2016-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 | * Report bugs and download new versions at https://github.com/zeux/meshoptimizer 6 | * 7 | * This library is distributed under the MIT License. See notice at the end of this file. 8 | */ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | /* Version macro; major * 100 + minor * 10 + patch */ 15 | #define MESHOPTIMIZER_VERSION 70 16 | 17 | /* If no API is defined, assume default */ 18 | #ifndef MESHOPTIMIZER_API 19 | #define MESHOPTIMIZER_API 20 | #endif 21 | 22 | /* C interface */ 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | /** 28 | * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices 29 | * 30 | * destination must contain enough space for the resulting remap table (vertex_count elements) 31 | * indices can be NULL if the input is unindexed 32 | */ 33 | MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); 34 | 35 | /** 36 | * Generates vertex buffer from the source vertex buffer and remap table generated by generateVertexRemap 37 | * 38 | * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by generateVertexRemap) 39 | */ 40 | MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap); 41 | 42 | /** 43 | * Generate index buffer from the source index buffer and remap table generated by generateVertexRemap 44 | * 45 | * destination must contain enough space for the resulting index buffer (index_count elements) 46 | * indices can be NULL if the input is unindexed 47 | */ 48 | MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap); 49 | 50 | /** 51 | * Vertex transform cache optimizer 52 | * Reorders indices to reduce the number of GPU vertex shader invocations 53 | * 54 | * destination must contain enough space for the resulting index buffer (index_count elements) 55 | */ 56 | MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); 57 | 58 | /** 59 | * Vertex transform cache optimizer for FIFO caches 60 | * Reorders indices to reduce the number of GPU vertex shader invocations 61 | * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache 62 | * 63 | * destination must contain enough space for the resulting index buffer (index_count elements) 64 | * cache_size should be less than the actual GPU cache size to avoid cache thrashing 65 | */ 66 | MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); 67 | 68 | /** 69 | * Overdraw optimizer 70 | * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw 71 | * 72 | * destination must contain enough space for the resulting index buffer (index_count elements) 73 | * indices must contain index data that is the result of optimizeVertexCache (*not* the original mesh indices!) 74 | * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer 75 | * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently 76 | */ 77 | MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); 78 | 79 | /** 80 | * Vertex fetch cache optimizer 81 | * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing 82 | * 83 | * destination must contain enough space for the resulting vertex buffer (vertex_count elements) 84 | * indices is used both as an input and as an output index buffer 85 | */ 86 | MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); 87 | 88 | /** 89 | * Experimental: Index buffer encoder 90 | * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original. 91 | * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space 92 | * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first. 93 | * 94 | * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to estimate) 95 | */ 96 | MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); 97 | MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); 98 | 99 | /** 100 | * Experimental: Index buffer decoder 101 | * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer 102 | * Returns 0 if decoding was successful, and an error code otherwise 103 | * 104 | * destination must contain enough space for the resulting index buffer (index_count elements) 105 | */ 106 | MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(unsigned int* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); 107 | 108 | /** 109 | * Experimental: Vertex buffer encoder 110 | * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original. 111 | * Returns encoded data size on success, 0 on error 112 | * For maximum efficiency you should provide the encoded index buffer from meshopt_encodeIndexBuffer 113 | * 114 | * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to estimate) 115 | */ 116 | MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* index_buffer, size_t index_buffer_size); 117 | MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); 118 | 119 | /** 120 | * Experimental: Vertex buffer decoder 121 | * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer 122 | * Returns 0 if decoding was successful, and an error code otherwise 123 | * 124 | * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) 125 | */ 126 | MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* buffer, size_t buffer_size, const unsigned char* index_buffer, size_t index_buffer_size); 127 | 128 | /** 129 | * Experimental: Mesh simplifier 130 | * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible 131 | * Returns the number of indices after simplification, with destination containing new index data 132 | * 133 | * destination must contain enough space for the source index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) 134 | * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer 135 | */ 136 | MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); 137 | 138 | /** 139 | * Experimental: Mesh stripifier 140 | * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index 141 | * Returns the number of indices in the resulting strip, with destination containing new index data 142 | * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. 143 | * 144 | * destination must contain enough space for the worst case target index buffer (index_count / 3 * 4 elements) 145 | */ 146 | MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); 147 | 148 | /** 149 | * Experimental: Mesh unstripifier 150 | * Converts a triangle strip to a triangle list 151 | * Returns the number of indices in the resulting list, with destination containing new index data 152 | * 153 | * destination must contain enough space for the worst case target index buffer ((index_count - 2) * 3 elements) 154 | */ 155 | MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count); 156 | 157 | struct meshopt_VertexCacheStatistics 158 | { 159 | unsigned int vertices_transformed; 160 | unsigned int warps_executed; 161 | float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */ 162 | float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */ 163 | }; 164 | 165 | /** 166 | * Vertex transform cache analyzer 167 | * Returns cache hit statistics using a simplified FIFO model 168 | * Results may not match actual GPU performance 169 | */ 170 | MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); 171 | 172 | struct meshopt_OverdrawStatistics 173 | { 174 | unsigned int pixels_covered; 175 | unsigned int pixels_shaded; 176 | float overdraw; /* shaded pixels / covered pixels; best case 1.0 */ 177 | }; 178 | 179 | /** 180 | * Overdraw analyzer 181 | * Returns overdraw statistics using a software rasterizer 182 | * Results may not match actual GPU performance 183 | * 184 | * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer 185 | */ 186 | MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); 187 | 188 | struct meshopt_VertexFetchStatistics 189 | { 190 | unsigned int bytes_fetched; 191 | float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ 192 | }; 193 | 194 | /** 195 | * Vertex fetch cache analyzer 196 | * Returns cache hit statistics using a simplified direct mapped model 197 | * Results may not match actual GPU performance 198 | */ 199 | MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); 200 | 201 | #ifdef __cplusplus 202 | } /* extern "C" */ 203 | #endif 204 | 205 | /* Quantization into commonly supported data formats */ 206 | #ifdef __cplusplus 207 | /** 208 | * Quantize a float in [0..1] range into an N-bit fixed point unorm value 209 | * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion 210 | * Maximum reconstruction error: 1/2^(N+1) 211 | */ 212 | inline int meshopt_quantizeUnorm(float v, int N); 213 | 214 | /** 215 | * Quantize a float in [-1..1] range into an N-bit fixed point snorm value 216 | * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions) 217 | * Maximum reconstruction error: 1/2^N 218 | */ 219 | inline int meshopt_quantizeSnorm(float v, int N); 220 | 221 | /** 222 | * Quantize a float into half-precision floating point value 223 | * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest 224 | * Representable magnitude range: [6e-5; 65504] 225 | * Maximum relative reconstruction error: 5e-4 226 | */ 227 | inline unsigned short meshopt_quantizeHalf(float v); 228 | 229 | /** 230 | * Quantize a float into a floating point value with a limited number of significant mantissa bits 231 | * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest 232 | * Assumes N is in a valid mantissa precision range, which is 1..23 233 | */ 234 | inline float meshopt_quantizeFloat(float v, int N); 235 | #endif 236 | 237 | /** 238 | * C++ template interface 239 | * 240 | * These functions mirror the C interface the library provides, providing template-based overloads so that 241 | * the caller can use an arbitrary type for the index data, both for input and output. 242 | * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not, 243 | * the wrappers end up allocating memory and copying index data to convert from one type to another. 244 | */ 245 | #ifdef __cplusplus 246 | template 247 | struct meshopt_IndexAdapter; 248 | 249 | template 250 | struct meshopt_IndexAdapter 251 | { 252 | T* result; 253 | unsigned int* data; 254 | size_t count; 255 | 256 | meshopt_IndexAdapter(T* result, const T* input, size_t count) 257 | : result(result) 258 | , data(0) 259 | , count(count) 260 | { 261 | data = new unsigned int[count]; 262 | 263 | if (input) 264 | { 265 | for (size_t i = 0; i < count; ++i) 266 | data[i] = input[i]; 267 | } 268 | } 269 | 270 | ~meshopt_IndexAdapter() 271 | { 272 | if (result) 273 | { 274 | for (size_t i = 0; i < count; ++i) 275 | result[i] = data[i]; 276 | } 277 | 278 | delete[] data; 279 | } 280 | }; 281 | 282 | template 283 | struct meshopt_IndexAdapter 284 | { 285 | unsigned int* data; 286 | 287 | meshopt_IndexAdapter(T* result, const T* input, size_t) 288 | : data(reinterpret_cast(result ? result : const_cast(input))) 289 | { 290 | } 291 | }; 292 | 293 | template 294 | inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) 295 | { 296 | meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); 297 | 298 | return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); 299 | } 300 | 301 | template 302 | inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) 303 | { 304 | meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); 305 | meshopt_IndexAdapter out(destination, 0, index_count); 306 | 307 | meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); 308 | } 309 | 310 | template 311 | inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) 312 | { 313 | meshopt_IndexAdapter in(0, indices, index_count); 314 | meshopt_IndexAdapter out(destination, 0, index_count); 315 | 316 | meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); 317 | } 318 | 319 | template 320 | inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) 321 | { 322 | meshopt_IndexAdapter in(0, indices, index_count); 323 | meshopt_IndexAdapter out(destination, 0, index_count); 324 | 325 | meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); 326 | } 327 | 328 | template 329 | inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) 330 | { 331 | meshopt_IndexAdapter in(0, indices, index_count); 332 | meshopt_IndexAdapter out(destination, 0, index_count); 333 | 334 | meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); 335 | } 336 | 337 | template 338 | inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) 339 | { 340 | meshopt_IndexAdapter inout(indices, indices, index_count); 341 | 342 | return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size); 343 | } 344 | 345 | template 346 | inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) 347 | { 348 | meshopt_IndexAdapter in(0, indices, index_count); 349 | 350 | return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); 351 | } 352 | 353 | template 354 | inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) 355 | { 356 | meshopt_IndexAdapter out(destination, 0, index_count); 357 | 358 | return meshopt_decodeIndexBuffer(out.data, index_count, buffer, buffer_size); 359 | } 360 | 361 | template 362 | inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) 363 | { 364 | meshopt_IndexAdapter in(0, indices, index_count); 365 | meshopt_IndexAdapter out(destination, 0, index_count); 366 | 367 | return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count); 368 | } 369 | 370 | template 371 | inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count) 372 | { 373 | meshopt_IndexAdapter in(0, indices, index_count); 374 | meshopt_IndexAdapter out(destination, 0, (index_count / 3) * 4); 375 | 376 | return meshopt_stripify(out.data, in.data, index_count, vertex_count); 377 | } 378 | 379 | template 380 | inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count) 381 | { 382 | meshopt_IndexAdapter in(0, indices, index_count); 383 | meshopt_IndexAdapter out(destination, 0, (index_count - 2) * 3); 384 | 385 | return meshopt_unstripify(out.data, in.data, index_count); 386 | } 387 | 388 | template 389 | inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) 390 | { 391 | meshopt_IndexAdapter in(0, indices, index_count); 392 | 393 | return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); 394 | } 395 | 396 | template 397 | inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) 398 | { 399 | meshopt_IndexAdapter in(0, indices, index_count); 400 | 401 | return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); 402 | } 403 | 404 | template 405 | inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) 406 | { 407 | meshopt_IndexAdapter in(0, indices, index_count); 408 | 409 | return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); 410 | } 411 | #endif 412 | 413 | /* Inline implementation */ 414 | #ifdef __cplusplus 415 | inline int meshopt_quantizeUnorm(float v, int N) 416 | { 417 | const float scale = float((1 << N) - 1); 418 | 419 | v = (v >= 0) ? v : 0; 420 | v = (v <= 1) ? v : 1; 421 | 422 | return int(v * scale + 0.5f); 423 | } 424 | 425 | inline int meshopt_quantizeSnorm(float v, int N) 426 | { 427 | const float scale = float((1 << (N - 1)) - 1); 428 | 429 | float round = (v >= 0 ? 0.5f : -0.5f); 430 | 431 | v = (v >= -1) ? v : -1; 432 | v = (v <= +1) ? v : +1; 433 | 434 | return int(v * scale + round); 435 | } 436 | 437 | inline unsigned short meshopt_quantizeHalf(float v) 438 | { 439 | union { float f; unsigned int ui; } u = {v}; 440 | unsigned int ui = u.ui; 441 | 442 | int s = (ui >> 16) & 0x8000; 443 | int em = ui & 0x7fffffff; 444 | 445 | /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ 446 | int h = (em - (112 << 23) + (1 << 12)) >> 13; 447 | 448 | /* underflow: flush to zero; 113 encodes exponent -14 */ 449 | h = (em < (113 << 23)) ? 0 : h; 450 | 451 | /* overflow: infinity; 143 encodes exponent 16 */ 452 | h = (em >= (143 << 23)) ? 0x7c00 : h; 453 | 454 | /* NaN; note that we convert all types of NaN to qNaN */ 455 | h = (em > (255 << 23)) ? 0x7e00 : h; 456 | 457 | return (unsigned short)(s | h); 458 | } 459 | 460 | inline float meshopt_quantizeFloat(float v, int N) 461 | { 462 | union { float f; unsigned int ui; } u = {v}; 463 | unsigned int ui = u.ui; 464 | 465 | const int mask = (1 << (23 - N)) - 1; 466 | const int round = (1 << (23 - N)) >> 1; 467 | 468 | int e = ui & 0x7f800000; 469 | unsigned int rui = (ui + round) & ~mask; 470 | 471 | /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ 472 | ui = e == 0x7f800000 ? ui : rui; 473 | 474 | /* flush denormals to zero */ 475 | ui = e == 0 ? 0 : ui; 476 | 477 | u.ui = ui; 478 | return u.f; 479 | } 480 | #endif 481 | 482 | /* Internal implementation helpers */ 483 | #ifdef __cplusplus 484 | template 485 | class meshopt_Buffer 486 | { 487 | meshopt_Buffer(const meshopt_Buffer&); 488 | meshopt_Buffer& operator=(const meshopt_Buffer&); 489 | 490 | public: 491 | T* data; 492 | size_t size; 493 | 494 | meshopt_Buffer() 495 | : data(0) 496 | , size(0) 497 | { 498 | } 499 | 500 | explicit meshopt_Buffer(size_t size) 501 | : data(0) 502 | , size(size) 503 | { 504 | data = new T[size]; 505 | } 506 | 507 | ~meshopt_Buffer() 508 | { 509 | delete[] data; 510 | } 511 | 512 | T& operator[](size_t index) 513 | { 514 | assert(index < size); 515 | return data[index]; 516 | } 517 | 518 | const T& operator[](size_t index) const 519 | { 520 | assert(index < size); 521 | return data[index]; 522 | } 523 | }; 524 | #endif 525 | 526 | /** 527 | * Copyright (c) 2016-2018 Arseny Kapoulkine 528 | * 529 | * Permission is hereby granted, free of charge, to any person 530 | * obtaining a copy of this software and associated documentation 531 | * files (the "Software"), to deal in the Software without 532 | * restriction, including without limitation the rights to use, 533 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 534 | * copies of the Software, and to permit persons to whom the 535 | * Software is furnished to do so, subject to the following 536 | * conditions: 537 | * 538 | * The above copyright notice and this permission notice shall be 539 | * included in all copies or substantial portions of the Software. 540 | * 541 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 542 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 543 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 544 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 545 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 546 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 547 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 548 | * OTHER DEALINGS IN THE SOFTWARE. 549 | */ 550 | -------------------------------------------------------------------------------- /External/meshoptimizer/src/vertexcodec.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details 2 | #include "meshoptimizer.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #define TRACE 0 10 | 11 | // This work is based on: 12 | // TODO: references 13 | namespace meshopt 14 | { 15 | 16 | const size_t kVertexBlockSize = 256; 17 | const size_t kByteGroupSize = 16; 18 | 19 | inline unsigned char zigzag8(unsigned char v) 20 | { 21 | return (v >> 7) | ((v ^ -(v >> 7)) << 1); 22 | } 23 | 24 | inline unsigned char unzigzag8(unsigned char v) 25 | { 26 | return (-(v & 1)) ^ (v >> 1); 27 | } 28 | 29 | #if TRACE > 0 30 | inline int bits(unsigned char v) 31 | { 32 | int result = 0; 33 | while (v >= (1 << result)) 34 | result++; 35 | 36 | return result; 37 | } 38 | 39 | inline int bitsset(unsigned char v) 40 | { 41 | int result = 0; 42 | 43 | while (v) 44 | { 45 | result += (v & 1); 46 | v >>= 1; 47 | } 48 | 49 | return result; 50 | } 51 | #endif 52 | 53 | #if TRACE > 1 54 | static void traceEncodeVertexBlock(const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction) 55 | { 56 | printf("vertex block; count %d\n", int(vertex_count)); 57 | 58 | { 59 | for (size_t k = 0; k < vertex_size; ++k) 60 | { 61 | printf("%02x ", vertex_data[k]); 62 | } 63 | 64 | printf("| base\n"); 65 | } 66 | 67 | int uniq[256] = {}; 68 | int max[256] = {}; 69 | int orv[256] = {}; 70 | int sumb[256] = {}; 71 | bool uniqb[256][256] = {}; 72 | 73 | for (size_t i = 1; i < vertex_count; ++i) 74 | { 75 | for (size_t k = 0; k < vertex_size; ++k) 76 | { 77 | size_t vertex_offset = i * vertex_size + k; 78 | 79 | unsigned char p = vertex_data[vertex_offset - vertex_size]; 80 | 81 | if (prediction && prediction[i]) 82 | { 83 | unsigned char pa = prediction[i] >> 16; 84 | unsigned char pb = prediction[i] >> 8; 85 | unsigned char pc = prediction[i] >> 0; 86 | assert(pa > 0 && pb > 0 && pc > 0); 87 | 88 | if (pa <= i && pb <= i && pc <= i) 89 | { 90 | unsigned char va = vertex_data[vertex_offset - pa * vertex_size]; 91 | unsigned char vb = vertex_data[vertex_offset - pb * vertex_size]; 92 | unsigned char vc = vertex_data[vertex_offset - pc * vertex_size]; 93 | 94 | p = va + vb - vc; 95 | } 96 | } 97 | 98 | unsigned char delta = zigzag8(vertex_data[vertex_offset] - p); 99 | 100 | if (!uniqb[k][delta]) 101 | { 102 | uniqb[k][delta] = true; 103 | uniq[k]++; 104 | } 105 | 106 | if (delta > max[k]) 107 | { 108 | max[k] = delta; 109 | } 110 | 111 | orv[k] |= delta; 112 | 113 | sumb[k] += bits(delta); 114 | 115 | #if TRACE > 2 116 | printf("%02x/%02x ", vertex_data[vertex_offset], delta); 117 | #endif 118 | } 119 | 120 | #if TRACE > 2 121 | printf("| "); 122 | 123 | if (prediction && prediction[i]) 124 | { 125 | unsigned char pa = prediction[i] >> 16; 126 | unsigned char pb = prediction[i] >> 8; 127 | unsigned char pc = prediction[i] >> 0; 128 | assert(pa > 0 && pb > 0 && pc > 0); 129 | 130 | if (pa <= i && pb <= i && pc <= i) 131 | { 132 | printf("pgram %d %d %d", pa, pb, pc); 133 | } 134 | else 135 | { 136 | printf("pdelta"); 137 | } 138 | } 139 | else 140 | { 141 | printf("delta"); 142 | } 143 | 144 | printf("\n"); 145 | #endif 146 | } 147 | 148 | for (size_t k = 0; k < vertex_size; ++k) 149 | printf("%-3d ", uniq[k]); 150 | 151 | printf("| uniq\n"); 152 | 153 | for (size_t k = 0; k < vertex_size; ++k) 154 | printf("%02x ", max[k]); 155 | 156 | printf("| max\n"); 157 | 158 | for (size_t k = 0; k < vertex_size; ++k) 159 | printf("%d ", bits(max[k])); 160 | 161 | printf("| maxbits\n"); 162 | 163 | for (size_t k = 0; k < vertex_size; ++k) 164 | printf("%3.1f ", double(sumb[k]) / double(vertex_count - 1)); 165 | 166 | printf("| avgbits\n"); 167 | 168 | for (size_t k = 0; k < vertex_size; ++k) 169 | printf("%d ", bitsset(orv[k])); 170 | 171 | printf("| bits set\n"); 172 | } 173 | #endif 174 | 175 | #if TRACE > 0 176 | struct EncodeVertexBlockStats 177 | { 178 | size_t bytes[256]; 179 | size_t bitsopt[256]; 180 | size_t bitsenc[256]; 181 | 182 | size_t headers[256]; 183 | size_t content[256]; 184 | 185 | size_t current_headers; 186 | size_t current_content; 187 | }; 188 | 189 | static EncodeVertexBlockStats encodeVertexBlockStats; 190 | 191 | static void dumpEncodeVertexBlockStats(size_t vertex_count, size_t vertex_size) 192 | { 193 | const EncodeVertexBlockStats& stats = encodeVertexBlockStats; 194 | 195 | size_t bytes = 0; 196 | size_t bitsopt = 0; 197 | size_t bitsenc = 0; 198 | size_t headers = 0; 199 | size_t content = 0; 200 | 201 | for (size_t k = 0; k < 256; ++k) 202 | if (stats.bytes[k]) 203 | { 204 | printf("%2d: %d bytes (optimal %d bytes, optenc %d bytes; headers %d, content %d)\n", int(k), int(stats.bytes[k]), int(stats.bitsopt[k]) / 8, int(stats.bitsenc[k]) / 8, int(stats.headers[k]), int(stats.content[k])); 205 | bytes += stats.bytes[k]; 206 | bitsopt += stats.bitsopt[k]; 207 | bitsenc += stats.bitsenc[k]; 208 | headers += stats.headers[k]; 209 | content += stats.content[k]; 210 | } 211 | 212 | printf("total: %d bytes (optimal %dd bytes, optenc %d bytes; headers %d, content %d)\n", int(bytes), int(bitsopt) / 8, int(bitsenc) / 8, int(headers), int(content)); 213 | 214 | if (vertex_size == 16) 215 | { 216 | // assume the following layout: 217 | // 6b position 218 | // 2b padding 219 | // 3b normal 220 | // 1b padding 221 | // 4b uv 222 | size_t bytes_pos = stats.bytes[0] + stats.bytes[1] + stats.bytes[2] + stats.bytes[3] + stats.bytes[4] + stats.bytes[5] + stats.bytes[6] + stats.bytes[7]; 223 | size_t bytes_nrm = stats.bytes[8] + stats.bytes[9] + stats.bytes[10] + stats.bytes[11]; 224 | size_t bytes_tex = stats.bytes[12] + stats.bytes[13] + stats.bytes[14] + stats.bytes[15]; 225 | 226 | printf("pos: %d bytes, %.1f bpv\n", int(bytes_pos), float(bytes_pos) / float(vertex_count) * 8); 227 | printf("nrm: %d bytes, %.1f bpv\n", int(bytes_nrm), float(bytes_nrm) / float(vertex_count) * 8); 228 | printf("tex: %d bytes, %.1f bpv\n", int(bytes_tex), float(bytes_tex) / float(vertex_count) * 8); 229 | } 230 | } 231 | #endif 232 | 233 | static bool encodeBytesFits(const unsigned char* buffer, size_t buffer_size, int bits) 234 | { 235 | for (size_t k = 0; k < buffer_size; ++k) 236 | if (buffer[k] >= (1 << bits)) 237 | return false; 238 | 239 | return true; 240 | } 241 | 242 | static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) 243 | { 244 | assert(bits >= 1 && bits <= 8); 245 | 246 | if (bits == 8) 247 | { 248 | memcpy(data, buffer, kByteGroupSize); 249 | return data + kByteGroupSize; 250 | } 251 | 252 | size_t byte_size = 8 / bits; 253 | assert(kByteGroupSize % byte_size == 0); 254 | 255 | // fixed portion: bits bits for each value 256 | // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) 257 | unsigned char sentinel = (1 << bits) - 1; 258 | 259 | for (size_t i = 0; i < kByteGroupSize; i += byte_size) 260 | { 261 | unsigned char byte = 0; 262 | 263 | for (size_t k = 0; k < byte_size; ++k) 264 | { 265 | unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k]; 266 | 267 | byte <<= bits; 268 | byte |= enc; 269 | } 270 | 271 | *data++ = byte; 272 | } 273 | 274 | for (size_t i = 0; i < kByteGroupSize; ++i) 275 | { 276 | if (buffer[i] >= sentinel) 277 | { 278 | *data++ = buffer[i]; 279 | } 280 | } 281 | 282 | return data; 283 | } 284 | 285 | static const unsigned char* decodeBytesGroup(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, int bits) 286 | { 287 | assert(bits >= 1 && bits <= 8); 288 | 289 | // TODO: missing OOB data checks 290 | (void)data_end; 291 | 292 | if (bits == 8) 293 | { 294 | memcpy(buffer, data, kByteGroupSize); 295 | 296 | return data + kByteGroupSize; 297 | } 298 | 299 | size_t byte_size = 8 / bits; 300 | assert(kByteGroupSize % byte_size == 0); 301 | 302 | const unsigned char* data_var = data + kByteGroupSize / byte_size; 303 | 304 | // fixed portion: bits bits for each value 305 | // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) 306 | unsigned char sentinel = (1 << bits) - 1; 307 | 308 | for (size_t i = 0; i < kByteGroupSize; i += byte_size) 309 | { 310 | unsigned char byte = *data++; 311 | 312 | for (size_t k = 0; k < byte_size; ++k) 313 | { 314 | unsigned char enc = byte >> (8 - bits); 315 | byte <<= bits; 316 | 317 | buffer[i + k] = (enc == sentinel) ? *data_var++ : enc; 318 | } 319 | } 320 | 321 | return data_var; 322 | } 323 | 324 | static unsigned char* encodeBytes(unsigned char* data, const unsigned char* buffer, size_t buffer_size) 325 | { 326 | assert(buffer_size % kByteGroupSize == 0); 327 | 328 | if (encodeBytesFits(buffer, buffer_size, 0)) 329 | { 330 | *data++ = 0; 331 | 332 | return data; 333 | } 334 | else 335 | { 336 | *data++ = 1; 337 | 338 | unsigned char* header = data; 339 | 340 | // round number of groups to 4 to get number of header bytes 341 | size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; 342 | 343 | data += header_size; 344 | 345 | memset(header, 0, header_size); 346 | 347 | #if TRACE > 0 348 | encodeVertexBlockStats.current_headers += header_size; 349 | #endif 350 | 351 | for (size_t i = 0; i < buffer_size; i += kByteGroupSize) 352 | { 353 | int best_bits = 8; 354 | size_t best_size = kByteGroupSize; // assume encodeBytesVar(8) just stores as is 355 | 356 | for (int bits = 1; bits < 8; bits *= 2) 357 | { 358 | unsigned char* end = encodeBytesGroup(data, buffer + i, bits); 359 | 360 | if (size_t(end - data) < best_size) 361 | { 362 | best_bits = bits; 363 | best_size = end - data; 364 | } 365 | } 366 | 367 | int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; 368 | assert((1 << bitslog2) == best_bits); 369 | 370 | size_t header_offset = i / kByteGroupSize; 371 | 372 | header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); 373 | 374 | data = encodeBytesGroup(data, buffer + i, best_bits); 375 | } 376 | 377 | #if TRACE > 0 378 | encodeVertexBlockStats.current_content += data - header - header_size; 379 | #endif 380 | 381 | return data; 382 | } 383 | } 384 | 385 | static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) 386 | { 387 | assert(buffer_size % kByteGroupSize == 0); 388 | 389 | if (size_t(data_end - data) < 1) 390 | return 0; 391 | 392 | unsigned char encoding = *data++; 393 | 394 | if (encoding == 0) 395 | { 396 | memset(buffer, 0, buffer_size); 397 | 398 | return data; 399 | } 400 | else if (encoding == 1) 401 | { 402 | const unsigned char* header = data; 403 | 404 | // round number of groups to 4 to get number of header bytes 405 | size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; 406 | 407 | if (size_t(data_end - data) < header_size) 408 | return 0; 409 | 410 | data += header_size; 411 | 412 | for (size_t i = 0; i < buffer_size; i += kByteGroupSize) 413 | { 414 | size_t header_offset = i / kByteGroupSize; 415 | 416 | int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; 417 | int bits = 1 << bitslog2; 418 | 419 | data = decodeBytesGroup(data, data_end, buffer + i, bits); 420 | if (!data) 421 | return 0; 422 | } 423 | 424 | return data; 425 | } 426 | else 427 | { 428 | // TODO: malformed data, we might want to return a different error code upstream? 429 | return 0; 430 | } 431 | } 432 | 433 | static unsigned char* encodeVertexBlock(unsigned char* data, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction, unsigned char last_vertex[256]) 434 | { 435 | assert(vertex_count > 0 && vertex_count <= 256); 436 | 437 | #if TRACE > 1 438 | traceEncodeVertexBlock(vertex_data, vertex_count, vertex_size, prediction); 439 | #endif 440 | 441 | unsigned char buffer[256]; 442 | assert(sizeof(buffer) % kByteGroupSize == 0); 443 | 444 | // we sometimes encode elements we didn't fill when rounding to kByteGroupSize 445 | memset(buffer, 0, sizeof(buffer)); 446 | 447 | for (size_t k = 0; k < vertex_size; ++k) 448 | { 449 | size_t vertex_offset = k; 450 | 451 | for (size_t i = 0; i < vertex_count; ++i) 452 | { 453 | unsigned char p = (i == 0) ? last_vertex[k] : vertex_data[vertex_offset - vertex_size]; 454 | 455 | if (prediction && prediction[i]) 456 | { 457 | unsigned int pa = (prediction[i] >> 16) & 0xff; 458 | unsigned int pb = (prediction[i] >> 8) & 0xff; 459 | unsigned int pc = (prediction[i] >> 0) & 0xff; 460 | assert(pa > 0 && pb > 0 && pc > 0); 461 | 462 | if (pa <= i && pb <= i && pc <= i) 463 | { 464 | unsigned char va = vertex_data[vertex_offset - pa * vertex_size]; 465 | unsigned char vb = vertex_data[vertex_offset - pb * vertex_size]; 466 | unsigned char vc = vertex_data[vertex_offset - pc * vertex_size]; 467 | 468 | p = va + vb - vc; 469 | } 470 | } 471 | 472 | unsigned char delta = zigzag8(vertex_data[vertex_offset] - p); 473 | 474 | buffer[i] = delta; 475 | vertex_offset += vertex_size; 476 | } 477 | 478 | #if TRACE > 0 479 | unsigned char* olddata = data; 480 | #endif 481 | 482 | data = encodeBytes(data, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); 483 | 484 | #if TRACE > 0 485 | EncodeVertexBlockStats& stats = encodeVertexBlockStats; 486 | 487 | stats.bytes[k] += data - olddata; 488 | 489 | for (size_t i = 0; i < vertex_count; ++i) 490 | { 491 | stats.bitsopt[k] += bits(buffer[i]); 492 | stats.bitsenc[k] += bits(buffer[i]) + bits(bits(buffer[i])); 493 | } 494 | 495 | stats.headers[k] += stats.current_headers; 496 | stats.content[k] += stats.current_content; 497 | 498 | stats.current_headers = 0; 499 | stats.current_content = 0; 500 | #endif 501 | } 502 | 503 | for (size_t k = 0; k < vertex_size; ++k) 504 | { 505 | last_vertex[k] = vertex_data[vertex_size * (vertex_count - 1) + k]; 506 | } 507 | 508 | return data; 509 | } 510 | 511 | static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction, unsigned char last_vertex[256]) 512 | { 513 | assert(vertex_count > 0 && vertex_count <= 256); 514 | 515 | unsigned char buffer[256]; 516 | assert(sizeof(buffer) % kByteGroupSize == 0); 517 | 518 | for (size_t k = 0; k < vertex_size; ++k) 519 | { 520 | data = decodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); 521 | if (!data) 522 | return 0; 523 | 524 | size_t vertex_offset = k; 525 | 526 | for (size_t i = 0; i < vertex_count; ++i) 527 | { 528 | unsigned char p = (i == 0) ? last_vertex[k] : vertex_data[vertex_offset - vertex_size]; 529 | 530 | if (prediction && prediction[i]) 531 | { 532 | unsigned int pa = (prediction[i] >> 16) & 0xff; 533 | unsigned int pb = (prediction[i] >> 8) & 0xff; 534 | unsigned int pc = (prediction[i] >> 0) & 0xff; 535 | assert(pa > 0 && pb > 0 && pc > 0); 536 | 537 | if (pa <= i && pb <= i && pc <= i) 538 | { 539 | unsigned char va = vertex_data[vertex_offset - pa * vertex_size]; 540 | unsigned char vb = vertex_data[vertex_offset - pb * vertex_size]; 541 | unsigned char vc = vertex_data[vertex_offset - pc * vertex_size]; 542 | 543 | p = va + vb - vc; 544 | } 545 | } 546 | 547 | vertex_data[vertex_offset] = unzigzag8(buffer[i]) + p; 548 | 549 | vertex_offset += vertex_size; 550 | } 551 | } 552 | 553 | for (size_t k = 0; k < vertex_size; ++k) 554 | { 555 | last_vertex[k] = vertex_data[vertex_size * (vertex_count - 1) + k]; 556 | } 557 | 558 | return data; 559 | } 560 | 561 | typedef unsigned int VertexFifo[16]; 562 | typedef unsigned int EdgeFifo[16][3]; 563 | 564 | static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t& offset) 565 | { 566 | fifo[offset][0] = a; 567 | fifo[offset][1] = b; 568 | fifo[offset][2] = c; 569 | offset = (offset + 1) & 15; 570 | } 571 | 572 | static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset) 573 | { 574 | fifo[offset] = v; 575 | offset = (offset + 1) & 15; 576 | } 577 | 578 | static unsigned int decodeVByte(const unsigned char*& data) 579 | { 580 | unsigned char lead = *data++; 581 | 582 | // fast path: single byte 583 | if (lead < 128) 584 | return lead; 585 | 586 | // slow path: up to 4 extra bytes 587 | // note that this loop always terminates, which is important for malformed data 588 | unsigned int result = lead & 127; 589 | unsigned int shift = 7; 590 | 591 | for (int i = 0; i < 4; ++i) 592 | { 593 | unsigned char group = *data++; 594 | result |= (group & 127) << shift; 595 | shift += 7; 596 | 597 | if (group < 128) 598 | break; 599 | } 600 | 601 | return result; 602 | } 603 | 604 | static unsigned int decodeIndex(const unsigned char*& data, unsigned int next, unsigned int last) 605 | { 606 | (void)next; 607 | 608 | unsigned int v = decodeVByte(data); 609 | unsigned int d = (v >> 1) ^ -int(v & 1); 610 | 611 | return last + d; 612 | } 613 | 614 | struct DecodePredictionState 615 | { 616 | EdgeFifo edgefifo; 617 | VertexFifo vertexfifo; 618 | size_t edgefifooffset; 619 | size_t vertexfifooffset; 620 | 621 | unsigned int next; 622 | unsigned int last; 623 | 624 | size_t code_offset; 625 | size_t data_offset; 626 | 627 | size_t index_offset; 628 | }; 629 | 630 | static size_t decodeVertexPrediction(DecodePredictionState& state, unsigned int* result, size_t result_size, size_t index_count, const unsigned char* buffer, size_t buffer_size) 631 | { 632 | assert(index_count % 3 == 0); 633 | 634 | // the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table 635 | if (buffer_size < index_count / 3 + 16) 636 | return 0; 637 | 638 | // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end 639 | const unsigned char* code = buffer + state.code_offset; 640 | const unsigned char* data = buffer + index_count / 3 + state.data_offset; 641 | const unsigned char* data_safe_end = buffer + buffer_size - 16; 642 | 643 | const unsigned char* codeaux_table = data_safe_end; 644 | 645 | size_t result_offset = 0; 646 | size_t i = state.index_offset; 647 | 648 | for (; i < index_count; i += 3) 649 | { 650 | if (result_offset + 3 > result_size) 651 | break; 652 | 653 | // make sure we have enough data to read for a triangle 654 | // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index 655 | // after this we can be sure we can read without extra bounds checks 656 | if (data > data_safe_end) 657 | return 0; 658 | 659 | unsigned char codetri = *code++; 660 | 661 | int fe = codetri >> 4; 662 | 663 | if (fe < 15) 664 | { 665 | // fifo reads are wrapped around 16 entry buffer 666 | unsigned int a = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][0]; 667 | unsigned int b = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][1]; 668 | unsigned int co = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][2]; 669 | 670 | int fec = codetri & 15; 671 | 672 | unsigned int c = (fec == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - 1 - fec) & 15]; 673 | 674 | // note that we need to update the last index since free indices are delta-encoded 675 | if (fec == 15) 676 | state.last = c = decodeIndex(data, state.next, state.last); 677 | 678 | // output prediction data 679 | if (fec == 0) 680 | { 681 | unsigned int na = c - a; 682 | unsigned int nb = c - b; 683 | unsigned int nc = c - co; 684 | 685 | unsigned int p = (na << 16) | (nb << 8) | nc; 686 | 687 | result[result_offset++] = (na | nb | nc) < 256 ? p : 0; 688 | } 689 | 690 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 691 | if (fec == 0 || fec == 15) 692 | pushVertexFifo(state.vertexfifo, c, state.vertexfifooffset); 693 | 694 | pushEdgeFifo(state.edgefifo, c, b, a, state.edgefifooffset); 695 | pushEdgeFifo(state.edgefifo, a, c, b, state.edgefifooffset); 696 | } 697 | else 698 | { 699 | // fast path: read codeaux from the table; we wrap table index so this access is memory-safe 700 | // slow path: read a full byte for codeaux instead of using a table lookup 701 | unsigned char codeaux = (codetri & 15) >= 14 ? *data++ : codeaux_table[codetri & 15]; 702 | 703 | int fea = (codetri & 15) == 15 ? 15 : 0; 704 | int feb = codeaux >> 4; 705 | int fec = codeaux & 15; 706 | 707 | // fifo reads are wrapped around 16 entry buffer 708 | // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior 709 | unsigned int a = (fea == 0) ? state.next++ : 0; 710 | unsigned int b = (feb == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - feb) & 15]; 711 | unsigned int c = (fec == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - fec) & 15]; 712 | 713 | // note that we need to update the last index since free indices are delta-encoded 714 | if (fea == 15) 715 | state.last = a = decodeIndex(data, state.next, state.last); 716 | 717 | if (feb == 15) 718 | state.last = b = decodeIndex(data, state.next, state.last); 719 | 720 | if (fec == 15) 721 | state.last = c = decodeIndex(data, state.next, state.last); 722 | 723 | // output prediction data 724 | if (fea == 0) 725 | result[result_offset++] = 0; 726 | 727 | if (feb == 0) 728 | result[result_offset++] = 0; 729 | 730 | if (fec == 0) 731 | result[result_offset++] = 0; 732 | 733 | // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly 734 | if (fea == 0 || fea == 15) 735 | pushVertexFifo(state.vertexfifo, a, state.vertexfifooffset); 736 | 737 | if (feb == 0 || feb == 15) 738 | pushVertexFifo(state.vertexfifo, b, state.vertexfifooffset); 739 | 740 | if (fec == 0 || fec == 15) 741 | pushVertexFifo(state.vertexfifo, c, state.vertexfifooffset); 742 | 743 | pushEdgeFifo(state.edgefifo, b, a, c, state.edgefifooffset); 744 | pushEdgeFifo(state.edgefifo, c, b, a, state.edgefifooffset); 745 | pushEdgeFifo(state.edgefifo, a, c, b, state.edgefifooffset); 746 | } 747 | } 748 | 749 | // we should've read all data bytes and stopped at the boundary between data and codeaux table 750 | if (i == index_count && data != data_safe_end) 751 | return 0; 752 | 753 | state.code_offset = code - buffer; 754 | state.data_offset = data - buffer - index_count / 3; 755 | state.index_offset = i; 756 | 757 | return result_offset; 758 | } 759 | 760 | } 761 | 762 | size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* index_buffer, size_t index_buffer_size) 763 | { 764 | using namespace meshopt; 765 | 766 | assert(vertex_size > 0 && vertex_size <= 256); 767 | assert(index_count % 3 == 0); 768 | assert(index_buffer == 0 || index_buffer_size > 0); 769 | 770 | const unsigned char* vertex_data = static_cast(vertices); 771 | 772 | unsigned char* data = buffer; 773 | 774 | unsigned char last_vertex[256]; 775 | 776 | for (size_t k = 0; k < vertex_size; ++k) 777 | { 778 | last_vertex[k] = vertex_data[k]; 779 | 780 | *data++ = last_vertex[k]; 781 | } 782 | 783 | const size_t prediction_capacity = kVertexBlockSize + 2; 784 | unsigned int prediction[prediction_capacity]; 785 | 786 | DecodePredictionState pstate = {}; 787 | 788 | #if TRACE > 0 789 | memset(&encodeVertexBlockStats, 0, sizeof(encodeVertexBlockStats)); 790 | #endif 791 | 792 | size_t vertex_offset = 0; 793 | size_t prediction_offset = 0; 794 | 795 | if (index_buffer) 796 | { 797 | for (;;) 798 | { 799 | size_t psize = decodeVertexPrediction(pstate, prediction + prediction_offset, prediction_capacity - prediction_offset, index_count, index_buffer, index_buffer_size); 800 | if (psize == 0) 801 | break; 802 | 803 | size_t block_size = psize + prediction_offset; 804 | 805 | if (vertex_offset + block_size > vertex_count) 806 | break; 807 | 808 | size_t block_size_clamped = (block_size > kVertexBlockSize) ? kVertexBlockSize : block_size; 809 | 810 | data = encodeVertexBlock(data, vertex_data + vertex_offset * vertex_size, block_size_clamped, vertex_size, prediction, last_vertex); 811 | vertex_offset += block_size_clamped; 812 | 813 | prediction_offset = block_size - block_size_clamped; 814 | memset(&prediction[0], 0, prediction_offset * sizeof(prediction[0])); 815 | } 816 | } 817 | 818 | while (vertex_offset < vertex_count) 819 | { 820 | size_t block_size = (vertex_offset + kVertexBlockSize < vertex_count) ? kVertexBlockSize : vertex_count - vertex_offset; 821 | 822 | data = encodeVertexBlock(data, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, 0, last_vertex); 823 | vertex_offset += block_size; 824 | } 825 | 826 | #if TRACE > 0 827 | dumpEncodeVertexBlockStats(vertex_count, vertex_size); 828 | #endif 829 | 830 | assert(size_t(data - buffer) <= buffer_size); 831 | (void)buffer_size; 832 | 833 | return data - buffer; 834 | } 835 | 836 | size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) 837 | { 838 | // TODO: This significantly overestimates worst case, refine 839 | return vertex_count * vertex_size * 2; 840 | } 841 | 842 | int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* buffer, size_t buffer_size, const unsigned char* index_buffer, size_t index_buffer_size) 843 | { 844 | using namespace meshopt; 845 | 846 | assert(vertex_size > 0 && vertex_size <= 256); 847 | assert(index_count % 3 == 0); 848 | assert(index_buffer == 0 || index_buffer_size > 0); 849 | 850 | unsigned char* vertex_data = static_cast(destination); 851 | 852 | const unsigned char* data = buffer; 853 | const unsigned char* data_end = buffer + buffer_size; 854 | 855 | if (size_t(data_end - data) < vertex_size) 856 | return -1; 857 | 858 | unsigned char last_vertex[256]; 859 | 860 | // TODO: bounds checks on data 861 | for (size_t k = 0; k < vertex_size; ++k) 862 | { 863 | last_vertex[k] = *data++; 864 | 865 | vertex_data[k] = last_vertex[k]; 866 | } 867 | 868 | const size_t prediction_capacity = kVertexBlockSize + 2; 869 | unsigned int prediction[prediction_capacity]; 870 | 871 | DecodePredictionState pstate = {}; 872 | 873 | size_t vertex_offset = 0; 874 | size_t prediction_offset = 0; 875 | 876 | if (index_buffer) 877 | { 878 | for (;;) 879 | { 880 | size_t psize = decodeVertexPrediction(pstate, prediction + prediction_offset, prediction_capacity - prediction_offset, index_count, index_buffer, index_buffer_size); 881 | if (psize == 0) 882 | break; 883 | 884 | size_t block_size = psize + prediction_offset; 885 | 886 | if (vertex_offset + block_size > vertex_count) 887 | break; 888 | 889 | size_t block_size_clamped = (block_size > kVertexBlockSize) ? kVertexBlockSize : block_size; 890 | 891 | data = decodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size_clamped, vertex_size, prediction, last_vertex); 892 | if (!data) 893 | return -2; 894 | 895 | vertex_offset += block_size_clamped; 896 | 897 | prediction_offset = block_size - block_size_clamped; 898 | memset(&prediction[0], 0, prediction_offset * sizeof(prediction[0])); 899 | } 900 | } 901 | 902 | while (vertex_offset < vertex_count) 903 | { 904 | size_t block_size = (vertex_offset + kVertexBlockSize < vertex_count) ? kVertexBlockSize : vertex_count - vertex_offset; 905 | 906 | data = decodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, 0, last_vertex); 907 | if (!data) 908 | return -2; 909 | 910 | vertex_offset += block_size; 911 | } 912 | 913 | if (data != data_end) 914 | return -3; 915 | 916 | return 0; 917 | } 918 | --------------------------------------------------------------------------------