├── External
    ├── tiny_obj_loader
    │   ├── tiny_obj_loader.cc
    │   └── CMakeLists.txt
    ├── stb
    │   └── CMakeLists.txt
    ├── CMakeLists.txt
    └── meshoptimizer
    │   ├── CMakeLists.txt
    │   ├── LICENSE.md
    │   ├── src
    │       ├── vfetchanalyzer.cpp
    │       ├── vfetchoptimizer.cpp
    │       ├── vcacheanalyzer.cpp
    │       ├── indexgenerator.cpp
    │       ├── stripifier.cpp
    │       ├── overdrawanalyzer.cpp
    │       ├── simplifier.cpp
    │       ├── overdrawoptimizer.cpp
    │       ├── vcacheoptimizer.cpp
    │       ├── indexcodec.cpp
    │       ├── meshoptimizer.h
    │       └── vertexcodec.cpp
    │   └── README.md
├── Scripts
    ├── cmake-vs2015-vk.cmd
    └── cmake-vs2017-vk.cmd
├── .gitmodules
├── .gitignore
├── CMakeLists.txt
├── Source
    ├── Shaders
    │   ├── ModelIndexed.frag
    │   ├── ModelIndexed.vert
    │   ├── ModelManual.vert
    │   ├── ModelNativeAMD.vert
    │   ├── ModelPassthrough.vert
    │   ├── ModelPassthrough.frag
    │   ├── Model.frag
    │   ├── Model.vert
    │   ├── ModelPassthroughTextured.frag
    │   ├── ModelBarycentrics.geom
    │   ├── ModelManual.frag
    │   ├── ModelNativeAMD.frag
    │   ├── ModelNativeAMDTextured.frag
    │   ├── ModelPassthrough.geom
    │   ├── ModelPassthroughTextured.geom
    │   └── Common.glsl
    ├── BaseApplication.h
    ├── DemoUtils.h
    ├── CMakeLists.txt
    ├── DemoUtils.cpp
    ├── Barycentrics.h
    ├── BaseApplication.cpp
    └── Barycentrics.cpp
├── LICENSE
└── README.md


/External/tiny_obj_loader/tiny_obj_loader.cc:
--------------------------------------------------------------------------------
1 | #define TINYOBJLOADER_IMPLEMENTATION
2 | #include "tiny_obj_loader.h"
3 | 


--------------------------------------------------------------------------------
/Scripts/cmake-vs2015-vk.cmd:
--------------------------------------------------------------------------------
1 | del ..\Build\CMakeCache.txt
2 | cmake -G "Visual Studio 14 2015 Win64" -B..\Build -H..
3 | 


--------------------------------------------------------------------------------
/Scripts/cmake-vs2017-vk.cmd:
--------------------------------------------------------------------------------
1 | del ..\Build\CMakeCache.txt
2 | cmake -G "Visual Studio 15 2017 Win64" -B..\Build -H..
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "External/librush"]
2 | 	path = External/librush
3 | 	url = https://github.com/kayru/librush.git
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | Build
 3 | build
 4 | *.spv
 5 | *.sublime-workspace
 6 | /.vscode/*
 7 | *.user
 8 | .vs
 9 | x64
10 | 


--------------------------------------------------------------------------------
/External/stb/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(stb INTERFACE)
2 | target_include_directories(stb INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
3 | 


--------------------------------------------------------------------------------
/External/tiny_obj_loader/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(tiny_obj_loader STATIC tiny_obj_loader.cc)
2 | target_include_directories(tiny_obj_loader INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
3 | 


--------------------------------------------------------------------------------
/External/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory("librush")
 2 | 
 3 | if (MSVC)
 4 | 	add_compile_options(-W0)
 5 | else()
 6 | 	add_compile_options(-w)
 7 | endif()
 8 | 
 9 | add_subdirectory("meshoptimizer")
10 | add_subdirectory("stb")
11 | add_subdirectory("tiny_obj_loader")
12 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.7)
 2 | set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 3 | 
 4 | project(Barycentrics)
 5 | 
 6 | set(RUSH_RENDER_API "VK" CACHE STRING "Force Vulkan renderer")
 7 | 
 8 | find_program(GLSLC NAMES glslc PATHS
 9 | 	$ENV{VULKAN_SDK}/Bin
10 | 	$ENV{VK_SDK_PATH}/Bin
11 | 	$ENV{PATH}
12 | 	"~/bin"
13 | )
14 | 
15 | add_subdirectory("External")
16 | add_subdirectory("Source")
17 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelIndexed.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec2 v_tex0;
 6 | layout (location = 2) in vec3 v_viewVector;
 7 | 
 8 | layout (location = 0) out vec4 fragColor0;
 9 | 
10 | void main()
11 | {
12 | 	if (g_useTexture)
13 | 	{
14 | 		fragColor0.rgb = texture(albedoSampler, v_tex0).rgb;
15 | 	}
16 | 	else
17 | 	{
18 | 		fragColor0.rgb = vec3(v_tex0, 0.0);
19 | 	}
20 | 
21 | 	fragColor0.a = 1;
22 | }
23 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelIndexed.vert:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec3 a_pos0;
 6 | layout (location = 1) in vec2 a_tex0;
 7 | 
 8 | layout (location = 0) out vec2 v_tex0;
 9 | layout (location = 2) out vec3 v_viewVector;
10 | 
11 | void main()
12 | {
13 | 	vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz;
14 | 	gl_Position = vec4(worldPos, 1) * g_matViewProj;
15 | 	v_tex0 = a_tex0;
16 | 	v_viewVector = worldPos - g_cameraPos.xyz;
17 | }
18 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelManual.vert:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec3 a_pos0;
 6 | layout (location = 1) in vec2 a_tex0;
 7 | 
 8 | layout (location = 0) out vec3 v_worldPos;
 9 | layout (location = 1) out vec3 v_viewVector;
10 | 
11 | void main()
12 | {
13 | 	vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz;
14 | 	gl_Position = vec4(worldPos, 1) * g_matViewProj;
15 | 	v_worldPos = worldPos;
16 | 	v_viewVector = worldPos - g_cameraPos.xyz;
17 | }
18 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelNativeAMD.vert:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec3 a_pos0;
 6 | layout (location = 1) in vec2 a_tex0;
 7 | 
 8 | layout (location = 0) out float v_IdFlat;
 9 | layout (location = 1) out float v_Id;
10 | 
11 | void main()
12 | {
13 | 	vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz;
14 | 	gl_Position = vec4(worldPos, 1) * g_matViewProj;
15 | 
16 | 	float id = intBitsToFloat(gl_VertexIndex);
17 | 	v_IdFlat = id;
18 | 	v_Id = id;
19 | }
20 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelPassthrough.vert:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec3 a_pos0;
 6 | layout (location = 1) in vec2 a_tex0;
 7 | 
 8 | layout (location = 0) out vec2 v_tex0;
 9 | layout (location = 1) out vec3 v_viewVector;
10 | layout (location = 2) out vec3 v_worldPos;
11 | 
12 | void main()
13 | {
14 | 	vec3 worldPos = (vec4(a_pos0, 1) * g_matWorld).xyz;
15 | 	gl_Position = vec4(worldPos, 1) * g_matViewProj;
16 | 	v_tex0 = a_tex0;
17 | 	v_viewVector = worldPos - g_cameraPos.xyz;
18 | 	v_worldPos = worldPos;
19 | }
20 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelPassthrough.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec2 v_tex0;
 6 | layout (location = 1) in vec3 v_viewVector;
 7 | layout (location = 2) in flat vec3 v_worldPos0;
 8 | layout (location = 3) in flat vec3 v_worldPos1;
 9 | layout (location = 4) in flat vec3 v_worldPos2;
10 | 
11 | layout (location = 0) out vec4 fragColor0;
12 | 
13 | void main()
14 | {
15 | 	vec3 barycentrics = intersectRayTri(g_cameraPos.xyz,
16 | 		normalize(v_viewVector),
17 | 		v_worldPos0,
18 | 		v_worldPos1,
19 | 		v_worldPos2);
20 | 
21 | 	fragColor0.rgb = barycentrics;
22 | 
23 | 	fragColor0.a = 1.0;
24 | }
25 | 


--------------------------------------------------------------------------------
/Source/Shaders/Model.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec2 v_barycentrics;
 6 | layout (location = 1) in flat uint v_primId;
 7 | layout (location = 2) in vec3 v_viewVector;
 8 | 
 9 | layout (location = 0) out vec4 fragColor0;
10 | 
11 | void main()
12 | {
13 | 	vec3 barycentrics = vec3(v_barycentrics.x, v_barycentrics.y, 1.0 - v_barycentrics.x - v_barycentrics.y);
14 | 
15 | 	if (g_useTexture)
16 | 	{
17 | 		vec2 texcoords = interpolateTexCoords(v_primId, barycentrics);
18 | 		fragColor0.rgb = texture(albedoSampler, texcoords).rgb;
19 | 	}
20 | 	else
21 | 	{
22 | 		fragColor0.rgb = barycentrics;
23 | 	}
24 | 
25 | 	fragColor0.a = 1;
26 | }
27 | 


--------------------------------------------------------------------------------
/Source/Shaders/Model.vert:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) out vec2 v_barycentrics;
 6 | layout (location = 1) out uint v_primId;
 7 | layout (location = 2) out vec3 v_viewVector;
 8 | 
 9 | void main()
10 | {
11 | 	uint index = g_indices[gl_VertexIndex];
12 | 	Vertex vertex = getVertex(index);
13 | 	vec3 worldPos = (vec4(vertex.position, 1) * g_matWorld).xyz;
14 | 
15 | 	gl_Position = vec4(worldPos, 1) * g_matViewProj;
16 | 
17 | 	uint id = gl_VertexIndex%3;
18 | 	switch(id)
19 | 	{
20 | 	case 0: v_barycentrics = vec2(1,0); break;
21 | 	case 1: v_barycentrics = vec2(0,1); break;
22 | 	case 2: v_barycentrics = vec2(0,0); break;
23 | 	}
24 | 
25 | 	v_primId = gl_VertexIndex / 3;
26 | 	v_viewVector = worldPos - g_cameraPos.xyz;
27 | }
28 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelPassthroughTextured.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in vec2 v_tex0;
 6 | layout (location = 1) in vec3 v_viewVector;
 7 | layout (location = 2) in flat vec3 v_worldPos0;
 8 | layout (location = 3) in flat vec3 v_worldPos1;
 9 | layout (location = 4) in flat vec3 v_worldPos2;
10 | layout (location = 5) in flat uint v_primId;
11 | 
12 | layout (location = 0) out vec4 fragColor0;
13 | 
14 | void main()
15 | {
16 | 	vec3 barycentrics = intersectRayTri(g_cameraPos.xyz,
17 | 		normalize(v_viewVector),
18 | 		v_worldPos0,
19 | 		v_worldPos1,
20 | 		v_worldPos2);
21 | 
22 | 	vec2 texcoords = interpolateTexCoords(v_primId, barycentrics);
23 | 	fragColor0.rgb = texture(albedoSampler, texcoords).rgb;
24 | 	fragColor0.a = 1.0;
25 | }
26 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelBarycentrics.geom:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | layout(triangles) in;
 4 | layout(triangle_strip, max_vertices=3) out;
 5 | 
 6 | layout (location = 0) in vec2 v_tex0[]; // unused
 7 | layout (location = 2) in vec3 v_viewVectorIn[];
 8 | 
 9 | layout (location = 0) out vec2 v_barycentrics;
10 | layout (location = 1) out uint v_primId;
11 | layout (location = 2) out vec3 v_viewVector;
12 | 
13 | void main()
14 | {
15 | 	gl_Position = gl_in[0].gl_Position;
16 | 	v_primId = gl_PrimitiveIDIn;
17 | 	v_viewVector = v_viewVectorIn[0];
18 | 	v_barycentrics = vec2(1,0);
19 | 	EmitVertex();
20 | 
21 | 	gl_Position = gl_in[1].gl_Position;
22 | 	v_primId = gl_PrimitiveIDIn;
23 | 	v_viewVector = v_viewVectorIn[1];
24 | 	v_barycentrics = vec2(0,1);
25 | 	EmitVertex();
26 | 
27 | 	gl_Position = gl_in[2].gl_Position;
28 | 	v_primId = gl_PrimitiveIDIn;
29 | 	v_viewVector = v_viewVectorIn[2];
30 | 	v_barycentrics = vec2(0,0);
31 | 	EmitVertex();
32 | }
33 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(meshoptimizer)
 2 | cmake_minimum_required(VERSION 3.0)
 3 | 
 4 | option(BUILD_DEMO "Build demo" OFF)
 5 | 
 6 | if(MSVC)
 7 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX")
 8 | endif(MSVC)
 9 | 
10 | set(SOURCES
11 |     src/meshoptimizer.h
12 |     src/indexcodec.cpp
13 |     src/indexgenerator.cpp
14 |     src/overdrawanalyzer.cpp
15 |     src/overdrawoptimizer.cpp
16 |     src/simplifier.cpp
17 |     src/stripifier.cpp
18 |     src/vcacheanalyzer.cpp
19 |     src/vcacheoptimizer.cpp
20 |     src/vertexcodec.cpp
21 |     src/vfetchanalyzer.cpp
22 |     src/vfetchoptimizer.cpp
23 | )
24 | 
25 | add_library(meshoptimizer STATIC ${SOURCES})
26 | 
27 | if(BUILD_DEMO)
28 |     add_executable(demo demo/main.cpp demo/miniz.cpp demo/objparser.cpp)
29 |     target_link_libraries(demo meshoptimizer)
30 | endif()
31 | 
32 | target_include_directories(meshoptimizer INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/src")
33 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelManual.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #include "Common.glsl"
 4 | 
 5 | layout (location = 0) in flat vec3 v_worldPos;
 6 | layout (location = 1) in vec3 v_viewVector;
 7 | 
 8 | layout (location = 0) out vec4 fragColor0;
 9 | 
10 | void main()
11 | {
12 | 	uint index1 = g_indices[gl_PrimitiveID*3+1];
13 | 	uint index2 = g_indices[gl_PrimitiveID*3+2];
14 | 
15 | 	Vertex vertex1 = getVertex(index1);
16 | 	Vertex vertex2 = getVertex(index2);
17 | 
18 | 	vec3 barycentrics = intersectRayTri(g_cameraPos.xyz,
19 | 		normalize(v_viewVector),
20 | 		v_worldPos,
21 | 		(vec4(vertex1.position, 1) * g_matWorld).xyz,
22 | 		(vec4(vertex2.position, 1) * g_matWorld).xyz);
23 | 
24 | 	if (g_useTexture)
25 | 	{
26 | 		vec2 texcoords = interpolateTexCoords(gl_PrimitiveID, barycentrics);
27 | 		fragColor0.rgb = texture(albedoSampler, texcoords).rgb;
28 | 	}
29 | 	else
30 | 	{
31 | 		fragColor0.rgb = barycentrics;
32 | 	}
33 | 
34 | 	fragColor0.a = 1;
35 | }
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Yuriy O'Donnell
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016-2018 Arseny Kapoulkine
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Source/BaseApplication.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Rush/GfxDevice.h>
 4 | #include <Rush/GfxRef.h>
 5 | #include <Rush/Platform.h>
 6 | 
 7 | class ShaderCompiler;
 8 | 
 9 | namespace Rush
10 | {
11 | class PrimitiveBatch;
12 | class BitmapFontRenderer;
13 | }
14 | 
15 | class BaseApplication : public Application
16 | {
17 | 	RUSH_DISALLOW_COPY_AND_ASSIGN(BaseApplication);
18 | 
19 | public:
20 | 	BaseApplication();
21 | 	~BaseApplication();
22 | 
23 | protected:
24 | 	struct DepthStencilStates
25 | 	{
26 | 		GfxDepthStencilStateRef testLessEqual;
27 | 		GfxDepthStencilStateRef writeLessEqual;
28 | 		GfxDepthStencilStateRef writeAlways;
29 | 		GfxDepthStencilStateRef disable;
30 | 	} m_depthStencilStates;
31 | 
32 | 	struct SamplerStates
33 | 	{
34 | 		GfxSamplerRef pointClamp;
35 | 		GfxSamplerRef linearClamp;
36 | 		GfxSamplerRef linearWrap;
37 | 		GfxSamplerRef anisotropicWrap;
38 | 	} m_samplerStates;
39 | 
40 | 	struct BlendStates
41 | 	{
42 | 		GfxBlendStateRef lerp;
43 | 		GfxBlendStateRef opaque;
44 | 		GfxBlendStateRef additive;
45 | 	} m_blendStates;
46 | 
47 | 	GfxDevice*          m_dev;
48 | 	GfxContext*         m_ctx;
49 | 	Window*             m_window;
50 | 	PrimitiveBatch*     m_prim;
51 | 	BitmapFontRenderer* m_font;
52 | 
53 | 	GfxTexture m_defaultWhiteTexture;
54 | 	GfxTexture m_checkerboardTexture;
55 | };
56 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelNativeAMD.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | #extension GL_AMD_shader_explicit_vertex_parameter : require
 3 | 
 4 | #include "Common.glsl"
 5 | 
 6 | layout (location = 0) in flat float v_IdFlat;
 7 | layout (location = 1) in __explicitInterpAMD float v_Id;
 8 | 
 9 | layout (location = 0) out vec4 fragColor0;
10 | 
11 | void main()
12 | {
13 | 	int idRef = floatBitsToInt(v_IdFlat);
14 | 	int id0 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 0));
15 | 	int id1 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 1));
16 | 	int id2 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 2));
17 | 
18 | 	vec3 barycentrics;
19 | 	if (idRef == id0)
20 | 	{
21 | 		barycentrics.y = gl_BaryCoordSmoothAMD.x;
22 | 		barycentrics.z = gl_BaryCoordSmoothAMD.y;
23 | 		barycentrics.x = 1.0 - barycentrics.z - barycentrics.y;
24 | 	}
25 | 	else if (idRef == id1)
26 | 	{
27 | 		barycentrics.x = gl_BaryCoordSmoothAMD.x;
28 | 		barycentrics.y = gl_BaryCoordSmoothAMD.y;
29 | 		barycentrics.z = 1.0 - barycentrics.x - barycentrics.y;
30 | 	}
31 | 	else if (idRef == id2)
32 | 	{
33 | 		barycentrics.z = gl_BaryCoordSmoothAMD.x;
34 | 		barycentrics.x = gl_BaryCoordSmoothAMD.y;
35 | 		barycentrics.y = 1.0 - barycentrics.x - barycentrics.z;
36 | 	}
37 | 	else
38 | 	{
39 | 		barycentrics = vec3(1.0);
40 | 	}
41 | 
42 | 	fragColor0.rgb = barycentrics;
43 | 
44 | 	fragColor0.a = 1.0;
45 | }
46 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelNativeAMDTextured.frag:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | #extension GL_AMD_shader_explicit_vertex_parameter : require
 3 | 
 4 | #include "Common.glsl"
 5 | 
 6 | layout (location = 0) in flat float v_IdFlat;
 7 | layout (location = 1) in __explicitInterpAMD float v_Id;
 8 | 
 9 | layout (location = 0) out vec4 fragColor0;
10 | 
11 | void main()
12 | {
13 | 	int idRef = floatBitsToInt(v_IdFlat);
14 | 	int id0 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 0));
15 | 	int id1 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 1));
16 | 	int id2 = floatBitsToInt(interpolateAtVertexAMD(v_Id, 2));
17 | 
18 | 	vec3 barycentrics;
19 | 	if (idRef == id0)
20 | 	{
21 | 		barycentrics.y = gl_BaryCoordSmoothAMD.x;
22 | 		barycentrics.z = gl_BaryCoordSmoothAMD.y;
23 | 		barycentrics.x = 1.0 - barycentrics.z - barycentrics.y;
24 | 	}
25 | 	else if (idRef == id1)
26 | 	{
27 | 		barycentrics.x = gl_BaryCoordSmoothAMD.x;
28 | 		barycentrics.y = gl_BaryCoordSmoothAMD.y;
29 | 		barycentrics.z = 1.0 - barycentrics.x - barycentrics.y;
30 | 	}
31 | 	else if (idRef == id2)
32 | 	{
33 | 		barycentrics.z = gl_BaryCoordSmoothAMD.x;
34 | 		barycentrics.x = gl_BaryCoordSmoothAMD.y;
35 | 		barycentrics.y = 1.0 - barycentrics.x - barycentrics.z;
36 | 	}
37 | 	else
38 | 	{
39 | 		barycentrics = vec3(1.0);
40 | 	}
41 | 
42 | 	vec2 texcoords = interpolateTexCoords(gl_PrimitiveID, barycentrics);
43 | 	fragColor0.rgb = texture(albedoSampler, texcoords).rgb;
44 | 
45 | 	fragColor0.a = 1.0;
46 | }
47 | 


--------------------------------------------------------------------------------
/Source/DemoUtils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include <Rush/Platform.h>
 5 | #include <Rush/UtilTimer.h>
 6 | #include <Rush/GfxCommon.h>
 7 | 
 8 | template <typename T, size_t SIZE>
 9 | struct MovingAverage
10 | {
11 | 	MovingAverage() { reset(); }
12 | 	void reset() { idx = 0; sum = 0; for(T& it : buf) it=0; }
13 | 	T get() const { return sum / SIZE; }
14 | 	void add(T v)
15 | 	{
16 | 		sum += v;
17 | 		sum -= buf[idx];
18 | 		buf[idx] = v;
19 | 		idx = (idx + 1) % SIZE;
20 | 	}
21 | 	size_t idx;
22 | 	T sum;
23 | 	T buf[SIZE];
24 | };
25 | 
26 | template <typename T, size_t S>
27 | struct TimingScope
28 | {
29 | 
30 | 	TimingScope(MovingAverage<T, S>& output)
31 | 		: m_output(output)
32 | 	{}
33 | 
34 | 	~TimingScope()
35 | 	{
36 | 		m_output.add(m_timer.time());
37 | 	}
38 | 
39 | 	MovingAverage<T, S>& m_output;
40 | 	Timer m_timer;
41 | };
42 | 
43 | inline u64 hashFnv1a64(const void* message, size_t length, u64 state = 0xcbf29ce484222325)
44 | {
45 | 	const u8* bytes = (const u8*)message;
46 | 	for (size_t i = 0; i < length; ++i)
47 | 	{
48 | 		state ^= bytes[i];
49 | 		state *= 0x100000001b3;
50 | 	}
51 | 	return state;
52 | }
53 | 
54 | std::string directoryFromFilename(const std::string& filename);
55 | GfxShaderSource shaderFromFile(const char* filename, const char* shaderDirectory = Platform_GetExecutableDirectory());
56 | GfxTexture textureFromFile(const char* filename);
57 | GfxTexture generateMipsRGBA8(u8* pixels, int w, int h);
58 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelPassthrough.geom:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #if 0 // Reference mode
 4 | 
 5 | layout(triangles) in;
 6 | layout(triangle_strip, max_vertices=3) out;
 7 | 
 8 | layout (location = 0) in vec2 v_tex0[];
 9 | layout (location = 1) in vec3 v_viewVector[];
10 | layout (location = 2) in vec3 v_worldPos[];
11 | 
12 | layout (location = 0) out vec2 out_tex0;
13 | layout (location = 1) out vec3 out_viewVector;
14 | layout (location = 2) out vec3 out_worldPos0;
15 | layout (location = 3) out vec3 out_worldPos1;
16 | layout (location = 4) out vec3 out_worldPos2;
17 | layout (location = 5) out uint out_primId;
18 | 
19 | void main()
20 | {
21 | 	for (int i=0; i<3; ++i)
22 | 	{
23 | 		gl_Position = gl_in[i].gl_Position;
24 | 		out_tex0 = v_tex0[i];
25 | 		out_viewVector = v_viewVector[i];
26 | 		out_worldPos0 = v_worldPos[0];
27 | 		out_worldPos1 = v_worldPos[1];
28 | 		out_worldPos2 = v_worldPos[2];
29 | 		EmitVertex();
30 | 	}
31 | }
32 | 
33 | #else
34 | 
35 | #extension GL_NV_geometry_shader_passthrough : require
36 | 
37 | layout(triangles) in;
38 | 
39 | layout(passthrough) in gl_PerVertex
40 | {
41 |     vec4 gl_Position;
42 | };
43 | 
44 | layout (location = 0, passthrough) in vec2 in_tex0;
45 | layout (location = 1, passthrough) in vec3 in_viewVector;
46 | layout (location = 2) in vec3 in_worldPos[];
47 | 
48 | layout (location = 2) out vec3 v_worldPos0;
49 | layout (location = 3) out vec3 v_worldPos1;
50 | layout (location = 4) out vec3 v_worldPos2;
51 | 
52 | void main()
53 | {
54 | 	v_worldPos0 = in_worldPos[0];
55 | 	v_worldPos1 = in_worldPos[1];
56 | 	v_worldPos2 = in_worldPos[2];
57 | }
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/vfetchanalyzer.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
 2 | #include "meshoptimizer.h"
 3 | 
 4 | #include <assert.h>
 5 | 
 6 | meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
 7 | {
 8 | 	assert(index_count % 3 == 0);
 9 | 	assert(vertex_size > 0 && vertex_size <= 256);
10 | 
11 | 	meshopt_VertexFetchStatistics result = {};
12 | 
13 | 	const size_t kCacheLine = 64;
14 | 	const size_t kCacheSize = 128 * 1024;
15 | 
16 | 	// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
17 | 	size_t cache[kCacheSize / kCacheLine] = {};
18 | 
19 | 	for (size_t i = 0; i < index_count; ++i)
20 | 	{
21 | 		unsigned int index = indices[i];
22 | 		assert(index < vertex_count);
23 | 
24 | 		size_t start_address = index * vertex_size;
25 | 		size_t end_address = start_address + vertex_size;
26 | 
27 | 		size_t start_tag = start_address / kCacheLine;
28 | 		size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
29 | 
30 | 		assert(start_tag < end_tag);
31 | 
32 | 		for (size_t tag = start_tag; tag < end_tag; ++tag)
33 | 		{
34 | 			size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
35 | 
36 | 			// we store +1 since cache is filled with 0 by default
37 | 			result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
38 | 			cache[line] = tag + 1;
39 | 		}
40 | 	}
41 | 
42 | 	result.overfetch = vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(vertex_count * vertex_size);
43 | 
44 | 	return result;
45 | }
46 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/vfetchoptimizer.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
 2 | #include "meshoptimizer.h"
 3 | 
 4 | #include <assert.h>
 5 | #include <string.h>
 6 | 
 7 | size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
 8 | {
 9 | 	assert(index_count % 3 == 0);
10 | 	assert(vertex_size > 0 && vertex_size <= 256);
11 | 
12 | 	// support in-place optimization
13 | 	meshopt_Buffer<char> vertices_copy;
14 | 
15 | 	if (destination == vertices)
16 | 	{
17 | 		vertices_copy.data = new char[vertex_count * vertex_size];
18 | 		memcpy(vertices_copy.data, vertices, vertex_count * vertex_size);
19 | 		vertices = vertices_copy.data;
20 | 	}
21 | 
22 | 	// build vertex remap table
23 | 	meshopt_Buffer<unsigned int> vertex_remap(vertex_count);
24 | 	memset(vertex_remap.data, -1, vertex_remap.size * sizeof(unsigned int));
25 | 
26 | 	unsigned int next_vertex = 0;
27 | 
28 | 	for (size_t i = 0; i < index_count; ++i)
29 | 	{
30 | 		unsigned int index = indices[i];
31 | 		assert(index < vertex_count);
32 | 
33 | 		unsigned int& remap = vertex_remap[index];
34 | 
35 | 		if (remap == ~0u) // vertex was not added to destination VB
36 | 		{
37 | 			// add vertex
38 | 			memcpy(static_cast<char*>(destination) + next_vertex * vertex_size, static_cast<const char*>(vertices) + index * vertex_size, vertex_size);
39 | 
40 | 			remap = next_vertex++;
41 | 		}
42 | 
43 | 		// modify indices in place
44 | 		indices[i] = remap;
45 | 	}
46 | 
47 | 	assert(next_vertex <= vertex_count);
48 | 
49 | 	return next_vertex;
50 | }
51 | 


--------------------------------------------------------------------------------
/Source/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(shaderDependencies
 2 | 	# Add explicit dependencies here
 3 | 	Shaders/Common.glsl
 4 | )
 5 | 
 6 | set(shaders
 7 | 	Shaders/Model.frag
 8 | 	Shaders/Model.vert
 9 | 	Shaders/ModelBarycentrics.geom
10 | 	Shaders/ModelIndexed.frag
11 | 	Shaders/ModelIndexed.vert
12 | 	Shaders/ModelManual.frag
13 | 	Shaders/ModelManual.vert
14 | 	Shaders/ModelNativeAMD.frag
15 | 	Shaders/ModelNativeAMD.vert
16 | 	Shaders/ModelNativeAMDTextured.frag
17 | 	Shaders/ModelPassthrough.frag
18 | 	Shaders/ModelPassthrough.geom
19 | 	Shaders/ModelPassthrough.vert
20 | 	Shaders/ModelPassthroughTextured.frag
21 | 	Shaders/ModelPassthroughTextured.geom
22 | )
23 | 
24 | set(src
25 | 	${shaders}
26 | 	BaseApplication.cpp
27 | 	BaseApplication.h
28 | 	Barycentrics.cpp
29 | 	Barycentrics.h
30 | 	DemoUtils.cpp
31 | 	DemoUtils.h
32 | )
33 | 
34 | set(app Barycentrics)
35 | 
36 | add_executable(${app}
37 | 	${src}
38 | )
39 | 
40 | source_group("Shaders" FILES ${shaders} ${shaderDependencies})
41 | 
42 | function(shader_compile_rule shaderName dependencies)
43 |     add_custom_command(
44 |         OUTPUT ${CMAKE_CFG_INTDIR}/${shaderName}.spv
45 |         COMMAND ${GLSLC} -o ${CMAKE_CFG_INTDIR}/${shaderName}.spv ${CMAKE_CURRENT_SOURCE_DIR}/${shaderName}
46 |         MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${shaderName}
47 |         DEPENDS ${dependencies}
48 |     )
49 | endfunction(shader_compile_rule)
50 | 
51 | foreach(shader ${shaders})
52 | 	shader_compile_rule(${shader} "${shaderDependencies}")
53 | endforeach()
54 | 
55 | target_compile_definitions(${app} PRIVATE
56 | 	RUSH_USING_NAMESPACE # Automatically use Rush namespace
57 | )
58 | 
59 | target_link_libraries(${app}
60 | 	meshoptimizer
61 | 	Rush
62 | 	stb
63 | 	tiny_obj_loader
64 | )
65 | 


--------------------------------------------------------------------------------
/Source/Shaders/ModelPassthroughTextured.geom:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | 
 3 | #if 0 // Reference mode
 4 | 
 5 | layout(triangles) in;
 6 | layout(triangle_strip, max_vertices=3) out;
 7 | 
 8 | layout (location = 0) in vec2 v_tex0[];
 9 | layout (location = 1) in vec3 v_viewVector[];
10 | layout (location = 2) in vec3 v_worldPos[];
11 | 
12 | layout (location = 0) out vec2 out_tex0;
13 | layout (location = 1) out vec3 out_viewVector;
14 | layout (location = 2) out vec3 out_worldPos0;
15 | layout (location = 3) out vec3 out_worldPos1;
16 | layout (location = 4) out vec3 out_worldPos2;
17 | layout (location = 5) out uint out_primId;
18 | 
19 | void main()
20 | {
21 | 	for (int i=0; i<3; ++i)
22 | 	{
23 | 		gl_Position = gl_in[i].gl_Position;
24 | 		out_tex0 = v_tex0[i];
25 | 		out_viewVector = v_viewVector[i];
26 | 		out_worldPos0 = v_worldPos[0];
27 | 		out_worldPos1 = v_worldPos[1];
28 | 		out_worldPos2 = v_worldPos[2];
29 | 		out_primId = gl_PrimitiveIDIn;		
30 | 		EmitVertex();
31 | 	}
32 | }
33 | 
34 | #else
35 | 
36 | #extension GL_NV_geometry_shader_passthrough : require
37 | 
38 | layout(triangles) in;
39 | 
40 | layout(passthrough) in gl_PerVertex
41 | {
42 |     vec4 gl_Position;
43 | };
44 | 
45 | layout (location = 0, passthrough) in vec2 in_tex0;
46 | layout (location = 1, passthrough) in vec3 in_viewVector;
47 | layout (location = 2) in vec3 in_worldPos[];
48 | 
49 | layout (location = 2) out vec3 v_worldPos0;
50 | layout (location = 3) out vec3 v_worldPos1;
51 | layout (location = 4) out vec3 v_worldPos2;
52 | layout (location = 5) out uint v_primId;
53 | 
54 | void main()
55 | {
56 | 	v_worldPos0 = in_worldPos[0];
57 | 	v_worldPos1 = in_worldPos[1];
58 | 	v_worldPos2 = in_worldPos[2];
59 | 	v_primId = gl_PrimitiveIDIn;
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/Source/Shaders/Common.glsl:
--------------------------------------------------------------------------------
 1 | layout(constant_id = 0) const bool g_useTexture = false;
 2 | 
 3 | layout (binding = 0) uniform Global
 4 | {
 5 | 	mat4 g_matView;
 6 | 	mat4 g_matProj;
 7 | 	mat4 g_matViewProj;
 8 | 	mat4 g_matWorld;
 9 | 	vec4 g_cameraPos;
10 | };
11 | 
12 | layout (binding = 1) uniform Material
13 | {
14 | 	vec4 g_baseColor;
15 | };
16 | 
17 | layout (binding = 2) uniform sampler2D albedoSampler;
18 | 
19 | struct VertexPacked
20 | {
21 | 	float pX, pY, pZ;
22 | 	float tX, tY;
23 | };
24 | 
25 | layout (std430, binding = 3) readonly buffer VertexBuffer
26 | {
27 | 	VertexPacked g_vertices[];
28 | };
29 | 
30 | layout (std430, binding = 4) readonly buffer IndexBuffer
31 | {
32 | 	uint g_indices[];
33 | };
34 | 
35 | struct Vertex
36 | {
37 | 	vec3 position;
38 | 	vec2 texcoord; // TODO: de-interleave vertex streams
39 | };
40 | 
41 | Vertex getVertex(uint i)
42 | {
43 | 	VertexPacked v = g_vertices[i];
44 | 
45 | 	Vertex r;
46 | 	r.position = vec3(v.pX, v.pY, v.pZ);
47 | 	r.texcoord = vec2(v.tX, v.tY);
48 | 
49 | 	return r;
50 | }
51 | 
52 | vec3 intersectRayTri(vec3 rayOrigin, vec3 rayDirection, vec3 v0, vec3 v1, vec3 v2)
53 | {
54 | 	vec3 e0 = v1 - v0;
55 | 	vec3 e1 = v2 - v0;
56 | 	vec3 s1 = cross(rayDirection, e1);
57 | 	float  invd = 1.0 / (dot(s1, e0));
58 | 	vec3 d = rayOrigin - v0;
59 | 	float  b1 = dot(d, s1) * invd;
60 | 	vec3 s2 = cross(d, e0);
61 | 	float  b2 = dot(rayDirection, s2) * invd;
62 | 	float temp = dot(e1, s2) * invd;
63 | 
64 | 	return vec3(1.0 - b1 - b2, b1, b2);
65 | }
66 | 
67 | vec2 interpolateTexCoords(vec2 t0, vec2 t1, vec2 t2, vec3 barycentrics)
68 | {
69 | 	vec2 texcoord = 
70 | 		t0 * barycentrics.x +
71 | 		t1 * barycentrics.y +
72 | 		t2 * barycentrics.z;
73 | 
74 | 	return texcoord;
75 | }
76 | 
77 | vec2 interpolateTexCoords(uint primId, vec3 barycentrics)
78 | {
79 | 	uint index0 = g_indices[3*primId+0];
80 | 	uint index1 = g_indices[3*primId+1];
81 | 	uint index2 = g_indices[3*primId+2];
82 | 
83 | 	Vertex vertex0 = getVertex(index0);
84 | 	Vertex vertex1 = getVertex(index1);
85 | 	Vertex vertex2 = getVertex(index2);
86 | 
87 | 	return interpolateTexCoords(
88 | 		vertex0.texcoord,
89 | 		vertex1.texcoord,
90 | 		vertex2.texcoord,
91 | 		barycentrics);
92 | }
93 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/vcacheanalyzer.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
 2 | #include "meshoptimizer.h"
 3 | 
 4 | #include <assert.h>
 5 | #include <string.h>
 6 | 
 7 | meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
 8 | {
 9 | 	assert(index_count % 3 == 0);
10 | 	assert(cache_size >= 3);
11 | 	assert(warp_size == 0 || warp_size >= 3);
12 | 
13 | 	meshopt_VertexCacheStatistics result = {};
14 | 
15 | 	unsigned int warp_offset = 0;
16 | 	unsigned int primgroup_offset = 0;
17 | 
18 | 	meshopt_Buffer<unsigned int> cache_timestamps(vertex_count);
19 | 	memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int));
20 | 
21 | 	unsigned int timestamp = cache_size + 1;
22 | 
23 | 	for (size_t i = 0; i < index_count; i += 3)
24 | 	{
25 | 		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
26 | 		assert(a < vertex_count && b < vertex_count && c < vertex_count);
27 | 
28 | 		bool ac = (timestamp - cache_timestamps[a]) > cache_size;
29 | 		bool bc = (timestamp - cache_timestamps[b]) > cache_size;
30 | 		bool cc = (timestamp - cache_timestamps[c]) > cache_size;
31 | 
32 | 		// flush cache if triangle doesn't fit into warp or into the primitive buffer
33 | 		if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
34 | 		{
35 | 			result.warps_executed += warp_offset > 0;
36 | 
37 | 			warp_offset = 0;
38 | 			primgroup_offset = 0;
39 | 
40 | 			// reset cache
41 | 			timestamp += cache_size + 1;
42 | 		}
43 | 
44 | 		// update cache and add vertices to warp
45 | 		for (int j = 0; j < 3; ++j)
46 | 		{
47 | 			unsigned int index = indices[i + j];
48 | 
49 | 			if (timestamp - cache_timestamps[index] > cache_size)
50 | 			{
51 | 				cache_timestamps[index] = timestamp++;
52 | 				result.vertices_transformed++;
53 | 				warp_offset++;
54 | 			}
55 | 		}
56 | 
57 | 		primgroup_offset++;
58 | 	}
59 | 
60 | 	result.warps_executed += warp_offset > 0;
61 | 
62 | 	result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
63 | 	result.atvr = vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(vertex_count);
64 | 
65 | 	return result;
66 | }
67 | 


--------------------------------------------------------------------------------
/Source/DemoUtils.cpp:
--------------------------------------------------------------------------------
  1 | #include "DemoUtils.h"
  2 | 
  3 | #include <Rush/GfxCommon.h>
  4 | #include <Rush/GfxDevice.h>
  5 | #include <Rush/UtilFile.h>
  6 | 
  7 | #pragma warning(push)
  8 | #pragma warning(disable: 4996)
  9 | #define STB_IMAGE_IMPLEMENTATION
 10 | #include <stb_image.h>
 11 | #define STB_IMAGE_RESIZE_IMPLEMENTATION
 12 | #include <stb_image_resize.h>
 13 | #pragma warning(pop)
 14 | 
 15 | #include <vector>
 16 | #include <memory>
 17 | 
 18 | std::string directoryFromFilename(const std::string& filename)
 19 | {
 20 | 	size_t pos = filename.find_last_of("/\\");
 21 | 	if (pos != std::string::npos)
 22 | 	{
 23 | 		return filename.substr(0, pos + 1);
 24 | 	}
 25 | 	else
 26 | 	{
 27 | 		return std::string();
 28 | 	}
 29 | }
 30 | 
 31 | GfxShaderSource shaderFromFile(const char* filename, const char* shaderDirectory)
 32 | {
 33 | 	std::string fullFilename = std::string(shaderDirectory) + "/" + std::string(filename);
 34 | 	Log::message("Loading shader '%s'", filename);
 35 | 
 36 | 	GfxShaderSource source;
 37 | 	source.type = GfxShaderSourceType_SPV;
 38 | 
 39 | 	FileIn f(fullFilename.c_str());
 40 | 	if (f.valid())
 41 | 	{
 42 | 		u32 fileSize = f.length();
 43 | 		source.resize(fileSize);
 44 | 		f.read(&source[0], fileSize);
 45 | 	}
 46 | 
 47 | 	if (source.empty())
 48 | 	{
 49 | 		Log::error("Failed to load shader '%s'", filename);
 50 | 	}
 51 | 
 52 | 	return source;
 53 | };
 54 | 
 55 | GfxTexture textureFromFile(const char* filename)
 56 | {
 57 | 	int w, h, comp;
 58 | 	stbi_set_flip_vertically_on_load(true);
 59 | 	u8* pixels = stbi_load(filename, &w, &h, &comp, 4);
 60 | 
 61 | 	GfxTexture result;
 62 | 
 63 | 	if (pixels)
 64 | 	{
 65 | 		result = generateMipsRGBA8(pixels, w, h);
 66 | 		stbi_image_free(pixels);
 67 | 	}
 68 | 	else
 69 | 	{
 70 | 		Log::warning("Failed to load texture '%s'", filename);
 71 | 	}
 72 | 
 73 | 	return result;
 74 | }
 75 | 
 76 | GfxTexture generateMipsRGBA8(u8* pixels, int w, int h)
 77 | {
 78 | 	GfxTexture result;
 79 | 
 80 | 	std::vector<std::unique_ptr<u8>> mips;
 81 | 	mips.reserve(16);
 82 | 
 83 | 	std::vector<GfxTextureData> textureData;
 84 | 	textureData.reserve(16);
 85 | 	textureData.push_back(GfxTextureData(pixels));
 86 | 
 87 | 	u32 mipWidth = w;
 88 | 	u32 mipHeight = h;
 89 | 
 90 | 	while (mipWidth != 1 && mipHeight != 1)
 91 | 	{
 92 | 		u32 nextMipWidth = max<u32>(1, mipWidth / 2);
 93 | 		u32 nextMipHeight = max<u32>(1, mipHeight / 2);
 94 | 
 95 | 		u8* nextMip = new u8[nextMipWidth * nextMipHeight * 4];
 96 | 		mips.push_back(std::unique_ptr<u8>(nextMip));
 97 | 
 98 | 		const u32 mipPitch = mipWidth * 4;
 99 | 		const u32 nextMipPitch = nextMipWidth * 4;
100 | 		int resizeResult = stbir_resize_uint8(
101 | 			(const u8*)textureData.back().pixels, mipWidth, mipHeight, mipPitch,
102 | 			nextMip, nextMipWidth, nextMipHeight, nextMipPitch, 4);
103 | 		RUSH_ASSERT(resizeResult);
104 | 
105 | 		textureData.push_back(GfxTextureData(nextMip, (u32)textureData.size()));
106 | 
107 | 		mipWidth = nextMipWidth;
108 | 		mipHeight = nextMipHeight;
109 | 	}
110 | 
111 | 	GfxTextureDesc desc = GfxTextureDesc::make2D(w, h);
112 | 	desc.mips = (u32)textureData.size();
113 | 
114 | 	result = Gfx_CreateTexture(desc, textureData.data(), (u32)textureData.size());	
115 | 	return result;
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Barycentrics
 2 | 
 3 | This demo several approaches for computing barycentric coordinates in the pixel shader.
 4 | 
 5 | ## Mode 1: Non-indexed geometry
 6 | 
 7 | Geometry is rendered using non-indexed draws. Vertex shader explicitly loads indices and vertices from index and vertex buffers and writes out barycentric coordinates. This approach is similar to using a geometry shader that outputs per-vertex barycentrics.
 8 | 
 9 | This approach results in geometry throughput ~2x slower than regular indexed rendering.
10 | 
11 | ## Mode 2: Geometry shader
12 | 
13 | Geometry shader is used to output new triangles with explicit per-vertex barycentric coordinates. This approach does not require custom vertex fetching (unlike mode 1).
14 | 
15 | Performance is slightly worse than mode 1 on AMD Fury X, but better on NVIDIA 1080.
16 | In general, we are still looking at ~2x slower rendering in geometry-bound scenes.
17 | 
18 | ## Mode 3: Manual ray-triangle intersection in pixel shader
19 | 
20 | Primitive indices and vertices are loaded in the pixel shader based on primitive ID.
21 | Positions are transformed into world space and resulting triangle is intersected with the eye ray to calculate barycentrics.
22 | 
23 | Despite doing quite a lot of work per pixel, this mode is much faster than modes 1 and 2 in geometry-heavy scenes.
24 | 
25 | On NVIDIA 1080 this runs ~25% slower than baseline "speed-of-light" shader that simply outpts texture coordinates in a geometry-bound scene. Interestingly, even with no geometry visible on screen (camera facing away, but still shading all vertices) performance is ~13% slower than "speed-of-light". It appears that simply using gl_PrimitiveID incurs an overhead.
26 | 
27 | AMD Fury X is ~10% slower than "speed-of-light" on average and ~7% slower in pure geometry-bound case, again suggesting an overhead from using gl_PrimitiveID.
28 | 
29 | ## Mode 4: Passthrough geometry shader (NVIDIA)
30 | 
31 | This mode uses `VK_NV_geometry_shader_passthrough` extension. Fast / passthrough geometry shader is used to output world positions of triangles to the pixel shader, which then performs a ray-triangle intersection similar to mode 3.
32 | 
33 | Performance is slightly better than mode 3, averaging ~15% slowdown compared to "speed-of-light". With no geometry in view, performance matches the baseline (no primitive ID overhead, unlike mode 3).
34 | 
35 | ## Mode 5: Native barycentrics (AMD)
36 | 
37 | This mode uses `VK_AMD_shader_explicit_vertex_parameter` extension. This approach is described in [GPUOpen blog post](https://gpuopen.com/stable-barycentric-coordinates).
38 | 
39 | Vertex shader writes gl_VertexIndex into 2 separate outputs. Pixel shader accesses those parameters through `flat` and `__explicitInterpAMD` interpolators to establish the order of native barycentrics available through `gl_BaryCoordSmoothAMD`.
40 | 
41 | Performance matches the "speed-of-light". There is no measurable overhead from accessing barycentrics with this method.
42 | 
43 | # Notes
44 | 
45 | Geometry-heavy scene used for testing is San Miguel 2.0 from http://casual-effects.com/data.
46 | 
47 | In more balanced scenes, performance delta between different methods can be much less dramatic.
48 | In any case, Mode 4 seems to be the most preferable one on NVIDIA and Mode 5 is obviously hard to compete with on AMD.
49 | 
50 | Mode 3 may be the best cross-platform mechanism at this point, though it would be interesting to implement a way to avoid gl_PrimitiveID overhead.
51 | 


--------------------------------------------------------------------------------
/Source/Barycentrics.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <Rush/GfxBitmapFont.h>
  4 | #include <Rush/GfxDevice.h>
  5 | #include <Rush/GfxPrimitiveBatch.h>
  6 | #include <Rush/GfxRef.h>
  7 | #include <Rush/MathTypes.h>
  8 | #include <Rush/Platform.h>
  9 | #include <Rush/UtilCamera.h>
 10 | #include <Rush/UtilCameraManipulator.h>
 11 | #include <Rush/UtilTimer.h>
 12 | #include <Rush/Window.h>
 13 | 
 14 | #include "BaseApplication.h"
 15 | #include "DemoUtils.h"
 16 | 
 17 | #include <stdio.h>
 18 | #include <memory>
 19 | #include <string>
 20 | #include <unordered_map>
 21 | 
 22 | class BarycentricsApp : public BaseApplication
 23 | {
 24 | public:
 25 | 
 26 | 	BarycentricsApp();
 27 | 	~BarycentricsApp();
 28 | 
 29 | 	void update() override;
 30 | 
 31 | private:
 32 | 
 33 | 	void render();
 34 | 
 35 | 	bool loadModel(const char* filename);
 36 | 	bool loadTunnelTestModel();
 37 | 
 38 | 	Camera m_camera;
 39 | 	Camera m_interpolatedCamera;
 40 | 
 41 | 	CameraManipulator* m_cameraMan;
 42 | 
 43 | 	GfxTechniqueRef m_techniqueNonIndexed[2];
 44 | 	GfxTechniqueRef m_techniqueGeometryShader[2];
 45 | 	GfxTechniqueRef m_techniqueIndexed[2];
 46 | 	GfxTechniqueRef m_techniqueManual[2];
 47 | 	GfxTechniqueRef m_techniquePassthroughGS[2];
 48 | 	GfxTechniqueRef m_techniqueNativeAMD[2];
 49 | 
 50 | 	GfxBuffer m_vertexBuffer;
 51 | 	GfxBuffer m_indexBuffer;
 52 | 	GfxBuffer m_constantBuffer;
 53 | 
 54 | 	u32 m_indexCount = 0;
 55 | 	u32 m_vertexCount = 0;
 56 | 
 57 | 	struct Constants
 58 | 	{
 59 | 		Mat4 matView = Mat4::identity();
 60 | 		Mat4 matProj = Mat4::identity();
 61 | 		Mat4 matViewProj = Mat4::identity();
 62 | 		Mat4 matWorld = Mat4::identity();
 63 | 		Vec4 cameraPos = Vec4(0.0f);
 64 | 	};
 65 | 
 66 | 	Mat4 m_worldTransform = Mat4::identity();
 67 | 
 68 | 	Box3 m_boundingBox;
 69 | 
 70 | 	struct Vertex // TODO: make a packed version of this for GPU
 71 | 	{
 72 | 		Vec3 position;
 73 | 		Vec2 texcoord; // TODO: de-interleave vertex streams
 74 | 	};
 75 | 
 76 | 	std::string m_statusString;
 77 | 	bool m_valid = false;
 78 | 
 79 | 	struct MaterialConstants
 80 | 	{
 81 | 		Vec4 baseColor;
 82 | 	};
 83 | 
 84 | 	struct Material
 85 | 	{
 86 | 		GfxTextureRef albedoTexture;
 87 | 		GfxBufferRef constantBuffer;
 88 | 	};
 89 | 
 90 | 	Material m_defaultMaterial;
 91 | 
 92 | 	WindowEventListener m_windowEvents;
 93 | 
 94 | 	float m_cameraScale = 1.0f;
 95 | 
 96 | 	Timer m_timer;
 97 | 
 98 | 
 99 | 	enum Timestamp
100 | 	{
101 | 		Timestamp_World,
102 | 		Timestamp_UI,
103 | 	};
104 | 
105 | 	struct Stats
106 | 	{
107 | 		MovingAverage<double, 60> gpuTotal;
108 | 		MovingAverage<double, 60> gpuWorld;
109 | 		MovingAverage<double, 60> gpuUI;
110 | 		MovingAverage<double, 60> cpuTotal;
111 | 		MovingAverage<double, 60> cpuWorld;
112 | 		MovingAverage<double, 60> cpuUI;
113 | 	} m_stats;
114 | 
115 | 	enum class Mode
116 | 	{
117 | 		Indexed,
118 | 		NonIndexed,
119 | 		GeometryShader,
120 | 		Manual,
121 | 		PassthroughGS,
122 | 		NativeAMD,
123 | 	} m_mode = Mode::NonIndexed;
124 | 
125 | 	const char* toString(Mode m)
126 | 	{
127 | 		switch (m)
128 | 		{
129 | 		default: return "Unknown";
130 | 		case Mode::Indexed: return "Indexed";
131 | 		case Mode::NonIndexed: return "NonIndexed";
132 | 		case Mode::GeometryShader: return "GeometryShader";
133 | 		case Mode::Manual: return "Manual";
134 | 		case Mode::PassthroughGS: return "PassthroughGS";
135 | 		case Mode::NativeAMD: return "NativeAMD";
136 | 		}
137 | 	}
138 | 
139 | 	bool m_useTexture = false;
140 | 	bool m_showUI = true;
141 | };
142 | 
143 | 


--------------------------------------------------------------------------------
/Source/BaseApplication.cpp:
--------------------------------------------------------------------------------
  1 | #include "BaseApplication.h"
  2 | #include "DemoUtils.h"
  3 | 
  4 | #include <Rush/GfxBitmapFont.h>
  5 | #include <Rush/GfxPrimitiveBatch.h>
  6 | #include <Rush/Window.h>
  7 | 
  8 | BaseApplication::BaseApplication()
  9 | : m_dev(Platform_GetGfxDevice()), m_ctx(Platform_GetGfxContext()), m_window(Platform_GetWindow())
 10 | {
 11 | 	m_window->retain();
 12 | 	Gfx_Retain(m_dev);
 13 | 	Gfx_Retain(m_ctx);
 14 | 
 15 | 	m_prim = new PrimitiveBatch();
 16 | 	m_font = new BitmapFontRenderer(BitmapFontRenderer::createEmbeddedFont(true, 0, 1));
 17 | 
 18 | 	// Depth stencil states
 19 | 
 20 | 	{
 21 | 		GfxDepthStencilDesc desc;
 22 | 		desc.enable      = false;
 23 | 		desc.writeEnable = false;
 24 | 		desc.compareFunc = GfxCompareFunc::Always;
 25 | 		m_depthStencilStates.disable.takeover(Gfx_CreateDepthStencilState(desc));
 26 | 	}
 27 | 
 28 | 	{
 29 | 		GfxDepthStencilDesc desc;
 30 | 		desc.enable      = true;
 31 | 		desc.writeEnable = true;
 32 | 		desc.compareFunc = GfxCompareFunc::LessEqual;
 33 | 		m_depthStencilStates.writeLessEqual.takeover(Gfx_CreateDepthStencilState(desc));
 34 | 	}
 35 | 
 36 | 	{
 37 | 		GfxDepthStencilDesc desc;
 38 | 		desc.enable      = true;
 39 | 		desc.writeEnable = true;
 40 | 		desc.compareFunc = GfxCompareFunc::Always;
 41 | 		m_depthStencilStates.writeAlways.takeover(Gfx_CreateDepthStencilState(desc));
 42 | 	}
 43 | 
 44 | 	{
 45 | 		GfxDepthStencilDesc desc;
 46 | 		desc.enable      = true;
 47 | 		desc.writeEnable = false;
 48 | 		desc.compareFunc = GfxCompareFunc::LessEqual;
 49 | 		m_depthStencilStates.testLessEqual.takeover(Gfx_CreateDepthStencilState(desc));
 50 | 	}
 51 | 
 52 | 	// Blend states
 53 | 
 54 | 	{
 55 | 		GfxBlendStateDesc desc = GfxBlendStateDesc::makeOpaque();
 56 | 		m_blendStates.opaque.takeover(Gfx_CreateBlendState(desc));
 57 | 	}
 58 | 
 59 | 	{
 60 | 		GfxBlendStateDesc desc = GfxBlendStateDesc::makeLerp();
 61 | 		m_blendStates.lerp.takeover(Gfx_CreateBlendState(desc));
 62 | 	}
 63 | 
 64 | 	{
 65 | 		GfxBlendStateDesc desc = GfxBlendStateDesc::makeAdditive();
 66 | 		m_blendStates.additive.takeover(Gfx_CreateBlendState(desc));
 67 | 	}
 68 | 
 69 | 	// Sampler states
 70 | 
 71 | 	{
 72 | 		GfxSamplerDesc desc = GfxSamplerDesc::makePoint();
 73 | 		desc.wrapU          = GfxTextureWrap::Clamp;
 74 | 		desc.wrapV          = GfxTextureWrap::Clamp;
 75 | 		desc.wrapW          = GfxTextureWrap::Clamp;
 76 | 		m_samplerStates.pointClamp.takeover(Gfx_CreateSamplerState(desc));
 77 | 	}
 78 | 
 79 | 	{
 80 | 		GfxSamplerDesc desc = GfxSamplerDesc::makeLinear();
 81 | 		desc.wrapU          = GfxTextureWrap::Clamp;
 82 | 		desc.wrapV          = GfxTextureWrap::Clamp;
 83 | 		desc.wrapW          = GfxTextureWrap::Clamp;
 84 | 		m_samplerStates.linearClamp.takeover(Gfx_CreateSamplerState(desc));
 85 | 	}
 86 | 
 87 | 	{
 88 | 		GfxSamplerDesc desc = GfxSamplerDesc::makeLinear();
 89 | 		desc.wrapU          = GfxTextureWrap::Wrap;
 90 | 		desc.wrapV          = GfxTextureWrap::Wrap;
 91 | 		desc.wrapW          = GfxTextureWrap::Wrap;
 92 | 		m_samplerStates.linearWrap.takeover(Gfx_CreateSamplerState(desc));
 93 | 	}
 94 | 
 95 | 	{
 96 | 		GfxSamplerDesc desc = GfxSamplerDesc::makeLinear();
 97 | 		desc.wrapU          = GfxTextureWrap::Wrap;
 98 | 		desc.wrapV          = GfxTextureWrap::Wrap;
 99 | 		desc.wrapW          = GfxTextureWrap::Wrap;
100 | 		desc.anisotropy     = 16.0f;
101 | 		m_samplerStates.anisotropicWrap.takeover(Gfx_CreateSamplerState(desc));
102 | 	}
103 | 
104 | 	// Resources
105 | 
106 | 	{
107 | 		const u32 whiteTexturePixels[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
108 | 		GfxTextureDesc textureDescr = GfxTextureDesc::make2D(2, 2);
109 | 		m_defaultWhiteTexture = Gfx_CreateTexture(textureDescr, whiteTexturePixels);
110 | 	}
111 | 
112 | 	{
113 | 		const u32 dimension = 256;
114 | 		const u32 square = dimension / 2;
115 | 
116 | 		std::vector<u32> pixels(dimension * dimension, 0x00000000);
117 | 		for (u32 y = 0; y < square; ++y)
118 | 		{
119 | 			for (u32 x = 0; x < square; ++x)
120 | 			{
121 | 				pixels[y * dimension + x] = 0xFFFFFFFF;
122 | 				pixels[(y + square) * dimension + (x + square)] = 0xFFFFFFFF;
123 | 			}
124 | 		}
125 | 
126 | 		m_checkerboardTexture = generateMipsRGBA8(reinterpret_cast<u8*>(pixels.data()), dimension, dimension);
127 | 	}
128 | }
129 | 
130 | BaseApplication::~BaseApplication()
131 | {
132 | 	delete m_font;
133 | 	delete m_prim;
134 | 
135 | 	Gfx_Release(m_defaultWhiteTexture);
136 | 	Gfx_Release(m_ctx);
137 | 	Gfx_Release(m_dev);
138 | 	m_window->release();
139 | }
140 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/indexgenerator.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <string.h>
  6 | 
  7 | namespace meshopt
  8 | {
  9 | 
 10 | static unsigned int murmurHash(const char* key, size_t len, unsigned int h)
 11 | {
 12 | 	const unsigned int m = 0x5bd1e995;
 13 | 	const int r = 24;
 14 | 
 15 | 	while (len >= 4)
 16 | 	{
 17 | 		unsigned int k = *reinterpret_cast<const unsigned int*>(key);
 18 | 
 19 | 		k *= m;
 20 | 		k ^= k >> r;
 21 | 		k *= m;
 22 | 
 23 | 		h *= m;
 24 | 		h ^= k;
 25 | 
 26 | 		key += 4;
 27 | 		len -= 4;
 28 | 	}
 29 | 
 30 | 	return h;
 31 | }
 32 | 
 33 | struct VertexHasher
 34 | {
 35 | 	const char* vertices;
 36 | 	size_t vertex_size;
 37 | 
 38 | 	unsigned int empty() const
 39 | 	{
 40 | 		return ~0u;
 41 | 	}
 42 | 
 43 | 	size_t operator()(unsigned int index) const
 44 | 	{
 45 | 		return murmurHash(vertices + index * vertex_size, vertex_size, 0);
 46 | 	}
 47 | 
 48 | 	size_t operator()(unsigned int lhs, unsigned int rhs) const
 49 | 	{
 50 | 		return memcmp(vertices + lhs * vertex_size, vertices + rhs * vertex_size, vertex_size) == 0;
 51 | 	}
 52 | };
 53 | 
 54 | struct VertexHashEntry
 55 | {
 56 | 	unsigned int key;
 57 | 	unsigned int value;
 58 | };
 59 | 
 60 | static size_t hashBuckets(size_t count)
 61 | {
 62 | 	size_t buckets = 1;
 63 | 	while (buckets < count)
 64 | 		buckets *= 2;
 65 | 
 66 | 	return buckets;
 67 | }
 68 | 
 69 | template <typename T, typename Hash, typename Key>
 70 | static T* hashLookup(T* table, size_t buckets, const Hash& hash, const Key& key, const Key& empty)
 71 | {
 72 | 	assert(buckets > 0);
 73 | 	assert((buckets & (buckets - 1)) == 0);
 74 | 
 75 | 	size_t hashmod = buckets - 1;
 76 | 	size_t bucket = hash(key) & hashmod;
 77 | 
 78 | 	for (size_t probe = 0; probe <= hashmod; ++probe)
 79 | 	{
 80 | 		T& item = table[bucket];
 81 | 
 82 | 		if (item.key == empty)
 83 | 			return &item;
 84 | 
 85 | 		if (hash(item.key, key))
 86 | 			return &item;
 87 | 
 88 | 		// hash collision, quadratic probing
 89 | 		bucket = (bucket + probe + 1) & hashmod;
 90 | 	}
 91 | 
 92 | 	assert(false && "Hash table is full");
 93 | 	return 0;
 94 | }
 95 | 
 96 | } // namespace meshopt
 97 | 
 98 | size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
 99 | {
100 | 	using namespace meshopt;
101 | 
102 | 	assert(indices || index_count == vertex_count);
103 | 	assert(index_count % 3 == 0);
104 | 	assert(vertex_size > 0 && vertex_size <= 256);
105 | 
106 | 	for (size_t i = 0; i < vertex_count; ++i)
107 | 	{
108 | 		destination[i] = ~0u;
109 | 	}
110 | 
111 | 	VertexHasher hasher = {static_cast<const char*>(vertices), vertex_size};
112 | 
113 | 	meshopt_Buffer<VertexHashEntry> table(hashBuckets(vertex_count));
114 | 	memset(table.data, -1, table.size * sizeof(VertexHashEntry));
115 | 
116 | 	unsigned int next_vertex = 0;
117 | 
118 | 	for (size_t i = 0; i < index_count; ++i)
119 | 	{
120 | 		unsigned int index = indices ? indices[i] : unsigned(i);
121 | 		assert(index < vertex_count);
122 | 
123 | 		if (destination[index] == ~0u)
124 | 		{
125 | 			VertexHashEntry* entry = hashLookup(table.data, table.size, hasher, index, ~0u);
126 | 
127 | 			if (entry->key == ~0u)
128 | 			{
129 | 				entry->key = index;
130 | 				entry->value = next_vertex++;
131 | 			}
132 | 
133 | 			destination[index] = entry->value;
134 | 		}
135 | 	}
136 | 
137 | 	assert(next_vertex <= vertex_count);
138 | 
139 | 	return next_vertex;
140 | }
141 | 
142 | void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
143 | {
144 | 	assert(destination != vertices);
145 | 	assert(vertex_size > 0 && vertex_size <= 256);
146 | 
147 | 	for (size_t i = 0; i < vertex_count; ++i)
148 | 	{
149 | 		if (remap[i] != ~0u)
150 | 		{
151 | 			assert(remap[i] < vertex_count);
152 | 
153 | 			memcpy(static_cast<char*>(destination) + remap[i] * vertex_size, static_cast<const char*>(vertices) + i * vertex_size, vertex_size);
154 | 		}
155 | 	}
156 | }
157 | 
158 | void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
159 | {
160 | 	assert(index_count % 3 == 0);
161 | 
162 | 	for (size_t i = 0; i < index_count; ++i)
163 | 	{
164 | 		unsigned int index = indices ? indices[i] : unsigned(i);
165 | 		assert(remap[index] != ~0u);
166 | 
167 | 		destination[i] = remap[index];
168 | 	}
169 | }
170 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/stripifier.cpp:
--------------------------------------------------------------------------------
  1 | #include "meshoptimizer.h"
  2 | 
  3 | #include <assert.h>
  4 | #include <limits.h>
  5 | #include <string.h>
  6 | 
  7 | namespace meshopt
  8 | {
  9 | 
 10 | static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence)
 11 | {
 12 | 	unsigned int index = 0;
 13 | 	unsigned int iv = ~0u;
 14 | 
 15 | 	for (unsigned int i = 0; i < buffer_size; ++i)
 16 | 	{
 17 | 		unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
 18 | 		unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc;
 19 | 
 20 | 		if (v < iv)
 21 | 		{
 22 | 			index = i;
 23 | 			iv = v;
 24 | 		}
 25 | 	}
 26 | 
 27 | 	return index;
 28 | }
 29 | 
 30 | static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
 31 | {
 32 | 	for (unsigned int i = 0; i < buffer_size; ++i)
 33 | 	{
 34 | 		unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
 35 | 
 36 | 		if (e0 == a && e1 == b)
 37 | 			return (i << 2) | 2;
 38 | 		else if (e0 == b && e1 == c)
 39 | 			return (i << 2) | 0;
 40 | 		else if (e0 == c && e1 == a)
 41 | 			return (i << 2) | 1;
 42 | 	}
 43 | 
 44 | 	return -1;
 45 | }
 46 | 
 47 | } // namespace meshopt
 48 | 
 49 | size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
 50 | {
 51 | 	assert(destination != indices);
 52 | 	assert(index_count % 3 == 0);
 53 | 
 54 | 	using namespace meshopt;
 55 | 
 56 | 	const size_t buffer_capacity = 8;
 57 | 
 58 | 	unsigned int buffer[buffer_capacity][3] = {};
 59 | 	unsigned int buffer_size = 0;
 60 | 
 61 | 	size_t index_offset = 0;
 62 | 
 63 | 	unsigned int strip[2] = {};
 64 | 	unsigned int parity = 0;
 65 | 
 66 | 	size_t strip_size = 0;
 67 | 
 68 | 	// compute vertex valence; this is used to prioritize starting triangle for strips
 69 | 	meshopt_Buffer<unsigned int> valence(vertex_count);
 70 | 	memset(valence.data, 0, vertex_count * sizeof(unsigned int));
 71 | 
 72 | 	for (size_t i = 0; i < index_count; ++i)
 73 | 	{
 74 | 		unsigned int index = indices[i];
 75 | 		assert(index < vertex_count);
 76 | 
 77 | 		valence[index]++;
 78 | 	}
 79 | 
 80 | 	int next = -1;
 81 | 
 82 | 	while (buffer_size > 0 || index_offset < index_count)
 83 | 	{
 84 | 		assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
 85 | 
 86 | 		// fill triangle buffer
 87 | 		while (buffer_size < buffer_capacity && index_offset < index_count)
 88 | 		{
 89 | 			buffer[buffer_size][0] = indices[index_offset + 0];
 90 | 			buffer[buffer_size][1] = indices[index_offset + 1];
 91 | 			buffer[buffer_size][2] = indices[index_offset + 2];
 92 | 
 93 | 			buffer_size++;
 94 | 			index_offset += 3;
 95 | 		}
 96 | 
 97 | 		assert(buffer_size > 0);
 98 | 
 99 | 		if (next >= 0)
100 | 		{
101 | 			unsigned int i = next >> 2;
102 | 			unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
103 | 			unsigned int v = buffer[i][next & 3];
104 | 
105 | 			// ordered removal from the buffer
106 | 			memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
107 | 			buffer_size--;
108 | 
109 | 			// update vertex valences for strip start heuristic
110 | 			valence[a]--;
111 | 			valence[b]--;
112 | 			valence[c]--;
113 | 
114 | 			// find next triangle (note that edge order flips on every iteration)
115 | 			// in some cases we need to perform a swap to pick a different outgoing triangle edge
116 | 			// for [a b c], the default strip edge is [b c], but we might want to use [a c]
117 | 			int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
118 | 			int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
119 | 
120 | 			if (cont < 0 && swap >= 0)
121 | 			{
122 | 				// [a b c] => [a b a c]
123 | 				destination[strip_size++] = strip[0];
124 | 				destination[strip_size++] = v;
125 | 
126 | 				// next strip has same winding
127 | 				// ? a b => b a v
128 | 				strip[1] = v;
129 | 
130 | 				next = swap;
131 | 			}
132 | 			else
133 | 			{
134 | 				// emit the next vertex in the strip
135 | 				destination[strip_size++] = v;
136 | 
137 | 				// next strip has flipped winding
138 | 				strip[0] = strip[1];
139 | 				strip[1] = v;
140 | 				parity ^= 1;
141 | 
142 | 				next = cont;
143 | 			}
144 | 		}
145 | 		else
146 | 		{
147 | 			// if we didn't find anything, we need to find the next new triangle
148 | 			// we use a heuristic to maximize the strip length
149 | 			unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]);
150 | 			unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
151 | 
152 | 			// ordered removal from the buffer
153 | 			memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
154 | 			buffer_size--;
155 | 
156 | 			// update vertex valences for strip start heuristic
157 | 			valence[a]--;
158 | 			valence[b]--;
159 | 			valence[c]--;
160 | 
161 | 			// we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
162 | 			int ea = findStripNext(buffer, buffer_size, c, b);
163 | 			int eb = findStripNext(buffer, buffer_size, a, c);
164 | 			int ec = findStripNext(buffer, buffer_size, b, a);
165 | 
166 | 			// in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
167 | 			// triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
168 | 			// reasons - slightly improves the stripification efficiency
169 | 			int mine = INT_MAX;
170 | 			mine = (ea >= 0 && mine > ea) ? ea : mine;
171 | 			mine = (eb >= 0 && mine > eb) ? eb : mine;
172 | 			mine = (ec >= 0 && mine > ec) ? ec : mine;
173 | 
174 | 			if (ea == mine)
175 | 			{
176 | 				// keep abc
177 | 				next = ea;
178 | 			}
179 | 			else if (eb == mine)
180 | 			{
181 | 				// abc -> bca
182 | 				unsigned int t = a;
183 | 				a = b, b = c, c = t;
184 | 
185 | 				next = eb;
186 | 			}
187 | 			else if (ec == mine)
188 | 			{
189 | 				// abc -> cab
190 | 				unsigned int t = c;
191 | 				c = b, b = a, a = t;
192 | 
193 | 				next = ec;
194 | 			}
195 | 
196 | 			// emit the new strip; we use restart indices
197 | 			if (strip_size)
198 | 				destination[strip_size++] = ~0u;
199 | 
200 | 			destination[strip_size++] = a;
201 | 			destination[strip_size++] = b;
202 | 			destination[strip_size++] = c;
203 | 
204 | 			// new strip always starts with the same edge winding
205 | 			strip[0] = b;
206 | 			strip[1] = c;
207 | 			parity = 1;
208 | 		}
209 | 	}
210 | 
211 | 	return strip_size;
212 | }
213 | 
214 | size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count)
215 | {
216 | 	assert(destination != indices);
217 | 
218 | 	size_t offset = 0;
219 | 	size_t start = 0;
220 | 
221 | 	for (size_t i = 0; i < index_count; ++i)
222 | 	{
223 | 		if (indices[i] == ~0u)
224 | 		{
225 | 			start = i + 1;
226 | 		}
227 | 		else if (i - start >= 2)
228 | 		{
229 | 			unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
230 | 
231 | 			if ((i - start) & 1)
232 | 			{
233 | 				unsigned int t = a;
234 | 				a = b, b = t;
235 | 			}
236 | 
237 | 			if (a != b && a != c && b != c)
238 | 			{
239 | 				destination[offset + 0] = a;
240 | 				destination[offset + 1] = b;
241 | 				destination[offset + 2] = c;
242 | 				offset += 3;
243 | 			}
244 | 		}
245 | 	}
246 | 
247 | 	return offset;
248 | }
249 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/overdrawanalyzer.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <float.h>
  6 | #include <string.h>
  7 | 
  8 | // This work is based on:
  9 | // Nicolas Capens. Advanced Rasterization. 2004
 10 | namespace meshopt
 11 | {
 12 | 
 13 | const int kViewport = 256;
 14 | 
 15 | struct OverdrawBuffer
 16 | {
 17 | 	float z[kViewport][kViewport][2];
 18 | 	unsigned int overdraw[kViewport][kViewport][2];
 19 | };
 20 | 
 21 | template <typename T>
 22 | static T min(T a, T b)
 23 | {
 24 | 	return a < b ? a : b;
 25 | }
 26 | 
 27 | template <typename T>
 28 | static T max(T a, T b)
 29 | {
 30 | 	return a > b ? a : b;
 31 | }
 32 | 
 33 | static float det2x2(float a, float b, float c, float d)
 34 | {
 35 | 	// (a b)
 36 | 	// (c d)
 37 | 	return a * d - b * c;
 38 | }
 39 | 
 40 | static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
 41 | {
 42 | 	// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
 43 | 	// z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
 44 | 	// (x2-x1 y2-y1)(dzdx) = (z2-z1)
 45 | 	// (x3-x1 y3-y1)(dzdy)   (z3-z1)
 46 | 	// we'll solve it with Cramer's rule
 47 | 	float det = det2x2(x2 - x1, y2 - y1, x3 - x1, y3 - y1);
 48 | 	float invdet = (det == 0) ? 0 : 1 / det;
 49 | 
 50 | 	dzdx = det2x2(z2 - z1, y2 - y1, z3 - z1, y3 - y1) * invdet;
 51 | 	dzdy = det2x2(x2 - x1, z2 - z1, x3 - x1, z3 - z1) * invdet;
 52 | 
 53 | 	return det;
 54 | }
 55 | 
 56 | // half-space fixed point triangle rasterizer
 57 | static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
 58 | {
 59 | 	// compute depth gradients
 60 | 	float DZx, DZy;
 61 | 	float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
 62 | 	int sign = det > 0;
 63 | 
 64 | 	// flip backfacing triangles to simplify rasterization logic
 65 | 	if (sign)
 66 | 	{
 67 | 		// flipping v2 & v3 preserves depth gradients since they're based on v1
 68 | 		float t;
 69 | 		t = v2x, v2x = v3x, v3x = t;
 70 | 		t = v2y, v2y = v3y, v3y = t;
 71 | 		t = v2z, v2z = v3z, v3z = t;
 72 | 
 73 | 		// flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
 74 | 		v1z = kViewport - v1z;
 75 | 		DZx = -DZx;
 76 | 		DZy = -DZy;
 77 | 	}
 78 | 
 79 | 	// coordinates, 28.4 fixed point
 80 | 	int X1 = int(16.0f * v1x + 0.5f);
 81 | 	int X2 = int(16.0f * v2x + 0.5f);
 82 | 	int X3 = int(16.0f * v3x + 0.5f);
 83 | 
 84 | 	int Y1 = int(16.0f * v1y + 0.5f);
 85 | 	int Y2 = int(16.0f * v2y + 0.5f);
 86 | 	int Y3 = int(16.0f * v3y + 0.5f);
 87 | 
 88 | 	// bounding rectangle, clipped against viewport
 89 | 	// since we rasterize pixels with covered centers, min >0.5 should round up
 90 | 	// as for max, due to top-left filling convention we will never rasterize right/bottom edges
 91 | 	// so max >= 0.5 should round down
 92 | 	int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
 93 | 	int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
 94 | 	int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
 95 | 	int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
 96 | 
 97 | 	// deltas, 28.4 fixed point
 98 | 	int DX12 = X1 - X2;
 99 | 	int DX23 = X2 - X3;
100 | 	int DX31 = X3 - X1;
101 | 
102 | 	int DY12 = Y1 - Y2;
103 | 	int DY23 = Y2 - Y3;
104 | 	int DY31 = Y3 - Y1;
105 | 
106 | 	// fill convention correction
107 | 	int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
108 | 	int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
109 | 	int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
110 | 
111 | 	// half edge equations, 24.8 fixed point
112 | 	// note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
113 | 	int FX = (minx << 4) + 8;
114 | 	int FY = (miny << 4) + 8;
115 | 	int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
116 | 	int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
117 | 	int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
118 | 	float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
119 | 
120 | 	for (int y = miny; y < maxy; y++)
121 | 	{
122 | 		int CX1 = CY1;
123 | 		int CX2 = CY2;
124 | 		int CX3 = CY3;
125 | 		float ZX = ZY;
126 | 
127 | 		for (int x = minx; x < maxx; x++)
128 | 		{
129 | 			// check if all CXn are non-negative
130 | 			if ((CX1 | CX2 | CX3) >= 0)
131 | 			{
132 | 				if (ZX >= buffer->z[y][x][sign])
133 | 				{
134 | 					buffer->z[y][x][sign] = ZX;
135 | 					buffer->overdraw[y][x][sign]++;
136 | 				}
137 | 			}
138 | 
139 | 			CX1 -= DY12 << 4;
140 | 			CX2 -= DY23 << 4;
141 | 			CX3 -= DY31 << 4;
142 | 			ZX += DZx;
143 | 		}
144 | 
145 | 		CY1 += DX12 << 4;
146 | 		CY2 += DX23 << 4;
147 | 		CY3 += DX31 << 4;
148 | 		ZY += DZy;
149 | 	}
150 | }
151 | 
152 | } // namespace meshopt
153 | 
154 | meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
155 | {
156 | 	using namespace meshopt;
157 | 
158 | 	assert(index_count % 3 == 0);
159 | 	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
160 | 	assert(vertex_positions_stride % sizeof(float) == 0);
161 | 
162 | 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
163 | 
164 | 	meshopt_OverdrawStatistics result = {};
165 | 
166 | 	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
167 | 	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
168 | 
169 | 	for (size_t i = 0; i < vertex_count; ++i)
170 | 	{
171 | 		const float* v = vertex_positions + i * vertex_stride_float;
172 | 
173 | 		for (int j = 0; j < 3; ++j)
174 | 		{
175 | 			minv[j] = min(minv[j], v[j]);
176 | 			maxv[j] = max(maxv[j], v[j]);
177 | 		}
178 | 	}
179 | 
180 | 	float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
181 | 	float scale = kViewport / extent;
182 | 
183 | 	meshopt_Buffer<float> triangles(index_count * 3);
184 | 
185 | 	for (size_t i = 0; i < index_count; ++i)
186 | 	{
187 | 		unsigned int index = indices[i];
188 | 		assert(index < vertex_count);
189 | 
190 | 		const float* v = vertex_positions + index * vertex_stride_float;
191 | 
192 | 		triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
193 | 		triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
194 | 		triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
195 | 	}
196 | 
197 | 	meshopt_Buffer<OverdrawBuffer> buffer_storage(1);
198 | 	OverdrawBuffer* buffer = buffer_storage.data;
199 | 
200 | 	for (int axis = 0; axis < 3; ++axis)
201 | 	{
202 | 		memset(buffer, 0, sizeof(OverdrawBuffer));
203 | 
204 | 		for (size_t i = 0; i < index_count; i += 3)
205 | 		{
206 | 			const float* vn0 = &triangles[3 * (i + 0)];
207 | 			const float* vn1 = &triangles[3 * (i + 1)];
208 | 			const float* vn2 = &triangles[3 * (i + 2)];
209 | 
210 | 			switch (axis)
211 | 			{
212 | 			case 0:
213 | 				rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
214 | 				break;
215 | 			case 1:
216 | 				rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
217 | 				break;
218 | 			case 2:
219 | 				rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
220 | 				break;
221 | 			}
222 | 		}
223 | 
224 | 		for (int y = 0; y < kViewport; ++y)
225 | 			for (int x = 0; x < kViewport; ++x)
226 | 				for (int s = 0; s < 2; ++s)
227 | 				{
228 | 					unsigned int overdraw = buffer->overdraw[y][x][s];
229 | 
230 | 					result.pixels_covered += overdraw > 0;
231 | 					result.pixels_shaded += overdraw;
232 | 				}
233 | 	}
234 | 
235 | 	result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
236 | 
237 | 	return result;
238 | }
239 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/README.md:
--------------------------------------------------------------------------------
  1 | # meshoptimizer [![Build Status](https://travis-ci.org/zeux/meshoptimizer.svg?branch=master)](https://travis-ci.org/zeux/meshoptimizer) [![Build status](https://ci.appveyor.com/api/projects/status/ptx6p8wmqchivawq?svg=true)](https://ci.appveyor.com/project/zeux/meshoptimizer) [![codecov.io](http://codecov.io/github/zeux/meshoptimizer/coverage.svg?branch=master)](http://codecov.io/github/zeux/meshoptimizer?branch=master) ![MIT](https://img.shields.io/badge/license-MIT-blue.svg)
  2 | 
  3 | ## Purpose
  4 | 
  5 | When GPU renders triangle meshes, various stages of the GPU pipeline have to process vertex and index data. The efficiency of these stages depends on the data you feed to them; this library provides algorithms to help optimize meshes for these stages, as well as algorithms to reduce the mesh complexity and storage overhead.
  6 | 
  7 | The library provides a C and C++ interface for all algorithms; you can use it from C/C++ or from other languages via FFI (such as P/Invoke).
  8 | 
  9 | ## Building
 10 | 
 11 | meshoptimizer is distributed as a set of C++ source files. To include it into your project, you can use one of the two options:
 12 | 
 13 | * Use CMake to build the library (either as a standalone project or as part of your project)
 14 | * Add source files to your project's build system
 15 | 
 16 | The source files are organized in such a way that you don't need to change your build-system settings, and you only need to add the files for the algorithms you use.
 17 | 
 18 | ## Pipeline
 19 | 
 20 | When optimizing a mesh, you should typically feed it through a set of optimizations (the order is important!):
 21 | 
 22 | 1. Indexing
 23 | 2. Vertex cache optimization
 24 | 3. Overdraw optimization
 25 | 4. Vertex fetch optimization
 26 | 5. Vertex quantization
 27 | 
 28 | ## Indexing
 29 | 
 30 | Most algorithms in this library assume that a mesh has a vertex buffer and an index buffer. For algorithms to work well and also for GPU to render your mesh efficiently, the vertex buffer has to have no redundant vertices; you can generate an index buffer from an unindexed vertex buffer or reindex an existing (potentially redundant) index buffer as follows:
 31 | 
 32 | First, generate a remap table from your existing vertex (and, optionally, index) data:
 33 | 
 34 | ```c++
 35 | size_t index_count = face_count * 3;
 36 | std::vector<unsigned int> remap(index_count); // allocate temporary memory for the remap table
 37 | size_t vertex_count = meshopt_generateVertexRemap(&remap[0], NULL, index_count, &unindexed_vertices[0], index_count, sizeof(Vertex));
 38 | ```
 39 | 
 40 | Note that in this case we only have an unindexed vertex buffer; the remap table is generated based on binary equivalence of the input vertices, so the resulting mesh will render the same way.
 41 | 
 42 | After generating the remap table, you can allocate space for the target vertex buffer (`vertex_count` elements) and index buffer (`index_count` elements) and generate them:
 43 | 
 44 | ```c++
 45 | meshopt_remapIndexBuffer(indices, NULL, index_count, &remap[0]);
 46 | meshopt_remapVertexBuffer(vertices, &unindexed_vertices[0], index_count, sizeof(Vertex), &remap[0]);
 47 | ```
 48 | 
 49 | You can then further optimize the resulting buffers by calling the other functions on them in-place.
 50 | 
 51 | ## Vertex cache optimization
 52 | 
 53 | When the GPU renders the mesh, it has to run the vertex shader for each vertex; usually GPUs have a built-in fixed size cache that stores the transformed vertices (the result of running the vertex shader), and uses this cache to reduce the number of vertex shader invocations. This cache is usually small, 16-32 vertices, and can have different replacement policies; to use this cache efficiently, you have to reorder your triangles to maximize the locality of reused vertex references like so:
 54 | 
 55 | ```c++
 56 | meshopt_optimizeVertexCache(indices, indices, index_count, vertex_count);
 57 | ```
 58 | 
 59 | ## Overdraw optimization
 60 | 
 61 | After transforming the vertices, GPU sends the triangles for rasterization which results in generating pixels that are usually first ran through the depth test, and pixels that pass it get the pixel shader executed to generate the final color. As pixel shaders get more expensive, it becomes more and more important to reduce overdraw. While in general improving overdraw requires view-dependent operations, this library provides an algorithm to reorder triangles to minimize the overdraw from all directions, which you should run after vertex cache optimization like this:
 62 | 
 63 | ```c++
 64 | meshopt_optimizeOverdraw(indices, indices, index_count, &vertices[0].x, vertex_count, sizeof(Vertex), 1.05f);
 65 | ```
 66 | 
 67 | The overdraw optimizer needs to read vertex positions as a float3 from the vertex; the code snippet above assumes that the vertex stores position as `float x, y, z`.
 68 | 
 69 | When performing the overdraw optimization you have to specify a floating-point threshold parameter. The algorithm tries to maintain a balance between vertex cache efficiency and overdraw; the threshold determines how much the algorithm can compromise the vertex cache hit ratio, with 1.05 meaning that the resulting ratio should be at most 5% worse than before the optimization.
 70 | 
 71 | ## Vertex fetch optimization
 72 | 
 73 | After the final triangle order has been established, we still can optimize the vertex buffer for memory efficiency. Before running the vertex shader GPU has to fetch the vertex attributes from the vertex buffer; the fetch is usually backed by a memory cache, and as such optimizing the data for the locality of memory access is important. You can do this by running this code:
 74 | 
 75 | To optimize the index/vertex buffers for vertex fetch efficiency, call:
 76 | 
 77 | ```c++
 78 | meshopt_optimizeVertexFetch(vertices, indices, index_count, vertices, vertex_count, sizeof(Vertex));
 79 | ```
 80 | 
 81 | This will reorder the vertices in the vertex buffer to try to improve the locality of reference, and rewrite the indices in place to match. This optimization has to be performed on the final index buffer since the optimal vertex order depends on the triangle order.
 82 | 
 83 | Note that the algorithm does not try to model cache replacement precisely and instead just orders vertices in the order of use, which generally produces results that are close to optimal.
 84 | 
 85 | ## Vertex quantization
 86 | 
 87 | To optimize memory bandwidth when fetching the vertex data even further, and to reduce the amount of memory required to store the mesh, it is often beneficial to quantize the vertex attributes to smaller types. While this optimization can technically run at any part of the pipeline (and sometimes doing quantization as the first step can improve indexing by merging almost identical vertices), it generally is easier to run this after all other optimizations since some of them require access to float3 positions.
 88 | 
 89 | Quantization is usually domain specific; it's common to quantize normals using 3 8-bit integers but you can use higher-precision quantization (for example using 10 bits per component in a 10_10_10_2 format), or a different encoding to use just 2 components. For positions and texture coordinate data the two most common storage formats are half precision floats, and 16-bit normalized integers that encode the position relative to the AABB of the mesh or the UV bounding rectangle.
 90 | 
 91 | The number of possible combinations here is very large but this library does provide the building blocks, specifically functions to quantize floating point values to normalized integers, as well as half-precision floats. For example, here's how you can quantize a normal:
 92 | 
 93 | ```c++
 94 | unsigned int normal =
 95 | 	(meshopt_quantizeUnorm(v.nx, 10) << 20) |
 96 | 	(meshopt_quantizeUnorm(v.ny, 10) << 10) |
 97 | 	 meshopt_quantizeUnorm(v.nz, 10);
 98 | ```
 99 | 
100 | and here's how you can quantize a position:
101 | 
102 | ```c++
103 | unsigned short px = meshopt_quantizeHalf(v.x);
104 | unsigned short py = meshopt_quantizeHalf(v.y);
105 | unsigned short pz = meshopt_quantizeHalf(v.z);
106 | ```
107 | 
108 | ## Efficiency analyzers
109 | 
110 | While the only way to get precise performance data is to measure performance on the target GPU, it can be valuable to measure the impact of these optimization in a GPU-independent manner. To this end, the library provides analyzers for all three major optimization routines. For each optimization there is a corresponding analyze function, like `meshopt_analyzeOverdraw`, that returns a struct with statistics.
111 | 
112 | `meshopt_analyzeVertexCache` returns vertex cache statistics. The common metric to use is ACMR - average cache miss ratio, which is the ratio of the total number of vertex invocations to the triangle count. The worst-case ACMR is 3 (GPU has to process 3 vertices for each triangle); on regular grids the optimal ACMR approaches 0.5. On real meshes it usually is in [0.5..1.5] ratio depending on the amount of vertex splits. One other useful metric is ATVR - average transformed vertex ratio - which represents the ratio of vertex shader invocations to the total vertices, and has the best case of 1.0 regardless of mesh topology (each vertex is transformed once).
113 | 
114 | `meshopt_analyzeVertexFetch` returns vertex fetch statistics. The main metric it uses is overfetch - the ratio between the number of bytes read from the vertex buffer to the total number of bytes in the vertex buffer. Assuming non-redundant vertex buffers, the best case is 1.0 - each byte is fetched once.
115 | 
116 | `meshopt_analyzeOverdraw` returns overdraw statistics. The main metric it uses is overdraw - the ratio between the number of pixel shader invocations to the total number of covered pixels, as measured from several different orthographic cameras. The best case for overdraw is 1.0 - each pixel is shaded once.
117 | 
118 | Note that all analyzers use approximate models for the relevant GPU units, so the numbers you will get as the result are only a rough approximation of the actual performance.
119 | 
120 | ## License
121 | 
122 | This library is available to anybody free of charge, under the terms of MIT License (see LICENSE.md).
123 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/simplifier.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <math.h>
  6 | #include <string.h>
  7 | 
  8 | #include <algorithm>
  9 | 
 10 | // This work is based on:
 11 | // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
 12 | namespace meshopt
 13 | {
 14 | 
 15 | static size_t hash(unsigned long long key)
 16 | {
 17 | 	key = (~key) + (key << 18);
 18 | 	key = key ^ (key >> 31);
 19 | 	key = key * 21;
 20 | 	key = key ^ (key >> 11);
 21 | 	key = key + (key << 6);
 22 | 	key = key ^ (key >> 22);
 23 | 	return size_t(key);
 24 | }
 25 | 
 26 | static size_t hashBuckets(size_t count)
 27 | {
 28 | 	size_t buckets = 1;
 29 | 	while (buckets < count)
 30 | 		buckets *= 2;
 31 | 
 32 | 	return buckets;
 33 | }
 34 | 
 35 | template <typename T>
 36 | static T* hashLookup(T* table, size_t buckets, const T& key, const T& empty)
 37 | {
 38 | 	assert(buckets > 0);
 39 | 	assert((buckets & (buckets - 1)) == 0);
 40 | 
 41 | 	size_t hashmod = buckets - 1;
 42 | 	size_t bucket = hash(key) & hashmod;
 43 | 
 44 | 	for (size_t probe = 0; probe <= hashmod; ++probe)
 45 | 	{
 46 | 		T& item = table[bucket];
 47 | 
 48 | 		if (item == empty || item == key)
 49 | 			return &item;
 50 | 
 51 | 		// hash collision, quadratic probing
 52 | 		bucket = (bucket + probe + 1) & hashmod;
 53 | 	}
 54 | 
 55 | 	assert(false && "Hash table is full");
 56 | 	return 0;
 57 | }
 58 | 
 59 | struct Vector3
 60 | {
 61 | 	float x, y, z;
 62 | };
 63 | 
 64 | struct Quadric
 65 | {
 66 | 	float a00;
 67 | 	float a10, a11;
 68 | 	float a20, a21, a22;
 69 | 	float b0, b1, b2, c;
 70 | };
 71 | 
 72 | struct Collapse
 73 | {
 74 | 	size_t v0;
 75 | 	size_t v1;
 76 | 	float error;
 77 | 
 78 | 	bool operator<(const Collapse& other) const
 79 | 	{
 80 | 		return error < other.error;
 81 | 	}
 82 | };
 83 | 
 84 | static float normalize(Vector3& v)
 85 | {
 86 | 	float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
 87 | 
 88 | 	if (length > 0)
 89 | 	{
 90 | 		v.x /= length;
 91 | 		v.y /= length;
 92 | 		v.z /= length;
 93 | 	}
 94 | 
 95 | 	return length;
 96 | }
 97 | 
 98 | static void quadricAdd(Quadric& Q, const Quadric& R)
 99 | {
100 | 	Q.a00 += R.a00;
101 | 	Q.a10 += R.a10;
102 | 	Q.a11 += R.a11;
103 | 	Q.a20 += R.a20;
104 | 	Q.a21 += R.a21;
105 | 	Q.a22 += R.a22;
106 | 	Q.b0 += R.b0;
107 | 	Q.b1 += R.b1;
108 | 	Q.b2 += R.b2;
109 | 	Q.c += R.c;
110 | }
111 | 
112 | static void quadricMul(Quadric& Q, float s)
113 | {
114 | 	Q.a00 *= s;
115 | 	Q.a10 *= s;
116 | 	Q.a11 *= s;
117 | 	Q.a20 *= s;
118 | 	Q.a21 *= s;
119 | 	Q.a22 *= s;
120 | 	Q.b0 *= s;
121 | 	Q.b1 *= s;
122 | 	Q.b2 *= s;
123 | 	Q.c *= s;
124 | }
125 | 
126 | static float quadricError(Quadric& Q, const Vector3& v)
127 | {
128 | 	float xx = v.x * v.x;
129 | 	float xy = v.x * v.y;
130 | 	float xz = v.x * v.z;
131 | 	float yy = v.y * v.y;
132 | 	float yz = v.y * v.z;
133 | 	float zz = v.z * v.z;
134 | 
135 | 	float vTQv = Q.a00 * xx + Q.a10 * xy * 2 + Q.a11 * yy + Q.a20 * xz * 2 + Q.a21 * yz * 2 + Q.a22 * zz + Q.b0 * v.x * 2 + Q.b1 * v.y * 2 + Q.b2 * v.z * 2 + Q.c;
136 | 
137 | 	return fabsf(vTQv);
138 | }
139 | 
140 | static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d)
141 | {
142 | 	Q.a00 = a * a;
143 | 	Q.a10 = b * a;
144 | 	Q.a11 = b * b;
145 | 	Q.a20 = c * a;
146 | 	Q.a21 = c * b;
147 | 	Q.a22 = c * c;
148 | 	Q.b0 = d * a;
149 | 	Q.b1 = d * b;
150 | 	Q.b2 = d * c;
151 | 	Q.c = d * d;
152 | }
153 | 
154 | static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2)
155 | {
156 | 	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
157 | 	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
158 | 
159 | 	Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
160 | 	float area = normalize(normal);
161 | 
162 | 	float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
163 | 
164 | 	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance);
165 | 
166 | 	// Three classical weighting methods include weight=1, weight=area and weight=area^2
167 | 	// We use weight=area for now
168 | 	quadricMul(Q, area);
169 | }
170 | 
171 | static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2)
172 | {
173 | 	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
174 | 	float length = normalize(p10);
175 | 
176 | 	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
177 | 	float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z;
178 | 
179 | 	Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p};
180 | 	normalize(normal);
181 | 
182 | 	float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
183 | 
184 | 	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance);
185 | 
186 | 	quadricMul(Q, length * 1000);
187 | }
188 | 
189 | static unsigned long long edgeId(unsigned int a, unsigned int b)
190 | {
191 | 	return (static_cast<unsigned long long>(a) << 32) | b;
192 | }
193 | 
194 | static size_t simplifyEdgeCollapse(unsigned int* result, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_positions_stride, size_t vertex_count, size_t target_index_count)
195 | {
196 | 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
197 | 
198 | 	meshopt_Buffer<Vector3> vertex_positions(vertex_count);
199 | 
200 | 	for (size_t i = 0; i < vertex_count; ++i)
201 | 	{
202 | 		const float* v = vertex_positions_data + i * vertex_stride_float;
203 | 
204 | 		vertex_positions[i].x = v[0];
205 | 		vertex_positions[i].y = v[1];
206 | 		vertex_positions[i].z = v[2];
207 | 	}
208 | 
209 | 	meshopt_Buffer<Quadric> vertex_quadrics(vertex_count);
210 | 	memset(vertex_quadrics.data, 0, vertex_count * sizeof(Quadric));
211 | 
212 | 	// face quadrics
213 | 	for (size_t i = 0; i < index_count; i += 3)
214 | 	{
215 | 		Quadric Q;
216 | 		quadricFromTriangle(Q, vertex_positions[indices[i + 0]], vertex_positions[indices[i + 1]], vertex_positions[indices[i + 2]]);
217 | 
218 | 		quadricAdd(vertex_quadrics[indices[i + 0]], Q);
219 | 		quadricAdd(vertex_quadrics[indices[i + 1]], Q);
220 | 		quadricAdd(vertex_quadrics[indices[i + 2]], Q);
221 | 	}
222 | 
223 | 	// edge quadrics for boundary edges
224 | 	meshopt_Buffer<unsigned long long> edges(hashBuckets(index_count));
225 | 	memset(edges.data, 0, edges.size * sizeof(unsigned long long));
226 | 
227 | 	for (size_t i = 0; i < index_count; i += 3)
228 | 	{
229 | 		static const int next[3] = {1, 2, 0};
230 | 
231 | 		for (int e = 0; e < 3; ++e)
232 | 		{
233 | 			unsigned int i0 = indices[i + e];
234 | 			unsigned int i1 = indices[i + next[e]];
235 | 
236 | 			unsigned long long edge = edgeId(i0, i1);
237 | 
238 | 			*hashLookup(edges.data, edges.size, edge, 0ull) = edge;
239 | 		}
240 | 	}
241 | 
242 | 	for (size_t i = 0; i < index_count; i += 3)
243 | 	{
244 | 		static const int next[3] = {1, 2, 0};
245 | 
246 | 		for (int e = 0; e < 3; ++e)
247 | 		{
248 | 			unsigned int i0 = indices[i + e];
249 | 			unsigned int i1 = indices[i + next[e]];
250 | 
251 | 			unsigned long long edge = edgeId(i1, i0);
252 | 
253 | 			if (*hashLookup(edges.data, edges.size, edge, 0ull) != edge)
254 | 			{
255 | 				unsigned int i2 = indices[i + next[next[e]]];
256 | 
257 | 				Quadric Q;
258 | 				quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2]);
259 | 
260 | 				quadricAdd(vertex_quadrics[i0], Q);
261 | 				quadricAdd(vertex_quadrics[i1], Q);
262 | 			}
263 | 		}
264 | 	}
265 | 
266 | 	if (result != indices)
267 | 	{
268 | 		for (size_t i = 0; i < index_count; ++i)
269 | 		{
270 | 			result[i] = indices[i];
271 | 		}
272 | 	}
273 | 
274 | 	size_t pass_count = 0;
275 | 	float worst_error = 0;
276 | 
277 | 	while (index_count > target_index_count)
278 | 	{
279 | 		meshopt_Buffer<Collapse> edge_collapses(index_count);
280 | 		size_t edge_collapse_count = 0;
281 | 
282 | 		for (size_t i = 0; i < index_count; i += 3)
283 | 		{
284 | 			static const int next[3] = {1, 2, 0};
285 | 
286 | 			for (int e = 0; e < 3; ++e)
287 | 			{
288 | 				unsigned int i0 = result[i + e];
289 | 				unsigned int i1 = result[i + next[e]];
290 | 
291 | 				Collapse c01 = {i0, i1, quadricError(vertex_quadrics[i0], vertex_positions[i1])};
292 | 				Collapse c10 = {i1, i0, quadricError(vertex_quadrics[i1], vertex_positions[i0])};
293 | 				Collapse c = c01.error <= c10.error ? c01 : c10;
294 | 
295 | 				edge_collapses[edge_collapse_count++] = c;
296 | 			}
297 | 		}
298 | 
299 | 		std::sort(edge_collapses.data, edge_collapses.data + edge_collapse_count);
300 | 
301 | 		meshopt_Buffer<unsigned int> vertex_remap(vertex_count);
302 | 
303 | 		for (size_t i = 0; i < vertex_count; ++i)
304 | 		{
305 | 			vertex_remap[i] = unsigned(i);
306 | 		}
307 | 
308 | 		meshopt_Buffer<char> vertex_locked(vertex_count);
309 | 		memset(vertex_locked.data, 0, vertex_count);
310 | 
311 | 		// each collapse removes 2 triangles
312 | 		size_t edge_collapse_goal = (index_count - target_index_count) / 6 + 1;
313 | 
314 | 		size_t collapses = 0;
315 | 		float pass_error = 0;
316 | 
317 | 		float error_goal = edge_collapses[edge_collapse_goal < edge_collapse_count ? edge_collapse_goal : edge_collapse_count - 1].error;
318 | 		float error_limit = error_goal * 1.5f;
319 | 
320 | 		for (size_t i = 0; i < edge_collapse_count; ++i)
321 | 		{
322 | 			const Collapse& c = edge_collapses[i];
323 | 
324 | 			if (vertex_locked[c.v0] || vertex_locked[c.v1])
325 | 				continue;
326 | 
327 | 			if (c.error > error_limit)
328 | 				break;
329 | 
330 | 			assert(vertex_remap[c.v0] == c.v0);
331 | 			assert(vertex_remap[c.v1] == c.v1);
332 | 
333 | 			quadricAdd(vertex_quadrics[c.v1], vertex_quadrics[c.v0]);
334 | 
335 | 			vertex_remap[c.v0] = unsigned(c.v1);
336 | 
337 | 			vertex_locked[c.v0] = 1;
338 | 			vertex_locked[c.v1] = 1;
339 | 
340 | 			collapses++;
341 | 			pass_error = c.error;
342 | 
343 | 			if (collapses >= edge_collapse_goal)
344 | 				break;
345 | 		}
346 | 
347 | 		// printf("pass %d: collapses: %d/%d, error: %e\n", int(pass_count), int(collapses), int(edge_collapse_count), pass_error);
348 | 
349 | 		pass_count++;
350 | 		worst_error = (worst_error < pass_error) ? pass_error : worst_error;
351 | 
352 | 		// no edges can be collapsed any more => bail out
353 | 		if (collapses == 0)
354 | 			break;
355 | 
356 | 		size_t write = 0;
357 | 
358 | 		for (size_t i = 0; i < index_count; i += 3)
359 | 		{
360 | 			unsigned int v0 = vertex_remap[result[i + 0]];
361 | 			unsigned int v1 = vertex_remap[result[i + 1]];
362 | 			unsigned int v2 = vertex_remap[result[i + 2]];
363 | 
364 | 			assert(vertex_remap[v0] == v0);
365 | 			assert(vertex_remap[v1] == v1);
366 | 			assert(vertex_remap[v2] == v2);
367 | 
368 | 			if (v0 != v1 && v0 != v2 && v1 != v2)
369 | 			{
370 | 				result[write + 0] = v0;
371 | 				result[write + 1] = v1;
372 | 				result[write + 2] = v2;
373 | 				write += 3;
374 | 			}
375 | 		}
376 | 
377 | 		index_count = write;
378 | 	}
379 | 
380 | 	// printf("passes: %d, worst error: %e\n", int(pass_count), worst_error);
381 | 
382 | 	return index_count;
383 | }
384 | 
385 | } // namespace meshopt
386 | 
387 | size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
388 | {
389 | 	using namespace meshopt;
390 | 
391 | 	assert(index_count % 3 == 0);
392 | 	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
393 | 	assert(vertex_positions_stride % sizeof(float) == 0);
394 | 	assert(target_index_count <= index_count);
395 | 
396 | 	return simplifyEdgeCollapse(destination, indices, index_count, vertex_positions, vertex_positions_stride, vertex_count, target_index_count);
397 | }
398 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/overdrawoptimizer.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <math.h>
  6 | #include <string.h>
  7 | 
  8 | #include <algorithm>
  9 | 
 10 | // This work is based on:
 11 | // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
 12 | namespace meshopt
 13 | {
 14 | 
 15 | struct ClusterSortData
 16 | {
 17 | 	unsigned int cluster;
 18 | 	float dot_product;
 19 | 
 20 | 	bool operator<(const ClusterSortData& other) const
 21 | 	{
 22 | 		// high product = possible occluder, render early
 23 | 		return dot_product > other.dot_product;
 24 | 	}
 25 | };
 26 | 
 27 | static void calculateSortData(ClusterSortData* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
 28 | {
 29 | 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
 30 | 
 31 | 	float mesh_centroid[3] = {};
 32 | 
 33 | 	for (size_t i = 0; i < index_count; ++i)
 34 | 	{
 35 | 		const float* p = vertex_positions + vertex_stride_float * indices[i];
 36 | 
 37 | 		mesh_centroid[0] += p[0];
 38 | 		mesh_centroid[1] += p[1];
 39 | 		mesh_centroid[2] += p[2];
 40 | 	}
 41 | 
 42 | 	mesh_centroid[0] /= index_count;
 43 | 	mesh_centroid[1] /= index_count;
 44 | 	mesh_centroid[2] /= index_count;
 45 | 
 46 | 	for (size_t cluster = 0; cluster < cluster_count; ++cluster)
 47 | 	{
 48 | 		size_t cluster_begin = clusters[cluster] * 3;
 49 | 		size_t cluster_end = (cluster_count > cluster + 1) ? clusters[cluster + 1] * 3 : index_count;
 50 | 		assert(cluster_begin < cluster_end);
 51 | 
 52 | 		float cluster_area = 0;
 53 | 		float cluster_centroid[3] = {};
 54 | 		float cluster_normal[3] = {};
 55 | 
 56 | 		for (size_t i = cluster_begin; i < cluster_end; i += 3)
 57 | 		{
 58 | 			const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
 59 | 			const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
 60 | 			const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
 61 | 
 62 | 			float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
 63 | 			float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
 64 | 
 65 | 			float normalx = p10[1] * p20[2] - p10[2] * p20[1];
 66 | 			float normaly = p10[2] * p20[0] - p10[0] * p20[2];
 67 | 			float normalz = p10[0] * p20[1] - p10[1] * p20[0];
 68 | 
 69 | 			float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
 70 | 
 71 | 			cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
 72 | 			cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
 73 | 			cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
 74 | 			cluster_normal[0] += normalx;
 75 | 			cluster_normal[1] += normaly;
 76 | 			cluster_normal[2] += normalz;
 77 | 			cluster_area += area;
 78 | 		}
 79 | 
 80 | 		float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
 81 | 
 82 | 		cluster_centroid[0] *= inv_cluster_area;
 83 | 		cluster_centroid[1] *= inv_cluster_area;
 84 | 		cluster_centroid[2] *= inv_cluster_area;
 85 | 
 86 | 		float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
 87 | 		float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
 88 | 
 89 | 		cluster_normal[0] *= inv_cluster_normal_length;
 90 | 		cluster_normal[1] *= inv_cluster_normal_length;
 91 | 		cluster_normal[2] *= inv_cluster_normal_length;
 92 | 
 93 | 		float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
 94 | 
 95 | 		sort_data[cluster].cluster = unsigned(cluster);
 96 | 		sort_data[cluster].dot_product = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
 97 | 	}
 98 | }
 99 | 
100 | static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
101 | {
102 | 	unsigned int cache_misses = 0;
103 | 
104 | 	// if vertex is not in cache, put it in cache
105 | 	if (timestamp - cache_timestamps[a] > cache_size)
106 | 	{
107 | 		cache_timestamps[a] = timestamp++;
108 | 		cache_misses++;
109 | 	}
110 | 
111 | 	if (timestamp - cache_timestamps[b] > cache_size)
112 | 	{
113 | 		cache_timestamps[b] = timestamp++;
114 | 		cache_misses++;
115 | 	}
116 | 
117 | 	if (timestamp - cache_timestamps[c] > cache_size)
118 | 	{
119 | 		cache_timestamps[c] = timestamp++;
120 | 		cache_misses++;
121 | 	}
122 | 
123 | 	return cache_misses;
124 | }
125 | 
126 | static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
127 | {
128 | 	meshopt_Buffer<unsigned int> cache_timestamps(vertex_count);
129 | 	memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int));
130 | 
131 | 	unsigned int timestamp = cache_size + 1;
132 | 
133 | 	size_t face_count = index_count / 3;
134 | 
135 | 	size_t result = 0;
136 | 
137 | 	for (size_t i = 0; i < face_count; ++i)
138 | 	{
139 | 		unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
140 | 
141 | 		// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
142 | 		// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
143 | 		// suggests an inefficiency in the vertex cache optimization algorithm
144 | 		// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
145 | 		if (i == 0 || m == 3)
146 | 		{
147 | 			destination[result++] = unsigned(i);
148 | 		}
149 | 	}
150 | 
151 | 	assert(result <= index_count / 3);
152 | 
153 | 	return result;
154 | }
155 | 
156 | static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold)
157 | {
158 | 	meshopt_Buffer<unsigned int> cache_timestamps(vertex_count);
159 | 	memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int));
160 | 
161 | 	unsigned int timestamp = 0;
162 | 
163 | 	size_t result = 0;
164 | 
165 | 	for (size_t it = 0; it < cluster_count; ++it)
166 | 	{
167 | 		size_t start = clusters[it];
168 | 		size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
169 | 		assert(start < end);
170 | 
171 | 		// reset cache
172 | 		timestamp += cache_size + 1;
173 | 
174 | 		// measure cluster ACMR
175 | 		unsigned int cluster_misses = 0;
176 | 
177 | 		for (size_t i = start; i < end; ++i)
178 | 		{
179 | 			unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
180 | 
181 | 			cluster_misses += m;
182 | 		}
183 | 
184 | 		float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
185 | 
186 | 		// first cluster always starts from the hard cluster boundary
187 | 		destination[result++] = unsigned(start);
188 | 
189 | 		// reset cache
190 | 		timestamp += cache_size + 1;
191 | 
192 | 		unsigned int running_misses = 0;
193 | 		unsigned int running_faces = 0;
194 | 
195 | 		for (size_t i = start; i < end; ++i)
196 | 		{
197 | 			unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
198 | 
199 | 			running_misses += m;
200 | 			running_faces += 1;
201 | 
202 | 			if (float(running_misses) / float(running_faces) <= cluster_threshold)
203 | 			{
204 | 				// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
205 | 				// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
206 | 				// cluster is empty; however, the 'pop_back' after the loop will clean it up
207 | 				destination[result++] = unsigned(i + 1);
208 | 
209 | 				// reset cache
210 | 				timestamp += cache_size + 1;
211 | 
212 | 				running_misses = 0;
213 | 				running_faces = 0;
214 | 			}
215 | 		}
216 | 
217 | 		// each time we reach the target ACMR we flush the cluster
218 | 		// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
219 | 		// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
220 | 		// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
221 | 		// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
222 | 		// to the cluster boundary array which we need to remove anyway - this code will do that automatically
223 | 		if (destination[result - 1] != start)
224 | 		{
225 | 			result--;
226 | 		}
227 | 	}
228 | 
229 | 	assert(result >= cluster_count);
230 | 	assert(result <= index_count / 3);
231 | 
232 | 	return result;
233 | }
234 | 
235 | } // namespace
236 | 
237 | void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
238 | {
239 | 	using namespace meshopt;
240 | 
241 | 	assert(index_count % 3 == 0);
242 | 	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
243 | 	assert(vertex_positions_stride % sizeof(float) == 0);
244 | 
245 | 	// guard for empty meshes
246 | 	if (index_count == 0 || vertex_count == 0)
247 | 		return;
248 | 
249 | 	// support in-place optimization
250 | 	meshopt_Buffer<unsigned int> indices_copy;
251 | 
252 | 	if (destination == indices)
253 | 	{
254 | 		indices_copy.data = new unsigned int[index_count];
255 | 		memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int));
256 | 		indices = indices_copy.data;
257 | 	}
258 | 
259 | 	unsigned int cache_size = 16;
260 | 
261 | 	// generate hard boundaries from full-triangle cache misses
262 | 	meshopt_Buffer<unsigned int> hard_clusters(index_count / 3);
263 | 	size_t hard_cluster_count = generateHardBoundaries(&hard_clusters[0], indices, index_count, vertex_count, cache_size);
264 | 
265 | 	// generate soft boundaries
266 | 	meshopt_Buffer<unsigned int> soft_clusters(index_count / 3 + 1);
267 | 	size_t soft_cluster_count = generateSoftBoundaries(&soft_clusters[0], indices, index_count, vertex_count, &hard_clusters[0], hard_cluster_count, cache_size, threshold);
268 | 
269 | 	const unsigned int* clusters = &soft_clusters[0];
270 | 	size_t cluster_count = soft_cluster_count;
271 | 
272 | 	// fill sort data
273 | 	meshopt_Buffer<ClusterSortData> sort_data(cluster_count);
274 | 	calculateSortData(&sort_data[0], indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
275 | 
276 | 	// high product = possible occluder, render early
277 | 	std::sort(sort_data.data, sort_data.data + cluster_count);
278 | 
279 | 	// fill output buffer
280 | 	size_t offset = 0;
281 | 
282 | 	for (size_t it = 0; it < cluster_count; ++it)
283 | 	{
284 | 		unsigned int cluster = sort_data[it].cluster;
285 | 		assert(cluster < cluster_count);
286 | 
287 | 		size_t start = clusters[cluster];
288 | 		size_t end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] : index_count / 3;
289 | 		assert(start < end);
290 | 
291 | 		for (size_t i = start; i < end; ++i)
292 | 		{
293 | 			destination[offset++] = indices[3 * i + 0];
294 | 			destination[offset++] = indices[3 * i + 1];
295 | 			destination[offset++] = indices[3 * i + 2];
296 | 		}
297 | 	}
298 | 
299 | 	assert(offset == index_count);
300 | }
301 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/vcacheoptimizer.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <math.h>
  6 | #include <string.h>
  7 | 
  8 | // This work is based on:
  9 | // Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
 10 | // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
 11 | namespace meshopt
 12 | {
 13 | 
 14 | const size_t max_cache_size = 16;
 15 | const size_t max_valence = 8;
 16 | 
 17 | static const float vertex_score_table_cache[1 + max_cache_size] = {
 18 |     0.f,
 19 |     0.792f, 0.767f, 0.764f, 0.956f, 0.827f, 0.751f, 0.820f, 0.864f, 0.738f, 0.788f, 0.642f, 0.646f, 0.165f, 0.654f, 0.545f, 0.284f
 20 | };
 21 | 
 22 | static const float vertex_score_table_live[1 + max_valence] = {
 23 |     0.f,
 24 |     0.994f, 0.721f, 0.479f, 0.423f, 0.174f, 0.080f, 0.249f, 0.056f
 25 | };
 26 | 
 27 | struct Adjacency
 28 | {
 29 | 	meshopt_Buffer<unsigned int> triangle_counts;
 30 | 	meshopt_Buffer<unsigned int> offsets;
 31 | 	meshopt_Buffer<unsigned int> data;
 32 | 
 33 | 	Adjacency(size_t index_count, size_t vertex_count)
 34 | 	    : triangle_counts(vertex_count)
 35 | 	    , offsets(vertex_count)
 36 | 	    , data(index_count)
 37 | 	{
 38 | 	}
 39 | };
 40 | 
 41 | static void buildAdjacency(Adjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count)
 42 | {
 43 | 	size_t face_count = index_count / 3;
 44 | 
 45 | 	// fill triangle counts
 46 | 	for (size_t i = 0; i < vertex_count; ++i)
 47 | 	{
 48 | 		adjacency.triangle_counts[i] = 0;
 49 | 	}
 50 | 
 51 | 	for (size_t i = 0; i < index_count; ++i)
 52 | 	{
 53 | 		assert(indices[i] < vertex_count);
 54 | 
 55 | 		adjacency.triangle_counts[indices[i]]++;
 56 | 	}
 57 | 
 58 | 	// fill offset table
 59 | 	unsigned int offset = 0;
 60 | 
 61 | 	for (size_t i = 0; i < vertex_count; ++i)
 62 | 	{
 63 | 		adjacency.offsets[i] = offset;
 64 | 		offset += adjacency.triangle_counts[i];
 65 | 	}
 66 | 
 67 | 	assert(offset == index_count);
 68 | 
 69 | 	// fill triangle data
 70 | 	for (size_t i = 0; i < face_count; ++i)
 71 | 	{
 72 | 		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
 73 | 
 74 | 		adjacency.data[adjacency.offsets[a]++] = unsigned(i);
 75 | 		adjacency.data[adjacency.offsets[b]++] = unsigned(i);
 76 | 		adjacency.data[adjacency.offsets[c]++] = unsigned(i);
 77 | 	}
 78 | 
 79 | 	// fix offsets that have been disturbed by the previous pass
 80 | 	for (size_t i = 0; i < vertex_count; ++i)
 81 | 	{
 82 | 		assert(adjacency.offsets[i] >= adjacency.triangle_counts[i]);
 83 | 
 84 | 		adjacency.offsets[i] -= adjacency.triangle_counts[i];
 85 | 	}
 86 | }
 87 | 
 88 | static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
 89 | {
 90 | 	// check dead-end stack
 91 | 	while (dead_end_top)
 92 | 	{
 93 | 		unsigned int vertex = dead_end[--dead_end_top];
 94 | 
 95 | 		if (live_triangles[vertex] > 0)
 96 | 			return vertex;
 97 | 	}
 98 | 
 99 | 	// input order
100 | 	while (input_cursor < vertex_count)
101 | 	{
102 | 		if (live_triangles[input_cursor] > 0)
103 | 			return input_cursor;
104 | 
105 | 		++input_cursor;
106 | 	}
107 | 
108 | 	return ~0u;
109 | }
110 | 
111 | static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
112 | {
113 | 	unsigned int best_candidate = ~0u;
114 | 	int best_priority = -1;
115 | 
116 | 	for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
117 | 	{
118 | 		unsigned int vertex = *next_candidate;
119 | 
120 | 		// otherwise we don't need to process it
121 | 		if (live_triangles[vertex] > 0)
122 | 		{
123 | 			int priority = 0;
124 | 
125 | 			// will it be in cache after fanning?
126 | 			if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
127 | 			{
128 | 				priority = timestamp - cache_timestamps[vertex]; // position in cache
129 | 			}
130 | 
131 | 			if (priority > best_priority)
132 | 			{
133 | 				best_candidate = vertex;
134 | 				best_priority = priority;
135 | 			}
136 | 		}
137 | 	}
138 | 
139 | 	return best_candidate;
140 | }
141 | 
142 | static float vertexScore(int cache_position, unsigned int live_triangles)
143 | {
144 | 	assert(cache_position >= -1 && cache_position < int(max_cache_size));
145 | 
146 | 	unsigned int live_triangles_clamped = live_triangles < max_valence ? live_triangles : max_valence;
147 | 
148 | 	return vertex_score_table_cache[1 + cache_position] + vertex_score_table_live[live_triangles_clamped];
149 | }
150 | 
151 | static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const char* emitted_flags, size_t face_count)
152 | {
153 | 	// input order
154 | 	while (input_cursor < face_count)
155 | 	{
156 | 		if (!emitted_flags[input_cursor])
157 | 			return input_cursor;
158 | 
159 | 		++input_cursor;
160 | 	}
161 | 
162 | 	return ~0u;
163 | }
164 | 
165 | } // namespace meshopt
166 | 
167 | void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
168 | {
169 | 	using namespace meshopt;
170 | 
171 | 	assert(index_count % 3 == 0);
172 | 
173 | 	// guard for empty meshes
174 | 	if (index_count == 0 || vertex_count == 0)
175 | 		return;
176 | 
177 | 	// support in-place optimization
178 | 	meshopt_Buffer<unsigned int> indices_copy;
179 | 
180 | 	if (destination == indices)
181 | 	{
182 | 		indices_copy.data = new unsigned int[index_count];
183 | 		memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int));
184 | 		indices = indices_copy.data;
185 | 	}
186 | 
187 | 	unsigned int cache_size = 16;
188 | 	assert(cache_size <= max_cache_size);
189 | 
190 | 	size_t face_count = index_count / 3;
191 | 
192 | 	// build adjacency information
193 | 	Adjacency adjacency(index_count, vertex_count);
194 | 	buildAdjacency(adjacency, indices, index_count, vertex_count);
195 | 
196 | 	// live triangle counts
197 | 	meshopt_Buffer<unsigned int> live_triangles(vertex_count);
198 | 	memcpy(live_triangles.data, adjacency.triangle_counts.data, vertex_count * sizeof(unsigned int));
199 | 
200 | 	// emitted flags
201 | 	meshopt_Buffer<char> emitted_flags(face_count);
202 | 	memset(emitted_flags.data, 0, face_count);
203 | 
204 | 	// compute initial vertex scores
205 | 	meshopt_Buffer<float> vertex_scores(vertex_count);
206 | 
207 | 	for (size_t i = 0; i < vertex_count; ++i)
208 | 	{
209 | 		vertex_scores[i] = vertexScore(-1, live_triangles[i]);
210 | 	}
211 | 
212 | 	// compute triangle scores
213 | 	meshopt_Buffer<float> triangle_scores(face_count);
214 | 
215 | 	for (size_t i = 0; i < face_count; ++i)
216 | 	{
217 | 		unsigned int a = indices[i * 3 + 0];
218 | 		unsigned int b = indices[i * 3 + 1];
219 | 		unsigned int c = indices[i * 3 + 2];
220 | 
221 | 		triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
222 | 	}
223 | 
224 | 	unsigned int cache_holder[2 * (max_cache_size + 3)];
225 | 	unsigned int* cache = cache_holder;
226 | 	unsigned int* cache_new = cache_holder + max_cache_size + 3;
227 | 	size_t cache_count = 0;
228 | 
229 | 	unsigned int current_triangle = 0;
230 | 	unsigned int input_cursor = 1;
231 | 
232 | 	unsigned int output_triangle = 0;
233 | 
234 | 	while (current_triangle != ~0u)
235 | 	{
236 | 		assert(output_triangle < face_count);
237 | 
238 | 		unsigned int a = indices[current_triangle * 3 + 0];
239 | 		unsigned int b = indices[current_triangle * 3 + 1];
240 | 		unsigned int c = indices[current_triangle * 3 + 2];
241 | 
242 | 		// output indices
243 | 		destination[output_triangle * 3 + 0] = a;
244 | 		destination[output_triangle * 3 + 1] = b;
245 | 		destination[output_triangle * 3 + 2] = c;
246 | 		output_triangle++;
247 | 
248 | 		// update emitted flags
249 | 		emitted_flags[current_triangle] = true;
250 | 		triangle_scores[current_triangle] = 0;
251 | 
252 | 		// new triangle
253 | 		size_t cache_write = 0;
254 | 		cache_new[cache_write++] = a;
255 | 		cache_new[cache_write++] = b;
256 | 		cache_new[cache_write++] = c;
257 | 
258 | 		// old triangles
259 | 		for (size_t i = 0; i < cache_count; ++i)
260 | 		{
261 | 			unsigned int index = cache[i];
262 | 
263 | 			if (index != a && index != b && index != c)
264 | 			{
265 | 				cache_new[cache_write++] = index;
266 | 			}
267 | 		}
268 | 
269 | 		unsigned int* cache_temp = cache;
270 | 		cache = cache_new, cache_new = cache_temp;
271 | 		cache_count = cache_write > cache_size ? cache_size : cache_write;
272 | 
273 | 		// update live triangle counts
274 | 		live_triangles[a]--;
275 | 		live_triangles[b]--;
276 | 		live_triangles[c]--;
277 | 
278 | 		// remove emitted triangle from adjacency data
279 | 		// this makes sure that we spend less time traversing these lists on subsequent iterations
280 | 		for (size_t k = 0; k < 3; ++k)
281 | 		{
282 | 			unsigned int index = indices[current_triangle * 3 + k];
283 | 
284 | 			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
285 | 			size_t neighbours_size = adjacency.triangle_counts[index];
286 | 
287 | 			for (size_t i = 0; i < neighbours_size; ++i)
288 | 			{
289 | 				unsigned int tri = neighbours[i];
290 | 
291 | 				if (tri == current_triangle)
292 | 				{
293 | 					neighbours[i] = neighbours[neighbours_size - 1];
294 | 					adjacency.triangle_counts[index]--;
295 | 					break;
296 | 				}
297 | 			}
298 | 		}
299 | 
300 | 		unsigned int best_triangle = ~0u;
301 | 		float best_score = 0;
302 | 
303 | 		// update cache positions, vertex scores and triangle scores, and find next best triangle
304 | 		for (size_t i = 0; i < cache_write; ++i)
305 | 		{
306 | 			unsigned int index = cache[i];
307 | 
308 | 			int cache_position = i >= cache_size ? -1 : int(i);
309 | 
310 | 			// update vertex score
311 | 			float score = vertexScore(cache_position, live_triangles[index]);
312 | 			float score_diff = score - vertex_scores[index];
313 | 
314 | 			vertex_scores[index] = score;
315 | 
316 | 			// update scores of vertex triangles
317 | 			const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
318 | 			const unsigned int* neighbours_end = neighbours_begin + adjacency.triangle_counts[index];
319 | 
320 | 			for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
321 | 			{
322 | 				unsigned int tri = *it;
323 | 				assert(!emitted_flags[tri]);
324 | 
325 | 				float tri_score = triangle_scores[tri] + score_diff;
326 | 				assert(tri_score > 0);
327 | 
328 | 				if (best_score < tri_score)
329 | 				{
330 | 					best_triangle = tri;
331 | 					best_score = tri_score;
332 | 				}
333 | 
334 | 				triangle_scores[tri] = tri_score;
335 | 			}
336 | 		}
337 | 
338 | 		// step through input triangles in order if we hit a dead-end
339 | 		current_triangle = best_triangle;
340 | 
341 | 		if (current_triangle == ~0u)
342 | 		{
343 | 			current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
344 | 		}
345 | 	}
346 | 
347 | 	assert(input_cursor == face_count);
348 | 	assert(output_triangle == face_count);
349 | }
350 | 
351 | void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
352 | {
353 | 	using namespace meshopt;
354 | 
355 | 	assert(index_count % 3 == 0);
356 | 	assert(cache_size >= 3);
357 | 
358 | 	// guard for empty meshes
359 | 	if (index_count == 0 || vertex_count == 0)
360 | 		return;
361 | 
362 | 	// support in-place optimization
363 | 	meshopt_Buffer<unsigned int> indices_copy;
364 | 
365 | 	if (destination == indices)
366 | 	{
367 | 		indices_copy.data = new unsigned int[index_count];
368 | 		memcpy(indices_copy.data, indices, index_count * sizeof(unsigned int));
369 | 		indices = indices_copy.data;
370 | 	}
371 | 
372 | 	size_t face_count = index_count / 3;
373 | 
374 | 	// build adjacency information
375 | 	Adjacency adjacency(index_count, vertex_count);
376 | 	buildAdjacency(adjacency, indices, index_count, vertex_count);
377 | 
378 | 	// live triangle counts
379 | 	meshopt_Buffer<unsigned int> live_triangles(vertex_count);
380 | 	memcpy(live_triangles.data, adjacency.triangle_counts.data, vertex_count * sizeof(unsigned int));
381 | 
382 | 	// cache time stamps
383 | 	meshopt_Buffer<unsigned int> cache_timestamps(vertex_count);
384 | 	memset(cache_timestamps.data, 0, vertex_count * sizeof(unsigned int));
385 | 
386 | 	// dead-end stack
387 | 	meshopt_Buffer<unsigned int> dead_end(index_count);
388 | 	unsigned int dead_end_top = 0;
389 | 
390 | 	// emitted flags
391 | 	meshopt_Buffer<char> emitted_flags(face_count);
392 | 	memset(emitted_flags.data, 0, face_count);
393 | 
394 | 	unsigned int current_vertex = 0;
395 | 
396 | 	unsigned int timestamp = cache_size + 1;
397 | 	unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
398 | 
399 | 	unsigned int output_triangle = 0;
400 | 
401 | 	while (current_vertex != ~0u)
402 | 	{
403 | 		const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
404 | 
405 | 		// emit all vertex neighbours
406 | 		const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
407 | 		const unsigned int* neighbours_end = neighbours_begin + adjacency.triangle_counts[current_vertex];
408 | 
409 | 		for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
410 | 		{
411 | 			unsigned int triangle = *it;
412 | 
413 | 			if (!emitted_flags[triangle])
414 | 			{
415 | 				unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
416 | 
417 | 				// output indices
418 | 				destination[output_triangle * 3 + 0] = a;
419 | 				destination[output_triangle * 3 + 1] = b;
420 | 				destination[output_triangle * 3 + 2] = c;
421 | 				output_triangle++;
422 | 
423 | 				// update dead-end stack
424 | 				dead_end[dead_end_top + 0] = a;
425 | 				dead_end[dead_end_top + 1] = b;
426 | 				dead_end[dead_end_top + 2] = c;
427 | 				dead_end_top += 3;
428 | 
429 | 				// update live triangle counts
430 | 				live_triangles[a]--;
431 | 				live_triangles[b]--;
432 | 				live_triangles[c]--;
433 | 
434 | 				// update cache info
435 | 				// if vertex is not in cache, put it in cache
436 | 				if (timestamp - cache_timestamps[a] > cache_size)
437 | 					cache_timestamps[a] = timestamp++;
438 | 
439 | 				if (timestamp - cache_timestamps[b] > cache_size)
440 | 					cache_timestamps[b] = timestamp++;
441 | 
442 | 				if (timestamp - cache_timestamps[c] > cache_size)
443 | 					cache_timestamps[c] = timestamp++;
444 | 
445 | 				// update emitted flags
446 | 				emitted_flags[triangle] = true;
447 | 			}
448 | 		}
449 | 
450 | 		// next candidates are the ones we pushed to dead-end stack just now
451 | 		const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
452 | 
453 | 		// get next vertex
454 | 		current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
455 | 
456 | 		if (current_vertex == ~0u)
457 | 		{
458 | 			current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
459 | 		}
460 | 	}
461 | 
462 | 	assert(output_triangle == face_count);
463 | }
464 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/indexcodec.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <string.h>
  6 | 
  7 | // This work is based on:
  8 | // Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
  9 | // Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
 10 | namespace meshopt
 11 | {
 12 | 
 13 | typedef unsigned int VertexFifo[16];
 14 | typedef unsigned int EdgeFifo[16][2];
 15 | 
 16 | static const unsigned int kTriangleIndexOrder[3][3] = {
 17 |     {0, 1, 2},
 18 |     {1, 2, 0},
 19 |     {2, 0, 1},
 20 | };
 21 | 
 22 | static const unsigned char kCodeAuxEncodingTable[16] = {
 23 |     0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, 0, 0,
 24 | };
 25 | 
 26 | static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
 27 | {
 28 | 	(void)a;
 29 | 
 30 | 	return (b == next) ? 1 : (c == next) ? 2 : 0;
 31 | }
 32 | 
 33 | static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
 34 | {
 35 | 	for (int i = 0; i < 16; ++i)
 36 | 	{
 37 | 		unsigned int index = (offset - 1 - i) & 15;
 38 | 		unsigned int e0 = fifo[index][0];
 39 | 		unsigned int e1 = fifo[index][1];
 40 | 
 41 | 		if (e0 == a && e1 == b)
 42 | 			return (i << 2) | 0;
 43 | 		if (e0 == b && e1 == c)
 44 | 			return (i << 2) | 1;
 45 | 		if (e0 == c && e1 == a)
 46 | 			return (i << 2) | 2;
 47 | 	}
 48 | 
 49 | 	return -1;
 50 | }
 51 | 
 52 | static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
 53 | {
 54 | 	fifo[offset][0] = a;
 55 | 	fifo[offset][1] = b;
 56 | 	offset = (offset + 1) & 15;
 57 | }
 58 | 
 59 | static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
 60 | {
 61 | 	for (int i = 0; i < 16; ++i)
 62 | 	{
 63 | 		unsigned int index = (offset - 1 - i) & 15;
 64 | 
 65 | 		if (fifo[index] == v)
 66 | 			return i;
 67 | 	}
 68 | 
 69 | 	return -1;
 70 | }
 71 | 
 72 | static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
 73 | {
 74 | 	fifo[offset] = v;
 75 | 	offset = (offset + cond) & 15;
 76 | }
 77 | 
 78 | static void encodeVByte(unsigned char*& data, unsigned int v)
 79 | {
 80 | 	// encode 32-bit value in up to 5 7-bit groups
 81 | 	do
 82 | 	{
 83 | 		*data++ = (v & 127) | (v > 127 ? 128 : 0);
 84 | 		v >>= 7;
 85 | 	} while (v);
 86 | }
 87 | 
 88 | static unsigned int decodeVByte(const unsigned char*& data)
 89 | {
 90 | 	unsigned char lead = *data++;
 91 | 
 92 | 	// fast path: single byte
 93 | 	if (lead < 128)
 94 | 		return lead;
 95 | 
 96 | 	// slow path: up to 4 extra bytes
 97 | 	// note that this loop always terminates, which is important for malformed data
 98 | 	unsigned int result = lead & 127;
 99 | 	unsigned int shift = 7;
100 | 
101 | 	for (int i = 0; i < 4; ++i)
102 | 	{
103 | 		unsigned char group = *data++;
104 | 		result |= (group & 127) << shift;
105 | 		shift += 7;
106 | 
107 | 		if (group < 128)
108 | 			break;
109 | 	}
110 | 
111 | 	return result;
112 | }
113 | 
114 | static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int next, unsigned int last)
115 | {
116 | 	(void)next;
117 | 
118 | 	unsigned int d = index - last;
119 | 	unsigned int v = (d << 1) ^ (int(d) >> 31);
120 | 
121 | 	encodeVByte(data, v);
122 | }
123 | 
124 | static unsigned int decodeIndex(const unsigned char*& data, unsigned int next, unsigned int last)
125 | {
126 | 	(void)next;
127 | 
128 | 	unsigned int v = decodeVByte(data);
129 | 	unsigned int d = (v >> 1) ^ -int(v & 1);
130 | 
131 | 	return last + d;
132 | }
133 | 
134 | static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
135 | {
136 | 	for (int i = 0; i < 16; ++i)
137 | 		if (table[i] == v)
138 | 			return i;
139 | 
140 | 	return -1;
141 | }
142 | }
143 | 
144 | size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
145 | {
146 | 	using namespace meshopt;
147 | 
148 | 	assert(index_count % 3 == 0);
149 | 
150 | 	// the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table
151 | 	if (buffer_size < index_count / 3 + 16)
152 | 		return 0;
153 | 
154 | 	EdgeFifo edgefifo;
155 | 	memset(edgefifo, -1, sizeof(edgefifo));
156 | 
157 | 	VertexFifo vertexfifo;
158 | 	memset(vertexfifo, -1, sizeof(vertexfifo));
159 | 
160 | 	size_t edgefifooffset = 0;
161 | 	size_t vertexfifooffset = 0;
162 | 
163 | 	unsigned int next = 0;
164 | 	unsigned int last = 0;
165 | 
166 | 	unsigned char* code = buffer;
167 | 	unsigned char* data = buffer + index_count / 3;
168 | 	unsigned char* data_safe_end = buffer + buffer_size - 16;
169 | 
170 | 	// use static encoding table; it's possible to pack the result and then build an optimal table and repack
171 | 	// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
172 | 	const unsigned char* codeaux_table = kCodeAuxEncodingTable;
173 | 
174 | 	// two last entries of codeaux_table are redundant - they are never referenced by the encoding
175 | 	// make sure that they are both zero, since they can serve as version/other data in the future
176 | 	assert(codeaux_table[14] == 0 && codeaux_table[15] == 0);
177 | 
178 | 	for (size_t i = 0; i < index_count; i += 3)
179 | 	{
180 | 		// make sure we have enough space to write a triangle
181 | 		// each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
182 | 		// after this we can be sure we can write without extra bounds checks
183 | 		if (data > data_safe_end)
184 | 			return 0;
185 | 
186 | 		int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
187 | 
188 | 		if (fer >= 0 && (fer >> 2) < 15)
189 | 		{
190 | 			const unsigned int* order = kTriangleIndexOrder[fer & 3];
191 | 
192 | 			unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
193 | 
194 | 			// encode edge index and vertex fifo index, next or free index
195 | 			int fe = fer >> 2;
196 | 			int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
197 | 
198 | 			int fec = (fc >= 1 && fc < 15) ? fc : (c == next) ? (next++, 0) : 15;
199 | 
200 | 			*code++ = static_cast<unsigned char>((fe << 4) | fec);
201 | 
202 | 			// note that we need to update the last index since free indices are delta-encoded
203 | 			if (fec == 15)
204 | 				encodeIndex(data, c, next, last), last = c;
205 | 
206 | 			// we only need to push third vertex since first two are likely already in the vertex fifo
207 | 			if (fec == 0 || fec == 15)
208 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset);
209 | 
210 | 			// we only need to push two new edges to edge fifo since the third one is already there
211 | 			pushEdgeFifo(edgefifo, c, b, edgefifooffset);
212 | 			pushEdgeFifo(edgefifo, a, c, edgefifooffset);
213 | 		}
214 | 		else
215 | 		{
216 | 			const unsigned int* order = kTriangleIndexOrder[rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next)];
217 | 
218 | 			unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
219 | 
220 | 			int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
221 | 			int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
222 | 
223 | 			// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
224 | 			int fea = (a == next) ? (next++, 0) : 15;
225 | 			int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
226 | 			int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
227 | 
228 | 			// we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
229 | 			unsigned char codeaux = static_cast<unsigned char>((feb << 4) | fec);
230 | 			int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
231 | 
232 | 			// <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
233 | 			if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14)
234 | 			{
235 | 				*code++ = static_cast<unsigned char>((15 << 4) | codeauxindex);
236 | 			}
237 | 			else
238 | 			{
239 | 				*code++ = static_cast<unsigned char>((15 << 4) | 14 | fea);
240 | 				*data++ = codeaux;
241 | 			}
242 | 
243 | 			// note that we need to update the last index since free indices are delta-encoded
244 | 			if (fea == 15)
245 | 				encodeIndex(data, a, next, last), last = a;
246 | 
247 | 			if (feb == 15)
248 | 				encodeIndex(data, b, next, last), last = b;
249 | 
250 | 			if (fec == 15)
251 | 				encodeIndex(data, c, next, last), last = c;
252 | 
253 | 			// only push vertices that weren't already in fifo
254 | 			if (fea == 0 || fea == 15)
255 | 				pushVertexFifo(vertexfifo, a, vertexfifooffset);
256 | 
257 | 			if (feb == 0 || feb == 15)
258 | 				pushVertexFifo(vertexfifo, b, vertexfifooffset);
259 | 
260 | 			if (fec == 0 || fec == 15)
261 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset);
262 | 
263 | 			// all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
264 | 			pushEdgeFifo(edgefifo, b, a, edgefifooffset);
265 | 			pushEdgeFifo(edgefifo, c, b, edgefifooffset);
266 | 			pushEdgeFifo(edgefifo, a, c, edgefifooffset);
267 | 		}
268 | 	}
269 | 
270 | 	// make sure we have enough space to write codeaux table
271 | 	if (data > data_safe_end)
272 | 		return 0;
273 | 
274 | 	// add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
275 | 	// we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
276 | 	// this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
277 | 	for (size_t i = 0; i < 16; ++i)
278 | 	{
279 | 		*data++ = codeaux_table[i];
280 | 	}
281 | 
282 | 	assert(data >= buffer + index_count / 3 + 16);
283 | 	assert(data <= buffer + buffer_size);
284 | 
285 | 	return data - buffer;
286 | }
287 | 
288 | size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
289 | {
290 | 	assert(index_count % 3 == 0);
291 | 
292 | 	// compute number of bits required for each index
293 | 	unsigned int vertex_bits = 1;
294 | 
295 | 	while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
296 | 		vertex_bits++;
297 | 
298 | 	// worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
299 | 	unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
300 | 
301 | 	return (index_count / 3) * (2 + 3 * vertex_groups) + 16;
302 | }
303 | 
304 | int meshopt_decodeIndexBuffer(unsigned int* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
305 | {
306 | 	using namespace meshopt;
307 | 
308 | 	assert(index_count % 3 == 0);
309 | 
310 | 	// the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table
311 | 	if (buffer_size < index_count / 3 + 16)
312 | 		return -1;
313 | 
314 | 	EdgeFifo edgefifo;
315 | 	memset(edgefifo, -1, sizeof(edgefifo));
316 | 
317 | 	VertexFifo vertexfifo;
318 | 	memset(vertexfifo, -1, sizeof(vertexfifo));
319 | 
320 | 	size_t edgefifooffset = 0;
321 | 	size_t vertexfifooffset = 0;
322 | 
323 | 	unsigned int next = 0;
324 | 	unsigned int last = 0;
325 | 
326 | 	// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
327 | 	const unsigned char* code = buffer;
328 | 	const unsigned char* data = buffer + index_count / 3;
329 | 	const unsigned char* data_safe_end = buffer + buffer_size - 16;
330 | 
331 | 	const unsigned char* codeaux_table = data_safe_end;
332 | 
333 | 	for (size_t i = 0; i < index_count; i += 3)
334 | 	{
335 | 		// make sure we have enough data to read for a triangle
336 | 		// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
337 | 		// after this we can be sure we can read without extra bounds checks
338 | 		if (data > data_safe_end)
339 | 			return -2;
340 | 
341 | 		unsigned char codetri = *code++;
342 | 
343 | 		if (codetri < 0xf0)
344 | 		{
345 | 			int fe = codetri >> 4;
346 | 
347 | 			// fifo reads are wrapped around 16 entry buffer
348 | 			unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
349 | 			unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
350 | 
351 | 			int fec = codetri & 15;
352 | 
353 | 			// note: this is the most common path in the entire decoder
354 | 			// inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
355 | 			if (fec != 15)
356 | 			{
357 | 				// fifo reads are wrapped around 16 entry buffer
358 | 				unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
359 | 				unsigned int c = (fec == 0) ? next : cf;
360 | 
361 | 				int fec0 = fec == 0;
362 | 				next += fec0;
363 | 
364 | 				// output triangle
365 | 				destination[i + 0] = a;
366 | 				destination[i + 1] = b;
367 | 				destination[i + 2] = c;
368 | 
369 | 				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
370 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
371 | 
372 | 				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
373 | 				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
374 | 			}
375 | 			else
376 | 			{
377 | 				unsigned int c = 0;
378 | 
379 | 				// note that we need to update the last index since free indices are delta-encoded
380 | 				last = c = decodeIndex(data, next, last);
381 | 
382 | 				// output triangle
383 | 				destination[i + 0] = a;
384 | 				destination[i + 1] = b;
385 | 				destination[i + 2] = c;
386 | 
387 | 				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
388 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset);
389 | 
390 | 				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
391 | 				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
392 | 			}
393 | 		}
394 | 		else
395 | 		{
396 | 			// fast path: read codeaux from the table
397 | 			if (codetri < 0xfe)
398 | 			{
399 | 				unsigned char codeaux = codeaux_table[codetri & 15];
400 | 
401 | 				// note: table can't contain feb/fec=15
402 | 				int feb = codeaux >> 4;
403 | 				int fec = codeaux & 15;
404 | 
405 | 				// fifo reads are wrapped around 16 entry buffer
406 | 				// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
407 | 				unsigned int a = next++;
408 | 
409 | 				unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
410 | 				unsigned int b = (feb == 0) ? next : bf;
411 | 
412 | 				int feb0 = feb == 0;
413 | 				next += feb0;
414 | 
415 | 				unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
416 | 				unsigned int c = (fec == 0) ? next : cf;
417 | 
418 | 				int fec0 = fec == 0;
419 | 				next += fec0;
420 | 
421 | 				// output triangle
422 | 				destination[i + 0] = a;
423 | 				destination[i + 1] = b;
424 | 				destination[i + 2] = c;
425 | 
426 | 				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
427 | 				pushVertexFifo(vertexfifo, a, vertexfifooffset);
428 | 				pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
429 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
430 | 
431 | 				pushEdgeFifo(edgefifo, b, a, edgefifooffset);
432 | 				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
433 | 				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
434 | 			}
435 | 			else
436 | 			{
437 | 				// slow path: read a full byte for codeaux instead of using a table lookup
438 | 				unsigned char codeaux = *data++;
439 | 
440 | 				int fea = codetri == 0xfe ? 0 : 15;
441 | 				int feb = codeaux >> 4;
442 | 				int fec = codeaux & 15;
443 | 
444 | 				// fifo reads are wrapped around 16 entry buffer
445 | 				// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
446 | 				unsigned int a = (fea == 0) ? next++ : 0;
447 | 				unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
448 | 				unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
449 | 
450 | 				// note that we need to update the last index since free indices are delta-encoded
451 | 				if (fea == 15)
452 | 					last = a = decodeIndex(data, next, last);
453 | 
454 | 				if (feb == 15)
455 | 					last = b = decodeIndex(data, next, last);
456 | 
457 | 				if (fec == 15)
458 | 					last = c = decodeIndex(data, next, last);
459 | 
460 | 				// output triangle
461 | 				destination[i + 0] = a;
462 | 				destination[i + 1] = b;
463 | 				destination[i + 2] = c;
464 | 
465 | 				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
466 | 				pushVertexFifo(vertexfifo, a, vertexfifooffset);
467 | 				pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
468 | 				pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
469 | 
470 | 				pushEdgeFifo(edgefifo, b, a, edgefifooffset);
471 | 				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
472 | 				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
473 | 			}
474 | 		}
475 | 	}
476 | 
477 | 	// we should've read all data bytes and stopped at the boundary between data and codeaux table
478 | 	if (data != data_safe_end)
479 | 		return -3;
480 | 
481 | 	return 0;
482 | }
483 | 


--------------------------------------------------------------------------------
/Source/Barycentrics.cpp:
--------------------------------------------------------------------------------
  1 | #include "Barycentrics.h"
  2 | 
  3 | #include <Rush/UtilFile.h>
  4 | #include <Rush/UtilLog.h>
  5 | #include <Rush/MathTypes.h>
  6 | 
  7 | #pragma warning(push)
  8 | #pragma warning(disable: 4996)
  9 | #define TINYOBJLOADER_IMPLEMENTATION
 10 | #include <tiny_obj_loader.h>
 11 | #pragma warning(pop)
 12 | 
 13 | #include <meshoptimizer.h>
 14 | 
 15 | #include <algorithm>
 16 | #include <cmath>
 17 | 
 18 | AppConfig g_appConfig;
 19 | 
 20 | int main(int argc, char** argv)
 21 | {
 22 | 	AppConfig& cfg = g_appConfig;
 23 | 
 24 | 	cfg.name = "Barycentrics (" RUSH_RENDER_API_NAME ")";
 25 | 
 26 | 	cfg.width = 1280;
 27 | 	cfg.height = 720;
 28 | 	cfg.argc = argc;
 29 | 	cfg.argv = argv;
 30 | 	cfg.resizable = true;
 31 | 
 32 | #ifndef NDEBUG
 33 | 	cfg.debug = true;
 34 | 	Log::breakOnError = true;
 35 | #endif
 36 | 
 37 | 	return Platform_Main<BarycentricsApp>(cfg);
 38 | }
 39 | 
 40 | BarycentricsApp::BarycentricsApp()
 41 | 	: BaseApplication()
 42 | 	, m_boundingBox(Vec3(0.0f), Vec3(0.0f))
 43 | {
 44 | 	Gfx_SetPresentInterval(1);
 45 | 
 46 | 	m_windowEvents.setOwner(m_window);
 47 | 
 48 | 	GfxShaderBindings bindings;
 49 | 	bindings.addConstantBuffer("constantBuffer0", 0); // scene consants
 50 | 	bindings.addConstantBuffer("constantBuffer1", 1); // material constants
 51 | 	bindings.addCombinedSampler("sampler0", 2); // albedo texture sampler
 52 | 	bindings.addStorageBuffer("vertexBuffer", 3);
 53 | 	bindings.addStorageBuffer("indexBuffer", 4);
 54 | 
 55 | 	GfxVertexFormatDesc vfDefaultDesc; // TODO: use de-interleaved vertex streams and packed vertices
 56 | 	vfDefaultDesc.add(0, GfxVertexFormatDesc::DataType::Float3, GfxVertexFormatDesc::Semantic::Position, 0);
 57 | 	vfDefaultDesc.add(0, GfxVertexFormatDesc::DataType::Float2, GfxVertexFormatDesc::Semantic::Texcoord, 0);
 58 | 
 59 | 	GfxVertexFormatDesc vfEmptyDesc;
 60 | 
 61 | 	GfxVertexShaderRef vsIndexed;
 62 | 	vsIndexed.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/ModelIndexed.vert.spv")));
 63 | 
 64 | 	struct SpecializationData { u32 useTexture; };
 65 | 	GfxSpecializationConstant specializationConstantLayout;
 66 | 	specializationConstantLayout.id = 0;
 67 | 	specializationConstantLayout.offset = 0;
 68 | 	specializationConstantLayout.size = sizeof(SpecializationData);
 69 | 
 70 | 	enum { specializationCount = 2 };
 71 | 	SpecializationData specializationData[specializationCount] = { 0, 1 }; // non-textured and textured variants
 72 | 
 73 | 	auto setupSpecialization = [&](GfxTechniqueDesc& techniqueDesc, u32 variantIndex)
 74 | 	{
 75 | 		techniqueDesc.specializationConstants = &specializationConstantLayout;
 76 | 		techniqueDesc.specializationConstantCount = 1;
 77 | 		techniqueDesc.specializationData = &specializationData[variantIndex];
 78 | 		techniqueDesc.specializationDataSize = sizeof(SpecializationData);
 79 | 	};
 80 | 
 81 | 	{
 82 | 		GfxVertexShaderRef vs;
 83 | 		vs.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/Model.vert.spv")));
 84 | 
 85 | 		GfxPixelShaderRef ps;
 86 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/Model.frag.spv")));
 87 | 
 88 | 		GfxVertexFormatRef vf;
 89 | 		vf.takeover(Gfx_CreateVertexFormat(vfEmptyDesc));
 90 | 
 91 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vs.get(), vf.get(), &bindings);
 92 | 
 93 | 		for (u32 i=0; i<specializationCount; ++i)
 94 | 		{
 95 | 			setupSpecialization(techniqueDesc, i);
 96 | 			m_techniqueNonIndexed[i].takeover(Gfx_CreateTechnique(techniqueDesc));
 97 | 		}
 98 | 	}
 99 | 
100 | 	{
101 | 		GfxPixelShaderRef ps;
102 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/Model.frag.spv")));
103 | 
104 | 		GfxGeometryShaderRef gs;
105 | 		gs.takeover(Gfx_CreateGeometryShader(shaderFromFile("Shaders/ModelBarycentrics.geom.spv")));
106 | 
107 | 		GfxVertexFormatRef vf;
108 | 		vf.takeover(Gfx_CreateVertexFormat(vfDefaultDesc));
109 | 
110 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vsIndexed.get(), vf.get(), &bindings);
111 | 		techniqueDesc.gs = gs.get();
112 | 
113 | 		for (u32 i = 0; i < specializationCount; ++i)
114 | 		{
115 | 			setupSpecialization(techniqueDesc, i);
116 | 			m_techniqueGeometryShader[i].takeover(Gfx_CreateTechnique(techniqueDesc));
117 | 		}
118 | 	}
119 | 
120 | 	{
121 | 		GfxPixelShaderRef ps;
122 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelIndexed.frag.spv")));
123 | 
124 | 		GfxVertexFormatRef vf;
125 | 		vf.takeover(Gfx_CreateVertexFormat(vfDefaultDesc));
126 | 
127 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vsIndexed.get(), vf.get(), &bindings);
128 | 
129 | 		for (u32 i = 0; i < specializationCount; ++i)
130 | 		{
131 | 			setupSpecialization(techniqueDesc, i);
132 | 			m_techniqueIndexed[i].takeover(Gfx_CreateTechnique(techniqueDesc));
133 | 		}
134 | 	}
135 | 
136 | 	{
137 | 		GfxVertexShaderRef vs;
138 | 		vs.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/ModelManual.vert.spv")));
139 | 
140 | 		GfxPixelShaderRef ps;
141 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelManual.frag.spv")));
142 | 
143 | 		GfxVertexFormatRef vf;
144 | 		vf.takeover(Gfx_CreateVertexFormat(vfDefaultDesc));
145 | 
146 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vs.get(), vf.get(), &bindings);
147 | 
148 | 		for (u32 i = 0; i < specializationCount; ++i)
149 | 		{
150 | 			setupSpecialization(techniqueDesc, i);
151 | 			m_techniqueManual[i].takeover(Gfx_CreateTechnique(techniqueDesc));
152 | 		}
153 | 	}
154 | 
155 | 	if (Gfx_GetCapability().geometryShaderPassthroughNV)
156 | 	{
157 | 		GfxVertexShaderRef vs;
158 | 		vs.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/ModelPassthrough.vert.spv")));
159 | 
160 | 		GfxPixelShaderRef ps;
161 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelPassthrough.frag.spv")));
162 | 
163 | 		GfxPixelShaderRef psTextured;
164 | 		psTextured.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelPassthroughTextured.frag.spv")));
165 | 
166 | 		GfxGeometryShaderRef gs;
167 | 		gs.takeover(Gfx_CreateGeometryShader(shaderFromFile("Shaders/ModelPassthrough.geom.spv")));
168 | 
169 | 		GfxGeometryShaderRef gsTextured;
170 | 		gsTextured.takeover(Gfx_CreateGeometryShader(shaderFromFile("Shaders/ModelPassthroughTextured.geom.spv")));
171 | 
172 | 		GfxVertexFormatRef vf;
173 | 		vf.takeover(Gfx_CreateVertexFormat(vfDefaultDesc));
174 | 
175 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vs.get(), vf.get(), &bindings);
176 | 		techniqueDesc.gs = gs.get();
177 | 
178 | 		// Explicit precompiled textured variant is used due to gl_PrimitiveID overhead
179 | 		// being present even when specialization constant is 'false'.
180 | 
181 | 		m_techniquePassthroughGS[0].takeover(Gfx_CreateTechnique(techniqueDesc));
182 | 
183 | 		techniqueDesc.ps = psTextured.get();
184 | 		techniqueDesc.gs = gsTextured.get();
185 | 		m_techniquePassthroughGS[1].takeover(Gfx_CreateTechnique(techniqueDesc));
186 | 	}
187 | 
188 | 	if (Gfx_GetCapability().explicitVertexParameterAMD)
189 | 	{
190 | 		GfxVertexShaderRef vs;
191 | 		vs.takeover(Gfx_CreateVertexShader(shaderFromFile("Shaders/ModelNativeAMD.vert.spv")));
192 | 
193 | 		GfxPixelShaderRef ps;
194 | 		ps.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelNativeAMD.frag.spv")));
195 | 
196 | 		GfxPixelShaderRef psTextured;
197 | 		psTextured.takeover(Gfx_CreatePixelShader(shaderFromFile("Shaders/ModelNativeAMDTextured.frag.spv")));
198 | 
199 | 		GfxVertexFormatRef vf;
200 | 		vf.takeover(Gfx_CreateVertexFormat(vfDefaultDesc));
201 | 
202 | 		// Explicit precompiled textured variant is used due to gl_PrimitiveID overhead
203 | 		// being present even when specialization constant is 'false'.
204 | 
205 | 		GfxTechniqueDesc techniqueDesc(ps.get(), vs.get(), vf.get(), &bindings);
206 | 		m_techniqueNativeAMD[0].takeover(Gfx_CreateTechnique(techniqueDesc));
207 | 
208 | 		techniqueDesc.ps = psTextured.get();
209 | 		m_techniqueNativeAMD[1].takeover(Gfx_CreateTechnique(techniqueDesc));
210 | 	}
211 | 
212 | 	GfxBufferDesc cbDescr(GfxBufferFlags::TransientConstant, GfxFormat_Unknown, 1, sizeof(Constants));
213 | 	m_constantBuffer = Gfx_CreateBuffer(cbDescr);
214 | 
215 | 	{
216 | 		const GfxBufferDesc materialCbDesc(GfxBufferFlags::Constant, GfxFormat_Unknown, 1, sizeof(MaterialConstants));
217 | 		MaterialConstants constants;
218 | 		constants.baseColor = Vec4(1.0f);
219 | 		m_defaultMaterial.constantBuffer.takeover(Gfx_CreateBuffer(materialCbDesc, &constants));
220 | 		m_defaultMaterial.albedoTexture.retain(m_checkerboardTexture);
221 | 	}
222 | 
223 | 	float aspect = m_window->getAspect();
224 | 	float fov = 1.0f;
225 | 
226 | 	m_camera = Camera(aspect, fov, 0.25f, 10000.0f);
227 | 
228 | 	if (g_appConfig.argc >= 2)
229 | 	{
230 | 		const char* modelFilename = g_appConfig.argv[1];
231 | 		m_statusString = std::string("Model: ") + modelFilename;
232 | 		m_valid = loadModel(modelFilename);
233 | 
234 | 		Vec3 center = m_boundingBox.center();
235 | 		Vec3 dimensions = m_boundingBox.dimensions();
236 | 		float longestSide = dimensions.reduceMax();
237 | 		if (longestSide != 0)
238 | 		{
239 | 			float scale = 100.0f / longestSide;
240 | 			m_worldTransform = Mat4::scaleTranslate(scale, -center*scale);
241 | 		}
242 | 
243 | 		m_boundingBox.m_min = m_worldTransform * m_boundingBox.m_min;
244 | 		m_boundingBox.m_max = m_worldTransform * m_boundingBox.m_max;
245 | 
246 | 		m_camera.lookAt(Vec3(m_boundingBox.m_max) + Vec3(2.0f), m_boundingBox.center());
247 | 	}
248 | 	else
249 | 	{
250 | 		// Default tunnel test model
251 | 		m_valid = loadTunnelTestModel();
252 | 
253 | 		Vec3 position = m_boundingBox.center();
254 | 		position.z = m_boundingBox.m_min.z;
255 | 		m_camera.lookAt(position, m_boundingBox.center());
256 | 	}
257 | 
258 | 	m_interpolatedCamera = m_camera;
259 | 
260 | 	m_cameraMan = new CameraManipulator();
261 | }
262 | 
263 | BarycentricsApp::~BarycentricsApp()
264 | {
265 | 	m_windowEvents.setOwner(nullptr);
266 | 
267 | 	delete m_cameraMan;
268 | 
269 | 	Gfx_Release(m_vertexBuffer);
270 | 	Gfx_Release(m_indexBuffer);
271 | 	Gfx_Release(m_constantBuffer);
272 | }
273 | 
274 | void BarycentricsApp::update()
275 | {
276 | 	TimingScope<double, 60> timingScope(m_stats.cpuTotal);
277 | 
278 | 	m_stats.gpuWorld.add(Gfx_Stats().customTimer[Timestamp_World]);
279 | 	m_stats.gpuUI.add(Gfx_Stats().customTimer[Timestamp_UI]);
280 | 	m_stats.gpuTotal.add(Gfx_Stats().lastFrameGpuTime);
281 | 
282 | 	Gfx_ResetStats();
283 | 
284 | 	const float dt = (float)m_timer.time();
285 | 	m_timer.reset();
286 | 
287 | 	for (const WindowEvent& e : m_windowEvents)
288 | 	{
289 | 		switch (e.type)
290 | 		{
291 | 		case WindowEventType_Scroll:
292 | 			if (e.scroll.y > 0)
293 | 			{
294 | 				m_cameraScale *= 1.25f;
295 | 			}
296 | 			else
297 | 			{
298 | 				m_cameraScale *= 0.9f;
299 | 			}
300 | 			Log::message("Camera scale: %f", m_cameraScale);
301 | 			break;
302 | 		case WindowEventType_KeyDown:
303 | 		{
304 | 			if (e.code == Key_0)
305 | 			{
306 | 				m_mode = Mode::Indexed;
307 | 			}
308 | 			else if (e.code == Key_1)
309 | 			{
310 | 				m_mode = Mode::NonIndexed;
311 | 			}
312 | 			else if (e.code == Key_2)
313 | 			{
314 | 				m_mode = Mode::GeometryShader;
315 | 			}
316 | 			else if (e.code == Key_3)
317 | 			{
318 | 				m_mode = Mode::Manual;
319 | 			}
320 | 			else if (e.code == Key_4 && m_techniquePassthroughGS[m_useTexture].valid())
321 | 			{
322 | 				m_mode = Mode::PassthroughGS;
323 | 			}
324 | 			else if (e.code == Key_5 && m_techniqueNativeAMD[m_useTexture].valid())
325 | 			{
326 | 				m_mode = Mode::NativeAMD;
327 | 			}
328 | 			else if (e.code == Key_T)
329 | 			{
330 | 				m_useTexture = !m_useTexture;
331 | 			}
332 | 			else if (e.code == Key_H)
333 | 			{
334 | 				m_showUI = !m_showUI;
335 | 			}
336 | 			break;
337 | 		}
338 | 		default:
339 | 			break;
340 | 		}
341 | 	}
342 | 
343 | 	float clipNear = 0.25f * m_cameraScale;
344 | 	float clipFar = 10000.0f * m_cameraScale;
345 | 	m_camera.setClip(clipNear, clipFar);
346 | 	m_camera.setAspect(m_window->getAspect());
347 | 	m_cameraMan->setMoveSpeed(20.0f * m_cameraScale);
348 | 
349 | 	m_cameraMan->update(&m_camera, dt, m_window->getKeyboardState(), m_window->getMouseState());
350 | 
351 | 	m_interpolatedCamera.blendTo(m_camera, 0.1f, 0.125f);
352 | 
353 | 	m_windowEvents.clear();
354 | 
355 | 	render();
356 | }
357 | 
358 | void BarycentricsApp::render()
359 | {
360 | 	const GfxCapability& caps = Gfx_GetCapability();
361 | 
362 | 	Mat4 matView = m_interpolatedCamera.buildViewMatrix();
363 | 	Mat4 matProj = m_interpolatedCamera.buildProjMatrix(caps.projectionFlags);
364 | 
365 | 	Constants constants;
366 | 	constants.matView = matView.transposed();
367 | 	constants.matProj = matProj.transposed();
368 | 	constants.matViewProj = (matView * matProj).transposed();
369 | 	constants.matWorld = m_worldTransform.transposed();
370 | 	constants.cameraPos = Vec4(m_interpolatedCamera.getPosition());
371 | 
372 | 	Gfx_UpdateBuffer(m_ctx, m_constantBuffer, &constants, sizeof(constants));
373 | 
374 | 	GfxPassDesc passDesc;
375 | 	passDesc.flags = GfxPassFlags::ClearAll;
376 | 	passDesc.clearColors[0] = ColorRGBA8(11, 22, 33);
377 | 	Gfx_BeginPass(m_ctx, passDesc);
378 | 
379 | 	Gfx_SetViewport(m_ctx, GfxViewport(m_window->getSize()));
380 | 	Gfx_SetScissorRect(m_ctx, m_window->getSize());
381 | 
382 | 	Gfx_SetDepthStencilState(m_ctx, m_depthStencilStates.writeLessEqual);
383 | 
384 | 	if (m_valid)
385 | 	{
386 | 		TimingScope<double, 60> timingScope(m_stats.cpuWorld);
387 | 		GfxTimerScope gpuTimerScopeWorld(m_ctx, Timestamp_World);
388 | 
389 | 		Gfx_SetBlendState(m_ctx, m_blendStates.opaque);
390 | 
391 | 		switch (m_mode)
392 | 		{
393 | 		case Mode::Indexed:
394 | 			Gfx_SetTechnique(m_ctx, m_techniqueIndexed[m_useTexture].get());
395 | 			break;
396 | 		case Mode::NonIndexed:
397 | 			Gfx_SetTechnique(m_ctx, m_techniqueNonIndexed[m_useTexture].get());
398 | 			break;
399 | 		case Mode::GeometryShader:
400 | 			Gfx_SetTechnique(m_ctx, m_techniqueGeometryShader[m_useTexture].get());
401 | 			break;
402 | 		case Mode::Manual:
403 | 			Gfx_SetTechnique(m_ctx, m_techniqueManual[m_useTexture].get());
404 | 			break;
405 | 		case Mode::PassthroughGS:
406 | 			Gfx_SetTechnique(m_ctx, m_techniquePassthroughGS[m_useTexture].get());
407 | 			break;
408 | 		case Mode::NativeAMD:
409 | 			Gfx_SetTechnique(m_ctx, m_techniqueNativeAMD[m_useTexture].get());
410 | 			break;
411 | 		default:
412 | 			RUSH_LOG_ERROR("Rendering mode '%s' not implemented", toString(m_mode));
413 | 		}
414 | 
415 | 		if (m_mode != Mode::NonIndexed)
416 | 		{
417 | 			Gfx_SetVertexStream(m_ctx, 0, m_vertexBuffer);
418 | 			Gfx_SetIndexStream(m_ctx, m_indexBuffer);
419 | 		}
420 | 
421 | 		Gfx_SetConstantBuffer(m_ctx, 0, m_constantBuffer);
422 | 
423 | 		Gfx_SetStorageBuffer(m_ctx, 0, m_vertexBuffer);
424 | 		Gfx_SetStorageBuffer(m_ctx, 1, m_indexBuffer);
425 | 
426 | 		Gfx_SetConstantBuffer(m_ctx, 1, m_defaultMaterial.constantBuffer);
427 | 		Gfx_SetTexture(m_ctx, GfxStage::Pixel, 0, m_defaultMaterial.albedoTexture, m_samplerStates.anisotropicWrap);
428 | 
429 | 		if (m_mode == Mode::NonIndexed)
430 | 		{
431 | 			Gfx_Draw(m_ctx, 0, m_indexCount);
432 | 		}
433 | 		else
434 | 		{
435 | 			Gfx_DrawIndexed(m_ctx, m_indexCount, 0, 0, m_vertexCount);
436 | 		}
437 | 	}
438 | 
439 | 	// Draw UI on top
440 | 	if (m_showUI)
441 | 	{
442 | 		GfxTimerScope gpuTimerScopeUI(m_ctx, Timestamp_UI);
443 | 		TimingScope<double, 60> timingScope(m_stats.cpuUI);
444 | 
445 | 		Gfx_SetBlendState(m_ctx, m_blendStates.lerp);
446 | 		Gfx_SetDepthStencilState(m_ctx, m_depthStencilStates.disable);
447 | 
448 | 		m_prim->begin2D(m_window->getSize());
449 | 
450 | 		m_font->setScale(2.0f);
451 | 
452 | 		Vec2 textOrigin = Vec2(10.0f);
453 | 		Vec2 pos = textOrigin;
454 | 		pos = m_font->draw(m_prim, pos, m_statusString.c_str());
455 | 		pos = m_font->draw(m_prim, pos, "\n");
456 | 		pos.x = textOrigin.x;
457 | 
458 | 		char tempString[1024];
459 | 
460 | 		pos = m_font->draw(m_prim, pos, "Mode: ");
461 | 		pos = m_font->draw(m_prim, pos, toString(m_mode), ColorRGBA8(255, 255, 64));
462 | 		pos = m_font->draw(m_prim, pos, "\n");
463 | 		pos.x = textOrigin.x;
464 | 		
465 | 		const GfxStats& stats = Gfx_Stats();
466 | 		sprintf_s(tempString,
467 | 			"Textured: %d\n"
468 | 			"Draw calls: %d\n"
469 | 			"Vertices: %d\n"
470 | 			"GPU total: %.2f ms\n"
471 | 			"> World: %.2f\n"
472 | 			"> UI: %.2f\n"
473 | 			"CPU time: %.2f ms\n"
474 | 			"> World: %.2f ms\n"
475 | 			"> UI: %.2f ms",
476 | 			int(m_useTexture),
477 | 			stats.drawCalls,
478 | 			stats.vertices,
479 | 			m_stats.gpuTotal.get() * 1000.0f,
480 | 			m_stats.gpuWorld.get() * 1000.0f,
481 | 			m_stats.gpuUI.get() * 1000.0f,
482 | 			m_stats.cpuTotal.get() * 1000.0f,
483 | 			m_stats.cpuWorld.get() * 1000.0f,
484 | 			m_stats.cpuUI.get() * 1000.0f);
485 | 		pos = m_font->draw(m_prim, pos, tempString);
486 | 		pos.x = textOrigin.x;
487 | 
488 | 		pos = Vec2(10, m_window->getSizeFloat().y - 30);
489 | 		pos = m_font->draw(m_prim, pos, "Controls: number keys to change modes, 'T' to toggle texturing, 'H' to hide UI");
490 | 
491 | 		m_prim->end2D();
492 | 	}
493 | 	else
494 | 	{
495 | 		GfxTimerScope gpuTimerScopeUI(m_ctx, Timestamp_UI);
496 | 		m_stats.cpuUI.add(0);
497 | 	}
498 | 
499 | 	Gfx_EndPass(m_ctx);
500 | }
501 | 
502 | bool BarycentricsApp::loadModel(const char* filename)
503 | {
504 | 	Log::message("Loading model '%s'", filename);
505 | 
506 | 	std::vector<tinyobj::shape_t> shapes;
507 | 	std::vector<tinyobj::material_t> materials;
508 | 	std::string errors;
509 | 
510 | 	std::string directory = directoryFromFilename(filename);
511 | 
512 | 	bool loaded = tinyobj::LoadObj(shapes, materials, errors, filename, directory.c_str());
513 | 	if (!loaded)
514 | 	{
515 | 		Log::error("Could not load model from '%s'\n%s\n", filename, errors.c_str());
516 | 		return false;
517 | 	}
518 | 
519 | 	std::vector<Vertex> vertices;
520 | 	std::vector<u32> indices;
521 | 
522 | 	m_boundingBox.expandInit();
523 | 
524 | 	for (const auto& shape : shapes)
525 | 	{
526 | 		u32 firstVertex = (u32)vertices.size();
527 | 		const auto& mesh = shape.mesh;
528 | 
529 | 		const u32 vertexCount = (u32)mesh.positions.size() / 3;
530 | 
531 | 		const bool haveTexcoords = !mesh.texcoords.empty();
532 | 
533 | 		for (u32 i = 0; i < vertexCount; ++i)
534 | 		{
535 | 			Vertex v;
536 | 
537 | 			v.position.x = mesh.positions[i * 3 + 0];
538 | 			v.position.y = mesh.positions[i * 3 + 1];
539 | 			v.position.z = mesh.positions[i * 3 + 2];
540 | 
541 | 			m_boundingBox.expand(v.position);
542 | 
543 | 			if (haveTexcoords)
544 | 			{
545 | 				v.texcoord.x = mesh.texcoords[i * 2 + 0];
546 | 				v.texcoord.y = mesh.texcoords[i * 2 + 1];
547 | 			}
548 | 			else
549 | 			{
550 | 				v.texcoord = Vec2(0.0f);
551 | 			}
552 | 
553 | 			v.position.x = -v.position.x;
554 | 
555 | 			vertices.push_back(v);
556 | 		}
557 | 
558 | 		const u32 triangleCount = (u32)mesh.indices.size() / 3;
559 | 		for (u32 triangleIt = 0; triangleIt < triangleCount; ++triangleIt)
560 | 		{
561 | 			indices.push_back(mesh.indices[triangleIt * 3 + 0] + firstVertex);
562 | 			indices.push_back(mesh.indices[triangleIt * 3 + 2] + firstVertex);
563 | 			indices.push_back(mesh.indices[triangleIt * 3 + 1] + firstVertex);
564 | 		}
565 | 	}
566 | 
567 | 	m_vertexCount = (u32)vertices.size();
568 | 	m_indexCount = (u32)indices.size();
569 | 
570 | 	meshopt_optimizeVertexCache<u32>(indices.data(), indices.data(), m_indexCount, m_vertexCount);
571 | 
572 | 	GfxBufferDesc vbDesc(GfxBufferFlags::Vertex | GfxBufferFlags::Storage, GfxFormat_Unknown, m_vertexCount, sizeof(Vertex));
573 | 	m_vertexBuffer = Gfx_CreateBuffer(vbDesc, vertices.data());
574 | 
575 | 	GfxBufferDesc ibDesc(GfxBufferFlags::Index | GfxBufferFlags::Storage, GfxFormat_R32_Uint, m_indexCount, 4);
576 | 	m_indexBuffer = Gfx_CreateBuffer(ibDesc, indices.data());
577 | 
578 | 	return true;
579 | }
580 | 
581 | 
582 | bool BarycentricsApp::loadTunnelTestModel()
583 | {
584 | 	Log::message("Creating tunnel test model");
585 | 
586 | 	std::vector<Vertex> vertices;
587 | 	std::vector<u32> indices;
588 | 
589 | 	m_boundingBox.expandInit();
590 | 
591 | 	const float near = 0.0f;
592 | 	const float far = 100.0f;
593 | 	const float radius = 1.0f;
594 | 	const float uscale = 10.0f;
595 | 
596 | 	const u32 circleVertexCount = 50;
597 | 
598 | 	// Last vertices have unique tex coords so need them
599 | 	for (u32 i = 0; i <= circleVertexCount; ++i)
600 | 	{
601 | 		float n = static_cast<float>(i) / static_cast<float>(circleVertexCount);
602 | 
603 | 		Vertex v;
604 | 		v.position.x = radius * std::sin(Rush::TwoPi * n);
605 | 		v.position.y = radius * std::cos(Rush::TwoPi * n);
606 | 		v.texcoord.x = n * uscale;
607 | 
608 | 		// Near vertex
609 | 		v.position.z = near;
610 | 		v.texcoord.y = near;
611 | 		m_boundingBox.expand(v.position);
612 | 		vertices.push_back(v);
613 | 
614 | 		// Far vertex
615 | 		v.position.z = far;
616 | 		v.texcoord.y = far;
617 | 		m_boundingBox.expand(v.position);
618 | 		vertices.push_back(v);
619 | 	}
620 | 
621 | 	m_vertexCount = (u32)vertices.size();
622 | 	
623 | 	// One quad (connecting near/far pair of vertices) per segment
624 | 	for (u32 i = 0; i < circleVertexCount; ++i)
625 | 	{
626 | 		int i0 = (2*i + 0);
627 | 		int i1 = (2*i + 1);
628 | 		int i2 = (2*i + 2);
629 | 		int i3 = (2*i + 3);
630 | 
631 | 		indices.push_back(i0);
632 | 		indices.push_back(i1);
633 | 		indices.push_back(i2);
634 | 
635 | 		indices.push_back(i2);
636 | 		indices.push_back(i1);
637 | 		indices.push_back(i3);
638 | 	}
639 | 		
640 | 	m_indexCount = (u32)indices.size();
641 | 
642 | 	//meshopt_optimizeVertexCache<u32>(indices.data(), indices.data(), m_indexCount, m_vertexCount);
643 | 
644 | 	GfxBufferDesc vbDesc(GfxBufferFlags::Vertex | GfxBufferFlags::Storage, GfxFormat_Unknown, m_vertexCount, sizeof(Vertex));
645 | 	m_vertexBuffer = Gfx_CreateBuffer(vbDesc, vertices.data());
646 | 
647 | 	GfxBufferDesc ibDesc(GfxBufferFlags::Index | GfxBufferFlags::Storage, GfxFormat_R32_Uint, m_indexCount, 4);
648 | 	m_indexBuffer = Gfx_CreateBuffer(ibDesc, indices.data());
649 | 
650 | 	return true;
651 | }


--------------------------------------------------------------------------------
/External/meshoptimizer/src/meshoptimizer.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * meshoptimizer - version 0.7
  3 |  *
  4 |  * Copyright (C) 2016-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  5 |  * Report bugs and download new versions at https://github.com/zeux/meshoptimizer
  6 |  *
  7 |  * This library is distributed under the MIT License. See notice at the end of this file.
  8 |  */
  9 | #pragma once
 10 | 
 11 | #include <assert.h>
 12 | #include <stddef.h>
 13 | 
 14 | /* Version macro; major * 100 + minor * 10 + patch */
 15 | #define MESHOPTIMIZER_VERSION 70
 16 | 
 17 | /* If no API is defined, assume default */
 18 | #ifndef MESHOPTIMIZER_API
 19 | #define MESHOPTIMIZER_API
 20 | #endif
 21 | 
 22 | /* C interface */
 23 | #ifdef __cplusplus
 24 | extern "C" {
 25 | #endif
 26 | 
 27 | /**
 28 |  * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
 29 |  *
 30 |  * destination must contain enough space for the resulting remap table (vertex_count elements)
 31 |  * indices can be NULL if the input is unindexed
 32 |  */
 33 | MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
 34 | 
 35 | /**
 36 |  * Generates vertex buffer from the source vertex buffer and remap table generated by generateVertexRemap
 37 |  *
 38 |  * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by generateVertexRemap)
 39 |  */
 40 | MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap);
 41 | 
 42 | /**
 43 |  * Generate index buffer from the source index buffer and remap table generated by generateVertexRemap
 44 |  *
 45 |  * destination must contain enough space for the resulting index buffer (index_count elements)
 46 |  * indices can be NULL if the input is unindexed
 47 |  */
 48 | MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap);
 49 | 
 50 | /**
 51 |  * Vertex transform cache optimizer
 52 |  * Reorders indices to reduce the number of GPU vertex shader invocations
 53 |  *
 54 |  * destination must contain enough space for the resulting index buffer (index_count elements)
 55 |  */
 56 | MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
 57 | 
 58 | /**
 59 |  * Vertex transform cache optimizer for FIFO caches
 60 |  * Reorders indices to reduce the number of GPU vertex shader invocations
 61 |  * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache
 62 |  *
 63 |  * destination must contain enough space for the resulting index buffer (index_count elements)
 64 |  * cache_size should be less than the actual GPU cache size to avoid cache thrashing
 65 |  */
 66 | MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
 67 | 
 68 | /**
 69 |  * Overdraw optimizer
 70 |  * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw
 71 |  *
 72 |  * destination must contain enough space for the resulting index buffer (index_count elements)
 73 |  * indices must contain index data that is the result of optimizeVertexCache (*not* the original mesh indices!)
 74 |  * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
 75 |  * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently
 76 |  */
 77 | MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
 78 | 
 79 | /**
 80 |  * Vertex fetch cache optimizer
 81 |  * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing
 82 |  *
 83 |  * destination must contain enough space for the resulting vertex buffer (vertex_count elements)
 84 |  * indices is used both as an input and as an output index buffer
 85 |  */
 86 | MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
 87 | 
 88 | /**
 89 |  * Experimental: Index buffer encoder
 90 |  * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original.
 91 |  * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
 92 |  * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first.
 93 |  *
 94 |  * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to estimate)
 95 |  */
 96 | MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
 97 | MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count);
 98 | 
 99 | /**
100 |  * Experimental: Index buffer decoder
101 |  * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer
102 |  * Returns 0 if decoding was successful, and an error code otherwise
103 |  *
104 |  * destination must contain enough space for the resulting index buffer (index_count elements)
105 |  */
106 | MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(unsigned int* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
107 | 
108 | /**
109 |  * Experimental: Vertex buffer encoder
110 |  * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original.
111 |  * Returns encoded data size on success, 0 on error
112 |  * For maximum efficiency you should provide the encoded index buffer from meshopt_encodeIndexBuffer
113 |  *
114 |  * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to estimate)
115 |  */
116 | MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* index_buffer, size_t index_buffer_size);
117 | MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
118 | 
119 | /**
120 |  * Experimental: Vertex buffer decoder
121 |  * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer
122 |  * Returns 0 if decoding was successful, and an error code otherwise
123 |  *
124 |  * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes)
125 |  */
126 | MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* buffer, size_t buffer_size, const unsigned char* index_buffer, size_t index_buffer_size);
127 | 
128 | /**
129 |  * Experimental: Mesh simplifier
130 |  * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
131 |  * Returns the number of indices after simplification, with destination containing new index data
132 |  * 
133 |  * destination must contain enough space for the source index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!)
134 |  * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
135 |  */
136 | MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
137 | 
138 | /**
139 |  * Experimental: Mesh stripifier
140 |  * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index
141 |  * Returns the number of indices in the resulting strip, with destination containing new index data
142 |  * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
143 |  *
144 |  * destination must contain enough space for the worst case target index buffer (index_count / 3 * 4 elements)
145 |  */
146 | MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
147 | 
148 | /**
149 |  * Experimental: Mesh unstripifier
150 |  * Converts a triangle strip to a triangle list
151 |  * Returns the number of indices in the resulting list, with destination containing new index data
152 |  *
153 |  * destination must contain enough space for the worst case target index buffer ((index_count - 2) * 3 elements)
154 |  */
155 | MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count);
156 | 
157 | struct meshopt_VertexCacheStatistics
158 | {
159 | 	unsigned int vertices_transformed;
160 | 	unsigned int warps_executed;
161 | 	float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */
162 | 	float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */
163 | };
164 | 
165 | /**
166 |  * Vertex transform cache analyzer
167 |  * Returns cache hit statistics using a simplified FIFO model
168 |  * Results may not match actual GPU performance
169 |  */
170 | MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size);
171 | 
172 | struct meshopt_OverdrawStatistics
173 | {
174 | 	unsigned int pixels_covered;
175 | 	unsigned int pixels_shaded;
176 | 	float overdraw; /* shaded pixels / covered pixels; best case 1.0 */
177 | };
178 | 
179 | /**
180 |  * Overdraw analyzer
181 |  * Returns overdraw statistics using a software rasterizer
182 |  * Results may not match actual GPU performance
183 |  *
184 |  * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
185 |  */
186 | MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
187 | 
188 | struct meshopt_VertexFetchStatistics
189 | {
190 | 	unsigned int bytes_fetched;
191 | 	float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
192 | };
193 | 
194 | /**
195 |  * Vertex fetch cache analyzer
196 |  * Returns cache hit statistics using a simplified direct mapped model
197 |  * Results may not match actual GPU performance
198 |  */
199 | MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
200 | 
201 | #ifdef __cplusplus
202 | } /* extern "C" */
203 | #endif
204 | 
205 | /* Quantization into commonly supported data formats */
206 | #ifdef __cplusplus
207 | /**
208 |  * Quantize a float in [0..1] range into an N-bit fixed point unorm value
209 |  * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion
210 |  * Maximum reconstruction error: 1/2^(N+1)
211 |  */
212 | inline int meshopt_quantizeUnorm(float v, int N);
213 | 
214 | /**
215 |  * Quantize a float in [-1..1] range into an N-bit fixed point snorm value
216 |  * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions)
217 |  * Maximum reconstruction error: 1/2^N
218 |  */
219 | inline int meshopt_quantizeSnorm(float v, int N);
220 | 
221 | /**
222 |  * Quantize a float into half-precision floating point value
223 |  * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
224 |  * Representable magnitude range: [6e-5; 65504]
225 |  * Maximum relative reconstruction error: 5e-4
226 |  */
227 | inline unsigned short meshopt_quantizeHalf(float v);
228 | 
229 | /**
230 |  * Quantize a float into a floating point value with a limited number of significant mantissa bits
231 |  * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
232 |  * Assumes N is in a valid mantissa precision range, which is 1..23
233 |  */
234 | inline float meshopt_quantizeFloat(float v, int N);
235 | #endif
236 | 
237 | /**
238 |  * C++ template interface
239 |  *
240 |  * These functions mirror the C interface the library provides, providing template-based overloads so that
241 |  * the caller can use an arbitrary type for the index data, both for input and output.
242 |  * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not,
243 |  * the wrappers end up allocating memory and copying index data to convert from one type to another.
244 |  */
245 | #ifdef __cplusplus
246 | template <typename T, bool ZeroCopy = sizeof(T) == sizeof(unsigned int)>
247 | struct meshopt_IndexAdapter;
248 | 
249 | template <typename T>
250 | struct meshopt_IndexAdapter<T, false>
251 | {
252 | 	T* result;
253 | 	unsigned int* data;
254 | 	size_t count;
255 | 
256 | 	meshopt_IndexAdapter(T* result, const T* input, size_t count)
257 | 	    : result(result)
258 | 	    , data(0)
259 | 	    , count(count)
260 | 	{
261 | 		data = new unsigned int[count];
262 | 
263 | 		if (input)
264 | 		{
265 | 			for (size_t i = 0; i < count; ++i)
266 | 				data[i] = input[i];
267 | 		}
268 | 	}
269 | 
270 | 	~meshopt_IndexAdapter()
271 | 	{
272 | 		if (result)
273 | 		{
274 | 			for (size_t i = 0; i < count; ++i)
275 | 				result[i] = data[i];
276 | 		}
277 | 
278 | 		delete[] data;
279 | 	}
280 | };
281 | 
282 | template <typename T>
283 | struct meshopt_IndexAdapter<T, true>
284 | {
285 | 	unsigned int* data;
286 | 
287 | 	meshopt_IndexAdapter(T* result, const T* input, size_t)
288 | 	    : data(reinterpret_cast<unsigned int*>(result ? result : const_cast<T*>(input)))
289 | 	{
290 | 	}
291 | };
292 | 
293 | template <typename T>
294 | inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
295 | {
296 | 	meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
297 | 
298 | 	return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size);
299 | }
300 | 
301 | template <typename T>
302 | inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
303 | {
304 | 	meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
305 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
306 | 
307 | 	meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap);
308 | }
309 | 
310 | template <typename T>
311 | inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
312 | {
313 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
314 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
315 | 
316 | 	meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count);
317 | }
318 | 
319 | template <typename T>
320 | inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
321 | {
322 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
323 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
324 | 
325 | 	meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size);
326 | }
327 | 
328 | template <typename T>
329 | inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
330 | {
331 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
332 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
333 | 
334 | 	meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold);
335 | }
336 | 
337 | template <typename T>
338 | inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
339 | {
340 | 	meshopt_IndexAdapter<T> inout(indices, indices, index_count);
341 | 
342 | 	return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size);
343 | }
344 | 
345 | template <typename T>
346 | inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
347 | {
348 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
349 | 
350 | 	return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count);
351 | }
352 | 
353 | template <typename T>
354 | inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
355 | {
356 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
357 | 
358 | 	return meshopt_decodeIndexBuffer(out.data, index_count, buffer, buffer_size);
359 | }
360 | 
361 | template <typename T>
362 | inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
363 | {
364 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
365 | 	meshopt_IndexAdapter<T> out(destination, 0, index_count);
366 | 
367 | 	return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count);
368 | }
369 | 
370 | template <typename T>
371 | inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count)
372 | {
373 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
374 | 	meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 4);
375 | 
376 | 	return meshopt_stripify(out.data, in.data, index_count, vertex_count);
377 | }
378 | 
379 | template <typename T>
380 | inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count)
381 | {
382 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
383 | 	meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3);
384 | 
385 | 	return meshopt_unstripify(out.data, in.data, index_count);
386 | }
387 | 
388 | template <typename T>
389 | inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size)
390 | {
391 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
392 | 
393 | 	return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
394 | }
395 | 
396 | template <typename T>
397 | inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
398 | {
399 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
400 | 
401 | 	return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
402 | }
403 | 
404 | template <typename T>
405 | inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
406 | {
407 | 	meshopt_IndexAdapter<T> in(0, indices, index_count);
408 | 
409 | 	return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
410 | }
411 | #endif
412 | 
413 | /* Inline implementation */
414 | #ifdef __cplusplus
415 | inline int meshopt_quantizeUnorm(float v, int N)
416 | {
417 | 	const float scale = float((1 << N) - 1);
418 | 
419 | 	v = (v >= 0) ? v : 0;
420 | 	v = (v <= 1) ? v : 1;
421 | 
422 | 	return int(v * scale + 0.5f);
423 | }
424 | 
425 | inline int meshopt_quantizeSnorm(float v, int N)
426 | {
427 | 	const float scale = float((1 << (N - 1)) - 1);
428 | 
429 | 	float round = (v >= 0 ? 0.5f : -0.5f);
430 | 
431 | 	v = (v >= -1) ? v : -1;
432 | 	v = (v <= +1) ? v : +1;
433 | 
434 | 	return int(v * scale + round);
435 | }
436 | 
437 | inline unsigned short meshopt_quantizeHalf(float v)
438 | {
439 | 	union { float f; unsigned int ui; } u = {v};
440 | 	unsigned int ui = u.ui;
441 | 
442 | 	int s = (ui >> 16) & 0x8000;
443 | 	int em = ui & 0x7fffffff;
444 | 
445 | 	/* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
446 | 	int h = (em - (112 << 23) + (1 << 12)) >> 13;
447 | 
448 | 	/* underflow: flush to zero; 113 encodes exponent -14 */
449 | 	h = (em < (113 << 23)) ? 0 : h;
450 | 
451 | 	/* overflow: infinity; 143 encodes exponent 16 */
452 | 	h = (em >= (143 << 23)) ? 0x7c00 : h;
453 | 
454 | 	/* NaN; note that we convert all types of NaN to qNaN */
455 | 	h = (em > (255 << 23)) ? 0x7e00 : h;
456 | 
457 | 	return (unsigned short)(s | h);
458 | }
459 | 
460 | inline float meshopt_quantizeFloat(float v, int N)
461 | {
462 | 	union { float f; unsigned int ui; } u = {v};
463 | 	unsigned int ui = u.ui;
464 | 
465 | 	const int mask = (1 << (23 - N)) - 1;
466 | 	const int round = (1 << (23 - N)) >> 1;
467 | 
468 | 	int e = ui & 0x7f800000;
469 | 	unsigned int rui = (ui + round) & ~mask;
470 | 
471 | 	/* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
472 | 	ui = e == 0x7f800000 ? ui : rui;
473 | 
474 | 	/* flush denormals to zero */
475 | 	ui = e == 0 ? 0 : ui;
476 | 
477 | 	u.ui = ui;
478 | 	return u.f;
479 | }
480 | #endif
481 | 
482 | /* Internal implementation helpers */
483 | #ifdef __cplusplus
484 | template <typename T>
485 | class meshopt_Buffer
486 | {
487 | 	meshopt_Buffer(const meshopt_Buffer&);
488 | 	meshopt_Buffer& operator=(const meshopt_Buffer&);
489 | 
490 | public:
491 | 	T* data;
492 | 	size_t size;
493 | 
494 | 	meshopt_Buffer()
495 | 	    : data(0)
496 | 	    , size(0)
497 | 	{
498 | 	}
499 | 
500 | 	explicit meshopt_Buffer(size_t size)
501 | 	    : data(0)
502 | 	    , size(size)
503 | 	{
504 | 		data = new T[size];
505 | 	}
506 | 
507 | 	~meshopt_Buffer()
508 | 	{
509 | 		delete[] data;
510 | 	}
511 | 
512 | 	T& operator[](size_t index)
513 | 	{
514 | 		assert(index < size);
515 | 		return data[index];
516 | 	}
517 | 
518 | 	const T& operator[](size_t index) const
519 | 	{
520 | 		assert(index < size);
521 | 		return data[index];
522 | 	}
523 | };
524 | #endif
525 | 
526 | /**
527 |  * Copyright (c) 2016-2018 Arseny Kapoulkine
528 |  *
529 |  * Permission is hereby granted, free of charge, to any person
530 |  * obtaining a copy of this software and associated documentation
531 |  * files (the "Software"), to deal in the Software without
532 |  * restriction, including without limitation the rights to use,
533 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
534 |  * copies of the Software, and to permit persons to whom the
535 |  * Software is furnished to do so, subject to the following
536 |  * conditions:
537 |  *
538 |  * The above copyright notice and this permission notice shall be
539 |  * included in all copies or substantial portions of the Software.
540 |  *
541 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
542 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
543 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
544 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
545 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
546 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
547 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
548 |  * OTHER DEALINGS IN THE SOFTWARE.
549 |  */
550 | 


--------------------------------------------------------------------------------
/External/meshoptimizer/src/vertexcodec.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2 | #include "meshoptimizer.h"
  3 | 
  4 | #include <assert.h>
  5 | #include <string.h>
  6 | 
  7 | #include <stdio.h>
  8 | 
  9 | #define TRACE 0
 10 | 
 11 | // This work is based on:
 12 | // TODO: references
 13 | namespace meshopt
 14 | {
 15 | 
 16 | const size_t kVertexBlockSize = 256;
 17 | const size_t kByteGroupSize = 16;
 18 | 
 19 | inline unsigned char zigzag8(unsigned char v)
 20 | {
 21 | 	return (v >> 7) | ((v ^ -(v >> 7)) << 1);
 22 | }
 23 | 
 24 | inline unsigned char unzigzag8(unsigned char v)
 25 | {
 26 | 	return (-(v & 1)) ^ (v >> 1);
 27 | }
 28 | 
 29 | #if TRACE > 0
 30 | inline int bits(unsigned char v)
 31 | {
 32 | 	int result = 0;
 33 | 	while (v >= (1 << result))
 34 | 		result++;
 35 | 
 36 | 	return result;
 37 | }
 38 | 
 39 | inline int bitsset(unsigned char v)
 40 | {
 41 | 	int result = 0;
 42 | 
 43 | 	while (v)
 44 | 	{
 45 | 		result += (v & 1);
 46 | 		v >>= 1;
 47 | 	}
 48 | 
 49 | 	return result;
 50 | }
 51 | #endif
 52 | 
 53 | #if TRACE > 1
 54 | static void traceEncodeVertexBlock(const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction)
 55 | {
 56 | 	printf("vertex block; count %d\n", int(vertex_count));
 57 | 
 58 | 	{
 59 | 		for (size_t k = 0; k < vertex_size; ++k)
 60 | 		{
 61 | 			printf("%02x    ", vertex_data[k]);
 62 | 		}
 63 | 
 64 | 		printf("| base\n");
 65 | 	}
 66 | 
 67 | 	int uniq[256] = {};
 68 | 	int max[256] = {};
 69 | 	int orv[256] = {};
 70 | 	int sumb[256] = {};
 71 | 	bool uniqb[256][256] = {};
 72 | 
 73 | 	for (size_t i = 1; i < vertex_count; ++i)
 74 | 	{
 75 | 		for (size_t k = 0; k < vertex_size; ++k)
 76 | 		{
 77 | 			size_t vertex_offset = i * vertex_size + k;
 78 | 
 79 | 			unsigned char p = vertex_data[vertex_offset - vertex_size];
 80 | 
 81 | 			if (prediction && prediction[i])
 82 | 			{
 83 | 				unsigned char pa = prediction[i] >> 16;
 84 | 				unsigned char pb = prediction[i] >> 8;
 85 | 				unsigned char pc = prediction[i] >> 0;
 86 | 				assert(pa > 0 && pb > 0 && pc > 0);
 87 | 
 88 | 				if (pa <= i && pb <= i && pc <= i)
 89 | 				{
 90 | 					unsigned char va = vertex_data[vertex_offset - pa * vertex_size];
 91 | 					unsigned char vb = vertex_data[vertex_offset - pb * vertex_size];
 92 | 					unsigned char vc = vertex_data[vertex_offset - pc * vertex_size];
 93 | 
 94 | 					p = va + vb - vc;
 95 | 				}
 96 | 			}
 97 | 
 98 | 			unsigned char delta = zigzag8(vertex_data[vertex_offset] - p);
 99 | 
100 | 			if (!uniqb[k][delta])
101 | 			{
102 | 				uniqb[k][delta] = true;
103 | 				uniq[k]++;
104 | 			}
105 | 
106 | 			if (delta > max[k])
107 | 			{
108 | 				max[k] = delta;
109 | 			}
110 | 
111 | 			orv[k] |= delta;
112 | 
113 | 			sumb[k] += bits(delta);
114 | 
115 | 		#if TRACE > 2
116 | 			printf("%02x/%02x ", vertex_data[vertex_offset], delta);
117 | 		#endif
118 | 		}
119 | 
120 | 	#if TRACE > 2
121 | 		printf("| ");
122 | 
123 | 		if (prediction && prediction[i])
124 | 		{
125 | 			unsigned char pa = prediction[i] >> 16;
126 | 			unsigned char pb = prediction[i] >> 8;
127 | 			unsigned char pc = prediction[i] >> 0;
128 | 			assert(pa > 0 && pb > 0 && pc > 0);
129 | 
130 | 			if (pa <= i && pb <= i && pc <= i)
131 | 			{
132 | 				printf("pgram %d %d %d", pa, pb, pc);
133 | 			}
134 | 			else
135 | 			{
136 | 				printf("pdelta");
137 | 			}
138 | 		}
139 | 		else
140 | 		{
141 | 			printf("delta");
142 | 		}
143 | 
144 | 		printf("\n");
145 | 	#endif
146 | 	}
147 | 
148 | 	for (size_t k = 0; k < vertex_size; ++k)
149 | 		printf("%-3d   ", uniq[k]);
150 | 
151 | 	printf("| uniq\n");
152 | 
153 | 	for (size_t k = 0; k < vertex_size; ++k)
154 | 		printf("%02x    ", max[k]);
155 | 
156 | 	printf("| max\n");
157 | 
158 | 	for (size_t k = 0; k < vertex_size; ++k)
159 | 		printf("%d     ", bits(max[k]));
160 | 
161 | 	printf("| maxbits\n");
162 | 
163 | 	for (size_t k = 0; k < vertex_size; ++k)
164 | 		printf("%3.1f   ", double(sumb[k]) / double(vertex_count - 1));
165 | 
166 | 	printf("| avgbits\n");
167 | 
168 | 	for (size_t k = 0; k < vertex_size; ++k)
169 | 		printf("%d     ", bitsset(orv[k]));
170 | 
171 | 	printf("| bits set\n");
172 | }
173 | #endif
174 | 
175 | #if TRACE > 0
176 | struct EncodeVertexBlockStats
177 | {
178 | 	size_t bytes[256];
179 | 	size_t bitsopt[256];
180 | 	size_t bitsenc[256];
181 | 
182 | 	size_t headers[256];
183 | 	size_t content[256];
184 | 
185 | 	size_t current_headers;
186 | 	size_t current_content;
187 | };
188 | 
189 | static EncodeVertexBlockStats encodeVertexBlockStats;
190 | 
191 | static void dumpEncodeVertexBlockStats(size_t vertex_count, size_t vertex_size)
192 | {
193 | 	const EncodeVertexBlockStats& stats = encodeVertexBlockStats;
194 | 
195 | 	size_t bytes = 0;
196 | 	size_t bitsopt = 0;
197 | 	size_t bitsenc = 0;
198 | 	size_t headers = 0;
199 | 	size_t content = 0;
200 | 
201 | 	for (size_t k = 0; k < 256; ++k)
202 | 		if (stats.bytes[k])
203 | 		{
204 | 			printf("%2d: %d bytes (optimal %d bytes, optenc %d bytes; headers %d, content %d)\n", int(k), int(stats.bytes[k]), int(stats.bitsopt[k]) / 8, int(stats.bitsenc[k]) / 8, int(stats.headers[k]), int(stats.content[k]));
205 | 			bytes += stats.bytes[k];
206 | 			bitsopt += stats.bitsopt[k];
207 | 			bitsenc += stats.bitsenc[k];
208 | 			headers += stats.headers[k];
209 | 			content += stats.content[k];
210 | 		}
211 | 
212 | 	printf("total: %d bytes (optimal %dd bytes, optenc %d bytes; headers %d, content %d)\n", int(bytes), int(bitsopt) / 8, int(bitsenc) / 8, int(headers), int(content));
213 | 
214 | 	if (vertex_size == 16)
215 | 	{
216 | 		// assume the following layout:
217 | 		// 6b position
218 | 		// 2b padding
219 | 		// 3b normal
220 | 		// 1b padding
221 | 		// 4b uv
222 | 		size_t bytes_pos = stats.bytes[0] + stats.bytes[1] + stats.bytes[2] + stats.bytes[3] + stats.bytes[4] + stats.bytes[5] + stats.bytes[6] + stats.bytes[7];
223 | 		size_t bytes_nrm = stats.bytes[8] + stats.bytes[9] + stats.bytes[10] + stats.bytes[11];
224 | 		size_t bytes_tex = stats.bytes[12] + stats.bytes[13] + stats.bytes[14] + stats.bytes[15];
225 | 
226 | 		printf("pos: %d bytes, %.1f bpv\n", int(bytes_pos), float(bytes_pos) / float(vertex_count) * 8);
227 | 		printf("nrm: %d bytes, %.1f bpv\n", int(bytes_nrm), float(bytes_nrm) / float(vertex_count) * 8);
228 | 		printf("tex: %d bytes, %.1f bpv\n", int(bytes_tex), float(bytes_tex) / float(vertex_count) * 8);
229 | 	}
230 | }
231 | #endif
232 | 
233 | static bool encodeBytesFits(const unsigned char* buffer, size_t buffer_size, int bits)
234 | {
235 | 	for (size_t k = 0; k < buffer_size; ++k)
236 | 		if (buffer[k] >= (1 << bits))
237 | 			return false;
238 | 
239 | 	return true;
240 | }
241 | 
242 | static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits)
243 | {
244 | 	assert(bits >= 1 && bits <= 8);
245 | 
246 | 	if (bits == 8)
247 | 	{
248 | 		memcpy(data, buffer, kByteGroupSize);
249 | 		return data + kByteGroupSize;
250 | 	}
251 | 
252 | 	size_t byte_size = 8 / bits;
253 | 	assert(kByteGroupSize % byte_size == 0);
254 | 
255 | 	// fixed portion: bits bits for each value
256 | 	// variable portion: full byte for each out-of-range value (using 1...1 as sentinel)
257 | 	unsigned char sentinel = (1 << bits) - 1;
258 | 
259 | 	for (size_t i = 0; i < kByteGroupSize; i += byte_size)
260 | 	{
261 | 		unsigned char byte = 0;
262 | 
263 | 		for (size_t k = 0; k < byte_size; ++k)
264 | 		{
265 | 			unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k];
266 | 
267 | 			byte <<= bits;
268 | 			byte |= enc;
269 | 		}
270 | 
271 | 		*data++ = byte;
272 | 	}
273 | 
274 | 	for (size_t i = 0; i < kByteGroupSize; ++i)
275 | 	{
276 | 		if (buffer[i] >= sentinel)
277 | 		{
278 | 			*data++ = buffer[i];
279 | 		}
280 | 	}
281 | 
282 | 	return data;
283 | }
284 | 
285 | static const unsigned char* decodeBytesGroup(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, int bits)
286 | {
287 | 	assert(bits >= 1 && bits <= 8);
288 | 
289 | 	// TODO: missing OOB data checks
290 | 	(void)data_end;
291 | 
292 | 	if (bits == 8)
293 | 	{
294 | 		memcpy(buffer, data, kByteGroupSize);
295 | 
296 | 		return data + kByteGroupSize;
297 | 	}
298 | 
299 | 	size_t byte_size = 8 / bits;
300 | 	assert(kByteGroupSize % byte_size == 0);
301 | 
302 | 	const unsigned char* data_var = data + kByteGroupSize / byte_size;
303 | 
304 | 	// fixed portion: bits bits for each value
305 | 	// variable portion: full byte for each out-of-range value (using 1...1 as sentinel)
306 | 	unsigned char sentinel = (1 << bits) - 1;
307 | 
308 | 	for (size_t i = 0; i < kByteGroupSize; i += byte_size)
309 | 	{
310 | 		unsigned char byte = *data++;
311 | 
312 | 		for (size_t k = 0; k < byte_size; ++k)
313 | 		{
314 | 			unsigned char enc = byte >> (8 - bits);
315 | 			byte <<= bits;
316 | 
317 | 			buffer[i + k] = (enc == sentinel) ? *data_var++ : enc;
318 | 		}
319 | 	}
320 | 
321 | 	return data_var;
322 | }
323 | 
324 | static unsigned char* encodeBytes(unsigned char* data, const unsigned char* buffer, size_t buffer_size)
325 | {
326 | 	assert(buffer_size % kByteGroupSize == 0);
327 | 
328 | 	if (encodeBytesFits(buffer, buffer_size, 0))
329 | 	{
330 | 		*data++ = 0;
331 | 
332 | 		return data;
333 | 	}
334 | 	else
335 | 	{
336 | 		*data++ = 1;
337 | 
338 | 		unsigned char* header = data;
339 | 
340 | 		// round number of groups to 4 to get number of header bytes
341 | 		size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
342 | 
343 | 		data += header_size;
344 | 
345 | 		memset(header, 0, header_size);
346 | 
347 | 	#if TRACE > 0
348 | 		encodeVertexBlockStats.current_headers += header_size;
349 | 	#endif
350 | 
351 | 		for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
352 | 		{
353 | 			int best_bits = 8;
354 | 			size_t best_size = kByteGroupSize; // assume encodeBytesVar(8) just stores as is
355 | 
356 | 			for (int bits = 1; bits < 8; bits *= 2)
357 | 			{
358 | 				unsigned char* end = encodeBytesGroup(data, buffer + i, bits);
359 | 
360 | 				if (size_t(end - data) < best_size)
361 | 				{
362 | 					best_bits = bits;
363 | 					best_size = end - data;
364 | 				}
365 | 			}
366 | 
367 | 			int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3;
368 | 			assert((1 << bitslog2) == best_bits);
369 | 
370 | 			size_t header_offset = i / kByteGroupSize;
371 | 
372 | 			header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2);
373 | 
374 | 			data = encodeBytesGroup(data, buffer + i, best_bits);
375 | 		}
376 | 
377 | 	#if TRACE > 0
378 | 		encodeVertexBlockStats.current_content += data - header - header_size;
379 | 	#endif
380 | 
381 | 		return data;
382 | 	}
383 | }
384 | 
385 | static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size)
386 | {
387 | 	assert(buffer_size % kByteGroupSize == 0);
388 | 
389 | 	if (size_t(data_end - data) < 1)
390 | 		return 0;
391 | 
392 | 	unsigned char encoding = *data++;
393 | 
394 | 	if (encoding == 0)
395 | 	{
396 | 		memset(buffer, 0, buffer_size);
397 | 
398 | 		return data;
399 | 	}
400 | 	else if (encoding == 1)
401 | 	{
402 | 		const unsigned char* header = data;
403 | 
404 | 		// round number of groups to 4 to get number of header bytes
405 | 		size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
406 | 
407 | 		if (size_t(data_end - data) < header_size)
408 | 			return 0;
409 | 
410 | 		data += header_size;
411 | 
412 | 		for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
413 | 		{
414 | 			size_t header_offset = i / kByteGroupSize;
415 | 
416 | 			int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3;
417 | 			int bits = 1 << bitslog2;
418 | 
419 | 			data = decodeBytesGroup(data, data_end, buffer + i, bits);
420 | 			if (!data)
421 | 				return 0;
422 | 		}
423 | 
424 | 		return data;
425 | 	}
426 | 	else
427 | 	{
428 | 		// TODO: malformed data, we might want to return a different error code upstream?
429 | 		return 0;
430 | 	}
431 | }
432 | 
433 | static unsigned char* encodeVertexBlock(unsigned char* data, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction, unsigned char last_vertex[256])
434 | {
435 | 	assert(vertex_count > 0 && vertex_count <= 256);
436 | 
437 | #if TRACE > 1
438 | 	traceEncodeVertexBlock(vertex_data, vertex_count, vertex_size, prediction);
439 | #endif
440 | 
441 | 	unsigned char buffer[256];
442 | 	assert(sizeof(buffer) % kByteGroupSize == 0);
443 | 
444 | 	// we sometimes encode elements we didn't fill when rounding to kByteGroupSize
445 | 	memset(buffer, 0, sizeof(buffer));
446 | 
447 | 	for (size_t k = 0; k < vertex_size; ++k)
448 | 	{
449 | 		size_t vertex_offset = k;
450 | 
451 | 		for (size_t i = 0; i < vertex_count; ++i)
452 | 		{
453 | 			unsigned char p = (i == 0) ? last_vertex[k] : vertex_data[vertex_offset - vertex_size];
454 | 
455 | 			if (prediction && prediction[i])
456 | 			{
457 | 				unsigned int pa = (prediction[i] >> 16) & 0xff;
458 | 				unsigned int pb = (prediction[i] >> 8) & 0xff;
459 | 				unsigned int pc = (prediction[i] >> 0) & 0xff;
460 | 				assert(pa > 0 && pb > 0 && pc > 0);
461 | 
462 | 				if (pa <= i && pb <= i && pc <= i)
463 | 				{
464 | 					unsigned char va = vertex_data[vertex_offset - pa * vertex_size];
465 | 					unsigned char vb = vertex_data[vertex_offset - pb * vertex_size];
466 | 					unsigned char vc = vertex_data[vertex_offset - pc * vertex_size];
467 | 
468 | 					p = va + vb - vc;
469 | 				}
470 | 			}
471 | 
472 | 			unsigned char delta = zigzag8(vertex_data[vertex_offset] - p);
473 | 
474 | 			buffer[i] = delta;
475 | 			vertex_offset += vertex_size;
476 | 		}
477 | 
478 | 	#if TRACE > 0
479 | 		unsigned char* olddata = data;
480 | 	#endif
481 | 
482 | 		data = encodeBytes(data, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1));
483 | 
484 | 	#if TRACE > 0
485 | 		EncodeVertexBlockStats& stats = encodeVertexBlockStats;
486 | 
487 | 		stats.bytes[k] += data - olddata;
488 | 
489 | 		for (size_t i = 0; i < vertex_count; ++i)
490 | 		{
491 | 			stats.bitsopt[k] += bits(buffer[i]);
492 | 			stats.bitsenc[k] += bits(buffer[i]) + bits(bits(buffer[i]));
493 | 		}
494 | 
495 | 		stats.headers[k] += stats.current_headers;
496 | 		stats.content[k] += stats.current_content;
497 | 
498 | 		stats.current_headers = 0;
499 | 		stats.current_content = 0;
500 | 	#endif
501 | 	}
502 | 
503 | 	for (size_t k = 0; k < vertex_size; ++k)
504 | 	{
505 | 		last_vertex[k] = vertex_data[vertex_size * (vertex_count - 1) + k];
506 | 	}
507 | 
508 | 	return data;
509 | }
510 | 
511 | static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned int* prediction, unsigned char last_vertex[256])
512 | {
513 | 	assert(vertex_count > 0 && vertex_count <= 256);
514 | 
515 | 	unsigned char buffer[256];
516 | 	assert(sizeof(buffer) % kByteGroupSize == 0);
517 | 
518 | 	for (size_t k = 0; k < vertex_size; ++k)
519 | 	{
520 | 		data = decodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1));
521 | 		if (!data)
522 | 			return 0;
523 | 
524 | 		size_t vertex_offset = k;
525 | 
526 | 		for (size_t i = 0; i < vertex_count; ++i)
527 | 		{
528 | 			unsigned char p = (i == 0) ? last_vertex[k] : vertex_data[vertex_offset - vertex_size];
529 | 
530 | 			if (prediction && prediction[i])
531 | 			{
532 | 				unsigned int pa = (prediction[i] >> 16) & 0xff;
533 | 				unsigned int pb = (prediction[i] >> 8) & 0xff;
534 | 				unsigned int pc = (prediction[i] >> 0) & 0xff;
535 | 				assert(pa > 0 && pb > 0 && pc > 0);
536 | 
537 | 				if (pa <= i && pb <= i && pc <= i)
538 | 				{
539 | 					unsigned char va = vertex_data[vertex_offset - pa * vertex_size];
540 | 					unsigned char vb = vertex_data[vertex_offset - pb * vertex_size];
541 | 					unsigned char vc = vertex_data[vertex_offset - pc * vertex_size];
542 | 
543 | 					p = va + vb - vc;
544 | 				}
545 | 			}
546 | 
547 | 			vertex_data[vertex_offset] = unzigzag8(buffer[i]) + p;
548 | 
549 | 			vertex_offset += vertex_size;
550 | 		}
551 | 	}
552 | 
553 | 	for (size_t k = 0; k < vertex_size; ++k)
554 | 	{
555 | 		last_vertex[k] = vertex_data[vertex_size * (vertex_count - 1) + k];
556 | 	}
557 | 
558 | 	return data;
559 | }
560 | 
561 | typedef unsigned int VertexFifo[16];
562 | typedef unsigned int EdgeFifo[16][3];
563 | 
564 | static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t& offset)
565 | {
566 | 	fifo[offset][0] = a;
567 | 	fifo[offset][1] = b;
568 | 	fifo[offset][2] = c;
569 | 	offset = (offset + 1) & 15;
570 | }
571 | 
572 | static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset)
573 | {
574 | 	fifo[offset] = v;
575 | 	offset = (offset + 1) & 15;
576 | }
577 | 
578 | static unsigned int decodeVByte(const unsigned char*& data)
579 | {
580 | 	unsigned char lead = *data++;
581 | 
582 | 	// fast path: single byte
583 | 	if (lead < 128)
584 | 		return lead;
585 | 
586 | 	// slow path: up to 4 extra bytes
587 | 	// note that this loop always terminates, which is important for malformed data
588 | 	unsigned int result = lead & 127;
589 | 	unsigned int shift = 7;
590 | 
591 | 	for (int i = 0; i < 4; ++i)
592 | 	{
593 | 		unsigned char group = *data++;
594 | 		result |= (group & 127) << shift;
595 | 		shift += 7;
596 | 
597 | 		if (group < 128)
598 | 			break;
599 | 	}
600 | 
601 | 	return result;
602 | }
603 | 
604 | static unsigned int decodeIndex(const unsigned char*& data, unsigned int next, unsigned int last)
605 | {
606 | 	(void)next;
607 | 
608 | 	unsigned int v = decodeVByte(data);
609 | 	unsigned int d = (v >> 1) ^ -int(v & 1);
610 | 
611 | 	return last + d;
612 | }
613 | 
614 | struct DecodePredictionState
615 | {
616 | 	EdgeFifo edgefifo;
617 | 	VertexFifo vertexfifo;
618 | 	size_t edgefifooffset;
619 | 	size_t vertexfifooffset;
620 | 
621 | 	unsigned int next;
622 | 	unsigned int last;
623 | 
624 | 	size_t code_offset;
625 | 	size_t data_offset;
626 | 
627 | 	size_t index_offset;
628 | };
629 | 
630 | static size_t decodeVertexPrediction(DecodePredictionState& state, unsigned int* result, size_t result_size, size_t index_count, const unsigned char* buffer, size_t buffer_size)
631 | {
632 | 	assert(index_count % 3 == 0);
633 | 
634 | 	// the minimum valid encoding is 1 byte per triangle and a 16-byte codeaux table
635 | 	if (buffer_size < index_count / 3 + 16)
636 | 		return 0;
637 | 
638 | 	// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
639 | 	const unsigned char* code = buffer + state.code_offset;
640 | 	const unsigned char* data = buffer + index_count / 3 + state.data_offset;
641 | 	const unsigned char* data_safe_end = buffer + buffer_size - 16;
642 | 
643 | 	const unsigned char* codeaux_table = data_safe_end;
644 | 
645 | 	size_t result_offset = 0;
646 | 	size_t i = state.index_offset;
647 | 
648 | 	for (; i < index_count; i += 3)
649 | 	{
650 | 		if (result_offset + 3 > result_size)
651 | 			break;
652 | 
653 | 		// make sure we have enough data to read for a triangle
654 | 		// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
655 | 		// after this we can be sure we can read without extra bounds checks
656 | 		if (data > data_safe_end)
657 | 			return 0;
658 | 
659 | 		unsigned char codetri = *code++;
660 | 
661 | 		int fe = codetri >> 4;
662 | 
663 | 		if (fe < 15)
664 | 		{
665 | 			// fifo reads are wrapped around 16 entry buffer
666 | 			unsigned int a = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][0];
667 | 			unsigned int b = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][1];
668 | 			unsigned int co = state.edgefifo[(state.edgefifooffset - 1 - fe) & 15][2];
669 | 
670 | 			int fec = codetri & 15;
671 | 
672 | 			unsigned int c = (fec == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - 1 - fec) & 15];
673 | 
674 | 			// note that we need to update the last index since free indices are delta-encoded
675 | 			if (fec == 15)
676 | 				state.last = c = decodeIndex(data, state.next, state.last);
677 | 
678 | 			// output prediction data
679 | 			if (fec == 0)
680 | 			{
681 | 				unsigned int na = c - a;
682 | 				unsigned int nb = c - b;
683 | 				unsigned int nc = c - co;
684 | 
685 | 				unsigned int p = (na << 16) | (nb << 8) | nc;
686 | 
687 | 				result[result_offset++] = (na | nb | nc) < 256 ? p : 0;
688 | 			}
689 | 
690 | 			// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
691 | 			if (fec == 0 || fec == 15)
692 | 				pushVertexFifo(state.vertexfifo, c, state.vertexfifooffset);
693 | 
694 | 			pushEdgeFifo(state.edgefifo, c, b, a, state.edgefifooffset);
695 | 			pushEdgeFifo(state.edgefifo, a, c, b, state.edgefifooffset);
696 | 		}
697 | 		else
698 | 		{
699 | 			// fast path: read codeaux from the table; we wrap table index so this access is memory-safe
700 | 			// slow path: read a full byte for codeaux instead of using a table lookup
701 | 			unsigned char codeaux = (codetri & 15) >= 14 ? *data++ : codeaux_table[codetri & 15];
702 | 
703 | 			int fea = (codetri & 15) == 15 ? 15 : 0;
704 | 			int feb = codeaux >> 4;
705 | 			int fec = codeaux & 15;
706 | 
707 | 			// fifo reads are wrapped around 16 entry buffer
708 | 			// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
709 | 			unsigned int a = (fea == 0) ? state.next++ : 0;
710 | 			unsigned int b = (feb == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - feb) & 15];
711 | 			unsigned int c = (fec == 0) ? state.next++ : state.vertexfifo[(state.vertexfifooffset - fec) & 15];
712 | 
713 | 			// note that we need to update the last index since free indices are delta-encoded
714 | 			if (fea == 15)
715 | 				state.last = a = decodeIndex(data, state.next, state.last);
716 | 
717 | 			if (feb == 15)
718 | 				state.last = b = decodeIndex(data, state.next, state.last);
719 | 
720 | 			if (fec == 15)
721 | 				state.last = c = decodeIndex(data, state.next, state.last);
722 | 
723 | 			// output prediction data
724 | 			if (fea == 0)
725 | 				result[result_offset++] = 0;
726 | 
727 | 			if (feb == 0)
728 | 				result[result_offset++] = 0;
729 | 
730 | 			if (fec == 0)
731 | 				result[result_offset++] = 0;
732 | 
733 | 			// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
734 | 			if (fea == 0 || fea == 15)
735 | 				pushVertexFifo(state.vertexfifo, a, state.vertexfifooffset);
736 | 
737 | 			if (feb == 0 || feb == 15)
738 | 				pushVertexFifo(state.vertexfifo, b, state.vertexfifooffset);
739 | 
740 | 			if (fec == 0 || fec == 15)
741 | 				pushVertexFifo(state.vertexfifo, c, state.vertexfifooffset);
742 | 
743 | 			pushEdgeFifo(state.edgefifo, b, a, c, state.edgefifooffset);
744 | 			pushEdgeFifo(state.edgefifo, c, b, a, state.edgefifooffset);
745 | 			pushEdgeFifo(state.edgefifo, a, c, b, state.edgefifooffset);
746 | 		}
747 | 	}
748 | 
749 | 	// we should've read all data bytes and stopped at the boundary between data and codeaux table
750 | 	if (i == index_count && data != data_safe_end)
751 | 		return 0;
752 | 
753 | 	state.code_offset = code - buffer;
754 | 	state.data_offset = data - buffer - index_count / 3;
755 | 	state.index_offset = i;
756 | 
757 | 	return result_offset;
758 | }
759 | 
760 | }
761 | 
762 | size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* index_buffer, size_t index_buffer_size)
763 | {
764 | 	using namespace meshopt;
765 | 
766 | 	assert(vertex_size > 0 && vertex_size <= 256);
767 | 	assert(index_count % 3 == 0);
768 | 	assert(index_buffer == 0 || index_buffer_size > 0);
769 | 
770 | 	const unsigned char* vertex_data = static_cast<const unsigned char*>(vertices);
771 | 
772 | 	unsigned char* data = buffer;
773 | 
774 | 	unsigned char last_vertex[256];
775 | 
776 | 	for (size_t k = 0; k < vertex_size; ++k)
777 | 	{
778 | 		last_vertex[k] = vertex_data[k];
779 | 
780 | 		*data++ = last_vertex[k];
781 | 	}
782 | 
783 | 	const size_t prediction_capacity = kVertexBlockSize + 2;
784 | 	unsigned int prediction[prediction_capacity];
785 | 
786 | 	DecodePredictionState pstate = {};
787 | 
788 | #if TRACE > 0
789 | 	memset(&encodeVertexBlockStats, 0, sizeof(encodeVertexBlockStats));
790 | #endif
791 | 
792 | 	size_t vertex_offset = 0;
793 | 	size_t prediction_offset = 0;
794 | 
795 | 	if (index_buffer)
796 | 	{
797 | 		for (;;)
798 | 		{
799 | 			size_t psize = decodeVertexPrediction(pstate, prediction + prediction_offset, prediction_capacity - prediction_offset, index_count, index_buffer, index_buffer_size);
800 | 			if (psize == 0)
801 | 				break;
802 | 
803 | 			size_t block_size = psize + prediction_offset;
804 | 
805 | 			if (vertex_offset + block_size > vertex_count)
806 | 				break;
807 | 
808 | 			size_t block_size_clamped = (block_size > kVertexBlockSize) ? kVertexBlockSize : block_size;
809 | 
810 | 			data = encodeVertexBlock(data, vertex_data + vertex_offset * vertex_size, block_size_clamped, vertex_size, prediction, last_vertex);
811 | 			vertex_offset += block_size_clamped;
812 | 
813 | 			prediction_offset = block_size - block_size_clamped;
814 | 			memset(&prediction[0], 0, prediction_offset * sizeof(prediction[0]));
815 | 		}
816 | 	}
817 | 
818 | 	while (vertex_offset < vertex_count)
819 | 	{
820 | 		size_t block_size = (vertex_offset + kVertexBlockSize < vertex_count) ? kVertexBlockSize : vertex_count - vertex_offset;
821 | 
822 | 		data = encodeVertexBlock(data, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, 0, last_vertex);
823 | 		vertex_offset += block_size;
824 | 	}
825 | 
826 | #if TRACE > 0
827 | 	dumpEncodeVertexBlockStats(vertex_count, vertex_size);
828 | #endif
829 | 
830 | 	assert(size_t(data - buffer) <= buffer_size);
831 | 	(void)buffer_size;
832 | 
833 | 	return data - buffer;
834 | }
835 | 
836 | size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size)
837 | {
838 | 	// TODO: This significantly overestimates worst case, refine
839 | 	return vertex_count * vertex_size * 2;
840 | }
841 | 
842 | int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, size_t index_count, const unsigned char* buffer, size_t buffer_size, const unsigned char* index_buffer, size_t index_buffer_size)
843 | {
844 | 	using namespace meshopt;
845 | 
846 | 	assert(vertex_size > 0 && vertex_size <= 256);
847 | 	assert(index_count % 3 == 0);
848 | 	assert(index_buffer == 0 || index_buffer_size > 0);
849 | 
850 | 	unsigned char* vertex_data = static_cast<unsigned char*>(destination);
851 | 
852 | 	const unsigned char* data = buffer;
853 | 	const unsigned char* data_end = buffer + buffer_size;
854 | 
855 | 	if (size_t(data_end - data) < vertex_size)
856 | 		return -1;
857 | 
858 | 	unsigned char last_vertex[256];
859 | 
860 | 	// TODO: bounds checks on data
861 | 	for (size_t k = 0; k < vertex_size; ++k)
862 | 	{
863 | 		last_vertex[k] = *data++;
864 | 
865 | 		vertex_data[k] = last_vertex[k];
866 | 	}
867 | 
868 | 	const size_t prediction_capacity = kVertexBlockSize + 2;
869 | 	unsigned int prediction[prediction_capacity];
870 | 
871 | 	DecodePredictionState pstate = {};
872 | 
873 | 	size_t vertex_offset = 0;
874 | 	size_t prediction_offset = 0;
875 | 
876 | 	if (index_buffer)
877 | 	{
878 | 		for (;;)
879 | 		{
880 | 			size_t psize = decodeVertexPrediction(pstate, prediction + prediction_offset, prediction_capacity - prediction_offset, index_count, index_buffer, index_buffer_size);
881 | 			if (psize == 0)
882 | 				break;
883 | 
884 | 			size_t block_size = psize + prediction_offset;
885 | 
886 | 			if (vertex_offset + block_size > vertex_count)
887 | 				break;
888 | 
889 | 			size_t block_size_clamped = (block_size > kVertexBlockSize) ? kVertexBlockSize : block_size;
890 | 
891 | 			data = decodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size_clamped, vertex_size, prediction, last_vertex);
892 | 			if (!data)
893 | 				return -2;
894 | 
895 | 			vertex_offset += block_size_clamped;
896 | 
897 | 			prediction_offset = block_size - block_size_clamped;
898 | 			memset(&prediction[0], 0, prediction_offset * sizeof(prediction[0]));
899 | 		}
900 | 	}
901 | 
902 | 	while (vertex_offset < vertex_count)
903 | 	{
904 | 		size_t block_size = (vertex_offset + kVertexBlockSize < vertex_count) ? kVertexBlockSize : vertex_count - vertex_offset;
905 | 
906 | 		data = decodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, 0, last_vertex);
907 | 		if (!data)
908 | 			return -2;
909 | 
910 | 		vertex_offset += block_size;
911 | 	}
912 | 
913 | 	if (data != data_end)
914 | 		return -3;
915 | 
916 | 	return 0;
917 | }
918 | 


--------------------------------------------------------------------------------