├── doc
    └── sample.png
├── .gitignore
├── csf.cpp
├── config.h
├── CONTRIBUTING
├── threadpool.hpp
├── animation.comp.glsl
├── resources.hpp
├── common.h
├── scene.frag.glsl
├── scene.vert.glsl
├── CMakeLists.txt
├── renderer.hpp
├── cadscene_vk.hpp
├── cadscene.hpp
├── cadscene_vk.cpp
├── renderer.cpp
├── resources_vk.hpp
├── threadpool.cpp
├── vk_ext_device_generated_commands.cpp
├── LICENSE
├── renderer_vk.cpp
├── cadscene.cpp
├── vk_ext_device_generated_commands.hpp
├── rendererthread_vk.cpp
└── main.cpp


/doc/sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/vk_device_generated_cmds/HEAD/doc/sample.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .clang-format
 2 | .editorconfig
 3 | 
 4 | #############################
 5 | #Spirv
 6 | #############################
 7 | *.spv
 8 | *.spva
 9 | *.sass
10 | *.sassbin
11 | *.bat
12 | 
13 | #############################
14 | #specific to the project
15 | #############################
16 | cmake_built
17 | cmake_build
18 | build
19 | _install
20 | bin_x64
21 | NVPRO_EXTERNAL
22 | nvpro_core


--------------------------------------------------------------------------------
/csf.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | 
21 | #define CSF_IMPLEMENTATION
22 | #define CSF_SUPPORT_GLTF2       1
23 | #define CSF_SUPPORT_FILEMAPPING 1
24 | 
25 | #include <fileformats/cadscenefile.h>
26 | 
27 | #define CGLTF_IMPLEMENTATION
28 | #include <cgltf.h>
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021-2024, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | 
21 | #pragma once
22 | 
23 | // artificially create a few more shader permutations, pairs of vertex/fragment shaders
24 | #define NUM_MATERIAL_SHADERS 128
25 | 
26 | // favor using drawcalls firstIndex / firstVertex rather than
27 | // setting index / vertex buffers as much
28 | #define USE_DRAW_OFFSETS 0
29 | 
30 | // use VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE and vkCmdBindVertexBuffers2
31 | #define USE_DYNAMIC_VERTEX_STRIDE 0
32 | 
33 | // enforces single buffers for vbo/ibo
34 | #define USE_SINGLE_GEOMETRY_ALLOCATION 0
35 | 


--------------------------------------------------------------------------------
/CONTRIBUTING:
--------------------------------------------------------------------------------
 1 | https://developercertificate.org/
 2 | 
 3 | Developer Certificate of Origin
 4 | Version 1.1
 5 | 
 6 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
 7 | 
 8 | Everyone is permitted to copy and distribute verbatim copies of this
 9 | license document, but changing it is not allowed.
10 | 
11 | 
12 | Developer's Certificate of Origin 1.1
13 | 
14 | By making a contribution to this project, I certify that:
15 | 
16 | (a) The contribution was created in whole or in part by me and I
17 |     have the right to submit it under the open source license
18 |     indicated in the file; or
19 | 
20 | (b) The contribution is based upon previous work that, to the best
21 |     of my knowledge, is covered under an appropriate open source
22 |     license and I have the right under that license to submit that
23 |     work with modifications, whether created in whole or in part
24 |     by me, under the same open source license (unless I am
25 |     permitted to submit under a different license), as indicated
26 |     in the file; or
27 | 
28 | (c) The contribution was provided directly to me by some other
29 |     person who certified (a), (b) or (c) and I have not modified
30 |     it.
31 | 
32 | (d) I understand and agree that this project and the contribution
33 |     are public and that a record of the contribution (including all
34 |     personal information I submit with it, including my sign-off) is
35 |     maintained indefinitely and may be redistributed consistent with
36 |     this project or the open source license(s) involved.


--------------------------------------------------------------------------------
/threadpool.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2014-2024, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | 
21 | #ifndef THREADPOOL_H__
22 | #define THREADPOOL_H__
23 | 
24 | #include <vector>
25 | #include <thread>
26 | #include <mutex>
27 | #include <condition_variable>
28 | 
29 | class ThreadPool
30 | {
31 | 
32 | public:
33 |   typedef void (*WorkerFunc)(void* arg);
34 | 
35 |   void init(unsigned int numThreads);
36 |   void deinit();
37 | 
38 |   void activateJob(unsigned int thread, WorkerFunc fn, void* arg);
39 | 
40 |   static unsigned int sysGetNumCores();
41 | 
42 |   unsigned int getNumThreads() { return m_numThreads; }
43 | 
44 | 
45 | private:
46 |   struct ThreadEntry
47 |   {
48 |     ThreadPool*             m_origin;
49 |     std::thread             m_thread;
50 |     unsigned int            m_id;
51 |     WorkerFunc              m_fn;
52 |     void*                   m_fnArg;
53 |     std::mutex              m_commMutex;
54 |     std::condition_variable m_commCond;
55 |   };
56 | 
57 |   unsigned int m_numThreads;
58 |   ThreadEntry* m_pool;
59 | 
60 |   volatile unsigned int m_globalInit;
61 | 
62 |   std::mutex              m_globalMutex;
63 |   std::condition_variable m_globalCond;
64 | 
65 |   static void threadKicker(void* arg);
66 |   void        threadProcess(ThreadEntry& entry);
67 | };
68 | 
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/animation.comp.glsl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | 
21 | #version 440
22 | /**/
23 | 
24 | #include "common.h"
25 | 
26 | layout (local_size_x = ANIMATION_WORKGROUPSIZE) in;
27 | 
28 | layout(binding=ANIM_UBO, std140) uniform animBuffer {
29 |   AnimationData   anim;
30 | };
31 | 
32 | layout(binding=ANIM_SSBO_MATRIXOUT, std430) restrict buffer matricesBuffer {
33 |   MatrixData animated[];
34 | };
35 | 
36 | layout(binding=ANIM_SSBO_MATRIXORIG, std430) restrict buffer matricesOrigBuffer {
37 |   MatrixData original[];
38 | };
39 | 
40 | void main()
41 | {
42 |   int self = int(gl_GlobalInvocationID.x);
43 |   if (gl_GlobalInvocationID.x >= anim.numMatrices){
44 |     return;
45 |   }
46 |   
47 |   float s = 1-(float(self)/float(anim.numMatrices));
48 |   float movement = 4;             // time until all objects done with moving (<= sequence*0.5)
49 |   float sequence = movement*2+3;  // time for sequence
50 |   
51 |   float timeS = fract(anim.time / sequence) * sequence;
52 |   float time  = clamp(timeS - s*movement,0,1) - clamp(timeS - (1-s)*movement - sequence*0.5, 0, 1);
53 |   
54 |   float scale         = smoothstep(0,1,time);
55 |   
56 |   mat4 matrixOrig     = original[self].worldMatrix;
57 |   vec3 pos  = matrixOrig[3].xyz;
58 |   vec3 away = (pos - anim.sceneCenter );
59 |   
60 |   float diridx  = float(self % 3);
61 |   float sidx    = float(self % 6);
62 | 
63 |   vec3 delta;
64 |   #if 1
65 |   #pragma optionNV(ifcvt 16)
66 |   delta.x = diridx == 0 ? 1 : 0;
67 |   delta.y = diridx == 1 ? 1 : 0;
68 |   delta.z = diridx == 2 ? 1 : 0;
69 |   #else
70 |   delta.x = step(diridx,0.5);
71 |   delta.y = step(abs(diridx-1),0.5);
72 |   delta.z = step(abs(diridx-2),0.5);
73 |   #endif
74 |   
75 |   delta *= -sign(sidx-2.5);
76 |   delta *= sign(dot(away,delta));
77 |   
78 |   delta = normalize(delta);
79 |   pos += delta * scale * anim.sceneDimension;
80 |   
81 |   animated[self].worldMatrix = mat4(matrixOrig[0], matrixOrig[1], matrixOrig[2], vec4(pos,1));
82 | }
83 | 


--------------------------------------------------------------------------------
/resources.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #pragma once
 22 | 
 23 | #include "config.h"
 24 | #include "cadscene.hpp"
 25 | #include <nvgl/glsltypes_gl.hpp>
 26 | #include <nvh/profiler.hpp>
 27 | #include <nvvk/context_vk.hpp>
 28 | #include <nvvk/swapchain_vk.hpp>
 29 | #include <platform.h>
 30 | 
 31 | #include <algorithm>
 32 | 
 33 | struct ImDrawData;
 34 | 
 35 | using namespace glm;
 36 | #include "common.h"
 37 | 
 38 | 
 39 | namespace generatedcmds {
 40 | 
 41 | enum BindingMode
 42 | {
 43 |   BINDINGMODE_DSETS,
 44 |   BINDINGMODE_PUSHADDRESS,
 45 |   BINDINGMODE_INDEX_VERTEXATTRIB,
 46 |   BINDINGMODE_INDEX_BASEINSTANCE,
 47 |   NUM_BINDINGMODES,
 48 | };
 49 | 
 50 | enum ShaderMode
 51 | {
 52 |   SHADERMODE_PIPELINE,
 53 |   SHADERMODE_OBJS,
 54 |   NUM_SHADERMODES,
 55 | };
 56 | 
 57 | inline size_t alignedSize(size_t sz, size_t align)
 58 | {
 59 |   return ((sz + align - 1) / align) * align;
 60 | }
 61 | 
 62 | class Resources
 63 | {
 64 | public:
 65 |   struct Global
 66 |   {
 67 |     SceneData     sceneUbo;
 68 |     AnimationData animUbo;
 69 |     int           winWidth;
 70 |     int           winHeight;
 71 |     int           workingSet;
 72 |     bool          workerBatched;
 73 |     ImDrawData*   imguiDrawData;
 74 |   };
 75 | 
 76 |   uint32_t m_numMatrices;
 77 | 
 78 |   uint32_t m_frame;
 79 | 
 80 |   uint32_t m_alignedMatrixSize;
 81 |   uint32_t m_alignedMaterialSize;
 82 | 
 83 |   Resources()
 84 |       : m_frame(0)
 85 |   {
 86 |   }
 87 | 
 88 |   virtual void synchronize() {}
 89 | 
 90 |   virtual bool init(nvvk::Context* context, nvvk::SwapChain* swapChain, nvh::Profiler* profiler) { return false; }
 91 |   virtual void deinit() {}
 92 | 
 93 |   virtual bool initPrograms(const std::string& path, const std::string& prepend) { return true; }
 94 |   virtual void reloadPrograms(const std::string& prepend) {}
 95 | 
 96 |   virtual bool initFramebuffer(int width, int height, int msaa, bool vsync) { return true; }
 97 | 
 98 |   virtual bool initScene(const CadScene&) { return true; }
 99 |   virtual void deinitScene() {}
100 | 
101 |   virtual void animation(const Global& global) {}
102 |   virtual void animationReset() {}
103 | 
104 |   virtual void beginFrame() {}
105 |   virtual void blitFrame(const Global& global) {}
106 |   virtual void endFrame() {}
107 | 
108 |   inline void initAlignedSizes(unsigned int alignment)
109 |   {
110 |     m_alignedMatrixSize   = (uint32_t)(alignedSize(sizeof(CadScene::MatrixNode), alignment));
111 |     m_alignedMaterialSize = (uint32_t)(alignedSize(sizeof(CadScene::Material), alignment));
112 |   }
113 | };
114 | }  // namespace generatedcmds
115 | 


--------------------------------------------------------------------------------
/common.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | 
 22 | #ifndef CSFTHREADED_COMMON_H
 23 | #define CSFTHREADED_COMMON_H
 24 | 
 25 | #define VERTEX_POS_OCTNORMAL      0
 26 | #define VERTEX_COMBINED_INDEX     1
 27 | 
 28 | #ifndef INDEXED_MATRIX_BITS     
 29 | #define INDEXED_MATRIX_BITS      24
 30 | #endif
 31 | 
 32 | #ifndef INDEXED_MATERIAL_BITS
 33 | #define INDEXED_MATERIAL_BITS     8
 34 | #endif
 35 | 
 36 | // changing these orders may break a lot of things ;)
 37 | #define DRAW_UBO_SCENE     0
 38 | #define DRAW_UBO_MATRIX    1
 39 | #define DRAW_SSBO_MATRIX   1
 40 | #define DRAW_UBO_MATERIAL  2
 41 | #define DRAW_SSBO_MATERIAL 2
 42 | 
 43 | #define ANIM_UBO              0
 44 | #define ANIM_SSBO_MATRIXOUT   1
 45 | #define ANIM_SSBO_MATRIXORIG  2
 46 | 
 47 | #define ANIMATION_WORKGROUPSIZE   256
 48 | 
 49 | #ifndef SHADER_PERMUTATION
 50 | #define SHADER_PERMUTATION 1
 51 | #endif
 52 | 
 53 | //////////////////////////////////////////////////////////////////////////
 54 | 
 55 | // see resources_vk.hpp
 56 | 
 57 | #ifndef UNIFORMS_MULTISETSDYNAMIC
 58 | #define UNIFORMS_MULTISETSDYNAMIC       0
 59 | #endif
 60 | #ifndef UNIFORMS_PUSHCONSTANTS_ADDRESS
 61 | #define UNIFORMS_PUSHCONSTANTS_ADDRESS  1
 62 | #endif
 63 | #ifndef UNIFORMS_INDEX_VERTEXATTRIB
 64 | #define UNIFORMS_INDEX_VERTEXATTRIB 2
 65 | #endif
 66 | #ifndef UNIFORMS_INDEX_BASEINSTANCE
 67 | #define UNIFORMS_INDEX_BASEINSTANCE 3
 68 | #endif
 69 | 
 70 | #ifndef UNIFORMS_TECHNIQUE
 71 | #define UNIFORMS_TECHNIQUE UNIFORMS_INDEX_VERTEXATTRIB
 72 | #endif
 73 | 
 74 | //////////////////////////////////////////////////////////////////////////
 75 | 
 76 | #ifdef __cplusplus
 77 | namespace generatedcmds {
 78 |   using namespace glm;
 79 | #endif
 80 | 
 81 | struct SceneData {
 82 |   mat4  viewProjMatrix;
 83 |   mat4  viewMatrix;
 84 |   mat4  viewMatrixIT;
 85 | 
 86 |   vec4  viewPos;
 87 |   vec4  viewDir;
 88 |   
 89 |   vec4  wLightPos;
 90 |   
 91 |   ivec2 viewport;
 92 |   ivec2 _pad;
 93 | };
 94 | 
 95 | // must match cadscene
 96 | struct MatrixData {
 97 |   mat4 worldMatrix;
 98 |   mat4 worldMatrixIT;
 99 |   mat4 objectMatrix;
100 |   mat4 objectMatrixIT;
101 | };
102 | 
103 | // must match cadscene
104 | struct MaterialSide {
105 |   vec4 ambient;
106 |   vec4 diffuse;
107 |   vec4 specular;
108 |   vec4 emissive;
109 | };
110 | 
111 | struct MaterialData {
112 |   MaterialSide sides[2];
113 | };
114 | 
115 | struct AnimationData {
116 |   uint    numMatrices;
117 |   float   time;
118 |   vec2   _pad0;
119 | 
120 |   vec3    sceneCenter;
121 |   float   sceneDimension;
122 | };
123 | 
124 | #ifdef __cplusplus
125 | }
126 | #endif
127 | 
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/scene.frag.glsl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #version 440 core
 22 | /**/
 23 | 
 24 | #extension GL_GOOGLE_include_directive : enable
 25 | #extension GL_EXT_buffer_reference : enable
 26 | #extension GL_EXT_scalar_block_layout : enable
 27 | 
 28 | #include "common.h"
 29 | 
 30 | #if UNIFORMS_TECHNIQUE == UNIFORMS_MULTISETSDYNAMIC
 31 | 
 32 |   layout(set=DRAW_UBO_SCENE, binding=0, scalar) uniform sceneBuffer {
 33 |     SceneData       scene;
 34 |   };
 35 |   layout(set=DRAW_UBO_MATERIAL, binding=0, scalar) uniform materialBuffer {
 36 |     MaterialData    material;
 37 |   };
 38 |   
 39 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_PUSHCONSTANTS_ADDRESS
 40 | 
 41 |   layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer {
 42 |     SceneData   scene;
 43 |   };
 44 |   layout(buffer_reference, buffer_reference_align=16, scalar) readonly buffer MaterialBuffer {
 45 |     MaterialData  material;
 46 |   };
 47 |   
 48 |   layout(push_constant, scalar) uniform pushConstants {
 49 |     layout(offset=8)
 50 |     MaterialBuffer v;
 51 |   };
 52 |   #define material v.material
 53 |   
 54 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE
 55 | 
 56 |   layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer {
 57 |     SceneData   scene;
 58 |   };
 59 |   layout(set=0, binding=DRAW_SSBO_MATERIAL, scalar) readonly buffer MaterialBuffer {
 60 |     MaterialData  materialDatas[];
 61 |   };
 62 |   #define material materialDatas[IN.materialIndex * 2]
 63 |   
 64 | #endif
 65 | 
 66 | 
 67 | layout(location=0) in Interpolants {
 68 |   vec3 wPos;
 69 |   vec3 wNormal;
 70 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE
 71 |   flat uint materialIndex;
 72 | #endif
 73 | #if SHADER_PERMUTATION
 74 |   vec3 oNormal;
 75 | #endif
 76 | } IN;
 77 | 
 78 | layout(location=0,index=0) out vec4 out_Color;
 79 | 
 80 | void main()
 81 | {
 82 |   MaterialSide side = material.sides[gl_FrontFacing ? 1 : 0];
 83 | 
 84 |   vec4 color = side.ambient + side.emissive;
 85 | #if SHADER_PERMUTATION
 86 |   ivec2 pixel = ivec2(gl_FragCoord.xy);
 87 |   pixel /= (SHADER_PERMUTATION % 8) + 1;
 88 |   pixel %= (SHADER_PERMUTATION % 2) + 1;
 89 |   pixel = ivec2(1) - pixel;
 90 |   
 91 |   color = mix(color, vec4(IN.oNormal*0.5+0.5, 1), vec4(0.5) * float(pixel.x * pixel.y));
 92 |   color += 0.001 * float(SHADER_PERMUTATION);
 93 | #endif
 94 | 
 95 |   vec3 eyePos = vec3(scene.viewMatrixIT[0].w,scene.viewMatrixIT[1].w,scene.viewMatrixIT[2].w);
 96 | 
 97 |   vec3 lightDir = normalize( scene.wLightPos.xyz - IN.wPos);
 98 |   vec3 viewDir  = normalize( eyePos - IN.wPos);
 99 |   vec3 halfDir  = normalize(lightDir + viewDir);
100 |   vec3 normal   = normalize(IN.wNormal) * (gl_FrontFacing ? 1 : -1);
101 | 
102 |   float ldot = dot(normal,lightDir);
103 |   normal *= sign(ldot);
104 |   ldot   *= sign(ldot);
105 | 
106 |   color += side.diffuse * ldot;
107 |   color += side.specular * pow(max(0,dot(normal,halfDir)),16);
108 |   
109 |   out_Color = color;
110 | }
111 | 


--------------------------------------------------------------------------------
/scene.vert.glsl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #version 460 core
 22 | /**/
 23 | 
 24 | #extension GL_GOOGLE_include_directive : enable
 25 | #extension GL_EXT_buffer_reference : enable
 26 | #extension GL_EXT_scalar_block_layout : enable
 27 | 
 28 | #include "common.h"
 29 | 
 30 | #if UNIFORMS_TECHNIQUE == UNIFORMS_MULTISETSDYNAMIC
 31 | 
 32 |   layout(set=DRAW_UBO_SCENE, binding=0, scalar) uniform sceneBuffer {
 33 |     SceneData   scene;
 34 |   };
 35 |   layout(set=DRAW_UBO_MATRIX, binding=0, scalar) uniform objectBuffer {
 36 |     MatrixData  matrix;
 37 |   };
 38 |   
 39 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_PUSHCONSTANTS_ADDRESS
 40 | 
 41 |   layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer {
 42 |     SceneData   scene;
 43 |   };
 44 |   layout(buffer_reference, buffer_reference_align=16, scalar) readonly buffer MatrixBuffer {
 45 |     MatrixData  matrix;
 46 |   };
 47 | 
 48 |   layout(push_constant, scalar) uniform pushConstants {
 49 |     MatrixBuffer v;
 50 |   };
 51 |   #define matrix v.matrix
 52 | 
 53 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE
 54 | 
 55 |   layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer {
 56 |     SceneData   scene;
 57 |   };
 58 |   layout(set=0, binding=DRAW_SSBO_MATRIX, scalar) readonly buffer MatrixBuffer {
 59 |     MatrixData  matrices[];
 60 |   };
 61 | 
 62 |   #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB
 63 |     in layout(location=VERTEX_COMBINED_INDEX) uint inCombinedIndex;
 64 | 
 65 |     #define matrix matrices[uint(inCombinedIndex) & ((1u<<INDEXED_MATRIX_BITS)-1)]
 66 |   #else
 67 |     #define matrix matrices[uint(gl_BaseInstance) & ((1u<<INDEXED_MATRIX_BITS)-1)]
 68 |   #endif
 69 | 
 70 | #endif
 71 | 
 72 | 
 73 | in layout(location=VERTEX_POS_OCTNORMAL)       vec4 inPosNormal;
 74 | 
 75 | layout(location=0) out Interpolants {
 76 |   vec3 wPos;
 77 |   vec3 wNormal;
 78 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE
 79 |   flat uint materialIndex;
 80 | #endif
 81 | #if SHADER_PERMUTATION
 82 |   vec3 oNormal;
 83 | #endif
 84 | } OUT;
 85 | 
 86 | // oct functions from http://jcgt.org/published/0003/02/01/paper.pdf
 87 | vec2 oct_signNotZero(vec2 v) {
 88 |   return vec2((v.x >= 0.0) ? +1.0 : -1.0, (v.y >= 0.0) ? +1.0 : -1.0);
 89 | }
 90 | vec3 oct_to_float32x3(vec2 e) {
 91 |   vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y));
 92 |   if (v.z < 0) v.xy = (1.0 - abs(v.yx)) * oct_signNotZero(v.xy);
 93 |   return normalize(v);
 94 | }
 95 | vec2 float32x3_to_oct(in vec3 v) {
 96 |   // Project the sphere onto the octahedron, and then onto the xy plane
 97 |   vec2 p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z)));
 98 |   // Reflect the folds of the lower hemisphere over the diagonals
 99 |   return (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * oct_signNotZero(p)) : p;
100 | }
101 | 
102 | void main()
103 | {
104 |   vec3 inNormal = oct_to_float32x3(unpackSnorm2x16(floatBitsToUint(inPosNormal.w)));
105 | 
106 |   vec3 wPos     = (matrix.worldMatrix   * vec4(inPosNormal.xyz,1)).xyz;
107 |   vec3 wNormal  = mat3(matrix.worldMatrixIT) * inNormal;
108 | 
109 |   gl_Position   = scene.viewProjMatrix * vec4(wPos,1);
110 |   
111 | #if SHADER_PERMUTATION
112 |   OUT.oNormal   = inNormal;
113 | #endif
114 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE
115 |   OUT.materialIndex = uint(gl_BaseInstance) >> INDEXED_MATRIX_BITS;
116 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB
117 |   OUT.materialIndex = inCombinedIndex >> INDEXED_MATRIX_BITS;
118 | #endif
119 |   
120 |   OUT.wPos = wPos;
121 |   OUT.wNormal = wNormal;
122 | }
123 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.5)
  2 | get_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
  3 | Project(${PROJNAME})
  4 | Message(STATUS "-------------------------------")
  5 | Message(STATUS "Processing Project ${PROJNAME}:")
  6 | 
  7 | #####################################################################################
  8 | # look for nvpro_core 1) as a sub-folder 2) at some other locations
  9 | # this cannot be put anywhere else since we still didn't find setup.cmake yet
 10 | #
 11 | if(NOT BASE_DIRECTORY)
 12 | 
 13 |   find_path(BASE_DIRECTORY
 14 |     NAMES nvpro_core/cmake/setup.cmake
 15 |     PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. 
 16 |     REQUIRED
 17 |     DOC "Directory containing nvpro_core"
 18 |     )
 19 | endif()
 20 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
 21 |   include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
 22 |   include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake)
 23 | else()
 24 |   message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core")
 25 | endif()
 26 |     
 27 | _add_project_definitions(${PROJNAME})
 28 | 
 29 | #--------------------------------------------------------------------------------------------------
 30 | # Resources
 31 | #
 32 | download_files(FILENAMES geforce.csf.gz)
 33 | 
 34 | 
 35 | #####################################################################################
 36 | # additions from packages needed for this sample
 37 | # add refs  in LIBRARIES_OPTIMIZED
 38 | # add refs  in LIBRARIES_DEBUG
 39 | # add files in PACKAGE_SOURCE_FILES
 40 | #
 41 | set( EXENAME ${PROJNAME} )
 42 | _add_package_VulkanSDK()
 43 | _add_package_ShaderC()
 44 | _add_package_ZLIB()
 45 | _add_package_ImGUI()
 46 | 
 47 | #####################################################################################
 48 | # process the rest of some cmake code that needs to be done *after* the packages add
 49 | _add_nvpro_core_lib()
 50 | 
 51 | add_definitions(-DCSF_SUPPORT_ZLIB=1)
 52 | 
 53 | #####################################################################################
 54 | # Resources
 55 | #
 56 | download_files(FILENAMES geforce.csf.gz)
 57 | 
 58 | #####################################################################################
 59 | # Source files for this project
 60 | #
 61 | file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c)
 62 | file(GLOB GLSL_FILES *.glsl)
 63 | 
 64 | #####################################################################################
 65 | # Executable
 66 | #
 67 | if(WIN32 AND NOT GLUT_FOUND)
 68 |   add_definitions(/wd4267) #remove size_t to int warning
 69 |   add_definitions(/wd4996) #remove printf warning
 70 |   add_definitions(/wd4244) #remove double to float conversion warning
 71 |   add_definitions(/wd4305) #remove double to float truncation warning
 72 | else()
 73 |   add_definitions(-fpermissive)
 74 | endif()
 75 | 
 76 | add_executable(${EXENAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_FILES})
 77 | 
 78 | #####################################################################################
 79 | # common source code needed for this sample
 80 | #
 81 | source_group(common FILES 
 82 |   ${COMMON_SOURCE_FILES}
 83 |   ${PACKAGE_SOURCE_FILES}
 84 | )
 85 | source_group(shaders FILES 
 86 |   ${GLSL_FILES}
 87 | )
 88 | source_group("Source Files" FILES ${SOURCE_FILES})
 89 | 
 90 | if(UNIX)
 91 |   set(UNIXLINKLIBS dl pthread)
 92 | else()
 93 |   set(UNIXLINKLIBS)
 94 | endif()
 95 | 
 96 | #####################################################################################
 97 | # Linkage
 98 | #
 99 | target_link_libraries(${EXENAME} ${PLATFORM_LIBRARIES}  nvpro_core)
100 | 
101 | foreach(DEBUGLIB ${LIBRARIES_DEBUG})
102 |   target_link_libraries(${EXENAME} debug ${DEBUGLIB})
103 | endforeach(DEBUGLIB)
104 | 
105 | foreach(RELEASELIB ${LIBRARIES_OPTIMIZED})
106 |   target_link_libraries(${EXENAME} optimized ${RELEASELIB})
107 | endforeach(RELEASELIB)
108 | 
109 | #####################################################################################
110 | # copies binaries that need to be put next to the exe files (ZLib, etc.)
111 | #
112 | 
113 | _finalize_target( ${EXENAME} )
114 | 
115 | LIST(APPEND GLSL_FILES "common.h")
116 | install(FILES ${GLSL_FILES} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/GLSL_${PROJNAME}")
117 | install(FILES ${GLSL_FILES} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/GLSL_${PROJNAME}")
118 | 
119 | 


--------------------------------------------------------------------------------
/renderer.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #ifndef RENDERER_H__
 22 | #define RENDERER_H__
 23 | 
 24 | #include "resources_vk.hpp"
 25 | #include <nvh/profiler.hpp>
 26 | 
 27 | // disable state filtering for buffer binds
 28 | #define USE_NOFILTER 0
 29 | // print per-thread stats
 30 | #define PRINT_TIMER_STATS 1
 31 | 
 32 | namespace generatedcmds {
 33 | 
 34 | enum Strategy
 35 | {                       // per-object
 36 |   STRATEGY_GROUPS,      // sorted and combined parts by material
 37 |   STRATEGY_INDIVIDUAL,  // keep all parts individual
 38 |   STRATEGY_SINGLE       // single draw / material
 39 | };
 40 | 
 41 | class Renderer
 42 | {
 43 | public:
 44 |   struct Stats
 45 |   {
 46 |     uint32_t drawCalls        = 0;
 47 |     uint32_t drawTriangles    = 0;
 48 |     uint32_t shaderBindings   = 0;
 49 |     uint32_t sequences        = 0;
 50 |     uint32_t preprocessSizeKB = 0;
 51 |     uint32_t indirectSizeKB   = 0;
 52 |     uint32_t cmdBuffers       = 0;
 53 |   };
 54 | 
 55 |   struct Config
 56 |   {
 57 |     Strategy    strategy;
 58 |     BindingMode bindingMode;
 59 |     uint32_t    objectFrom;
 60 |     uint32_t    objectNum;
 61 |     uint32_t    maxShaders = 16;
 62 |     uint32_t    workerThreads;
 63 |     bool        interleaved = false;
 64 |     bool        sorted      = false;
 65 |     bool        unordered   = false;
 66 |     bool        permutated  = false;
 67 |     bool        binned      = false;
 68 |     bool        shaderObjs  = false;
 69 |   };
 70 | 
 71 |   struct DrawItem
 72 |   {
 73 |     bool                solid;
 74 |     int                 materialIndex;
 75 |     int                 geometryIndex;
 76 |     int                 matrixIndex;
 77 |     int                 shaderIndex;
 78 |     CadScene::DrawRange range;
 79 |   };
 80 | 
 81 |   static inline bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b)
 82 |   {
 83 |     int diff = 0;
 84 |     diff     = diff != 0 ? diff : (a.solid == b.solid ? 0 : (a.solid ? -1 : 1));
 85 |     diff     = diff != 0 ? diff : (a.shaderIndex - b.shaderIndex);
 86 |     diff     = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex);
 87 |     diff     = diff != 0 ? diff : (a.materialIndex - b.materialIndex);
 88 |     diff     = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex);
 89 | 
 90 |     return diff < 0;
 91 |   }
 92 | 
 93 |   class Type
 94 |   {
 95 |   public:
 96 |     Type() { getRegistry().push_back(this); }
 97 | 
 98 |   public:
 99 |     virtual bool        isAvailable(const nvvk::Context& context) = 0;
100 |     virtual const char* name() const                              = 0;
101 |     virtual Renderer*   create() const                            = 0;
102 |     virtual uint32_t    priority() const { return 0xFF; }
103 |     virtual uint32_t    supportedBindingModes() const { return 0xFF; }
104 |     virtual bool        supportsShaderObjs() const { return true; }
105 |     virtual uint32_t    supportedShaderBinds() const { return ~0; }
106 |   };
107 | 
108 |   typedef std::vector<Type*> Registry;
109 | 
110 |   static Registry& getRegistry()
111 |   {
112 |     static Registry s_registry;
113 |     return s_registry;
114 |   }
115 | 
116 | public:
117 |   virtual void init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) {}
118 |   virtual void deinit() {}
119 |   virtual void draw(const Resources::Global& global, Stats& stats) {}
120 | 
121 |   virtual ~Renderer() {}
122 | 
123 |   void fillDrawItems(std::vector<DrawItem>& drawItems, const CadScene* scene, const Config& config, Stats& stats);
124 |   void fillRandomPermutation(uint32_t drawCount, uint32_t* permutation, const DrawItem* drawItems, Stats& stats);
125 | 
126 |   Config          m_config;
127 |   const CadScene* m_scene;
128 | };
129 | }  // namespace generatedcmds
130 | 
131 | #endif
132 | 


--------------------------------------------------------------------------------
/cadscene_vk.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #pragma once
 22 | 
 23 | #include "cadscene.hpp"
 24 | 
 25 | #include <nvvk/buffers_vk.hpp>
 26 | #include <nvvk/commands_vk.hpp>
 27 | #include <nvvk/resourceallocator_vk.hpp>
 28 | 
 29 | // ScopeStaging handles uploads and other staging operations.
 30 | // not efficient because it blocks/syncs operations
 31 | 
 32 | struct ScopeStaging
 33 | {
 34 |   ScopeStaging(nvvk::ResourceAllocator& resAllocator, VkQueue queue_, uint32_t queueFamily)
 35 |       : staging(*resAllocator.getStaging())
 36 |       , cmdPool(resAllocator.getDevice(), queueFamily)
 37 |       , queue(queue_)
 38 |       , cmd(VK_NULL_HANDLE)
 39 |   {
 40 |   }
 41 |   ~ScopeStaging() { submit(); }
 42 | 
 43 |   VkCommandBuffer             cmd;
 44 |   nvvk::StagingMemoryManager& staging;
 45 |   nvvk::CommandPool           cmdPool;
 46 |   VkQueue                     queue;
 47 | 
 48 |   VkCommandBuffer getCmd()
 49 |   {
 50 |     cmd = cmd ? cmd : cmdPool.createCommandBuffer();
 51 |     return cmd;
 52 |   }
 53 | 
 54 |   void submit()
 55 |   {
 56 |     if(cmd)
 57 |     {
 58 |       cmdPool.submitAndWait(cmd, queue);
 59 |       cmd = VK_NULL_HANDLE;
 60 |       staging.releaseResources();
 61 |     }
 62 |   }
 63 | 
 64 |   void uploadAutoSubmit(const VkDescriptorBufferInfo& binding, const void* data)
 65 |   {
 66 |     if(cmd && (data == nullptr || !staging.fitsInAllocated(binding.range)))
 67 |     {
 68 |       submit();
 69 |     }
 70 |     if(data && binding.range)
 71 |     {
 72 |       staging.cmdToBuffer(getCmd(), binding.buffer, binding.offset, binding.range, data);
 73 |     }
 74 |   }
 75 | 
 76 |   void* upload(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data = nullptr)
 77 |   {
 78 |     return staging.cmdToBuffer(getCmd(), buffer, offset, size, data);
 79 |   }
 80 | 
 81 |   template <class T>
 82 |   T* uploadT(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data = nullptr)
 83 |   {
 84 |     return (T*)staging.cmdToBuffer(getCmd(), buffer, offset, size, data);
 85 |   }
 86 | };
 87 | 
 88 | 
 89 | // GeometryMemoryVK manages vbo/ibo etc. in chunks
 90 | // allows to reduce number of bindings and be more memory efficient
 91 | 
 92 | struct GeometryMemoryVK
 93 | {
 94 |   typedef size_t Index;
 95 | 
 96 | 
 97 |   struct Allocation
 98 |   {
 99 |     Index        chunkIndex;
100 |     VkDeviceSize vboOffset;
101 |     VkDeviceSize iboOffset;
102 |   };
103 | 
104 |   struct Chunk
105 |   {
106 |     nvvk::Buffer vbo;
107 |     nvvk::Buffer ibo;
108 | 
109 |     VkDeviceSize vboSize;
110 |     VkDeviceSize iboSize;
111 |   };
112 | 
113 | 
114 |   VkDevice                 m_device = VK_NULL_HANDLE;
115 |   nvvk::ResourceAllocator* m_resourceAllocator;
116 |   std::vector<Chunk>       m_chunks;
117 | 
118 |   void init(nvvk::ResourceAllocator* resourceAllocator, VkDeviceSize vboStride, VkDeviceSize maxChunk);
119 |   void deinit();
120 |   void alloc(VkDeviceSize vboSize, VkDeviceSize iboSize, Allocation& allocation);
121 |   void finalize();
122 | 
123 |   const Chunk& getChunk(const Allocation& allocation) const { return m_chunks[allocation.chunkIndex]; }
124 | 
125 |   const Chunk& getChunk(Index index) const { return m_chunks[index]; }
126 | 
127 |   VkDeviceSize getVertexSize() const
128 |   {
129 |     VkDeviceSize size = 0;
130 |     for(size_t i = 0; i < m_chunks.size(); i++)
131 |     {
132 |       size += m_chunks[i].vboSize;
133 |     }
134 |     return size;
135 |   }
136 | 
137 |   VkDeviceSize getIndexSize() const
138 |   {
139 |     VkDeviceSize size = 0;
140 |     for(size_t i = 0; i < m_chunks.size(); i++)
141 |     {
142 |       size += m_chunks[i].iboSize;
143 |     }
144 |     return size;
145 |   }
146 | 
147 |   VkDeviceSize getChunkCount() const { return m_chunks.size(); }
148 | 
149 | private:
150 |   VkDeviceSize m_alignment;
151 |   VkDeviceSize m_vboAlignment;
152 |   VkDeviceSize m_maxVboChunk;
153 |   VkDeviceSize m_maxIboChunk;
154 | 
155 |   Index getActiveIndex() { return (m_chunks.size() - 1); }
156 | 
157 |   Chunk& getActiveChunk()
158 |   {
159 |     assert(!m_chunks.empty());
160 |     return m_chunks[getActiveIndex()];
161 |   }
162 | };
163 | 
164 | 
165 | class CadSceneVK
166 | {
167 | public:
168 |   struct Geometry
169 |   {
170 |     GeometryMemoryVK::Allocation allocation;
171 | 
172 |     VkDescriptorBufferInfo vbo;
173 |     VkDescriptorBufferInfo ibo;
174 |   };
175 | 
176 |   struct Buffers
177 |   {
178 |     nvvk::Buffer materials    = {};
179 |     nvvk::Buffer matrices     = {};
180 |     nvvk::Buffer matricesOrig = {};
181 |   };
182 | 
183 |   struct Infos
184 |   {
185 |     VkDescriptorBufferInfo materialsSingle, materials, matricesSingle, matrices, matricesOrig;
186 |   };
187 | 
188 |   struct Config
189 |   {
190 |     bool singleAllocation = false;
191 |   };
192 | 
193 |   VkDevice m_device = VK_NULL_HANDLE;
194 | 
195 |   Config m_config;
196 | 
197 |   Buffers m_buffers;
198 |   Infos   m_infos;
199 | 
200 |   std::vector<Geometry>    m_geometry;
201 |   GeometryMemoryVK         m_geometryMem;
202 |   nvvk::ResourceAllocator* m_resourceAllocator = nullptr;
203 | 
204 | 
205 |   void init(const CadScene& cadscene, nvvk::ResourceAllocator& resourceAllocator, VkQueue queue, uint32_t queueFamilyIndex, const Config& config);
206 |   void deinit();
207 | };
208 | 


--------------------------------------------------------------------------------
/cadscene.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2014-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #ifndef CADSCENE_H__
 22 | #define CADSCENE_H__
 23 | 
 24 | #include <cstring>  // memset
 25 | #include <glm/glm.hpp>
 26 | #include <vector>
 27 | #include <cstdint>
 28 | 
 29 | class CadScene
 30 | {
 31 | 
 32 | public:
 33 |   struct BBox
 34 |   {
 35 |     glm::vec4 min;
 36 |     glm::vec4 max;
 37 | 
 38 |     BBox()
 39 |         : min(FLT_MAX)
 40 |         , max(-FLT_MAX)
 41 |     {
 42 |     }
 43 | 
 44 |     inline void merge(const glm::vec4& point)
 45 |     {
 46 |       min = glm::min(min, point);
 47 |       max = glm::max(max, point);
 48 |     }
 49 | 
 50 |     inline void merge(const BBox& bbox)
 51 |     {
 52 |       min = glm::min(min, bbox.min);
 53 |       max = glm::max(max, bbox.max);
 54 |     }
 55 | 
 56 |     inline BBox transformed(const glm::mat4& matrix, int dim = 3)
 57 |     {
 58 |       int       i;
 59 |       glm::vec4 box[16];
 60 |       // create box corners
 61 |       box[0] = glm::vec4(min.x, min.y, min.z, min.w);
 62 |       box[1] = glm::vec4(max.x, min.y, min.z, min.w);
 63 |       box[2] = glm::vec4(min.x, max.y, min.z, min.w);
 64 |       box[3] = glm::vec4(max.x, max.y, min.z, min.w);
 65 |       box[4] = glm::vec4(min.x, min.y, max.z, min.w);
 66 |       box[5] = glm::vec4(max.x, min.y, max.z, min.w);
 67 |       box[6] = glm::vec4(min.x, max.y, max.z, min.w);
 68 |       box[7] = glm::vec4(max.x, max.y, max.z, min.w);
 69 | 
 70 |       box[8]  = glm::vec4(min.x, min.y, min.z, max.w);
 71 |       box[9]  = glm::vec4(max.x, min.y, min.z, max.w);
 72 |       box[10] = glm::vec4(min.x, max.y, min.z, max.w);
 73 |       box[11] = glm::vec4(max.x, max.y, min.z, max.w);
 74 |       box[12] = glm::vec4(min.x, min.y, max.z, max.w);
 75 |       box[13] = glm::vec4(max.x, min.y, max.z, max.w);
 76 |       box[14] = glm::vec4(min.x, max.y, max.z, max.w);
 77 |       box[15] = glm::vec4(max.x, max.y, max.z, max.w);
 78 | 
 79 |       // transform box corners
 80 |       // and find new mins,maxs
 81 |       BBox bbox;
 82 | 
 83 |       for(i = 0; i < (1 << dim); i++)
 84 |       {
 85 |         glm::vec4 point = matrix * box[i];
 86 |         bbox.merge(point);
 87 |       }
 88 | 
 89 |       return bbox;
 90 |     }
 91 |   };
 92 | 
 93 |   struct MaterialSide
 94 |   {
 95 |     glm::vec4 ambient;
 96 |     glm::vec4 diffuse;
 97 |     glm::vec4 specular;
 98 |     glm::vec4 emissive;
 99 |   };
100 | 
101 |   // need to keep this 256 byte aligned (UBO range)
102 |   struct Material
103 |   {
104 |     MaterialSide sides[2];
105 |     unsigned int _pad[32];
106 | 
107 |     Material() { memset(this, 0, sizeof(Material)); }
108 |   };
109 | 
110 |   // need to keep this 256 byte aligned (UBO range)
111 |   struct MatrixNode
112 |   {
113 |     glm::mat4 worldMatrix;
114 |     glm::mat4 worldMatrixIT;
115 |     glm::mat4 objectMatrix;
116 |     glm::mat4 objectMatrixIT;
117 |   };
118 | 
119 |   struct Vertex
120 |   {
121 |     glm::vec3 position;
122 |     uint16_t  normalOctX;
123 |     uint16_t  normalOctY;
124 |   };
125 | 
126 |   struct DrawRange
127 |   {
128 |     size_t offset;
129 |     int    count;
130 | 
131 |     DrawRange()
132 |         : offset(0)
133 |         , count(0)
134 |     {
135 |     }
136 |   };
137 | 
138 |   struct DrawStateInfo
139 |   {
140 |     int materialIndex;
141 |     int matrixIndex;
142 | 
143 |     friend bool operator!=(const DrawStateInfo& lhs, const DrawStateInfo& rhs)
144 |     {
145 |       return lhs.materialIndex != rhs.materialIndex || lhs.matrixIndex != rhs.matrixIndex;
146 |     }
147 | 
148 |     friend bool operator==(const DrawStateInfo& lhs, const DrawStateInfo& rhs)
149 |     {
150 |       return lhs.materialIndex == rhs.materialIndex && lhs.matrixIndex == rhs.matrixIndex;
151 |     }
152 |   };
153 | 
154 |   struct DrawRangeCache
155 |   {
156 |     std::vector<DrawStateInfo> state;
157 |     std::vector<int>           stateCount;
158 | 
159 |     std::vector<size_t> offsets;
160 |     std::vector<int>    counts;
161 |   };
162 | 
163 |   struct GeometryPart
164 |   {
165 |     DrawRange indexSolid;
166 |     DrawRange indexWire;
167 |   };
168 | 
169 |   struct Geometry
170 |   {
171 |     int    cloneIdx;
172 |     size_t vboSize;
173 |     size_t iboSize;
174 | 
175 |     Vertex*       vboData;
176 |     unsigned int* iboData;
177 | 
178 |     std::vector<GeometryPart> parts;
179 | 
180 |     int numVertices;
181 |     int numIndexSolid;
182 |     int numIndexWire;
183 |   };
184 | 
185 |   struct ObjectPart
186 |   {
187 |     int active;
188 |     int materialIndex;
189 |     int matrixIndex;
190 |   };
191 | 
192 |   struct Object
193 |   {
194 |     int matrixIndex;
195 |     int geometryIndex;
196 | 
197 |     std::vector<ObjectPart> parts;
198 | 
199 |     DrawRangeCache cacheSolid;
200 |     DrawRangeCache cacheWire;
201 |   };
202 | 
203 |   std::vector<Material>   m_materials;
204 |   std::vector<BBox>       m_geometryBboxes;
205 |   std::vector<Geometry>   m_geometry;
206 |   std::vector<MatrixNode> m_matrices;
207 |   std::vector<Object>     m_objects;
208 | 
209 | 
210 |   BBox m_bbox;
211 | 
212 | 
213 |   void updateObjectDrawCache(Object& object);
214 | 
215 |   bool loadCSF(const char* filename, int clones = 0, int cloneaxis = 3);
216 |   void unload();
217 | 
218 |   struct IndexingBits
219 |   {
220 |     uint32_t matrices  = 0;
221 |     uint32_t materials = 0;
222 | 
223 |     uint32_t packIndices(uint32_t matrixIndex, uint32_t materialIndex) const
224 |     {
225 |       return matrixIndex | (materialIndex << matrices);
226 |     }
227 |   };
228 | 
229 |   IndexingBits getIndexingBits() const;
230 |   bool         supportsIndexing() const;
231 | };
232 | 
233 | 
234 | #endif
235 | 


--------------------------------------------------------------------------------
/cadscene_vk.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include "config.h"
 22 | #include "cadscene_vk.hpp"
 23 | 
 24 | #include <algorithm>
 25 | #include <inttypes.h>
 26 | #include <nvh/nvprint.hpp>
 27 | 
 28 | 
 29 | static inline VkDeviceSize alignedSize(VkDeviceSize sz, VkDeviceSize align)
 30 | {
 31 |   return ((sz + align - 1) / (align)) * align;
 32 | }
 33 | 
 34 | 
 35 | void GeometryMemoryVK::init(nvvk::ResourceAllocator* resourceAllocator, VkDeviceSize vboStride, VkDeviceSize maxChunk)
 36 | {
 37 |   m_resourceAllocator = resourceAllocator;
 38 |   m_alignment         = 16;
 39 |   m_vboAlignment      = 16;
 40 | 
 41 |   m_maxVboChunk = maxChunk;
 42 |   m_maxIboChunk = maxChunk;
 43 | }
 44 | 
 45 | void GeometryMemoryVK::deinit()
 46 | {
 47 |   for(size_t i = 0; i < m_chunks.size(); i++)
 48 |   {
 49 |     Chunk chunk = getChunk(i);
 50 |     m_resourceAllocator->destroy(chunk.vbo);
 51 |     m_resourceAllocator->destroy(chunk.ibo);
 52 |   }
 53 |   m_chunks            = std::vector<Chunk>();
 54 |   m_device            = nullptr;
 55 |   m_resourceAllocator = nullptr;
 56 | }
 57 | 
 58 | void GeometryMemoryVK::alloc(VkDeviceSize vboSize, VkDeviceSize iboSize, Allocation& allocation)
 59 | {
 60 |   vboSize = alignedSize(vboSize, m_vboAlignment);
 61 |   iboSize = alignedSize(iboSize, m_alignment);
 62 | 
 63 |   if(m_chunks.empty() || getActiveChunk().vboSize + vboSize > m_maxVboChunk || getActiveChunk().iboSize + iboSize > m_maxIboChunk)
 64 |   {
 65 |     finalize();
 66 |     Chunk chunk = {};
 67 |     m_chunks.push_back(chunk);
 68 |   }
 69 | 
 70 |   Chunk& chunk = getActiveChunk();
 71 | 
 72 |   allocation.chunkIndex = getActiveIndex();
 73 |   allocation.vboOffset  = chunk.vboSize;
 74 |   allocation.iboOffset  = chunk.iboSize;
 75 | 
 76 |   chunk.vboSize += vboSize;
 77 |   chunk.iboSize += iboSize;
 78 | }
 79 | 
 80 | void GeometryMemoryVK::finalize()
 81 | {
 82 |   if(m_chunks.empty())
 83 |   {
 84 |     return;
 85 |   }
 86 | 
 87 |   Chunk& chunk = getActiveChunk();
 88 | 
 89 |   uint32_t flags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
 90 | 
 91 |   chunk.vbo = m_resourceAllocator->createBuffer(chunk.vboSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | flags);
 92 |   chunk.ibo = m_resourceAllocator->createBuffer(chunk.iboSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | flags);
 93 | }
 94 | 
 95 | void CadSceneVK::init(const CadScene& cadscene, nvvk::ResourceAllocator& resourceAllocator, VkQueue queue, uint32_t queueFamilyIndex, const Config& config)
 96 | {
 97 |   VkDeviceSize MB = 1024 * 1024;
 98 | 
 99 |   m_resourceAllocator = &resourceAllocator;
100 |   m_config            = config;
101 |   m_geometry.resize(cadscene.m_geometry.size(), {0});
102 | 
103 |   if(m_geometry.empty())
104 |     return;
105 | 
106 |   {
107 |     // allocation phase
108 |     m_geometryMem.init(&resourceAllocator, sizeof(CadScene::Vertex), config.singleAllocation ? VkDeviceSize(4096) * MB : 256 * MB);
109 | 
110 |     for(size_t g = 0; g < cadscene.m_geometry.size(); g++)
111 |     {
112 |       const CadScene::Geometry& cadgeom = cadscene.m_geometry[g];
113 |       Geometry&                 geom    = m_geometry[g];
114 | 
115 |       m_geometryMem.alloc(cadgeom.vboSize, cadgeom.iboSize, geom.allocation);
116 |     }
117 | 
118 |     m_geometryMem.finalize();
119 | 
120 |     LOGI("Size of vertex data: %11" PRId64 "\n", uint64_t(m_geometryMem.getVertexSize()));
121 |     LOGI("Size of index data:  %11" PRId64 "\n", uint64_t(m_geometryMem.getIndexSize()));
122 |     LOGI("Size of data:        %11" PRId64 "\n", uint64_t(m_geometryMem.getVertexSize() + m_geometryMem.getIndexSize()));
123 |     LOGI("Chunks:              %11d\n", uint32_t(m_geometryMem.getChunkCount()));
124 |   }
125 | 
126 |   ScopeStaging staging(resourceAllocator, queue, queueFamilyIndex);
127 | 
128 |   for(size_t g = 0; g < cadscene.m_geometry.size(); g++)
129 |   {
130 |     const CadScene::Geometry&      cadgeom = cadscene.m_geometry[g];
131 |     Geometry&                      geom    = m_geometry[g];
132 |     const GeometryMemoryVK::Chunk& chunk   = m_geometryMem.getChunk(geom.allocation);
133 | 
134 |     // upload and assignment phase
135 |     geom.vbo.buffer = chunk.vbo.buffer;
136 |     geom.vbo.offset = geom.allocation.vboOffset;
137 |     geom.vbo.range  = cadgeom.vboSize;
138 |     staging.uploadAutoSubmit(geom.vbo, cadgeom.vboData);
139 | 
140 |     geom.ibo.buffer = chunk.ibo.buffer;
141 |     geom.ibo.offset = geom.allocation.iboOffset;
142 |     geom.ibo.range  = cadgeom.iboSize;
143 |     staging.uploadAutoSubmit(geom.ibo, cadgeom.iboData);
144 |   }
145 | 
146 |   VkBufferUsageFlags usageFlags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
147 |   usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
148 | 
149 |   VkDeviceSize materialsSize = cadscene.m_materials.size() * sizeof(CadScene::Material);
150 |   VkDeviceSize matricesSize  = cadscene.m_matrices.size() * sizeof(CadScene::MatrixNode);
151 | 
152 |   m_buffers.materials    = resourceAllocator.createBuffer(materialsSize, usageFlags);
153 |   m_buffers.matrices     = resourceAllocator.createBuffer(matricesSize, usageFlags);
154 |   m_buffers.matricesOrig = resourceAllocator.createBuffer(matricesSize, usageFlags | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
155 | 
156 |   m_infos.materialsSingle = {m_buffers.materials.buffer, 0, sizeof(CadScene::Material)};
157 |   m_infos.materials       = {m_buffers.materials.buffer, 0, materialsSize};
158 |   m_infos.matricesSingle  = {m_buffers.matrices.buffer, 0, sizeof(CadScene::MatrixNode)};
159 |   m_infos.matrices        = {m_buffers.matrices.buffer, 0, matricesSize};
160 |   m_infos.matricesOrig    = {m_buffers.matricesOrig.buffer, 0, matricesSize};
161 | 
162 |   staging.uploadAutoSubmit(m_infos.materials, cadscene.m_materials.data());
163 |   staging.uploadAutoSubmit(m_infos.matrices, cadscene.m_matrices.data());
164 |   staging.uploadAutoSubmit(m_infos.matricesOrig, cadscene.m_matrices.data());
165 | 
166 |   staging.uploadAutoSubmit({}, nullptr);
167 | }
168 | 
169 | void CadSceneVK::deinit()
170 | {
171 |   m_resourceAllocator->destroy(m_buffers.materials);
172 |   m_resourceAllocator->destroy(m_buffers.matrices);
173 |   m_resourceAllocator->destroy(m_buffers.matricesOrig);
174 |   m_geometry.clear();
175 |   m_geometryMem.deinit();
176 | }
177 | 


--------------------------------------------------------------------------------
/renderer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include <assert.h>
 22 | #include <algorithm>
 23 | #include "renderer.hpp"
 24 | #include <nvpwindow.hpp>
 25 | 
 26 | #include "common.h"
 27 | 
 28 | #pragma pack(1)
 29 | 
 30 | 
 31 | namespace generatedcmds {
 32 | //////////////////////////////////////////////////////////////////////////
 33 | 
 34 | static void AddItem(std::vector<Renderer::DrawItem>& drawItems, const Renderer::Config& config, const Renderer::DrawItem& di)
 35 | {
 36 |   if(di.range.count)
 37 |   {
 38 |     drawItems.push_back(di);
 39 |   }
 40 | }
 41 | 
 42 | static void FillSingle(std::vector<Renderer::DrawItem>& drawItems,
 43 |                        const Renderer::Config&          config,
 44 |                        const CadScene::Object&          obj,
 45 |                        const CadScene::Geometry&        geo,
 46 |                        bool                             solid,
 47 |                        int                              objectIndex)
 48 | {
 49 |   int                             begin = 0;
 50 |   const CadScene::DrawRangeCache& cache = solid ? obj.cacheSolid : obj.cacheWire;
 51 | 
 52 |   if(obj.parts.empty())
 53 |     return;
 54 | 
 55 |   const CadScene::ObjectPart&   part = obj.parts[0];
 56 |   const CadScene::GeometryPart& mesh = geo.parts[0];
 57 | 
 58 |   if(!part.active)
 59 |     return;
 60 | 
 61 |   // evict
 62 |   Renderer::DrawItem di;
 63 |   di.geometryIndex = obj.geometryIndex;
 64 |   di.matrixIndex   = part.matrixIndex;
 65 |   di.materialIndex = part.materialIndex;
 66 |   di.shaderIndex   = part.materialIndex % config.maxShaders;
 67 | 
 68 |   di.solid        = solid;
 69 |   di.range.offset = solid ? 0 : geo.numIndexSolid * sizeof(unsigned int);
 70 |   di.range.count  = solid ? geo.numIndexSolid : geo.numIndexWire;
 71 | 
 72 |   AddItem(drawItems, config, di);
 73 | }
 74 | 
 75 | static void FillCache(std::vector<Renderer::DrawItem>& drawItems,
 76 |                       const Renderer::Config&          config,
 77 |                       const CadScene::Object&          obj,
 78 |                       const CadScene::Geometry&        geo,
 79 |                       bool                             solid,
 80 |                       int                              objectIndex)
 81 | {
 82 |   int                             begin = 0;
 83 |   const CadScene::DrawRangeCache& cache = solid ? obj.cacheSolid : obj.cacheWire;
 84 | 
 85 |   for(size_t s = 0; s < cache.state.size(); s++)
 86 |   {
 87 |     const CadScene::DrawStateInfo& state = cache.state[s];
 88 |     for(int d = 0; d < cache.stateCount[s]; d++)
 89 |     {
 90 |       // evict
 91 |       Renderer::DrawItem di;
 92 |       di.geometryIndex = obj.geometryIndex;
 93 |       di.matrixIndex   = state.matrixIndex;
 94 |       di.materialIndex = state.materialIndex;
 95 |       di.shaderIndex   = state.materialIndex % config.maxShaders;
 96 | 
 97 |       di.solid        = solid;
 98 |       di.range.offset = cache.offsets[begin + d];
 99 |       di.range.count  = cache.counts[begin + d];
100 | 
101 |       AddItem(drawItems, config, di);
102 |     }
103 |     begin += cache.stateCount[s];
104 |   }
105 | }
106 | 
107 | static void FillIndividual(std::vector<Renderer::DrawItem>& drawItems,
108 |                            const Renderer::Config&          config,
109 |                            const CadScene::Object&          obj,
110 |                            const CadScene::Geometry&        geo,
111 |                            bool                             solid,
112 |                            int                              objectIndex)
113 | {
114 |   for(size_t p = 0; p < obj.parts.size(); p++)
115 |   {
116 |     const CadScene::ObjectPart&   part = obj.parts[p];
117 |     const CadScene::GeometryPart& mesh = geo.parts[p];
118 | 
119 |     if(!part.active)
120 |       continue;
121 | 
122 |     Renderer::DrawItem di;
123 |     di.geometryIndex = obj.geometryIndex;
124 |     di.matrixIndex   = part.matrixIndex;
125 |     di.materialIndex = part.materialIndex;
126 |     di.shaderIndex   = part.materialIndex % config.maxShaders;
127 | 
128 |     di.solid = solid;
129 |     di.range = solid ? mesh.indexSolid : mesh.indexWire;
130 | 
131 |     AddItem(drawItems, config, di);
132 |   }
133 | }
134 | 
135 | void Renderer::fillDrawItems(std::vector<DrawItem>& drawItems, const CadScene* scene, const Config& config, Stats& stats)
136 | {
137 |   bool solid = true;
138 |   bool wire  = false;
139 | 
140 |   size_t maxObjects = scene->m_objects.size();
141 |   size_t from       = std::min(maxObjects - 1, size_t(config.objectFrom));
142 |   maxObjects        = std::min(maxObjects, from + size_t(config.objectNum));
143 | 
144 |   for(size_t i = from; i < maxObjects; i++)
145 |   {
146 |     const CadScene::Object&   obj = scene->m_objects[i];
147 |     const CadScene::Geometry& geo = scene->m_geometry[obj.geometryIndex];
148 | 
149 |     if(config.strategy == STRATEGY_SINGLE)
150 |     {
151 |       if(solid)
152 |         FillSingle(drawItems, config, obj, geo, true, int(i));
153 |       if(wire)
154 |         FillSingle(drawItems, config, obj, geo, false, int(i));
155 |     }
156 |     else if(config.strategy == STRATEGY_GROUPS)
157 |     {
158 |       if(solid)
159 |         FillCache(drawItems, config, obj, geo, true, int(i));
160 |       if(wire)
161 |         FillCache(drawItems, config, obj, geo, false, int(i));
162 |     }
163 |     else if(config.strategy == STRATEGY_INDIVIDUAL)
164 |     {
165 |       if(solid)
166 |         FillIndividual(drawItems, config, obj, geo, true, int(i));
167 |       if(wire)
168 |         FillIndividual(drawItems, config, obj, geo, false, int(i));
169 |     }
170 |   }
171 | 
172 |   if(config.sorted && !config.permutated)
173 |   {
174 |     std::sort(drawItems.begin(), drawItems.end(), DrawItem_compare_groups);
175 |   }
176 | 
177 |   int shaderIndex = -1;
178 |   for(size_t i = 0; i < drawItems.size(); i++)
179 |   {
180 |     stats.drawCalls++;
181 |     stats.drawTriangles += drawItems[i].range.count / 3;
182 |     if(drawItems[i].shaderIndex != shaderIndex)
183 |     {
184 |       stats.shaderBindings++;
185 |       shaderIndex = drawItems[i].shaderIndex;
186 |     }
187 |   }
188 | }
189 | 
190 | void Renderer::fillRandomPermutation(uint32_t drawCount, uint32_t* permutation, const DrawItem* drawItems, Stats& stats)
191 | {
192 |   srand(634523);
193 |   for(uint32_t i = 0; i < drawCount; i++)
194 |   {
195 |     permutation[i] = i;
196 |   }
197 |   if(drawCount)
198 |   {
199 |     // not exactly a good way to generate random 32bit ;)
200 |     for(uint32_t i = drawCount - 1; i > 0; i--)
201 |     {
202 |       uint32_t r = 0;
203 |       r |= (rand() & 0xFF) << 0;
204 |       r |= (rand() & 0xFF) << 8;
205 |       r |= (rand() & 0xFF) << 16;
206 |       r |= (rand() & 0xFF) << 24;
207 | 
208 |       uint32_t other = r % (i + 1);
209 |       std::swap(permutation[i], permutation[other]);
210 |     }
211 | 
212 |     int shaderIndex      = -1;
213 |     stats.shaderBindings = 0;
214 |     for(uint32_t i = 0; i < drawCount; i++)
215 |     {
216 |       uint32_t idx = permutation[i];
217 |       if(drawItems[idx].shaderIndex != shaderIndex)
218 |       {
219 |         stats.shaderBindings++;
220 |         shaderIndex = drawItems[idx].shaderIndex;
221 |       }
222 |     }
223 |   }
224 | }
225 | 
226 | }  // namespace generatedcmds
227 | 


--------------------------------------------------------------------------------
/resources_vk.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #pragma once
 22 | 
 23 | #define DRAW_UBOS_NUM 3
 24 | 
 25 | #include "cadscene_vk.hpp"
 26 | #include "resources.hpp"
 27 | 
 28 | #include <nvvk/buffers_vk.hpp>
 29 | #include <nvvk/commands_vk.hpp>
 30 | #include <nvvk/context_vk.hpp>
 31 | #include <nvvk/descriptorsets_vk.hpp>
 32 | #include <nvvk/error_vk.hpp>
 33 | #include <nvvk/resourceallocator_vk.hpp>
 34 | #include <nvvk/pipeline_vk.hpp>
 35 | #include <nvvk/profiler_vk.hpp>
 36 | #include <nvvk/renderpasses_vk.hpp>
 37 | #include <nvvk/shadermodulemanager_vk.hpp>
 38 | #include <nvvk/swapchain_vk.hpp>
 39 | #include <nvvk/memorymanagement_vk.hpp>
 40 | #include <nvvk/resourceallocator_vk.hpp>
 41 | 
 42 | namespace generatedcmds {
 43 | 
 44 | class ResourcesVK : public Resources
 45 | {
 46 | public:
 47 |   ResourcesVK() {}
 48 | 
 49 |   static ResourcesVK* get()
 50 |   {
 51 |     static ResourcesVK res;
 52 | 
 53 |     return &res;
 54 |   }
 55 |   static bool isAvailable();
 56 | 
 57 |   static void initImGui(const nvvk::Context& context);
 58 |   static void deinitImGui(const nvvk::Context& context);
 59 | 
 60 |   struct FrameBuffer
 61 |   {
 62 |     int  renderWidth  = 0;
 63 |     int  renderHeight = 0;
 64 |     int  supersample  = 0;
 65 |     bool useResolved  = false;
 66 |     bool vsync        = false;
 67 |     int  msaa         = 0;
 68 | 
 69 |     VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
 70 |     VkFormat depthStencilFormat;
 71 | 
 72 |     VkViewport viewport;
 73 |     VkViewport viewportUI;
 74 |     VkRect2D   scissor;
 75 |     VkRect2D   scissorUI;
 76 | 
 77 |     nvvk::Image imgColor         = {};
 78 |     nvvk::Image imgColorResolved = {};
 79 |     nvvk::Image imgDepthStencil  = {};
 80 | 
 81 |     VkImageView viewColor         = VK_NULL_HANDLE;
 82 |     VkImageView viewColorResolved = VK_NULL_HANDLE;
 83 |     VkImageView viewDepthStencil  = VK_NULL_HANDLE;
 84 | 
 85 |     VkRenderingAttachmentInfo attachColor;
 86 |     VkRenderingAttachmentInfo attachColorUI;
 87 |     VkRenderingAttachmentInfo attachDepth;
 88 | 
 89 |     VkRenderingInfo               renderingInfo           = {VK_STRUCTURE_TYPE_RENDERING_INFO};
 90 |     VkRenderingInfo               renderingInfoUI         = {VK_STRUCTURE_TYPE_RENDERING_INFO};
 91 |     VkPipelineRenderingCreateInfo pipelineRenderingInfo   = {VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO};
 92 |     VkPipelineRenderingCreateInfo pipelineRenderingInfoUI = {VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO};
 93 |   };
 94 | 
 95 |   struct Common
 96 |   {
 97 |     nvvk::Buffer           viewBuffer;
 98 |     VkDescriptorBufferInfo viewInfo;
 99 | 
100 |     nvvk::Buffer           animBuffer;
101 |     VkDescriptorBufferInfo animInfo;
102 |   };
103 | 
104 |   struct
105 |   {
106 |     nvvk::ShaderModuleID shaderModuleID = {};
107 |     VkShaderModule       shader         = nullptr;
108 |     VkPipeline           pipeline       = nullptr;
109 |   } m_animShading;
110 | 
111 |   struct
112 |   {
113 |     VkPipeline  pipelines[NUM_MATERIAL_SHADERS]          = {};
114 |     VkShaderEXT vertexShaderObjs[NUM_MATERIAL_SHADERS]   = {};
115 |     VkShaderEXT fragmentShaderObjs[NUM_MATERIAL_SHADERS] = {};
116 |   } m_drawShading;
117 | 
118 |   struct
119 |   {
120 |     nvvk::ShaderModuleID vertexIDs[NUM_MATERIAL_SHADERS]       = {};
121 |     nvvk::ShaderModuleID fragmentIDs[NUM_MATERIAL_SHADERS]     = {};
122 |     VkShaderModule       vertexShaders[NUM_MATERIAL_SHADERS]   = {};
123 |     VkShaderModule       fragmentShaders[NUM_MATERIAL_SHADERS] = {};
124 |   } m_drawShaderModules[NUM_BINDINGMODES];
125 | 
126 | 
127 |   bool                      m_withinFrame = false;
128 |   nvvk::ShaderModuleManager m_shaderManager;
129 | 
130 | 
131 |   FrameBuffer m_framebuffer = {};
132 |   Common      m_common;
133 | 
134 |   nvvk::SwapChain* m_swapChain = nullptr;
135 |   nvvk::Context*   m_context   = nullptr;
136 |   nvvk::ProfilerVK m_profilerVK;
137 | 
138 |   VkDevice                    m_device = VK_NULL_HANDLE;
139 |   VkPhysicalDevice            m_physical;
140 |   VkQueue                     m_queue;
141 |   uint32_t                    m_queueFamily;
142 |   nvvk::DeviceMemoryAllocator m_memoryAllocator;
143 |   nvvk::ResourceAllocator     m_resourceAllocator;
144 |   nvvk::RingFences            m_ringFences;
145 |   nvvk::RingCommandPool       m_ringCmdPool;
146 |   nvvk::BatchSubmission       m_submission;
147 |   bool                        m_submissionWaitForRead;
148 | 
149 |   VkPipelineCreateFlags2CreateInfoKHR m_gfxStateFlags2CreateInfo;
150 |   nvvk::GraphicsPipelineState         m_gfxState;
151 |   nvvk::GraphicsPipelineGenerator     m_gfxGen{m_gfxState};
152 |   nvvk::GraphicShaderObjectPipeline   m_gfxStateShaderObjects;
153 | 
154 |   nvvk::TDescriptorSetContainer<DRAW_UBOS_NUM> m_drawBind;
155 |   nvvk::DescriptorSetContainer                 m_drawPush;
156 |   nvvk::DescriptorSetContainer                 m_drawIndexed;
157 |   nvvk::DescriptorSetContainer                 m_anim;
158 |   VkPushConstantRange                          m_pushRanges[2];
159 | 
160 |   BindingMode               m_lastBindingMode   = NUM_BINDINGMODES;
161 |   VkPipelineCreateFlags2KHR m_lastPipeFlags     = ~0;
162 |   bool                      m_lastUseShaderObjs = false;
163 | 
164 |   uint32_t   m_numMatrices;
165 |   CadSceneVK m_scene;
166 | 
167 |   size_t m_pipeChangeID;
168 |   size_t m_fboChangeID;
169 | 
170 | 
171 |   bool init(nvvk::Context* context, nvvk::SwapChain* swapChain, nvh::Profiler* profiler) override;
172 |   void deinit() override;
173 | 
174 |   void initPipelinesOrShaders(BindingMode bindingMode, VkPipelineCreateFlags2KHR pipeFlags, bool useShaderObjs, bool force = false);
175 |   void deinitPipelinesOrShaders();
176 |   bool hasPipes() { return m_animShading.pipeline != 0; }
177 | 
178 |   bool initPrograms(const std::string& path, const std::string& prepend) override;
179 |   void reloadPrograms(const std::string& prepend) override;
180 | 
181 |   void updatedPrograms();
182 |   void deinitPrograms();
183 | 
184 |   bool initFramebuffer(int width, int height, int msaa, bool vsync) override;
185 |   void deinitFramebuffer();
186 | 
187 |   bool initScene(const CadScene&) override;
188 |   void deinitScene() override;
189 | 
190 |   void synchronize() override;
191 | 
192 |   void beginFrame() override;
193 |   void blitFrame(const Global& global) override;
194 |   void endFrame() override;
195 | 
196 |   void animation(const Global& global) override;
197 |   void animationReset() override;
198 | 
199 |   //////////////////////////////////////////////////////////////////////////
200 | 
201 |   VkCommandBuffer createCmdBuffer(VkCommandPool pool, bool singleshot, bool primary, bool secondaryInClear) const;
202 |   VkCommandBuffer createTempCmdBuffer(bool primary = true, bool secondaryInClear = false);
203 | 
204 | 
205 |   // submit for batched execution
206 |   void submissionEnqueue(VkCommandBuffer cmdbuffer) { m_submission.enqueue(cmdbuffer); }
207 |   void submissionEnqueue(uint32_t num, const VkCommandBuffer* cmdbuffers) { m_submission.enqueue(num, cmdbuffers); }
208 |   // perform queue submit
209 |   void submissionExecute(VkFence fence = nullptr, bool useImageReadWait = false, bool useImageWriteSignals = false);
210 | 
211 |   // synchronizes to queue
212 |   void resetTempResources();
213 | 
214 | 
215 |   void cmdShaderObjectState(VkCommandBuffer cmd) const;
216 |   void cmdDynamicPipelineState(VkCommandBuffer cmd) const;
217 |   void cmdImageTransition(VkCommandBuffer    cmd,
218 |                           VkImage            img,
219 |                           VkImageAspectFlags aspects,
220 |                           VkAccessFlags      src,
221 |                           VkAccessFlags      dst,
222 |                           VkImageLayout      oldLayout,
223 |                           VkImageLayout      newLayout) const;
224 | 
225 |   void cmdBegin(VkCommandBuffer cmd, bool singleshot, bool primary, bool secondaryInClear) const;
226 |   void cmdBeginRendering(VkCommandBuffer cmd, bool hasSecondary = false) const;
227 | 
228 |   void cmdPipelineBarrier(VkCommandBuffer cmd) const;
229 | };
230 | 
231 | }  // namespace generatedcmds
232 | 


--------------------------------------------------------------------------------
/threadpool.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2014-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include <platform.h>
 22 | 
 23 | #include "threadpool.hpp"
 24 | #include "nvh/nvprint.hpp"
 25 | #include <assert.h>
 26 | 
 27 | #define THREADPOOL_TERMINATE_FUNC ((ThreadPool::WorkerFunc)1)
 28 | 
 29 | #define USE_PHYSICAL_CORES_ONLY 1
 30 | 
 31 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY
 32 | 
 33 | #include <windows.h>
 34 | 
 35 | typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
 36 | 
 37 | 
 38 | // Helper function to count set bits in the processor mask.
 39 | static DWORD CountSetBits(ULONG_PTR bitMask)
 40 | {
 41 |   DWORD     LSHIFT      = sizeof(ULONG_PTR) * 8 - 1;
 42 |   DWORD     bitSetCount = 0;
 43 |   ULONG_PTR bitTest     = (ULONG_PTR)1 << LSHIFT;
 44 |   DWORD     i;
 45 | 
 46 |   for(i = 0; i <= LSHIFT; ++i)
 47 |   {
 48 |     bitSetCount += ((bitMask & bitTest) ? 1 : 0);
 49 |     bitTest /= 2;
 50 |   }
 51 | 
 52 |   return bitSetCount;
 53 | }
 54 | 
 55 | unsigned int ThreadPool::sysGetNumCores()
 56 | {
 57 |   LPFN_GLPI                             glpi;
 58 |   BOOL                                  done                  = FALSE;
 59 |   PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer                = nullptr;
 60 |   PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr                   = nullptr;
 61 |   DWORD                                 returnLength          = 0;
 62 |   DWORD                                 logicalProcessorCount = 0;
 63 |   DWORD                                 numaNodeCount         = 0;
 64 |   DWORD                                 processorCoreCount    = 0;
 65 |   DWORD                                 processorL1CacheCount = 0;
 66 |   DWORD                                 processorL2CacheCount = 0;
 67 |   DWORD                                 processorL3CacheCount = 0;
 68 |   DWORD                                 processorPackageCount = 0;
 69 |   DWORD                                 byteOffset            = 0;
 70 |   PCACHE_DESCRIPTOR                     Cache;
 71 | 
 72 |   glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandleA("kernel32"), "GetLogicalProcessorInformation");
 73 |   if(nullptr == glpi)
 74 |   {
 75 |     return std::thread::hardware_concurrency();
 76 |   }
 77 | 
 78 |   while(!done)
 79 |   {
 80 |     DWORD rc = glpi(buffer, &returnLength);
 81 | 
 82 |     if(FALSE == rc)
 83 |     {
 84 |       if(GetLastError() == ERROR_INSUFFICIENT_BUFFER)
 85 |       {
 86 |         if(buffer)
 87 |           free(buffer);
 88 | 
 89 |         buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
 90 | 
 91 |         if(nullptr == buffer)
 92 |         {
 93 |           return std::thread::hardware_concurrency();
 94 |         }
 95 |       }
 96 |       else
 97 |       {
 98 |         return std::thread::hardware_concurrency();
 99 |       }
100 |     }
101 |     else
102 |     {
103 |       done = TRUE;
104 |     }
105 |   }
106 | 
107 |   ptr = buffer;
108 | 
109 |   while(byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength)
110 |   {
111 |     switch(ptr->Relationship)
112 |     {
113 |       case RelationNumaNode:
114 |         // Non-NUMA systems report a single record of this type.
115 |         numaNodeCount++;
116 |         break;
117 | 
118 |       case RelationProcessorCore:
119 |         processorCoreCount++;
120 | 
121 |         // A hyperthreaded core supplies more than one logical processor.
122 |         logicalProcessorCount += CountSetBits(ptr->ProcessorMask);
123 |         break;
124 | 
125 |       case RelationCache:
126 |         // Cache data is in ptr->Cache, one CACHE_DESCRIPTOR structure for each cache.
127 |         Cache = &ptr->Cache;
128 |         if(Cache->Level == 1)
129 |         {
130 |           processorL1CacheCount++;
131 |         }
132 |         else if(Cache->Level == 2)
133 |         {
134 |           processorL2CacheCount++;
135 |         }
136 |         else if(Cache->Level == 3)
137 |         {
138 |           processorL3CacheCount++;
139 |         }
140 |         break;
141 | 
142 |       case RelationProcessorPackage:
143 |         // Logical processors share a physical package.
144 |         processorPackageCount++;
145 |         break;
146 | 
147 |       default:
148 |         break;
149 |     }
150 |     byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
151 |     ptr++;
152 |   }
153 | 
154 | #if 0
155 |   LOGI(TEXT("\nGetLogicalProcessorInformation results:\n"));
156 |   LOGI(TEXT("Number of NUMA nodes: %d\n"),
157 |     numaNodeCount);
158 |   LOGI(TEXT("Number of physical processor packages: %d\n"),
159 |     processorPackageCount);
160 |   LOGI(TEXT("Number of processor cores: %d\n"),
161 |     processorCoreCount);
162 |   LOGI(TEXT("Number of logical processors: %d\n"),
163 |     logicalProcessorCount);
164 |   LOGI(TEXT("Number of processor L1/L2/L3 caches: %d/%d/%d\n"),
165 |     processorL1CacheCount,
166 |     processorL2CacheCount,
167 |     processorL3CacheCount);
168 | #endif
169 | 
170 |   free(buffer);
171 | 
172 |   return processorCoreCount;
173 | }
174 | 
175 | #else
176 | 
177 | unsigned int ThreadPool::sysGetNumCores()
178 | {
179 |   return std::thread::hardware_concurrency();
180 | }
181 | 
182 | #endif
183 | 
184 | 
185 | void ThreadPool::threadKicker(void* arg)
186 | {
187 |   ThreadEntry* thread = (ThreadEntry*)arg;
188 |   thread->m_origin->threadProcess(*thread);
189 | }
190 | 
191 | void ThreadPool::threadProcess(ThreadEntry& entry)
192 | {
193 |   {
194 |     std::unique_lock<std::mutex> lock(m_globalMutex);
195 | 
196 |     LOGI("%d created...\n", entry.m_id);
197 | 
198 |     m_globalInit++;
199 |     m_globalCond.notify_all();
200 |   }
201 | 
202 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY
203 |   // assume hyperthreading, move to n physical cores
204 |   unsigned int cpuCore = entry.m_id * 2 + 1;
205 |   SetThreadAffinityMask(GetCurrentThread(), uint64_t(1) << cpuCore);
206 | #endif
207 | 
208 |   while(true)
209 |   {
210 |     {
211 |       std::unique_lock<std::mutex> lock(entry.m_commMutex);
212 |       while(!entry.m_fn)
213 |       {
214 |         entry.m_commCond.wait(lock);
215 |       }
216 |     }
217 | 
218 |     if(entry.m_fn == THREADPOOL_TERMINATE_FUNC)
219 |       break;
220 | 
221 |     NV_BARRIER();
222 | 
223 |     LOGI("%d started job\n", entry.m_id);
224 | 
225 |     entry.m_fn(entry.m_fnArg);
226 |     entry.m_fn = 0;
227 | 
228 |     LOGI("%d finished job\n", entry.m_id);
229 |   }
230 | 
231 |   LOGI("%d exiting...\n", entry.m_id);
232 | 
233 |   {
234 |     std::unique_lock<std::mutex> lock(m_globalMutex);
235 |     LOGI("%d shutdown\n", entry.m_id);
236 |   }
237 | }
238 | 
239 | void ThreadPool::init(unsigned int numThreads)
240 | {
241 |   m_numThreads = numThreads;
242 |   m_globalInit = 0;
243 | 
244 |   m_pool = new ThreadEntry[numThreads];
245 | 
246 |   for(unsigned int i = 0; i < numThreads; i++)
247 |   {
248 |     ThreadEntry& entry = m_pool[i];
249 |     entry.m_id         = numThreads - i - 1;
250 |     entry.m_origin     = this;
251 |     entry.m_fn         = 0;
252 |     entry.m_fnArg      = 0;
253 |   }
254 | 
255 |   NV_BARRIER();
256 | 
257 |   for(unsigned int i = 0; i < numThreads; i++)
258 |   {
259 |     ThreadEntry& entry = m_pool[i];
260 |     entry.m_thread     = std::thread(threadKicker, &m_pool[i]);
261 |   }
262 | 
263 |   {
264 |     std::unique_lock<std::mutex> lock(m_globalMutex);
265 |     while(m_globalInit < numThreads)
266 |     {
267 |       m_globalCond.wait(lock);
268 |     }
269 |   }
270 | 
271 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY
272 |   // pin the main thread to core 0
273 |   SetThreadAffinityMask(GetCurrentThread(), 1);
274 | #endif
275 | }
276 | 
277 | void ThreadPool::deinit()
278 | {
279 |   NV_BARRIER();
280 | 
281 |   for(unsigned int i = 0; i < m_numThreads; i++)
282 |   {
283 |     ThreadEntry& entry = m_pool[i];
284 | 
285 |     {
286 |       std::unique_lock<std::mutex> lock(entry.m_commMutex);
287 |       entry.m_fn    = THREADPOOL_TERMINATE_FUNC;
288 |       entry.m_fnArg = 0;
289 |       entry.m_commCond.notify_all();
290 |     }
291 | 
292 |     std::this_thread::yield();
293 | 
294 |     entry.m_thread.join();
295 |   }
296 | 
297 |   delete[] m_pool;
298 |   m_pool       = 0;
299 |   m_numThreads = 0;
300 | }
301 | 
302 | void ThreadPool::activateJob(unsigned int tid, WorkerFunc fn, void* arg)
303 | {
304 |   assert(tid < m_numThreads);
305 | 
306 |   ThreadEntry& entry = m_pool[tid];
307 | 
308 |   assert(entry.m_fn == 0);
309 | 
310 |   {
311 |     std::unique_lock<std::mutex> lock(entry.m_commMutex);
312 |     entry.m_fn    = fn;
313 |     entry.m_fnArg = arg;
314 |     entry.m_commCond.notify_all();
315 |   }
316 | }
317 | 


--------------------------------------------------------------------------------
/vk_ext_device_generated_commands.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  3 | *
  4 | * Licensed under the Apache License, Version 2.0 (the "License");
  5 | * you may not use this file except in compliance with the License.
  6 | * You may obtain a copy of the License at
  7 | *
  8 | *     http://www.apache.org/licenses/LICENSE-2.0
  9 | *
 10 | * Unless required by applicable law or agreed to in writing, software
 11 | * distributed under the License is distributed on an "AS IS" BASIS,
 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | * See the License for the specific language governing permissions and
 14 | * limitations under the License.
 15 | *
 16 | * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
 17 | * SPDX-License-Identifier: Apache-2.0
 18 | */
 19 | 
 20 | #include <nvvk/extensions_vk.hpp>
 21 | #include "vk_ext_device_generated_commands.hpp"
 22 | 
 23 | static PFN_vkGetGeneratedCommandsMemoryRequirementsEXT s_vkGetGeneratedCommandsMemoryRequirementsEXT = nullptr;
 24 | static PFN_vkCmdPreprocessGeneratedCommandsEXT         s_vkCmdPreprocessGeneratedCommandsEXT         = nullptr;
 25 | static PFN_vkCmdExecuteGeneratedCommandsEXT            s_vkCmdExecuteGeneratedCommandsEXT            = nullptr;
 26 | static PFN_vkCreateIndirectCommandsLayoutEXT           s_vkCreateIndirectCommandsLayoutEXT           = nullptr;
 27 | static PFN_vkDestroyIndirectCommandsLayoutEXT          s_vkDestroyIndirectCommandsLayoutEXT          = nullptr;
 28 | static PFN_vkCreateIndirectExecutionSetEXT             s_vkCreateIndirectExecutionSetEXT             = nullptr;
 29 | static PFN_vkDestroyIndirectExecutionSetEXT            s_vkDestroyIndirectExecutionSetEXT            = nullptr;
 30 | static PFN_vkUpdateIndirectExecutionSetPipelineEXT     s_vkUpdateIndirectExecutionSetPipelineEXT     = nullptr;
 31 | static PFN_vkUpdateIndirectExecutionSetShaderEXT       s_vkUpdateIndirectExecutionSetShaderEXT       = nullptr;
 32 | 
 33 | #ifndef NVVK_HAS_VK_EXT_device_generated_commands
 34 | 
 35 | VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsEXT(VkDevice device,
 36 |                                                                        VkGeneratedCommandsMemoryRequirementsInfoEXT const* pInfo,
 37 |                                                                        VkMemoryRequirements2* pMemoryRequirements)
 38 | {
 39 |   s_vkGetGeneratedCommandsMemoryRequirementsEXT(device, pInfo, pMemoryRequirements);
 40 | }
 41 | 
 42 | VKAPI_ATTR void VKAPI_CALL vkCmdPreprocessGeneratedCommandsEXT(VkCommandBuffer                   commandBuffer,
 43 |                                                                VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo,
 44 |                                                                VkCommandBuffer                   stateCommandBuffer)
 45 | {
 46 |   s_vkCmdPreprocessGeneratedCommandsEXT(commandBuffer, pGeneratedCommandsInfo, stateCommandBuffer);
 47 | }
 48 | 
 49 | VKAPI_ATTR void VKAPI_CALL vkCmdExecuteGeneratedCommandsEXT(VkCommandBuffer                   commandBuffer,
 50 |                                                             VkBool32                          isPreprocessed,
 51 |                                                             VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo)
 52 | {
 53 |   s_vkCmdExecuteGeneratedCommandsEXT(commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
 54 | }
 55 | 
 56 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutEXT(VkDevice device,
 57 |                                                                  VkIndirectCommandsLayoutCreateInfoEXT const* pCreateInfo,
 58 |                                                                  VkAllocationCallbacks const* pAllocator,
 59 |                                                                  VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout)
 60 | {
 61 |   return s_vkCreateIndirectCommandsLayoutEXT(device, pCreateInfo, pAllocator, pIndirectCommandsLayout);
 62 | }
 63 | 
 64 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutEXT(VkDevice                     device,
 65 |                                                               VkIndirectCommandsLayoutEXT  indirectCommandsLayout,
 66 |                                                               VkAllocationCallbacks const* pAllocator)
 67 | {
 68 |   s_vkDestroyIndirectCommandsLayoutEXT(device, indirectCommandsLayout, pAllocator);
 69 | }
 70 | 
 71 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectExecutionSetEXT(VkDevice                                   device,
 72 |                                                                VkIndirectExecutionSetCreateInfoEXT const* pCreateInfo,
 73 |                                                                VkAllocationCallbacks const*               pAllocator,
 74 |                                                                VkIndirectExecutionSetEXT* pIndirectExecutionSet)
 75 | {
 76 |   return s_vkCreateIndirectExecutionSetEXT(device, pCreateInfo, pAllocator, pIndirectExecutionSet);
 77 | }
 78 | 
 79 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectExecutionSetEXT(VkDevice                     device,
 80 |                                                             VkIndirectExecutionSetEXT    indirectExecutionSet,
 81 |                                                             VkAllocationCallbacks const* pAllocator)
 82 | {
 83 |   s_vkDestroyIndirectExecutionSetEXT(device, indirectExecutionSet, pAllocator);
 84 | }
 85 | 
 86 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetPipelineEXT(VkDevice                  device,
 87 |                                                                    VkIndirectExecutionSetEXT indirectExecutionSet,
 88 |                                                                    uint32_t                  executionSetWriteCount,
 89 |                                                                    VkWriteIndirectExecutionSetPipelineEXT const* pExecutionSetWrites)
 90 | {
 91 |   s_vkUpdateIndirectExecutionSetPipelineEXT(device, indirectExecutionSet, executionSetWriteCount, pExecutionSetWrites);
 92 | }
 93 | 
 94 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetShaderEXT(VkDevice                  device,
 95 |                                                                  VkIndirectExecutionSetEXT indirectExecutionSet,
 96 |                                                                  uint32_t                  executionSetWriteCount,
 97 |                                                                  VkWriteIndirectExecutionSetShaderEXT const* pExecutionSetWrites)
 98 | {
 99 |   s_vkUpdateIndirectExecutionSetShaderEXT(device, indirectExecutionSet, executionSetWriteCount, pExecutionSetWrites);
100 | }
101 | #endif
102 | 
103 | VkBool32 load_VK_EXT_device_generated_commands(VkInstance instance, VkDevice device)
104 | {
105 |   s_vkGetGeneratedCommandsMemoryRequirementsEXT = nullptr;
106 |   s_vkCmdPreprocessGeneratedCommandsEXT         = nullptr;
107 |   s_vkCmdExecuteGeneratedCommandsEXT            = nullptr;
108 |   s_vkCreateIndirectCommandsLayoutEXT           = nullptr;
109 |   s_vkDestroyIndirectCommandsLayoutEXT          = nullptr;
110 |   s_vkCreateIndirectExecutionSetEXT             = nullptr;
111 |   s_vkDestroyIndirectExecutionSetEXT            = nullptr;
112 |   s_vkUpdateIndirectExecutionSetPipelineEXT     = nullptr;
113 |   s_vkUpdateIndirectExecutionSetShaderEXT       = nullptr;
114 | 
115 |   s_vkGetGeneratedCommandsMemoryRequirementsEXT =
116 |       (PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)vkGetDeviceProcAddr(device, "vkGetGeneratedCommandsMemoryRequirementsEXT");
117 |   s_vkCmdPreprocessGeneratedCommandsEXT =
118 |       (PFN_vkCmdPreprocessGeneratedCommandsEXT)vkGetDeviceProcAddr(device, "vkCmdPreprocessGeneratedCommandsEXT");
119 |   s_vkCmdExecuteGeneratedCommandsEXT =
120 |       (PFN_vkCmdExecuteGeneratedCommandsEXT)vkGetDeviceProcAddr(device, "vkCmdExecuteGeneratedCommandsEXT");
121 |   s_vkCreateIndirectCommandsLayoutEXT =
122 |       (PFN_vkCreateIndirectCommandsLayoutEXT)vkGetDeviceProcAddr(device, "vkCreateIndirectCommandsLayoutEXT");
123 |   s_vkDestroyIndirectCommandsLayoutEXT =
124 |       (PFN_vkDestroyIndirectCommandsLayoutEXT)vkGetDeviceProcAddr(device, "vkDestroyIndirectCommandsLayoutEXT");
125 |   s_vkCreateIndirectExecutionSetEXT =
126 |       (PFN_vkCreateIndirectExecutionSetEXT)vkGetDeviceProcAddr(device, "vkCreateIndirectExecutionSetEXT");
127 |   s_vkDestroyIndirectExecutionSetEXT =
128 |       (PFN_vkDestroyIndirectExecutionSetEXT)vkGetDeviceProcAddr(device, "vkDestroyIndirectExecutionSetEXT");
129 |   s_vkUpdateIndirectExecutionSetPipelineEXT =
130 |       (PFN_vkUpdateIndirectExecutionSetPipelineEXT)vkGetDeviceProcAddr(device, "vkUpdateIndirectExecutionSetPipelineEXT");
131 |   s_vkUpdateIndirectExecutionSetShaderEXT =
132 |       (PFN_vkUpdateIndirectExecutionSetShaderEXT)vkGetDeviceProcAddr(device, "vkUpdateIndirectExecutionSetShaderEXT");
133 | 
134 |   return s_vkGetGeneratedCommandsMemoryRequirementsEXT && s_vkCmdPreprocessGeneratedCommandsEXT
135 |          && s_vkCmdExecuteGeneratedCommandsEXT && s_vkCreateIndirectCommandsLayoutEXT
136 |          && s_vkDestroyIndirectCommandsLayoutEXT && s_vkCreateIndirectExecutionSetEXT && s_vkDestroyIndirectExecutionSetEXT
137 |          && s_vkUpdateIndirectExecutionSetPipelineEXT && s_vkUpdateIndirectExecutionSetShaderEXT;
138 | }
139 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/renderer_vk.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */
 22 | 
 23 | 
 24 | #include <algorithm>
 25 | #include <assert.h>
 26 | 
 27 | #include "renderer.hpp"
 28 | #include "resources_vk.hpp"
 29 | 
 30 | #include <nvh/nvprint.hpp>
 31 | 
 32 | #include "common.h"
 33 | 
 34 | 
 35 | namespace generatedcmds {
 36 | 
 37 | //////////////////////////////////////////////////////////////////////////
 38 | 
 39 | 
 40 | class RendererVK : public Renderer
 41 | {
 42 | public:
 43 |   class TypeCmd : public Renderer::Type
 44 |   {
 45 |     bool isAvailable(const nvvk::Context& context) override { return true; }
 46 | 
 47 |     const char* name() const override { return "re-used cmds"; }
 48 |     Renderer*   create() const override
 49 |     {
 50 |       RendererVK* renderer = new RendererVK();
 51 |       return renderer;
 52 |     }
 53 |     uint32_t priority() const override { return 8; }
 54 |   };
 55 | 
 56 | public:
 57 |   void init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) override;
 58 |   void deinit() override;
 59 |   void draw(const Resources::Global& global, Stats& stats) override;
 60 | 
 61 |   RendererVK() {}
 62 | 
 63 | private:
 64 |   struct DrawSetup
 65 |   {
 66 |     VkCommandBuffer cmdBuffer;
 67 |     nvvk::Buffer    combinedIndices;
 68 |   };
 69 | 
 70 |   std::vector<DrawItem>  m_drawItems;
 71 |   std::vector<uint32_t>  m_seqIndices;
 72 |   CadScene::IndexingBits m_indexingBits;
 73 |   VkCommandPool          m_cmdPool;
 74 |   DrawSetup              m_draw;
 75 |   ResourcesVK*           m_resources;
 76 | 
 77 |   void fillCmdBuffer(VkCommandBuffer cmd, const DrawItem* drawItems, size_t drawCount)
 78 |   {
 79 |     ResourcesVK*      res         = m_resources;
 80 |     const CadSceneVK& scene       = res->m_scene;
 81 |     BindingMode       bindingMode = m_config.bindingMode;
 82 | 
 83 |     int lastMaterial = -1;
 84 |     int lastGeometry = -1;
 85 |     int lastMatrix   = -1;
 86 |     int lastObject   = -1;
 87 |     int lastShader   = -1;
 88 | 
 89 |     VkDeviceAddress matrixAddress   = scene.m_buffers.matrices.address;
 90 |     VkDeviceAddress materialAddress = scene.m_buffers.materials.address;
 91 | 
 92 |     // setup staging buffer for filling
 93 |     ScopeStaging staging(res->m_resourceAllocator, res->m_queue, res->m_queueFamily);
 94 | 
 95 |     size_t    combinedIndicesSize    = bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB ? sizeof(uint32_t) * drawCount : 0;
 96 |     uint32_t* combinedIndicesMapping = nullptr;
 97 |     if(combinedIndicesSize)
 98 |     {
 99 |       m_draw.combinedIndices = res->m_resourceAllocator.createBuffer(combinedIndicesSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
100 |       combinedIndicesMapping = staging.uploadT<uint32_t>(m_draw.combinedIndices.buffer, 0, combinedIndicesSize);
101 |     }
102 | 
103 |     switch(bindingMode)
104 |     {
105 |       case BINDINGMODE_DSETS:
106 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), DRAW_UBO_SCENE,
107 |                                 1, res->m_drawBind.at(DRAW_UBO_SCENE).getSets(), 0, nullptr);
108 |         break;
109 |       case BINDINGMODE_PUSHADDRESS:
110 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawPush.getPipeLayout(), 0, 1,
111 |                                 res->m_drawPush.getSets(), 0, nullptr);
112 |         break;
113 |       case BINDINGMODE_INDEX_BASEINSTANCE:
114 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1,
115 |                                 res->m_drawIndexed.getSets(), 0, nullptr);
116 |         break;
117 |       case BINDINGMODE_INDEX_VERTEXATTRIB:
118 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1,
119 |                                 res->m_drawIndexed.getSets(), 0, nullptr);
120 | 
121 |         {
122 |           VkDeviceSize offset = {0};
123 |           VkDeviceSize size   = {VK_WHOLE_SIZE};
124 |           VkDeviceSize stride = {sizeof(uint32_t)};
125 | #if USE_DYNAMIC_VERTEX_STRIDE
126 |           vkCmdBindVertexBuffers2(cmd, 1, 1, &m_draw.combinedIndices.buffer, &offset, &size, &stride);
127 | #else
128 |           vkCmdBindVertexBuffers(cmd, 1, 1, &m_draw.combinedIndices.buffer, &offset);
129 | #endif
130 |         }
131 |         break;
132 |     }
133 | 
134 |     if(m_config.shaderObjs)
135 |     {
136 |       const VkShaderStageFlagBits unusedStages[3] = {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
137 |                                                      VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_GEOMETRY_BIT};
138 |       vkCmdBindShadersEXT(cmd, 3, unusedStages, nullptr);
139 |     }
140 | 
141 |     for(size_t i = 0; i < drawCount; i++)
142 |     {
143 |       uint32_t        idx = m_config.permutated ? m_seqIndices[i] : uint32_t(i);
144 |       const DrawItem& di  = drawItems[idx];
145 | 
146 |       if(di.shaderIndex != lastShader)
147 |       {
148 |         if(m_config.shaderObjs)
149 |         {
150 |           VkShaderStageFlagBits stages[2]  = {VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT};
151 |           VkShaderEXT           shaders[2] = {res->m_drawShading.vertexShaderObjs[di.shaderIndex],
152 |                                               res->m_drawShading.fragmentShaderObjs[di.shaderIndex]};
153 |           vkCmdBindShadersEXT(cmd, 2, stages, shaders);
154 |         }
155 |         else
156 |         {
157 |           vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawShading.pipelines[di.shaderIndex]);
158 |         }
159 | 
160 |         lastShader = di.shaderIndex;
161 |       }
162 | 
163 | #if USE_DRAW_OFFSETS
164 |       if(lastGeometry != int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex))
165 |       {
166 |         const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex];
167 | 
168 |         vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, 0, VK_INDEX_TYPE_UINT32);
169 |         VkDeviceSize offset = {0};
170 |         VkDeviceSize size   = {VK_WHOLE_SIZE};
171 |         VkDeviceSize stride = {sizeof(CadScene::Vertex)};
172 | #if USE_DYNAMIC_VERTEX_STRIDE
173 |         vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &offset, &size, &stride);
174 | #else
175 |         vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &offset);
176 | #endif
177 |         lastGeometry = int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex);
178 |       }
179 | #else
180 |       if(lastGeometry != di.geometryIndex)
181 |       {
182 |         const CadSceneVK::Geometry& geo    = scene.m_geometry[di.geometryIndex];
183 |         VkDeviceSize                stride = {sizeof(CadScene::Vertex)};
184 | 
185 |         vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, geo.ibo.offset, VK_INDEX_TYPE_UINT32);
186 | #if USE_DYNAMIC_VERTEX_STRIDE
187 |         vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset, &geo.vbo.range, &stride);
188 | #else
189 |         vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset);
190 | #endif
191 | 
192 |         lastGeometry = di.geometryIndex;
193 |       }
194 | #endif
195 | 
196 |       uint32_t firstInstance = 0;
197 | 
198 |       if(bindingMode == BINDINGMODE_DSETS)
199 |       {
200 |         if(lastMatrix != di.matrixIndex)
201 |         {
202 |           uint32_t offset = di.matrixIndex * res->m_alignedMatrixSize;
203 |           vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(),
204 |                                   DRAW_UBO_MATRIX, 1, res->m_drawBind.at(DRAW_UBO_MATRIX).getSets(), 1, &offset);
205 |           lastMatrix = di.matrixIndex;
206 |         }
207 | 
208 |         if(lastMaterial != di.materialIndex)
209 |         {
210 |           uint32_t offset = di.materialIndex * res->m_alignedMaterialSize;
211 |           vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(),
212 |                                   DRAW_UBO_MATERIAL, 1, res->m_drawBind.at(DRAW_UBO_MATERIAL).getSets(), 1, &offset);
213 |           lastMaterial = di.materialIndex;
214 |         }
215 |       }
216 |       else if(bindingMode == BINDINGMODE_PUSHADDRESS)
217 |       {
218 |         if(lastMatrix != di.matrixIndex)
219 |         {
220 |           VkDeviceAddress address = matrixAddress + sizeof(CadScene::MatrixNode) * di.matrixIndex;
221 | 
222 |           vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(VkDeviceAddress), &address);
223 | 
224 |           lastMatrix = di.matrixIndex;
225 |         }
226 | 
227 |         if(lastMaterial != di.materialIndex)
228 |         {
229 |           VkDeviceAddress address = materialAddress + sizeof(CadScene::Material) * di.materialIndex;
230 | 
231 |           vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_FRAGMENT_BIT,
232 |                              sizeof(VkDeviceAddress), sizeof(VkDeviceAddress), &address);
233 | 
234 |           lastMaterial = di.materialIndex;
235 |         }
236 |       }
237 |       else if(bindingMode == BINDINGMODE_INDEX_BASEINSTANCE)
238 |       {
239 |         firstInstance = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex);
240 |       }
241 |       else if(bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB)
242 |       {
243 |         firstInstance             = i;
244 |         combinedIndicesMapping[i] = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex);
245 |       }
246 | 
247 |       // drawcall
248 | #if USE_DRAW_OFFSETS
249 |       const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex];
250 |       vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset + geo.ibo.offset / sizeof(uint32_t)),
251 |                        geo.vbo.offset / sizeof(CadScene::Vertex), firstInstance);
252 | #else
253 |       vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset / sizeof(uint32_t)), 0, firstInstance);
254 | #endif
255 | 
256 |       lastShader = di.shaderIndex;
257 |     }
258 |   }
259 | 
260 |   void setupCmdBuffer(const DrawItem* drawItems, size_t drawCount)
261 |   {
262 |     const ResourcesVK* res = m_resources;
263 | 
264 |     VkCommandBuffer cmd = res->createCmdBuffer(m_cmdPool, false, false, true);
265 | 
266 |     if(m_config.shaderObjs)
267 |     {
268 |       res->cmdShaderObjectState(cmd);
269 |     }
270 |     else
271 |     {
272 |       res->cmdDynamicPipelineState(cmd);
273 |     }
274 | 
275 |     fillCmdBuffer(cmd, drawItems, drawCount);
276 | 
277 |     vkEndCommandBuffer(cmd);
278 |     m_draw.cmdBuffer = cmd;
279 |   }
280 | 
281 |   void deleteCmdBuffer() { vkFreeCommandBuffers(m_resources->m_device, m_cmdPool, 1, &m_draw.cmdBuffer); }
282 | };
283 | 
284 | 
285 | static RendererVK::TypeCmd s_type_cmdbuffer_vk;
286 | 
287 | void RendererVK::init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats)
288 | {
289 |   ResourcesVK* res = (ResourcesVK*)resources;
290 |   m_resources      = res;
291 |   m_scene          = scene;
292 |   m_config         = config;
293 | 
294 |   stats.cmdBuffers = 1;
295 | 
296 |   m_indexingBits = m_scene->getIndexingBits();
297 | 
298 |   res->initPipelinesOrShaders(config.bindingMode, 0, config.shaderObjs);
299 | 
300 |   VkResult                result;
301 |   VkCommandPoolCreateInfo cmdPoolInfo = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
302 |   cmdPoolInfo.queueFamilyIndex        = 0;
303 |   result                              = vkCreateCommandPool(res->m_device, &cmdPoolInfo, nullptr, &m_cmdPool);
304 |   assert(result == VK_SUCCESS);
305 | 
306 |   fillDrawItems(m_drawItems, scene, config, stats);
307 |   if(config.permutated)
308 |   {
309 |     m_seqIndices.resize(m_drawItems.size());
310 |     fillRandomPermutation(m_drawItems.size(), m_seqIndices.data(), m_drawItems.data(), stats);
311 |   }
312 | 
313 |   setupCmdBuffer(m_drawItems.data(), m_drawItems.size());
314 | }
315 | 
316 | void RendererVK::deinit()
317 | {
318 |   m_resources->m_resourceAllocator.destroy(m_draw.combinedIndices);
319 | 
320 |   deleteCmdBuffer();
321 |   vkDestroyCommandPool(m_resources->m_device, m_cmdPool, nullptr);
322 | }
323 | 
324 | void RendererVK::draw(const Resources::Global& global, Stats& stats)
325 | {
326 |   ResourcesVK* res = m_resources;
327 | 
328 |   VkCommandBuffer primary = res->createTempCmdBuffer();
329 |   {
330 |     nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Render", primary);
331 |     {
332 |       nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Draw", primary);
333 | 
334 |       vkCmdUpdateBuffer(primary, res->m_common.viewBuffer.buffer, 0, sizeof(SceneData), (const uint32_t*)&global.sceneUbo);
335 |       res->cmdPipelineBarrier(primary);
336 | 
337 |       // clear via pass
338 |       res->cmdBeginRendering(primary, true);
339 |       vkCmdExecuteCommands(primary, 1, &m_draw.cmdBuffer);
340 |       vkCmdEndRendering(primary);
341 |     }
342 |   }
343 |   vkEndCommandBuffer(primary);
344 |   res->submissionEnqueue(primary);
345 | }
346 | 
347 | }  // namespace generatedcmds
348 | 


--------------------------------------------------------------------------------
/cadscene.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2014-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include "cadscene.hpp"
 22 | #include <fileformats/cadscenefile.h>
 23 | 
 24 | #include <algorithm>
 25 | #include <assert.h>
 26 | #include <glm/gtc/type_ptr.hpp>
 27 | 
 28 | #define USE_CACHECOMBINE 1
 29 | 
 30 | 
 31 | glm::vec4 randomVector(float from, float to)
 32 | {
 33 |   glm::vec4 vec;
 34 |   float     width = to - from;
 35 |   for(int i = 0; i < 4; i++)
 36 |   {
 37 |     vec[i] = from + (float(rand()) / float(RAND_MAX)) * width;
 38 |   }
 39 |   return vec;
 40 | }
 41 | 
 42 | // all oct functions derived from "A Survey of Efficient Representations for Independent Unit Vectors"
 43 | // http://jcgt.org/published/0003/02/01/paper.pdf
 44 | // Returns +/- 1
 45 | inline glm::vec3 oct_signNotZero(glm::vec3 v)
 46 | {
 47 |   // leaves z as is
 48 |   return glm::vec3((v.x >= 0.0f) ? +1.0f : -1.0f, (v.y >= 0.0f) ? +1.0f : -1.0f, 1.0f);
 49 | }
 50 | 
 51 | // Assume normalized input. Output is on [-1, 1] for each component.
 52 | inline glm::vec3 float32x3_to_oct(glm::vec3 v)
 53 | {
 54 |   // Project the sphere onto the octahedron, and then onto the xy plane
 55 |   glm::vec3 p = glm::vec3(v.x, v.y, 0) * (1.0f / (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)));
 56 |   // Reflect the folds of the lower hemisphere over the diagonals
 57 |   return (v.z <= 0.0f) ? glm::vec3(1.0f - fabsf(p.y), 1.0f - fabsf(p.x), 0.0f) * oct_signNotZero(p) : p;
 58 | }
 59 | 
 60 | inline glm::vec3 oct_to_float32x3(glm::vec3 e)
 61 | {
 62 |   glm::vec3 v = glm::vec3(e.x, e.y, 1.0f - fabsf(e.x) - fabsf(e.y));
 63 |   if(v.z < 0.0f)
 64 |   {
 65 |     v = glm::vec3(1.0f - fabs(v.y), 1.0f - fabs(v.x), v.z) * oct_signNotZero(v);
 66 |   }
 67 |   return glm::normalize(v);
 68 | }
 69 | 
 70 | inline glm::vec3 float32x3_to_octn_precise(glm::vec3 v, const int n)
 71 | {
 72 |   glm::vec3 s = float32x3_to_oct(v);  // Remap to the square
 73 |                                       // Each snorm's max value interpreted as an integer,
 74 |                                       // e.g., 127.0 for snorm8
 75 |   float M = float(1 << ((n / 2) - 1)) - 1.0;
 76 |   // Remap components to snorm(n/2) precision...with floor instead
 77 |   // of round (see equation 1)
 78 |   s                            = glm::floor(glm::clamp(s, -1.0f, +1.0f) * M) * (1.0f / M);
 79 |   glm::vec3 bestRepresentation = s;
 80 |   float     highestCosine      = glm::dot(oct_to_float32x3(s), v);
 81 |   // Test all combinations of floor and ceil and keep the best.
 82 |   // Note that at +/- 1, this will exit the square... but that
 83 |   // will be a worse encoding and never win.
 84 |   for(int i = 0; i <= 1; ++i)
 85 |   {
 86 |     for(int j = 0; j <= 1; ++j)
 87 |     {
 88 |       // This branch will be evaluated at compile time
 89 |       if((i != 0) || (j != 0))
 90 |       {
 91 |         // Offset the bit pattern (which is stored in floating
 92 |         // point!) to effectively change the rounding mode
 93 |         // (when i or j is 0: floor, when it is one: ceiling)
 94 |         glm::vec3 candidate = glm::vec3(i, j, 0) * (1 / M) + s;
 95 |         float     cosine    = glm::dot(oct_to_float32x3(candidate), v);
 96 |         if(cosine > highestCosine)
 97 |         {
 98 |           bestRepresentation = candidate;
 99 |           highestCosine      = cosine;
100 |         }
101 |       }
102 |     }
103 |   }
104 |   return bestRepresentation;
105 | }
106 | 
107 | bool CadScene::loadCSF(const char* filename, int clones, int cloneaxis)
108 | {
109 |   CSFile*         csf;
110 |   CSFileMemoryPTR mem = CSFileMemory_new();
111 |   if(CSFile_loadExt(&csf, filename, mem) != CADSCENEFILE_NOERROR || !(csf->fileFlags & CADSCENEFILE_FLAG_UNIQUENODES))
112 |   {
113 |     CSFileMemory_delete(mem);
114 |     return false;
115 |   }
116 | 
117 |   int copies = clones + 1;
118 | 
119 |   CSFile_transform(csf);
120 | 
121 |   srand(234525);
122 | 
123 | 
124 |   // materials
125 |   m_materials.resize(csf->numMaterials);
126 |   for(int n = 0; n < csf->numMaterials; n++)
127 |   {
128 |     CSFMaterial* csfmaterial = &csf->materials[n];
129 |     Material&    material    = m_materials[n];
130 | 
131 |     for(int i = 0; i < 2; i++)
132 |     {
133 |       material.sides[i].ambient  = randomVector(0.0f, 0.1f);
134 |       material.sides[i].diffuse  = glm::make_vec4(csf->materials[n].color) + randomVector(0.0f, 0.07f);
135 |       material.sides[i].specular = randomVector(0.25f, 0.55f);
136 |       material.sides[i].emissive = randomVector(0.0f, 0.05f);
137 |     }
138 |   }
139 | 
140 | 
141 |   // geometry
142 |   int numGeoms = csf->numGeometries;
143 |   m_geometry.resize(csf->numGeometries * copies);
144 |   m_geometryBboxes.resize(csf->numGeometries * copies);
145 |   for(int n = 0; n < csf->numGeometries; n++)
146 |   {
147 |     CSFGeometry* csfgeom = &csf->geometries[n];
148 |     Geometry&    geom    = m_geometry[n];
149 |     geom.cloneIdx        = -1;
150 | 
151 |     geom.numVertices   = csfgeom->numVertices;
152 |     geom.numIndexSolid = csfgeom->numIndexSolid;
153 |     geom.numIndexWire  = csfgeom->numIndexWire;
154 | 
155 |     Vertex* vertices = new Vertex[csfgeom->numVertices];
156 |     for(int i = 0; i < csfgeom->numVertices; i++)
157 |     {
158 |       vertices[i].position[0] = csfgeom->vertex[3 * i + 0];
159 |       vertices[i].position[1] = csfgeom->vertex[3 * i + 1];
160 |       vertices[i].position[2] = csfgeom->vertex[3 * i + 2];
161 | 
162 |       glm::vec3 normal;
163 |       if(csfgeom->normal)
164 |       {
165 |         normal.x = csfgeom->normal[3 * i + 0];
166 |         normal.y = csfgeom->normal[3 * i + 1];
167 |         normal.z = csfgeom->normal[3 * i + 2];
168 |       }
169 |       else
170 |       {
171 |         normal = normalize(glm::vec3(vertices[i].position));
172 |       }
173 | 
174 |       glm::vec3 packed       = float32x3_to_octn_precise(normal, 16);
175 |       vertices[i].normalOctX = std::min(32767, std::max(-32767, int32_t(packed.x * 32767.0f)));
176 |       vertices[i].normalOctY = std::min(32767, std::max(-32767, int32_t(packed.y * 32767.0f)));
177 | 
178 |       m_geometryBboxes[n].merge(glm::vec4(vertices[i].position, 1));
179 |     }
180 | 
181 |     geom.vboData = vertices;
182 |     geom.vboSize = sizeof(Vertex) * csfgeom->numVertices;
183 | 
184 | 
185 |     unsigned int* indices = new unsigned int[csfgeom->numIndexSolid + csfgeom->numIndexWire];
186 |     memcpy(&indices[0], csfgeom->indexSolid, sizeof(unsigned int) * csfgeom->numIndexSolid);
187 |     if(csfgeom->indexWire)
188 |     {
189 |       memcpy(&indices[csfgeom->numIndexSolid], csfgeom->indexWire, sizeof(unsigned int) * csfgeom->numIndexWire);
190 |     }
191 | 
192 |     geom.iboData = indices;
193 |     geom.iboSize = sizeof(unsigned int) * (csfgeom->numIndexSolid + csfgeom->numIndexWire);
194 | 
195 | 
196 |     geom.parts.resize(csfgeom->numParts);
197 | 
198 |     size_t offsetSolid = 0;
199 |     size_t offsetWire  = csfgeom->numIndexSolid * sizeof(unsigned int);
200 |     for(int i = 0; i < csfgeom->numParts; i++)
201 |     {
202 |       geom.parts[i].indexWire.count  = csfgeom->parts[i].numIndexWire;
203 |       geom.parts[i].indexSolid.count = csfgeom->parts[i].numIndexSolid;
204 | 
205 |       geom.parts[i].indexWire.offset  = offsetWire;
206 |       geom.parts[i].indexSolid.offset = offsetSolid;
207 | 
208 |       offsetWire += csfgeom->parts[i].numIndexWire * sizeof(unsigned int);
209 |       offsetSolid += csfgeom->parts[i].numIndexSolid * sizeof(unsigned int);
210 |     }
211 |   }
212 |   for(int c = 1; c <= clones; c++)
213 |   {
214 |     for(int n = 0; n < numGeoms; n++)
215 |     {
216 |       m_geometryBboxes[n + numGeoms * c] = m_geometryBboxes[n];
217 | 
218 |       const Geometry& geomorig = m_geometry[n];
219 |       Geometry&       geom     = m_geometry[n + numGeoms * c];
220 | 
221 |       geom          = geomorig;
222 |       geom.cloneIdx = n;
223 |     }
224 |   }
225 | 
226 | 
227 |   // nodes
228 |   int numObjects = 0;
229 |   m_matrices.resize(csf->numNodes * copies);
230 | 
231 |   for(int n = 0; n < csf->numNodes; n++)
232 |   {
233 |     CSFNode* csfnode = &csf->nodes[n];
234 | 
235 |     memcpy(glm::value_ptr(m_matrices[n].objectMatrix), csfnode->objectTM, sizeof(float) * 16);
236 |     memcpy(glm::value_ptr(m_matrices[n].worldMatrix), csfnode->worldTM, sizeof(float) * 16);
237 | 
238 |     m_matrices[n].objectMatrixIT = glm::transpose(glm::inverse(m_matrices[n].objectMatrix));
239 |     m_matrices[n].worldMatrixIT  = glm::transpose(glm::inverse(m_matrices[n].worldMatrix));
240 | 
241 |     if(csfnode->geometryIDX < 0)
242 |       continue;
243 | 
244 |     numObjects++;
245 |   }
246 | 
247 | 
248 |   // objects
249 |   m_objects.resize(numObjects * copies);
250 |   numObjects = 0;
251 |   for(int n = 0; n < csf->numNodes; n++)
252 |   {
253 |     CSFNode* csfnode = &csf->nodes[n];
254 | 
255 |     if(csfnode->geometryIDX < 0)
256 |       continue;
257 | 
258 |     Object& object = m_objects[numObjects];
259 | 
260 |     object.matrixIndex   = n;
261 |     object.geometryIndex = csfnode->geometryIDX;
262 | 
263 |     object.parts.resize(csfnode->numParts);
264 |     for(int i = 0; i < csfnode->numParts; i++)
265 |     {
266 |       object.parts[i].active        = 1;
267 |       object.parts[i].matrixIndex   = csfnode->parts[i].nodeIDX < 0 ? object.matrixIndex : csfnode->parts[i].nodeIDX;
268 |       object.parts[i].materialIndex = csfnode->parts[i].materialIDX;
269 | #if 1
270 |       if(csf->materials[csfnode->parts[i].materialIDX].color[3] < 0.9f)
271 |       {
272 |         object.parts[i].active = 0;
273 |       }
274 | #endif
275 |     }
276 | 
277 |     BBox bbox = m_geometryBboxes[object.geometryIndex].transformed(m_matrices[n].worldMatrix);
278 |     m_bbox.merge(bbox);
279 | 
280 |     updateObjectDrawCache(object);
281 | 
282 |     numObjects++;
283 |   }
284 | 
285 |   // compute clone move delta based on m_bbox;
286 | 
287 |   glm::vec4 dim = m_bbox.max - m_bbox.min;
288 | 
289 |   int sq      = 1;
290 |   int numAxis = 0;
291 |   for(int i = 0; i < 3; i++)
292 |   {
293 |     numAxis += (cloneaxis & (1 << i)) ? 1 : 0;
294 |   }
295 | 
296 |   assert(numAxis);
297 | 
298 |   switch(numAxis)
299 |   {
300 |     case 1:
301 |       sq = copies;
302 |       break;
303 |     case 2:
304 |       while(sq * sq < copies)
305 |       {
306 |         sq++;
307 |       }
308 |       break;
309 |     case 3:
310 |       while(sq * sq * sq < copies)
311 |       {
312 |         sq++;
313 |       }
314 |       break;
315 |   }
316 | 
317 | 
318 |   for(int c = 1; c <= clones; c++)
319 |   {
320 |     int numNodes = csf->numNodes;
321 | 
322 |     glm::vec4 shift = dim * 1.05f;
323 | 
324 |     float u = 0;
325 |     float v = 0;
326 |     float w = 0;
327 | 
328 |     switch(numAxis)
329 |     {
330 |       case 1:
331 |         u = float(c);
332 |         break;
333 |       case 2:
334 |         u = float(c % sq);
335 |         v = float(c / sq);
336 |         break;
337 |       case 3:
338 |         u = float(c % sq);
339 |         v = float((c / sq) % sq);
340 |         w = float(c / (sq * sq));
341 |         break;
342 |     }
343 | 
344 |     float use = u;
345 | 
346 |     if(cloneaxis & (1 << 0))
347 |     {
348 |       shift.x *= -use;
349 |       if(numAxis > 1)
350 |         use = v;
351 |     }
352 |     else
353 |     {
354 |       shift.x = 0;
355 |     }
356 | 
357 |     if(cloneaxis & (1 << 1))
358 |     {
359 |       shift.y *= use;
360 |       if(numAxis > 2)
361 |         use = w;
362 |       else if(numAxis > 1)
363 |         use = v;
364 |     }
365 |     else
366 |     {
367 |       shift.y = 0;
368 |     }
369 | 
370 |     if(cloneaxis & (1 << 2))
371 |     {
372 |       shift.z *= -use;
373 |     }
374 |     else
375 |     {
376 |       shift.z = 0;
377 |     }
378 | 
379 |     shift.w = 0;
380 | 
381 |     // move all world matrices
382 |     for(int n = 0; n < numNodes; n++)
383 |     {
384 |       MatrixNode& node     = m_matrices[n + numNodes * c];
385 |       MatrixNode& nodeOrig = m_matrices[n];
386 |       node                 = nodeOrig;
387 |       node.worldMatrix[3]  = node.worldMatrix[3] + shift;
388 |       node.worldMatrixIT   = glm::transpose(glm::inverse(node.worldMatrix));
389 |     }
390 | 
391 |     {
392 |       // patch object matrix of root
393 |       MatrixNode& node     = m_matrices[csf->rootIDX + numNodes * c];
394 |       node.objectMatrix[3] = node.objectMatrix[3] + shift;
395 |       node.objectMatrixIT  = glm::transpose(glm::inverse(node.objectMatrix));
396 |     }
397 | 
398 |     // clone objects
399 |     for(int n = 0; n < numObjects; n++)
400 |     {
401 |       const Object& objectorig = m_objects[n];
402 |       Object&       object     = m_objects[n + numObjects * c];
403 | 
404 |       object = objectorig;
405 |       object.geometryIndex += c * numGeoms;
406 |       object.matrixIndex += c * numNodes;
407 |       for(size_t i = 0; i < object.parts.size(); i++)
408 |       {
409 |         object.parts[i].matrixIndex += c * numNodes;
410 |       }
411 | 
412 |       for(size_t i = 0; i < object.cacheSolid.state.size(); i++)
413 |       {
414 |         object.cacheSolid.state[i].matrixIndex += c * numNodes;
415 |       }
416 |       for(size_t i = 0; i < object.cacheWire.state.size(); i++)
417 |       {
418 |         object.cacheWire.state[i].matrixIndex += c * numNodes;
419 |       }
420 |     }
421 |   }
422 | 
423 |   CSFileMemory_delete(mem);
424 |   return true;
425 | }
426 | 
427 | 
428 | struct ListItem
429 | {
430 |   CadScene::DrawStateInfo state;
431 |   CadScene::DrawRange     range;
432 | };
433 | 
434 | static bool ListItem_compare(const ListItem& a, const ListItem& b)
435 | {
436 |   int diff = 0;
437 |   diff     = diff != 0 ? diff : (a.state.materialIndex - b.state.materialIndex);
438 |   diff     = diff != 0 ? diff : (a.state.matrixIndex - b.state.matrixIndex);
439 |   diff     = diff != 0 ? diff : int(a.range.offset - b.range.offset);
440 | 
441 |   return diff < 0;
442 | }
443 | 
444 | static void fillCache(CadScene::DrawRangeCache& cache, const std::vector<ListItem>& list)
445 | {
446 |   cache = CadScene::DrawRangeCache();
447 | 
448 |   if(!list.size())
449 |     return;
450 | 
451 |   CadScene::DrawStateInfo state = list[0].state;
452 |   CadScene::DrawRange     range = list[0].range;
453 | 
454 |   int stateCount = 0;
455 | 
456 |   for(size_t i = 1; i < list.size() + 1; i++)
457 |   {
458 |     bool newrange = false;
459 |     if(i == list.size() || list[i].state != state)
460 |     {
461 |       // push range
462 |       if(range.count)
463 |       {
464 |         stateCount++;
465 |         cache.offsets.push_back(range.offset);
466 |         cache.counts.push_back(range.count);
467 |       }
468 | 
469 |       // emit
470 |       if(stateCount)
471 |       {
472 |         cache.state.push_back(state);
473 |         cache.stateCount.push_back(stateCount);
474 |       }
475 | 
476 |       stateCount = 0;
477 | 
478 |       if(i == list.size())
479 |       {
480 |         break;
481 |       }
482 |       else
483 |       {
484 |         state        = list[i].state;
485 |         range.offset = list[i].range.offset;
486 |         range.count  = 0;
487 |         newrange     = true;
488 |       }
489 |     }
490 | 
491 |     const CadScene::DrawRange& currange = list[i].range;
492 |     if(newrange || (USE_CACHECOMBINE && currange.offset == (range.offset + sizeof(unsigned int) * range.count)))
493 |     {
494 |       // merge
495 |       range.count += currange.count;
496 |     }
497 |     else
498 |     {
499 |       // push
500 |       if(range.count)
501 |       {
502 |         stateCount++;
503 |         cache.offsets.push_back(range.offset);
504 |         cache.counts.push_back(range.count);
505 |       }
506 | 
507 |       range = currange;
508 |     }
509 |   }
510 | }
511 | 
512 | void CadScene::updateObjectDrawCache(Object& object)
513 | {
514 |   Geometry& geom = m_geometry[object.geometryIndex];
515 | 
516 |   std::vector<ListItem> listSolid;
517 |   std::vector<ListItem> listWire;
518 | 
519 |   listSolid.reserve(geom.parts.size());
520 |   listWire.reserve(geom.parts.size());
521 | 
522 |   for(size_t i = 0; i < geom.parts.size(); i++)
523 |   {
524 |     if(!object.parts[i].active)
525 |       continue;
526 | 
527 |     ListItem item;
528 |     item.state.materialIndex = object.parts[i].materialIndex;
529 | 
530 |     item.range             = geom.parts[i].indexSolid;
531 |     item.state.matrixIndex = object.parts[i].matrixIndex;
532 |     listSolid.push_back(item);
533 | 
534 |     item.range             = geom.parts[i].indexWire;
535 |     item.state.matrixIndex = object.parts[i].matrixIndex;
536 |     listWire.push_back(item);
537 |   }
538 | 
539 |   std::sort(listSolid.begin(), listSolid.end(), ListItem_compare);
540 |   std::sort(listWire.begin(), listWire.end(), ListItem_compare);
541 | 
542 |   fillCache(object.cacheSolid, listSolid);
543 |   fillCache(object.cacheWire, listWire);
544 | }
545 | 
546 | void CadScene::unload()
547 | {
548 |   if(m_geometry.empty())
549 |     return;
550 | 
551 | 
552 |   for(size_t i = 0; i < m_geometry.size(); i++)
553 |   {
554 |     if(m_geometry[i].cloneIdx >= 0)
555 |       continue;
556 | 
557 |     delete[] m_geometry[i].vboData;
558 |     delete[] m_geometry[i].iboData;
559 |   }
560 | 
561 |   m_matrices.clear();
562 |   m_geometryBboxes.clear();
563 |   m_geometry.clear();
564 |   m_objects.clear();
565 |   m_geometryBboxes.clear();
566 | }
567 | 
568 | CadScene::IndexingBits CadScene::getIndexingBits() const
569 | {
570 |   CadScene::IndexingBits bits = {1, 1};
571 | 
572 |   for(uint32_t i = 32; i >= 1; i--)
573 |   {
574 |     uint64_t max = uint64_t(1) << i;
575 |     if(m_matrices.size() < max)
576 |     {
577 |       bits.matrices = i;
578 |     }
579 |     if(m_materials.size() < max)
580 |     {
581 |       bits.materials = i;
582 |     }
583 |   }
584 | 
585 |   return bits;
586 | }
587 | 
588 | bool CadScene::supportsIndexing() const
589 | {
590 |   IndexingBits bits = getIndexingBits();
591 |   return (bits.materials + bits.matrices) <= 32;
592 | }
593 | 


--------------------------------------------------------------------------------
/vk_ext_device_generated_commands.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  3 | *
  4 | * Licensed under the Apache License, Version 2.0 (the "License");
  5 | * you may not use this file except in compliance with the License.
  6 | * You may obtain a copy of the License at
  7 | *
  8 | *     http://www.apache.org/licenses/LICENSE-2.0
  9 | *
 10 | * Unless required by applicable law or agreed to in writing, software
 11 | * distributed under the License is distributed on an "AS IS" BASIS,
 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | * See the License for the specific language governing permissions and
 14 | * limitations under the License.
 15 | *
 16 | * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
 17 | * SPDX-License-Identifier: Apache-2.0
 18 | */
 19 | 
 20 | #include <vulkan/vulkan_core.h>
 21 | 
 22 | #ifndef VK_EXT_device_generated_commands
 23 | #define VK_EXT_device_generated_commands 1
 24 | #define VK_EXT_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1
 25 | #define VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_EXT_device_generated_commands"
 26 | #define VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT ((VkShaderCreateFlagBitsEXT)0x00000080)
 27 | #define VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT ((VkBufferUsageFlagBits2KHR)0x0000000080000000ULL)
 28 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_EXT ((VkStructureType)1000572000)
 29 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_EXT ((VkStructureType)1000572001)
 30 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_EXT ((VkStructureType)1000572002)
 31 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_CREATE_INFO_EXT ((VkStructureType)1000572003)
 32 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_EXT ((VkStructureType)1000572004)
 33 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_EXT ((VkStructureType)1000572005)
 34 | #define VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_EXT ((VkStructureType)1000572006)
 35 | #define VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_EXT ((VkStructureType)1000572007)
 36 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_PIPELINE_EXT ((VkStructureType)1000572008)
 37 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_SHADER_EXT ((VkStructureType)1000572009)
 38 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_PIPELINE_INFO_EXT ((VkStructureType)1000572010)
 39 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_SHADER_INFO_EXT ((VkStructureType)1000572011)
 40 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_SHADER_LAYOUT_INFO_EXT ((VkStructureType)1000572012)
 41 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_PIPELINE_INFO_EXT ((VkStructureType)1000572013)
 42 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_SHADER_INFO_EXT ((VkStructureType)1000572014)
 43 | #define VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV
 44 | #define VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV
 45 | #define VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV
 46 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT ((VkIndirectCommandsTokenTypeEXT)1000328000)
 47 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT ((VkIndirectCommandsTokenTypeEXT)1000328001)
 48 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT ((VkIndirectCommandsTokenTypeEXT)1000202002)
 49 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT ((VkIndirectCommandsTokenTypeEXT)1000202003)
 50 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT ((VkIndirectCommandsTokenTypeEXT)1000386004)
 51 | #define VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT ((VkObjectType)1000572000)
 52 | #define VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT ((VkObjectType)1000572001)
 53 | #define VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT ((VkPipelineCreateFlagBits2KHR)0x0000004000000000ULL)
 54 | #define VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_EXT VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV
 55 | #define VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_EXT VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV
 56 | #define VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_EXT VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV
 57 | 
 58 | typedef struct VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT
 59 | {
 60 |   VkStructureType sType;
 61 |   void*           pNext;
 62 |   VkBool32        deviceGeneratedCommandsEXT;
 63 |   VkBool32        dynamicGeneratedPipelineLayout;
 64 | } VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT;
 65 | 
 66 | typedef VkFlags VkIndirectCommandsInputModeFlagsEXT;
 67 | 
 68 | typedef struct VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT
 69 | {
 70 |   VkStructureType                     sType;
 71 |   void*                               pNext;
 72 |   uint32_t                            maxIndirectPipelineCount;
 73 |   uint32_t                            maxIndirectShaderObjectCount;
 74 |   uint32_t                            maxIndirectSequenceCount;
 75 |   uint32_t                            maxIndirectCommandsTokenCount;
 76 |   uint32_t                            maxIndirectCommandsTokenOffset;
 77 |   uint32_t                            maxIndirectCommandsIndirectStride;
 78 |   VkIndirectCommandsInputModeFlagsEXT supportedIndirectCommandsInputModes;
 79 |   VkShaderStageFlags                  supportedIndirectCommandsShaderStages;
 80 |   VkShaderStageFlags                  supportedIndirectCommandsShaderStagesPipelineBinding;
 81 |   VkShaderStageFlags                  supportedIndirectCommandsShaderStagesShaderBinding;
 82 |   VkBool32                            deviceGeneratedCommandsTransformFeedback;
 83 |   VkBool32                            deviceGeneratedCommandsMultiDrawIndirectCount;
 84 | } VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT;
 85 | 
 86 | VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutEXT)
 87 | 
 88 | VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectExecutionSetEXT)
 89 | 
 90 | typedef struct VkGeneratedCommandsMemoryRequirementsInfoEXT
 91 | {
 92 |   VkStructureType             sType;
 93 |   void*                       pNext;
 94 |   VkIndirectExecutionSetEXT   indirectExecutionSet;
 95 |   VkIndirectCommandsLayoutEXT indirectCommandsLayout;
 96 |   uint32_t                    maxSequenceCount;
 97 |   uint32_t                    maxDrawCount;
 98 | } VkGeneratedCommandsMemoryRequirementsInfoEXT;
 99 | 
100 | typedef struct VkIndirectExecutionSetPipelineInfoEXT
101 | {
102 |   VkStructureType sType;
103 |   void const*     pNext;
104 |   VkPipeline      initialPipeline;
105 |   uint32_t        maxPipelineCount;
106 | } VkIndirectExecutionSetPipelineInfoEXT;
107 | 
108 | typedef struct VkIndirectExecutionSetShaderLayoutInfoEXT
109 | {
110 |   VkStructureType              sType;
111 |   void const*                  pNext;
112 |   uint32_t                     setLayoutCount;
113 |   VkDescriptorSetLayout const* pSetLayouts;
114 | } VkIndirectExecutionSetShaderLayoutInfoEXT;
115 | 
116 | typedef struct VkIndirectExecutionSetShaderInfoEXT
117 | {
118 |   VkStructureType                                  sType;
119 |   void const*                                      pNext;
120 |   uint32_t                                         shaderCount;
121 |   VkShaderEXT const*                               pInitialShaders;
122 |   VkIndirectExecutionSetShaderLayoutInfoEXT const* pSetLayoutInfos;
123 |   uint32_t                                         maxShaderCount;
124 |   uint32_t                                         pushConstantRangeCount;
125 |   VkPushConstantRange const*                       pPushConstantRanges;
126 | } VkIndirectExecutionSetShaderInfoEXT;
127 | 
128 | typedef union VkIndirectExecutionSetInfoEXT
129 | {
130 |   VkIndirectExecutionSetPipelineInfoEXT const* pPipelineInfo;
131 |   VkIndirectExecutionSetShaderInfoEXT const*   pShaderInfo;
132 | } VkIndirectExecutionSetInfoEXT;
133 | 
134 | typedef enum VkIndirectExecutionSetInfoTypeEXT
135 | {
136 |   VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT      = 0,
137 |   VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT = 1,
138 |   VK_INDIRECT_EXECUTION_SET_INFO_TYPE_MAX_ENUM_EXT       = 0x7FFFFFFF
139 | } VkIndirectExecutionSetInfoTypeEXT;
140 | 
141 | typedef struct VkIndirectExecutionSetCreateInfoEXT
142 | {
143 |   VkStructureType                   sType;
144 |   void const*                       pNext;
145 |   VkIndirectExecutionSetInfoTypeEXT type;
146 |   VkIndirectExecutionSetInfoEXT     info;
147 | } VkIndirectExecutionSetCreateInfoEXT;
148 | 
149 | typedef struct VkGeneratedCommandsInfoEXT
150 | {
151 |   VkStructureType             sType;
152 |   void const*                 pNext;
153 |   VkShaderStageFlags          shaderStages;
154 |   VkIndirectExecutionSetEXT   indirectExecutionSet;
155 |   VkIndirectCommandsLayoutEXT indirectCommandsLayout;
156 |   VkDeviceAddress             indirectAddress;
157 |   VkDeviceSize                indirectAddressSize;
158 |   VkDeviceAddress             preprocessAddress;
159 |   VkDeviceSize                preprocessSize;
160 |   uint32_t                    maxSequenceCount;
161 |   VkDeviceAddress             sequenceCountAddress;
162 |   uint32_t                    maxDrawCount;
163 | } VkGeneratedCommandsInfoEXT;
164 | 
165 | typedef struct VkWriteIndirectExecutionSetPipelineEXT
166 | {
167 |   VkStructureType sType;
168 |   void const*     pNext;
169 |   uint32_t        index;
170 |   VkPipeline      pipeline;
171 | } VkWriteIndirectExecutionSetPipelineEXT;
172 | 
173 | typedef struct VkWriteIndirectExecutionSetShaderEXT
174 | {
175 |   VkStructureType sType;
176 |   void const*     pNext;
177 |   uint32_t        index;
178 |   VkShaderEXT     shader;
179 | } VkWriteIndirectExecutionSetShaderEXT;
180 | 
181 | typedef struct VkIndirectCommandsVertexBufferTokenEXT
182 | {
183 |   uint32_t vertexBindingUnit;
184 | } VkIndirectCommandsVertexBufferTokenEXT;
185 | 
186 | typedef enum VkIndirectCommandsInputModeFlagBitsEXT
187 | {
188 |   VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT = 0x00000001,
189 |   VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT   = 0x00000002,
190 |   VK_INDIRECT_COMMANDS_INPUT_MODE_FLAG_BITS_MAX_ENUM_EXT  = 0x7FFFFFFF
191 | } VkIndirectCommandsInputModeFlagBitsEXT;
192 | 
193 | typedef struct VkIndirectCommandsIndexBufferTokenEXT
194 | {
195 |   VkIndirectCommandsInputModeFlagBitsEXT mode;
196 | } VkIndirectCommandsIndexBufferTokenEXT;
197 | 
198 | typedef struct VkIndirectCommandsPushConstantTokenEXT
199 | {
200 |   VkPushConstantRange updateRange;
201 | } VkIndirectCommandsPushConstantTokenEXT;
202 | 
203 | typedef struct VkIndirectCommandsExecutionSetTokenEXT
204 | {
205 |   VkIndirectExecutionSetInfoTypeEXT type;
206 |   VkShaderStageFlags                shaderStages;
207 | } VkIndirectCommandsExecutionSetTokenEXT;
208 | 
209 | typedef union VkIndirectCommandsTokenDataEXT
210 | {
211 |   VkIndirectCommandsPushConstantTokenEXT const* pPushConstant;
212 |   VkIndirectCommandsVertexBufferTokenEXT const* pVertexBuffer;
213 |   VkIndirectCommandsIndexBufferTokenEXT const*  pIndexBuffer;
214 |   VkIndirectCommandsExecutionSetTokenEXT const* pExecutionSet;
215 | } VkIndirectCommandsTokenDataEXT;
216 | 
217 | typedef enum VkIndirectCommandsTokenTypeEXT
218 | {
219 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT      = 0,
220 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT      = 1,
221 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT     = 2,
222 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT       = 3,
223 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT      = 4,
224 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT       = 5,
225 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT               = 6,
226 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT = 7,
227 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT         = 8,
228 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT           = 9,
229 |   VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_EXT           = 0x7FFFFFFF
230 | } VkIndirectCommandsTokenTypeEXT;
231 | 
232 | typedef struct VkIndirectCommandsLayoutTokenEXT
233 | {
234 |   VkStructureType                sType;
235 |   void const*                    pNext;
236 |   VkIndirectCommandsTokenTypeEXT type;
237 |   VkIndirectCommandsTokenDataEXT data;
238 |   uint32_t                       offset;
239 | } VkIndirectCommandsLayoutTokenEXT;
240 | 
241 | typedef VkFlags VkIndirectCommandsLayoutUsageFlagsEXT;
242 | 
243 | typedef struct VkIndirectCommandsLayoutCreateInfoEXT
244 | {
245 |   VkStructureType                         sType;
246 |   void const*                             pNext;
247 |   VkIndirectCommandsLayoutUsageFlagsEXT   flags;
248 |   VkShaderStageFlags                      shaderStages;
249 |   uint32_t                                indirectStride;
250 |   VkPipelineLayout                        pipelineLayout;
251 |   uint32_t                                tokenCount;
252 |   VkIndirectCommandsLayoutTokenEXT const* pTokens;
253 | } VkIndirectCommandsLayoutCreateInfoEXT;
254 | 
255 | typedef struct VkDrawIndirectCountIndirectCommandEXT
256 | {
257 |   VkDeviceAddress bufferAddress;
258 |   uint32_t        stride;
259 |   uint32_t        commandCount;
260 | } VkDrawIndirectCountIndirectCommandEXT;
261 | 
262 | typedef struct VkBindVertexBufferIndirectCommandEXT
263 | {
264 |   VkDeviceAddress bufferAddress;
265 |   uint32_t        size;
266 |   uint32_t        stride;
267 | } VkBindVertexBufferIndirectCommandEXT;
268 | 
269 | typedef struct VkBindIndexBufferIndirectCommandEXT
270 | {
271 |   VkDeviceAddress bufferAddress;
272 |   uint32_t        size;
273 |   VkIndexType     indexType;
274 | } VkBindIndexBufferIndirectCommandEXT;
275 | 
276 | typedef enum VkIndirectCommandsLayoutUsageFlagBitsEXT
277 | {
278 |   VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT = 0x00000001,
279 |   VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_EXT = 0x00000002,
280 |   VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_EXT      = 0x7FFFFFFF
281 | } VkIndirectCommandsLayoutUsageFlagBitsEXT;
282 | 
283 | typedef struct VkGeneratedCommandsPipelineInfoEXT
284 | {
285 |   VkStructureType sType;
286 |   void*           pNext;
287 |   VkPipeline      pipeline;
288 | } VkGeneratedCommandsPipelineInfoEXT;
289 | 
290 | typedef struct VkGeneratedCommandsShaderInfoEXT
291 | {
292 |   VkStructureType    sType;
293 |   void*              pNext;
294 |   uint32_t           shaderCount;
295 |   VkShaderEXT const* pShaders;
296 | } VkGeneratedCommandsShaderInfoEXT;
297 | 
298 | typedef void(VKAPI_PTR* PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)(VkDevice device,
299 |                                                                          const VkGeneratedCommandsMemoryRequirementsInfoEXT* pInfo,
300 |                                                                          VkMemoryRequirements2* pMemoryRequirements);
301 | typedef void(VKAPI_PTR* PFN_vkCmdPreprocessGeneratedCommandsEXT)(VkCommandBuffer commandBuffer,
302 |                                                                  const VkGeneratedCommandsInfoEXT* pGeneratedCommandsInfo,
303 |                                                                  VkCommandBuffer stateCommandBuffer);
304 | typedef void(VKAPI_PTR* PFN_vkCmdExecuteGeneratedCommandsEXT)(VkCommandBuffer                   commandBuffer,
305 |                                                               VkBool32                          isPreprocessed,
306 |                                                               const VkGeneratedCommandsInfoEXT* pGeneratedCommandsInfo);
307 | typedef VkResult(VKAPI_PTR* PFN_vkCreateIndirectCommandsLayoutEXT)(VkDevice device,
308 |                                                                    const VkIndirectCommandsLayoutCreateInfoEXT* pCreateInfo,
309 |                                                                    const VkAllocationCallbacks* pAllocator,
310 |                                                                    VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout);
311 | typedef void(VKAPI_PTR* PFN_vkDestroyIndirectCommandsLayoutEXT)(VkDevice                     device,
312 |                                                                 VkIndirectCommandsLayoutEXT  indirectCommandsLayout,
313 |                                                                 const VkAllocationCallbacks* pAllocator);
314 | typedef VkResult(VKAPI_PTR* PFN_vkCreateIndirectExecutionSetEXT)(VkDevice                                   device,
315 |                                                                  const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo,
316 |                                                                  const VkAllocationCallbacks*               pAllocator,
317 |                                                                  VkIndirectExecutionSetEXT* pIndirectExecutionSet);
318 | typedef void(VKAPI_PTR* PFN_vkDestroyIndirectExecutionSetEXT)(VkDevice                     device,
319 |                                                               VkIndirectExecutionSetEXT    indirectExecutionSet,
320 |                                                               const VkAllocationCallbacks* pAllocator);
321 | typedef void(VKAPI_PTR* PFN_vkUpdateIndirectExecutionSetPipelineEXT)(VkDevice                  device,
322 |                                                                      VkIndirectExecutionSetEXT indirectExecutionSet,
323 |                                                                      uint32_t                  executionSetWriteCount,
324 |                                                                      const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites);
325 | typedef void(VKAPI_PTR* PFN_vkUpdateIndirectExecutionSetShaderEXT)(VkDevice                  device,
326 |                                                                    VkIndirectExecutionSetEXT indirectExecutionSet,
327 |                                                                    uint32_t                  executionSetWriteCount,
328 |                                                                    const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites);
329 | 
330 | #ifndef VK_NO_PROTOTYPES
331 | VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsEXT(VkDevice device,
332 |                                                                        VkGeneratedCommandsMemoryRequirementsInfoEXT const* pInfo,
333 |                                                                        VkMemoryRequirements2* pMemoryRequirements);
334 | 
335 | VKAPI_ATTR void VKAPI_CALL vkCmdPreprocessGeneratedCommandsEXT(VkCommandBuffer                   commandBuffer,
336 |                                                                VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo,
337 |                                                                VkCommandBuffer                   stateCommandBuffer);
338 | 
339 | VKAPI_ATTR void VKAPI_CALL vkCmdExecuteGeneratedCommandsEXT(VkCommandBuffer                   commandBuffer,
340 |                                                             VkBool32                          isPreprocessed,
341 |                                                             VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo);
342 | 
343 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutEXT(VkDevice device,
344 |                                                                  VkIndirectCommandsLayoutCreateInfoEXT const* pCreateInfo,
345 |                                                                  VkAllocationCallbacks const* pAllocator,
346 |                                                                  VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout);
347 | 
348 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutEXT(VkDevice                     device,
349 |                                                               VkIndirectCommandsLayoutEXT  indirectCommandsLayout,
350 |                                                               VkAllocationCallbacks const* pAllocator);
351 | 
352 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectExecutionSetEXT(VkDevice                                   device,
353 |                                                                VkIndirectExecutionSetCreateInfoEXT const* pCreateInfo,
354 |                                                                VkAllocationCallbacks const*               pAllocator,
355 |                                                                VkIndirectExecutionSetEXT* pIndirectExecutionSet);
356 | 
357 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectExecutionSetEXT(VkDevice                     device,
358 |                                                             VkIndirectExecutionSetEXT    indirectExecutionSet,
359 |                                                             VkAllocationCallbacks const* pAllocator);
360 | 
361 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetPipelineEXT(VkDevice                  device,
362 |                                                                    VkIndirectExecutionSetEXT indirectExecutionSet,
363 |                                                                    uint32_t                  executionSetWriteCount,
364 |                                                                    VkWriteIndirectExecutionSetPipelineEXT const* pExecutionSetWrites);
365 | 
366 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetShaderEXT(VkDevice                  device,
367 |                                                                  VkIndirectExecutionSetEXT indirectExecutionSet,
368 |                                                                  uint32_t                  executionSetWriteCount,
369 |                                                                  VkWriteIndirectExecutionSetShaderEXT const* pExecutionSetWrites);
370 | #endif
371 | #endif
372 | 
373 | VkBool32 load_VK_EXT_device_generated_commands(VkInstance instance, VkDevice device);
374 | 


--------------------------------------------------------------------------------
/rendererthread_vk.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include <algorithm>
 22 | #include <assert.h>
 23 | #include <mutex>
 24 | #include <queue>
 25 | 
 26 | #include "renderer.hpp"
 27 | #include "resources_vk.hpp"
 28 | #include "threadpool.hpp"
 29 | #include <nvh/nvprint.hpp>
 30 | #include <nvpwindow.hpp>
 31 | 
 32 | #include "common.h"
 33 | 
 34 | #if 0
 35 | #include <emmintrin.h>
 36 | #define THREAD_BARRIER() _mm_mfence()
 37 | #else
 38 | #define THREAD_BARRIER() std::atomic_thread_fence(std::memory_order_seq_cst)
 39 | #endif
 40 | 
 41 | namespace generatedcmds {
 42 | 
 43 | //////////////////////////////////////////////////////////////////////////
 44 | 
 45 | 
 46 | class RendererThreadedVK : public Renderer
 47 | {
 48 | public:
 49 |   class TypeCmd : public Renderer::Type
 50 |   {
 51 |     bool        isAvailable(const nvvk::Context& context) override { return true; }
 52 |     const char* name() const override { return "threaded cmds"; }
 53 |     Renderer*   create() const override
 54 |     {
 55 |       RendererThreadedVK* renderer = new RendererThreadedVK();
 56 |       return renderer;
 57 |     }
 58 |     uint32_t priority() const override { return 10; }
 59 |   };
 60 | 
 61 | public:
 62 |   void init(const CadScene* scene, ResourcesVK* res, const Config& config, Stats& stats) override;
 63 |   void deinit() override;
 64 |   void draw(const Resources::Global& global, Stats& stats) override;
 65 | 
 66 |   RendererThreadedVK() {}
 67 | 
 68 | private:
 69 |   struct DrawSetup
 70 |   {
 71 |     std::vector<VkCommandBuffer> cmdbuffers;
 72 |   };
 73 | 
 74 | 
 75 |   struct ThreadJob
 76 |   {
 77 |     RendererThreadedVK* renderer;
 78 |     int                 index;
 79 | 
 80 |     nvvk::RingCommandPool m_pool;
 81 | 
 82 |     int                     m_frame;
 83 |     std::condition_variable m_hasWorkCond;
 84 |     std::mutex              m_hasWorkMutex;
 85 |     volatile int            m_hasWork;
 86 | 
 87 |     size_t                  m_scIdx;
 88 |     std::vector<DrawSetup*> m_scs;
 89 | 
 90 | 
 91 |     void resetFrame() { m_scIdx = 0; }
 92 | 
 93 |     DrawSetup* getFrameCommand()
 94 |     {
 95 |       DrawSetup* sc;
 96 |       if(m_scIdx + 1 > m_scs.size())
 97 |       {
 98 |         sc = new DrawSetup;
 99 |         m_scIdx++;
100 |         m_scs.push_back(sc);
101 |       }
102 |       else
103 |       {
104 |         sc = m_scs[m_scIdx++];
105 |       }
106 | 
107 |       sc->cmdbuffers.clear();
108 |       return sc;
109 |     }
110 |   };
111 | 
112 | 
113 |   std::vector<DrawItem>  m_drawItems;
114 |   std::vector<uint32_t>  m_seqIndices;
115 |   ResourcesVK*           m_resources;
116 |   int                    m_numThreads;
117 |   CadScene::IndexingBits m_indexingBits;
118 |   std::vector<uint32_t>  m_combinedIndicesData;
119 |   nvvk::Buffer           m_combinedIndices[nvvk::DEFAULT_RING_SIZE];
120 |   void*                  m_combinedIndicesMappings[nvvk::DEFAULT_RING_SIZE];
121 | 
122 |   ThreadPool m_threadpool;
123 | 
124 |   bool     m_workerBatched;
125 |   int      m_workingSet;
126 |   int      m_frame;
127 |   uint32_t m_cycleCurrent;
128 | 
129 |   ThreadJob* m_jobs;
130 | 
131 |   volatile uint32_t m_ready;
132 |   volatile uint32_t m_stopThreads;
133 |   volatile size_t   m_numCurItems;
134 | 
135 |   std::condition_variable m_readyCond;
136 |   std::mutex              m_readyMutex;
137 | 
138 |   size_t                 m_numEnqueues;
139 |   std::queue<DrawSetup*> m_drawQueue;
140 | 
141 |   std::mutex              m_workMutex;
142 |   std::mutex              m_drawMutex;
143 |   std::condition_variable m_drawMutexCondition;
144 | 
145 |   VkCommandBuffer m_primary;
146 | 
147 |   static void threadMaster(void* arg)
148 |   {
149 |     ThreadJob* job = (ThreadJob*)arg;
150 |     job->renderer->RunThread(job->index);
151 |   }
152 | 
153 |   bool getWork_ts(size_t& start, size_t& num)
154 |   {
155 |     std::lock_guard<std::mutex> lock(m_workMutex);
156 |     bool                        hasWork = false;
157 | 
158 |     const size_t chunkSize = m_workingSet;
159 |     size_t       total     = m_drawItems.size();
160 | 
161 |     if(m_numCurItems < total)
162 |     {
163 |       size_t batch = std::min(total - m_numCurItems, chunkSize);
164 |       start        = m_numCurItems;
165 |       num          = batch;
166 |       m_numCurItems += batch;
167 |       hasWork = true;
168 |     }
169 |     else
170 |     {
171 |       hasWork = false;
172 |       start   = 0;
173 |       num     = 0;
174 |     }
175 | 
176 |     return hasWork;
177 |   }
178 | 
179 |   void         RunThread(int index);
180 |   unsigned int RunThreadFrame(ThreadJob& job);
181 | 
182 |   void enqueueShadeCommand_ts(DrawSetup* sc);
183 | 
184 |   void drawThreaded(const Resources::Global& global, VkCommandBuffer cmd, Stats& stats);
185 | 
186 |   void fillCmdBuffer(VkCommandBuffer cmd, BindingMode bindingMode, size_t begin, const DrawItem* drawItems, size_t drawCount)
187 |   {
188 |     const ResourcesVK* res   = m_resources;
189 |     const CadSceneVK&  scene = res->m_scene;
190 | 
191 |     int lastMaterial = -1;
192 |     int lastGeometry = -1;
193 |     int lastMatrix   = -1;
194 |     int lastObject   = -1;
195 |     int lastShader   = -1;
196 | 
197 |     VkDeviceAddress matrixAddress   = scene.m_buffers.matrices.address;
198 |     VkDeviceAddress materialAddress = scene.m_buffers.materials.address;
199 | 
200 |     switch(bindingMode)
201 |     {
202 |       case BINDINGMODE_DSETS:
203 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), DRAW_UBO_SCENE,
204 |                                 1, res->m_drawBind.at(DRAW_UBO_SCENE).getSets(), 0, nullptr);
205 |         break;
206 |       case BINDINGMODE_PUSHADDRESS:
207 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawPush.getPipeLayout(), 0, 1,
208 |                                 res->m_drawPush.getSets(), 0, nullptr);
209 |         break;
210 |       case BINDINGMODE_INDEX_BASEINSTANCE:
211 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1,
212 |                                 res->m_drawIndexed.getSets(), 0, nullptr);
213 |         break;
214 |       case BINDINGMODE_INDEX_VERTEXATTRIB:
215 |         vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1,
216 |                                 res->m_drawIndexed.getSets(), 0, nullptr);
217 | 
218 |         {
219 |           VkDeviceSize offset = {sizeof(uint32_t) * begin};
220 |           VkDeviceSize size   = {VK_WHOLE_SIZE};
221 |           VkDeviceSize stride = {sizeof(uint32_t)};
222 | #if USE_DYNAMIC_VERTEX_STRIDE
223 |           vkCmdBindVertexBuffers2(cmd, 1, 1, &m_combinedIndices[m_cycleCurrent].buffer, &offset, &size, &stride);
224 | #else
225 |           vkCmdBindVertexBuffers(cmd, 1, 1, &m_combinedIndices[m_cycleCurrent].buffer, &offset);
226 | #endif
227 |         }
228 |         break;
229 |     }
230 | 
231 |     if(m_config.shaderObjs)
232 |     {
233 |       const VkShaderStageFlagBits unusedStages[3] = {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
234 |                                                      VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_GEOMETRY_BIT};
235 |       vkCmdBindShadersEXT(cmd, 3, unusedStages, nullptr);
236 |     }
237 | 
238 |     for(size_t i = 0; i < drawCount; i++)
239 |     {
240 |       size_t          idx = m_config.permutated ? m_seqIndices[i + begin] : i + begin;
241 |       const DrawItem& di  = drawItems[idx];
242 | 
243 |       if(di.shaderIndex != lastShader)
244 |       {
245 |         if(m_config.shaderObjs)
246 |         {
247 |           VkShaderStageFlagBits stages[2]  = {VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT};
248 |           VkShaderEXT           shaders[2] = {res->m_drawShading.vertexShaderObjs[di.shaderIndex],
249 |                                               res->m_drawShading.fragmentShaderObjs[di.shaderIndex]};
250 |           vkCmdBindShadersEXT(cmd, 2, stages, shaders);
251 |         }
252 |         else
253 |         {
254 |           vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawShading.pipelines[di.shaderIndex]);
255 |         }
256 | 
257 |         lastShader = di.shaderIndex;
258 |       }
259 | 
260 | #if USE_DRAW_OFFSETS
261 |       if(lastGeometry != int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex))
262 |       {
263 |         const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex];
264 | 
265 |         vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, 0, VK_INDEX_TYPE_UINT32);
266 |         VkDeviceSize offset = {0};
267 |         VkDeviceSize size   = {VK_WHOLE_SIZE};
268 |         VkDeviceSize stride = {sizeof(CadScene::Vertex)};
269 | #if USE_DYNAMIC_VERTEX_STRIDE
270 |         vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &offset, &size, &stride);
271 | #else
272 |         vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &offset);
273 | #endif
274 |         lastGeometry = int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex);
275 |       }
276 | #else
277 |       if(lastGeometry != di.geometryIndex)
278 |       {
279 |         const CadSceneVK::Geometry& geo    = scene.m_geometry[di.geometryIndex];
280 |         VkDeviceSize                stride = {sizeof(CadScene::Vertex)};
281 | 
282 |         vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, geo.ibo.offset, VK_INDEX_TYPE_UINT32);
283 | #if USE_DYNAMIC_VERTEX_STRIDE
284 |         vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset, &geo.vbo.range, &stride);
285 | #else
286 |         vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset);
287 | #endif
288 | 
289 |         lastGeometry = di.geometryIndex;
290 |       }
291 | #endif
292 | 
293 |       uint32_t firstInstance = 0;
294 | 
295 |       if(bindingMode == BINDINGMODE_DSETS)
296 |       {
297 |         if(lastMatrix != di.matrixIndex)
298 |         {
299 |           uint32_t offset = di.matrixIndex * res->m_alignedMatrixSize;
300 |           vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(),
301 |                                   DRAW_UBO_MATRIX, 1, res->m_drawBind.at(DRAW_UBO_MATRIX).getSets(), 1, &offset);
302 |           lastMatrix = di.matrixIndex;
303 |         }
304 | 
305 |         if(lastMaterial != di.materialIndex)
306 |         {
307 |           uint32_t offset = di.materialIndex * res->m_alignedMaterialSize;
308 |           vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(),
309 |                                   DRAW_UBO_MATERIAL, 1, res->m_drawBind.at(DRAW_UBO_MATERIAL).getSets(), 1, &offset);
310 |           lastMaterial = di.materialIndex;
311 |         }
312 |       }
313 |       else if(bindingMode == BINDINGMODE_PUSHADDRESS)
314 |       {
315 |         if(lastMatrix != di.matrixIndex)
316 |         {
317 |           VkDeviceAddress address = matrixAddress + sizeof(CadScene::MatrixNode) * di.matrixIndex;
318 | 
319 |           vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(VkDeviceAddress), &address);
320 | 
321 |           lastMatrix = di.matrixIndex;
322 |         }
323 | 
324 |         if(lastMaterial != di.materialIndex)
325 |         {
326 |           VkDeviceAddress address = materialAddress + sizeof(CadScene::Material) * di.materialIndex;
327 | 
328 |           vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_FRAGMENT_BIT,
329 |                              sizeof(VkDeviceAddress), sizeof(VkDeviceAddress), &address);
330 | 
331 |           lastMaterial = di.materialIndex;
332 |         }
333 |       }
334 |       else if(bindingMode == BINDINGMODE_INDEX_BASEINSTANCE)
335 |       {
336 |         firstInstance = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex);
337 |       }
338 |       else if(bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB)
339 |       {
340 |         firstInstance                    = i;
341 |         m_combinedIndicesData[begin + i] = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex);
342 |       }
343 | 
344 |       // drawcall
345 | #if USE_DRAW_OFFSETS
346 |       const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex];
347 |       vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset + geo.ibo.offset / sizeof(uint32_t)),
348 |                        geo.vbo.offset / sizeof(CadScene::Vertex), firstInstance);
349 | #else
350 |       vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset / sizeof(uint32_t)), 0, firstInstance);
351 | #endif
352 | 
353 |       lastShader = di.shaderIndex;
354 |     }
355 | 
356 |     if(m_combinedIndicesData.size())
357 |     {
358 |       // copy
359 |       uint32_t* mapping = (uint32_t*)m_combinedIndicesMappings[m_cycleCurrent];
360 |       memcpy(mapping + begin, m_combinedIndicesData.data() + begin, sizeof(uint32_t) * drawCount);
361 |     }
362 |   }
363 | 
364 |   void setupCmdBuffer(DrawSetup& sc, nvvk::RingCommandPool& pool, size_t begin, const DrawItem* drawItems, size_t drawCount)
365 |   {
366 |     const ResourcesVK* res = m_resources;
367 | 
368 |     VkCommandBuffer cmd = pool.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_SECONDARY, false);
369 |     res->cmdBegin(cmd, true, false, true);
370 | 
371 |     if(m_config.shaderObjs)
372 |     {
373 |       res->cmdShaderObjectState(cmd);
374 |     }
375 |     else
376 |     {
377 |       res->cmdDynamicPipelineState(cmd);
378 |     }
379 | 
380 |     fillCmdBuffer(cmd, m_config.bindingMode, begin, drawItems, drawCount);
381 | 
382 |     vkEndCommandBuffer(cmd);
383 |     sc.cmdbuffers.push_back(cmd);
384 |   }
385 | };
386 | 
387 | 
388 | static RendererThreadedVK::TypeCmd s_type_cmdmain_vk;
389 | 
390 | void RendererThreadedVK::init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats)
391 | {
392 |   ResourcesVK* res = (ResourcesVK*)resources;
393 |   m_resources      = res;
394 |   m_scene          = scene;
395 |   m_config         = config;
396 | 
397 |   res->initPipelinesOrShaders(config.bindingMode, 0, config.shaderObjs);
398 | 
399 |   fillDrawItems(m_drawItems, scene, config, stats);
400 |   if(config.permutated)
401 |   {
402 |     m_seqIndices.resize(m_drawItems.size());
403 |     fillRandomPermutation(m_drawItems.size(), m_seqIndices.data(), m_drawItems.data(), stats);
404 |   }
405 | 
406 |   if(m_config.bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB)
407 |   {
408 |     m_combinedIndicesData.resize(m_drawItems.size());
409 |     for(uint32_t i = 0; i < nvvk::DEFAULT_RING_SIZE; i++)
410 |     {
411 |       m_combinedIndices[i] =
412 |           res->m_resourceAllocator.createBuffer(sizeof(uint32_t) * m_drawItems.size(), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
413 |                                                 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
414 | 
415 |       m_combinedIndicesMappings[i] = res->m_resourceAllocator.map(m_combinedIndices[i]);
416 |     }
417 |   }
418 | 
419 |   m_indexingBits = m_scene->getIndexingBits();
420 | 
421 |   m_threadpool.init(m_config.workerThreads);
422 | 
423 |   // make jobs
424 |   m_ready       = 0;
425 |   m_jobs        = new ThreadJob[m_config.workerThreads];
426 |   m_stopThreads = 0;
427 | 
428 |   for(uint32_t i = 0; i < m_config.workerThreads; i++)
429 |   {
430 |     ThreadJob& job = m_jobs[i];
431 |     job.index      = i;
432 |     job.renderer   = this;
433 |     job.m_hasWork  = -1;
434 |     job.m_frame    = 0;
435 | 
436 |     job.m_pool.init(res->m_device, res->m_context->m_queueGCT);
437 | 
438 |     m_threadpool.activateJob(i, threadMaster, &m_jobs[i]);
439 |   }
440 | 
441 |   m_frame = 0;
442 | }
443 | 
444 | void RendererThreadedVK::deinit()
445 | {
446 |   m_stopThreads = 1;
447 |   m_ready       = 0;
448 | 
449 |   THREAD_BARRIER();
450 |   for(uint32_t i = 0; i < m_config.workerThreads; i++)
451 |   {
452 |     std::unique_lock<std::mutex> lock(m_jobs[i].m_hasWorkMutex);
453 |     m_jobs[i].m_hasWork = m_frame;
454 |     m_jobs[i].m_hasWorkCond.notify_one();
455 |   }
456 |   m_drawMutexCondition.notify_all();
457 | 
458 |   std::this_thread::yield();
459 | 
460 |   {
461 |     std::unique_lock<std::mutex> lock(m_readyMutex);
462 |     while(m_ready < m_config.workerThreads)
463 |     {
464 |       m_readyCond.wait(lock);
465 |     }
466 |   }
467 | 
468 |   THREAD_BARRIER();
469 | 
470 |   for(uint32_t i = 0; i < m_config.workerThreads; i++)
471 |   {
472 |     for(size_t s = 0; s < m_jobs[i].m_scs.size(); s++)
473 |     {
474 |       delete m_jobs[i].m_scs[s];
475 |     }
476 |     m_jobs[i].m_pool.deinit();
477 |   }
478 | 
479 |   for(uint32_t i = 0; i < nvvk::DEFAULT_RING_SIZE; i++)
480 |   {
481 |     if(m_combinedIndices[i].memHandle)
482 |     {
483 |       m_resources->m_resourceAllocator.unmap(m_combinedIndices[i]);
484 |       m_resources->m_resourceAllocator.destroy(m_combinedIndices[i]);
485 |     }
486 |   }
487 | 
488 |   delete[] m_jobs;
489 | 
490 |   m_threadpool.deinit();
491 | 
492 |   m_drawItems.clear();
493 |   m_combinedIndicesData.clear();
494 | }
495 | 
496 | void RendererThreadedVK::enqueueShadeCommand_ts(DrawSetup* sc)
497 | {
498 |   std::unique_lock<std::mutex> lock(m_drawMutex);
499 | 
500 |   m_drawQueue.push(sc);
501 |   m_drawMutexCondition.notify_one();
502 | }
503 | 
504 | unsigned int RendererThreadedVK::RunThreadFrame(ThreadJob& job)
505 | {
506 |   unsigned int dispatches = 0;
507 | 
508 |   bool   first = true;
509 |   size_t tnum  = 0;
510 |   size_t begin = 0;
511 |   size_t num   = 0;
512 | 
513 |   size_t offset = 0;
514 | 
515 |   job.resetFrame();
516 |   job.m_pool.setCycle(m_cycleCurrent);
517 | 
518 |   if(m_workerBatched || true)
519 |   {
520 |     DrawSetup* sc = job.getFrameCommand();
521 |     while(getWork_ts(begin, num))
522 |     {
523 |       setupCmdBuffer(*sc, job.m_pool, begin, m_drawItems.data(), num);
524 |       tnum += num;
525 |     }
526 |     if(!sc->cmdbuffers.empty())
527 |     {
528 |       enqueueShadeCommand_ts(sc);
529 |       dispatches += 1;
530 |     }
531 |   }
532 |   else
533 |   {
534 |     while(getWork_ts(begin, num))
535 |     {
536 |       DrawSetup* sc = job.getFrameCommand();
537 |       setupCmdBuffer(*sc, job.m_pool, begin, m_drawItems.data(), num);
538 | 
539 |       if(!sc->cmdbuffers.empty())
540 |       {
541 |         enqueueShadeCommand_ts(sc);
542 |         dispatches += 1;
543 |       }
544 |       tnum += num;
545 |     }
546 |   }
547 | 
548 |   // nullptr signals we are done
549 |   enqueueShadeCommand_ts(nullptr);
550 | 
551 |   return dispatches;
552 | }
553 | 
554 | void RendererThreadedVK::RunThread(int tid)
555 | {
556 |   ThreadJob& job = m_jobs[tid];
557 | 
558 |   double timeWork    = 0;
559 |   double timeFrame   = 0;
560 |   int    timerFrames = 0;
561 |   size_t dispatches  = 0;
562 | 
563 |   double timePrint = NVPSystem::getTime();
564 | 
565 |   while(!m_stopThreads)
566 |   {
567 |     double beginFrame = NVPSystem::getTime();
568 |     timeFrame -= NVPSystem::getTime();
569 |     {
570 |       std::unique_lock<std::mutex> lock(job.m_hasWorkMutex);
571 |       while(job.m_hasWork != job.m_frame)
572 |       {
573 |         job.m_hasWorkCond.wait(lock);
574 |       }
575 |     }
576 | 
577 |     if(m_stopThreads)
578 |     {
579 |       break;
580 |     }
581 | 
582 |     double beginWork = NVPSystem::getTime();
583 |     timeWork -= NVPSystem::getTime();
584 | 
585 |     dispatches += RunThreadFrame(job);
586 | 
587 |     job.m_frame++;
588 | 
589 |     timeWork += NVPSystem::getTime();
590 | 
591 |     double currentTime = NVPSystem::getTime();
592 |     timeFrame += currentTime;
593 | 
594 |     timerFrames++;
595 | 
596 |     if(timerFrames && (currentTime - timePrint) > 2.0)
597 |     {
598 |       timeFrame /= double(timerFrames);
599 |       timeWork /= double(timerFrames);
600 | 
601 |       timeFrame *= 1000000.0;
602 |       timeWork *= 1000000.0;
603 | 
604 |       timePrint = currentTime;
605 | 
606 |       float avgdispatch = float(double(dispatches) / double(timerFrames));
607 | 
608 | #if 1
609 |       LOGI("thread %d: work %6d [us] cmdbuffers %5.1f (avg)\n", tid, uint32_t(timeWork), avgdispatch);
610 | #endif
611 |       timeFrame = 0;
612 |       timeWork  = 0;
613 | 
614 |       timerFrames = 0;
615 |       dispatches  = 0;
616 |     }
617 |   }
618 | 
619 |   {
620 |     std::unique_lock<std::mutex> lock(m_readyMutex);
621 |     m_ready++;
622 |     m_readyCond.notify_all();
623 |   }
624 | }
625 | 
626 | 
627 | void RendererThreadedVK::drawThreaded(const Resources::Global& global, VkCommandBuffer primary, Stats& stats)
628 | {
629 |   ResourcesVK* res = m_resources;
630 | 
631 |   m_workingSet    = global.workingSet;
632 |   m_workerBatched = global.workerBatched;
633 |   m_numCurItems   = 0;
634 |   m_numEnqueues   = 0;
635 |   m_cycleCurrent  = res->m_ringFences.getCycleIndex();
636 | 
637 |   stats.cmdBuffers = 0;
638 | 
639 |   // generate & cmdbuffers in parallel
640 | 
641 |   THREAD_BARRIER();
642 | 
643 |   // start to dispatch threads
644 |   for(uint32_t i = 0; i < m_config.workerThreads; i++)
645 |   {
646 |     {
647 |       std::unique_lock<std::mutex> lock(m_jobs[i].m_hasWorkMutex);
648 |       m_jobs[i].m_hasWork = m_frame;
649 |     }
650 |     m_jobs[i].m_hasWorkCond.notify_one();
651 |   }
652 | 
653 |   // collect secondaries here
654 |   {
655 |     int numTerminated = 0;
656 |     while(true)
657 |     {
658 |       bool       hadEntry = false;
659 |       DrawSetup* sc       = nullptr;
660 |       {
661 |         std::unique_lock<std::mutex> lock(m_drawMutex);
662 |         if(m_drawQueue.empty())
663 |         {
664 |           m_drawMutexCondition.wait(lock);
665 |         }
666 |         if(!m_drawQueue.empty())
667 |         {
668 | 
669 |           sc = m_drawQueue.front();
670 |           m_drawQueue.pop();
671 | 
672 |           hadEntry = true;
673 |         }
674 |       }
675 | 
676 |       if(hadEntry)
677 |       {
678 |         if(sc)
679 |         {
680 |           m_numEnqueues++;
681 |           THREAD_BARRIER();
682 |           vkCmdExecuteCommands(primary, (uint32_t)sc->cmdbuffers.size(), sc->cmdbuffers.data());
683 |           stats.cmdBuffers += (uint32_t)sc->cmdbuffers.size();
684 |           sc->cmdbuffers.clear();
685 |         }
686 |         else
687 |         {
688 |           numTerminated++;
689 |         }
690 |       }
691 | 
692 |       if(numTerminated == m_config.workerThreads)
693 |       {
694 |         break;
695 |       }
696 |       std::this_thread::yield();
697 |     }
698 |   }
699 | 
700 |   m_frame++;
701 | 
702 |   THREAD_BARRIER();
703 | }
704 | 
705 | void RendererThreadedVK::draw(const Resources::Global& global, Stats& stats)
706 | {
707 |   ResourcesVK* res = m_resources;
708 | 
709 |   VkCommandBuffer primary = res->createTempCmdBuffer();
710 |   {
711 |     nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Render", primary);
712 |     {
713 |       nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Draw", primary);
714 | 
715 |       vkCmdUpdateBuffer(primary, res->m_common.viewBuffer.buffer, 0, sizeof(SceneData), (const uint32_t*)&global.sceneUbo);
716 |       res->cmdPipelineBarrier(primary);
717 |       res->cmdBeginRendering(primary, true);
718 | 
719 |       drawThreaded(global, primary, stats);
720 | 
721 | 
722 |       vkCmdEndRendering(primary);
723 |     }
724 |   }
725 |   vkEndCommandBuffer(primary);
726 |   res->submissionEnqueue(primary);
727 | }
728 | 
729 | 
730 | }  // namespace generatedcmds
731 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */
 22 | 
 23 | #define DEBUG_FILTER 1
 24 | 
 25 | #include "vk_ext_device_generated_commands.hpp"
 26 | #include <imgui/imgui_helper.h>
 27 | 
 28 | #include <nvvk/appwindowprofiler_vk.hpp>
 29 | 
 30 | #include <nvh/cameracontrol.hpp>
 31 | #include <nvh/fileoperations.hpp>
 32 | #include <nvh/geometry.hpp>
 33 | 
 34 | #include <algorithm>
 35 | 
 36 | #include "renderer.hpp"
 37 | #include "threadpool.hpp"
 38 | #include "resources_vk.hpp"
 39 | #include "glm/gtc/matrix_access.hpp"
 40 | 
 41 | namespace generatedcmds {
 42 | int const SAMPLE_SIZE_WIDTH(1024);
 43 | int const SAMPLE_SIZE_HEIGHT(960);
 44 | 
 45 | void setupVulkanContextInfo(nvvk::ContextCreateInfo& info)
 46 | {
 47 |   info.apiMajor = 1;
 48 |   info.apiMinor = 3;
 49 | 
 50 |   static VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjsFeatureExt = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT};
 51 |   info.addDeviceExtension(VK_EXT_SHADER_OBJECT_EXTENSION_NAME, true, &shaderObjsFeatureExt, VK_EXT_SHADER_OBJECT_SPEC_VERSION);
 52 | 
 53 | #if 1
 54 |   static VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV dgcFeaturesNv = {
 55 |       VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV};
 56 |   info.addDeviceExtension(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME, true, &dgcFeaturesNv,
 57 |                           VK_NV_DEVICE_GENERATED_COMMANDS_SPEC_VERSION);
 58 | 
 59 |   static VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT dgcFeaturesExt = {
 60 |       VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_EXT};
 61 |   info.addDeviceExtension(VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME, true, &dgcFeaturesExt,
 62 |                           VK_EXT_DEVICE_GENERATED_COMMANDS_SPEC_VERSION);
 63 | 
 64 | #if _DEBUG
 65 |   // extensions don't work with validation layer
 66 | #if 1
 67 |   info.removeInstanceLayer("VK_LAYER_KHRONOS_validation");
 68 | #else
 69 | 
 70 |   // Removing the handle wrapping to the KHRONOS validation layer
 71 |   // See: https://vulkan.lunarg.com/doc/sdk/1.3.275.0/linux/khronos_validation_layer.html
 72 |   static const char*    layer_name      = "VK_LAYER_KHRONOS_validation";
 73 |   static const VkBool32 handle_wrapping = VK_FALSE;
 74 | 
 75 |   static const VkLayerSettingEXT settings[] = {
 76 |       {layer_name, "handle_wrapping", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &handle_wrapping},
 77 |   };
 78 | 
 79 |   static VkLayerSettingsCreateInfoEXT layerSettingsCreateInfo = {
 80 |       .sType        = VK_STRUCTURE_TYPE_LAYER_SETTINGS_CREATE_INFO_EXT,
 81 |       .settingCount = static_cast<uint32_t>(std::size(settings)),
 82 |       .pSettings    = settings,
 83 |   };
 84 | 
 85 |   info.instanceCreateInfoExt = &layerSettingsCreateInfo;
 86 | #endif
 87 | #endif
 88 | #endif
 89 | }
 90 | 
 91 | 
 92 | class Sample : public nvvk::AppWindowProfilerVK
 93 | {
 94 | 
 95 |   enum GuiEnums
 96 |   {
 97 |     GUI_SHADERS,
 98 |     GUI_BINDINGS,
 99 |     GUI_RENDERER,
100 |     GUI_STRATEGY,
101 |     GUI_MSAA,
102 |   };
103 | 
104 | public:
105 |   struct Tweak
106 |   {
107 |     int         renderer      = 0;
108 |     BindingMode binding       = BINDINGMODE_INDEX_VERTEXATTRIB;
109 |     Strategy    strategy      = STRATEGY_GROUPS;
110 |     int         msaa          = 4;
111 |     int         copies        = 4;
112 |     bool        unordered     = true;
113 |     bool        interleaved   = true;
114 |     bool        sorted        = false;
115 |     bool        permutated    = false;
116 |     bool        binned        = false;
117 |     bool        animation     = false;
118 |     bool        animationSpin = false;
119 |     int         useShaderObjs = 0;
120 |     uint32_t    maxShaders    = 16;
121 |     int         cloneaxisX    = 1;
122 |     int         cloneaxisY    = 1;
123 |     int         cloneaxisZ    = 1;
124 |     float       percent       = 1.01f;
125 |     uint32_t    workingSet    = 4096;
126 |     uint32_t    workerThreads = 4;
127 |     bool        workerBatched = true;
128 |   };
129 | 
130 | 
131 |   bool     m_useUI              = true;
132 |   bool     m_supportsShaderObjs = false;
133 |   bool     m_supportsBinning    = false;
134 |   bool     m_supportsNV         = false;
135 |   uint32_t m_maxThreads         = 1;
136 | 
137 |   ImGuiH::Registry m_ui;
138 |   double           m_uiTime = 0;
139 | 
140 |   Tweak m_tweak;
141 |   Tweak m_lastTweak;
142 |   bool  m_lastVsync;
143 | 
144 |   CadScene                  m_scene;
145 |   std::vector<unsigned int> m_renderersSorted;
146 |   std::string               m_rendererName;
147 | 
148 |   Renderer*         m_renderer = nullptr;
149 |   ResourcesVK       m_resources;
150 |   Resources::Global m_shared;
151 |   Renderer::Stats   m_renderStats;
152 | 
153 |   std::string m_modelFilename;
154 |   double      m_animBeginTime;
155 | 
156 |   double m_lastFrameTime = 0;
157 |   double m_frames        = 0;
158 | 
159 |   double m_statsFrameTime    = 0;
160 |   double m_statsCpuTime      = 0;
161 |   double m_statsGpuTime      = 0;
162 |   double m_statsGpuDrawTime  = 0;
163 |   double m_statsGpuBuildTime = 0;
164 | 
165 |   bool initProgram();
166 |   bool initScene(const char* filename, int clones, int cloneaxis);
167 |   void initRenderer(int type);
168 |   void deinitRenderer();
169 |   void initResources();
170 | 
171 |   void setupConfigParameters();
172 |   void setRendererFromName();
173 | 
174 |   Sample()
175 |       : AppWindowProfilerVK(false)
176 |   {
177 |     m_maxThreads          = ThreadPool::sysGetNumCores();
178 |     m_tweak.workerThreads = m_maxThreads;
179 | 
180 |     setupConfigParameters();
181 |     setupVulkanContextInfo(m_contextInfo);
182 | #if defined(NDEBUG)
183 |     setVsync(false);
184 | #endif
185 |   }
186 | 
187 | public:
188 |   bool validateConfig() override;
189 | 
190 |   void postBenchmarkAdvance() override { setRendererFromName(); }
191 | 
192 |   bool begin() override;
193 |   void think(double time) override;
194 |   void resize(int width, int height) override;
195 | 
196 |   void processUI(int width, int height, double time);
197 | 
198 |   nvh::CameraControl m_control;
199 | 
200 |   void end() override;
201 | 
202 |   // return true to prevent m_window updates
203 |   bool mouse_pos(int x, int y) override
204 |   {
205 |     if(!m_useUI)
206 |       return false;
207 | 
208 |     return ImGuiH::mouse_pos(x, y);
209 |   }
210 |   bool mouse_button(int button, int action) override
211 |   {
212 |     if(!m_useUI)
213 |       return false;
214 | 
215 |     return ImGuiH::mouse_button(button, action);
216 |   }
217 |   bool mouse_wheel(int wheel) override
218 |   {
219 |     if(!m_useUI)
220 |       return false;
221 | 
222 |     return ImGuiH::mouse_wheel(wheel);
223 |   }
224 |   bool key_char(int key) override
225 |   {
226 |     if(!m_useUI)
227 |       return false;
228 | 
229 |     return ImGuiH::key_char(key);
230 |   }
231 |   bool key_button(int button, int action, int mods) override
232 |   {
233 |     if(!m_useUI)
234 |       return false;
235 | 
236 |     return ImGuiH::key_button(button, action, mods);
237 |   }
238 | };
239 | 
240 | 
241 | bool Sample::initProgram()
242 | {
243 |   return true;
244 | }
245 | 
246 | bool Sample::initScene(const char* filename, int clones, int cloneaxis)
247 | {
248 |   std::string modelFilename(filename);
249 | 
250 |   if(!nvh::fileExists(filename))
251 |   {
252 |     modelFilename = nvh::getFileName(filename);
253 |     std::vector<std::string> searchPaths;
254 |     searchPaths.push_back("./");
255 |     searchPaths.push_back(exePath() + PROJECT_RELDIRECTORY);
256 |     searchPaths.push_back(exePath() + PROJECT_DOWNLOAD_RELDIRECTORY);
257 |     modelFilename = nvh::findFile(modelFilename, searchPaths);
258 |   }
259 | 
260 |   m_scene.unload();
261 | 
262 |   bool status = m_scene.loadCSF(modelFilename.c_str(), clones, cloneaxis);
263 |   if(status)
264 |   {
265 |     LOGI("\nscene %s\n", filename);
266 |     LOGI("geometries: %6d\n", uint32_t(m_scene.m_geometry.size()));
267 |     LOGI("materials:  %6d\n", uint32_t(m_scene.m_materials.size()));
268 |     LOGI("nodes:      %6d\n", uint32_t(m_scene.m_matrices.size()));
269 |     LOGI("objects:    %6d\n", uint32_t(m_scene.m_objects.size()));
270 |     LOGI("\n");
271 |   }
272 |   else
273 |   {
274 |     LOGW("\ncould not load model %s\n", modelFilename.c_str());
275 |   }
276 | 
277 |   m_shared.animUbo.numMatrices = uint(m_scene.m_matrices.size());
278 | 
279 |   return status;
280 | }
281 | 
282 | void Sample::deinitRenderer()
283 | {
284 |   if(m_renderer)
285 |   {
286 |     m_resources.synchronize();
287 |     m_renderer->deinit();
288 |     delete m_renderer;
289 |     m_renderer = nullptr;
290 |   }
291 | }
292 | 
293 | void Sample::initResources()
294 | {
295 |   std::string            prepend;
296 |   CadScene::IndexingBits bits = m_scene.getIndexingBits();
297 |   prepend += nvh::ShaderFileManager::format("#define INDEXED_MATRIX_BITS %d\n", bits.matrices);
298 |   prepend += nvh::ShaderFileManager::format("#define INDEXED_MATERIAL_BITS %d\n", bits.materials);
299 | 
300 |   bool valid = m_resources.init(&m_context, &m_swapChain, &m_profiler);
301 |   valid = valid && m_resources.initFramebuffer(m_windowState.m_swapSize[0], m_windowState.m_swapSize[1], m_tweak.msaa, getVsync());
302 |   valid               = valid && m_resources.initPrograms(exePath(), prepend);
303 |   valid               = valid && m_resources.initScene(m_scene);
304 |   m_resources.m_frame = 0;
305 | 
306 |   if(!valid)
307 |   {
308 |     LOGE("resource initialization failed\n");
309 |     exit(-1);
310 |   }
311 | 
312 |   m_lastVsync = getVsync();
313 | }
314 | 
315 | void Sample::initRenderer(int typesort)
316 | {
317 |   int type = m_renderersSorted[typesort];
318 | 
319 |   deinitRenderer();
320 | 
321 |   {
322 |     uint32_t    supported = Renderer::getRegistry()[type]->supportedBindingModes();
323 |     BindingMode mode      = BINDINGMODE_DSETS;
324 |     m_ui.enumReset(GUI_BINDINGS);
325 |     if(supported & (1 << BINDINGMODE_DSETS))
326 |     {
327 |       m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_DSETS, "dsetbinding");
328 |       mode = BINDINGMODE_DSETS;
329 |     }
330 |     if(supported & (1 << BINDINGMODE_PUSHADDRESS))
331 |     {
332 |       m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_PUSHADDRESS, "pushaddress");
333 |       mode = BINDINGMODE_PUSHADDRESS;
334 |     }
335 |     if(supported & (1 << BINDINGMODE_INDEX_BASEINSTANCE) && m_scene.supportsIndexing())
336 |     {
337 |       m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_INDEX_BASEINSTANCE, "baseinstance index");
338 |       mode = BINDINGMODE_INDEX_BASEINSTANCE;
339 |     }
340 |     if(supported & (1 << BINDINGMODE_INDEX_VERTEXATTRIB) && m_scene.supportsIndexing())
341 |     {
342 |       m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_INDEX_VERTEXATTRIB, "inst.vertexattrib index");
343 |       mode = BINDINGMODE_INDEX_VERTEXATTRIB;
344 |     }
345 | 
346 |     if(!(supported & (1 << m_tweak.binding)))
347 |     {
348 |       m_tweak.binding = mode;
349 |     }
350 |   }
351 | 
352 |   {
353 |     bool supported     = Renderer::getRegistry()[type]->supportsShaderObjs();
354 |     bool useShaderObjs = false;
355 |     m_ui.enumReset(GUI_SHADERS);
356 |     m_ui.enumAdd(GUI_SHADERS, SHADERMODE_PIPELINE, "pipeline");
357 |     if(supported)
358 |     {
359 |       m_ui.enumAdd(GUI_SHADERS, SHADERMODE_OBJS, "shaderobjs");
360 |     }
361 | 
362 |     if(!supported && m_tweak.useShaderObjs)
363 |     {
364 |       m_tweak.useShaderObjs = false;
365 |     }
366 |   }
367 | 
368 |   if(m_tweak.sorted)
369 |   {
370 |     m_tweak.permutated = false;
371 |   }
372 | 
373 |   m_tweak.maxShaders = std::min(m_tweak.maxShaders, std::min(uint32_t(NUM_MATERIAL_SHADERS),
374 |                                                              Renderer::getRegistry()[type]->supportedShaderBinds()));
375 |   m_tweak.maxShaders = std::max(m_tweak.maxShaders, uint32_t(1));
376 | 
377 |   Renderer::Config config;
378 |   config.objectFrom    = 0;
379 |   config.objectNum     = uint32_t(double(m_scene.m_objects.size()) * double(m_tweak.percent));
380 |   config.strategy      = m_tweak.strategy;
381 |   config.bindingMode   = m_tweak.binding;
382 |   config.sorted        = m_tweak.sorted;
383 |   config.binned        = m_tweak.binned;
384 |   config.interleaved   = m_tweak.interleaved;
385 |   config.unordered     = m_tweak.unordered;
386 |   config.permutated    = m_tweak.permutated;
387 |   config.maxShaders    = m_tweak.maxShaders;
388 |   config.workerThreads = m_tweak.workerThreads;
389 |   config.shaderObjs    = m_tweak.useShaderObjs != 0;
390 | 
391 |   m_renderStats = Renderer::Stats();
392 | 
393 |   LOGI("renderer: %s\n", Renderer::getRegistry()[type]->name());
394 |   m_renderer = Renderer::getRegistry()[type]->create();
395 |   m_renderer->init(&m_scene, &m_resources, config, m_renderStats);
396 | 
397 |   LOGI("drawCalls:    %9d\n", m_renderStats.drawCalls);
398 |   LOGI("drawTris:     %9d\n", m_renderStats.drawTriangles);
399 |   LOGI("shaderBinds:  %9d\n", m_renderStats.shaderBindings);
400 |   LOGI("prep.Buffer:  %9d KB\n\n", m_renderStats.preprocessSizeKB);
401 | }
402 | 
403 | 
404 | void Sample::end()
405 | {
406 |   deinitRenderer();
407 |   m_resources.deinit();
408 |   ResourcesVK::deinitImGui(m_context);
409 | }
410 | 
411 | 
412 | bool Sample::begin()
413 | {
414 | #if !PRINT_TIMER_STATS
415 |   m_profilerPrint = false;
416 |   m_timeInTitle   = true;
417 | #else
418 |   m_profilerPrint = true;
419 |   m_timeInTitle   = true;
420 | #endif
421 | 
422 | 
423 |   ImGuiH::Init(m_windowState.m_winSize[0], m_windowState.m_winSize[1], this);
424 | 
425 |   if(m_context.hasDeviceExtension(VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME))
426 |   {
427 |     bool loaded = load_VK_EXT_device_generated_commands(m_context.m_instance, m_context.m_device);
428 |     if(!loaded)
429 |     {
430 |       LOGE("Failed to load functions for VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION\n");
431 |       return false;
432 |     }
433 | 
434 |     VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT props = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_EXT};
435 |     VkPhysicalDeviceProperties2 props2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
436 |     props2.pNext                       = &props;
437 |     vkGetPhysicalDeviceProperties2(m_context.m_physicalDevice, &props2);
438 | 
439 |     if(props.deviceGeneratedCommandsMultiDrawIndirectCount)
440 |     {
441 |       m_supportsBinning = true;
442 |     }
443 |   }
444 |   m_supportsNV         = m_context.hasDeviceExtension(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME);
445 |   m_supportsShaderObjs = m_context.hasDeviceExtension(VK_EXT_SHADER_OBJECT_EXTENSION_NAME);
446 | 
447 |   bool validated(true);
448 |   validated = validated && initProgram();
449 |   validated = validated
450 |               && initScene(m_modelFilename.c_str(), m_tweak.copies - 1,
451 |                            (m_tweak.cloneaxisX << 0) | (m_tweak.cloneaxisY << 1) | (m_tweak.cloneaxisZ << 2));
452 | 
453 |   if(!validated)
454 |   {
455 |     LOGE("resources failed\n");
456 |     return false;
457 |   }
458 | 
459 |   ResourcesVK::initImGui(m_context);
460 | 
461 |   const Renderer::Registry registry = Renderer::getRegistry();
462 |   for(size_t i = 0; i < registry.size(); i++)
463 |   {
464 |     if(registry[i]->isAvailable(m_context))
465 |     {
466 |       uint sortkey = uint(i);
467 |       sortkey |= registry[i]->priority() << 16;
468 |       m_renderersSorted.push_back(sortkey);
469 |     }
470 |   }
471 | 
472 |   if(m_renderersSorted.empty())
473 |   {
474 |     LOGE("No renderers available\n");
475 |     return false;
476 |   }
477 | 
478 |   std::sort(m_renderersSorted.begin(), m_renderersSorted.end());
479 | 
480 |   for(size_t i = 0; i < m_renderersSorted.size(); i++)
481 |   {
482 |     m_renderersSorted[i] &= 0xFFFF;
483 |   }
484 | 
485 |   for(size_t i = 0; i < m_renderersSorted.size(); i++)
486 |   {
487 |     LOGI("renderers found: %d %s\n", uint32_t(i), registry[m_renderersSorted[i]]->name());
488 |   }
489 | 
490 |   setRendererFromName();
491 | 
492 |   if(m_useUI)
493 |   {
494 |     auto& imgui_io       = ImGui::GetIO();
495 |     imgui_io.IniFilename = nullptr;
496 | 
497 |     for(size_t i = 0; i < m_renderersSorted.size(); i++)
498 |     {
499 |       m_ui.enumAdd(GUI_RENDERER, int(i), registry[m_renderersSorted[i]]->name());
500 |     }
501 | 
502 |     m_ui.enumAdd(GUI_STRATEGY, STRATEGY_GROUPS, "object material groups");
503 |     m_ui.enumAdd(GUI_STRATEGY, STRATEGY_INDIVIDUAL, "object individual surfaces");
504 |     m_ui.enumAdd(GUI_STRATEGY, STRATEGY_SINGLE, "object as single mesh");
505 | 
506 |     m_ui.enumAdd(GUI_MSAA, 0, "none");
507 |     m_ui.enumAdd(GUI_MSAA, 2, "2x");
508 |     m_ui.enumAdd(GUI_MSAA, 4, "4x");
509 |     m_ui.enumAdd(GUI_MSAA, 8, "8x");
510 |   }
511 | 
512 |   m_control.m_sceneOrbit     = glm::vec3(m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f;
513 |   m_control.m_sceneDimension = glm::length((m_scene.m_bbox.max - m_scene.m_bbox.min));
514 |   m_control.m_viewMatrix = glm::lookAt(m_control.m_sceneOrbit - (-vec3(1, 1, 1) * m_control.m_sceneDimension * 0.5f),
515 |                                        m_control.m_sceneOrbit, vec3(0, 1, 0));
516 | 
517 |   m_shared.animUbo.sceneCenter    = m_control.m_sceneOrbit;
518 |   m_shared.animUbo.sceneDimension = m_control.m_sceneDimension * 0.2f;
519 |   m_shared.animUbo.numMatrices    = uint(m_scene.m_matrices.size());
520 |   m_shared.sceneUbo.wLightPos     = (m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f + m_control.m_sceneDimension;
521 |   m_shared.sceneUbo.wLightPos.w   = 1.0;
522 | 
523 |   initResources();
524 |   initRenderer(m_tweak.renderer);
525 | 
526 |   m_lastTweak = m_tweak;
527 | 
528 |   return validated;
529 | }
530 | 
531 | 
532 | void Sample::processUI(int width, int height, double time)
533 | {
534 |   // Update imgui configuration
535 |   auto& imgui_io       = ImGui::GetIO();
536 |   imgui_io.DeltaTime   = static_cast<float>(time - m_uiTime);
537 |   imgui_io.DisplaySize = ImVec2(width, height);
538 | 
539 |   m_uiTime = time;
540 | 
541 |   ImGui::NewFrame();
542 |   ImGui::SetNextWindowSize(ImGuiH::dpiScaled(380, 0), ImGuiCond_FirstUseEver);
543 |   if(ImGui::Begin("NVIDIA " PROJECT_NAME, nullptr))
544 |   {
545 |     m_ui.enumCombobox(GUI_RENDERER, "renderer", &m_tweak.renderer);
546 |     m_ui.enumCombobox(GUI_SHADERS, "shaders", &m_tweak.useShaderObjs);
547 |     m_ui.enumCombobox(GUI_BINDINGS, "binding", &m_tweak.binding);
548 |     m_ui.enumCombobox(GUI_STRATEGY, "strategy", &m_tweak.strategy);
549 | 
550 |     ImGui::PushItemWidth(ImGuiH::dpiScaled(100));
551 | 
552 |     //guiRegistry.enumCombobox(GUI_SUPERSAMPLE, "supersample", &tweak.supersample);
553 |     ImGuiH::InputIntClamped("max shadergroups", &m_tweak.maxShaders, 1, NUM_MATERIAL_SHADERS, 1, 1, ImGuiInputTextFlags_EnterReturnsTrue);
554 |     ImGuiH::InputIntClamped("copies", &m_tweak.copies, 1, 16, 1, 1, ImGuiInputTextFlags_EnterReturnsTrue);
555 |     ImGui::SliderFloat("pct visible", &m_tweak.percent, 0.0f, 1.001f);
556 |     ImGui::Checkbox("sorted once (minimized state changes)", &m_tweak.sorted);
557 |     ImGui::Checkbox("permutated (random state changes,\ngen nv: use seqindex)", &m_tweak.permutated);
558 |     ImGui::Checkbox("gen: unordered (non-coherent)", &m_tweak.unordered);
559 |     if(m_supportsBinning)
560 |     {
561 |       ImGui::Checkbox("gen ext: binned via draw_indexed_count", &m_tweak.binned);
562 |     }
563 |     if(m_supportsNV)
564 |     {
565 |       ImGui::Checkbox("gen nv: interleaved inputs", &m_tweak.interleaved);
566 |     }
567 | 
568 |     ImGuiH::InputIntClamped("threaded: worker threads", &m_tweak.workerThreads, 1, m_maxThreads, 1, 1,
569 |                             ImGuiInputTextFlags_EnterReturnsTrue);
570 |     ImGuiH::InputIntClamped("threaded: drawcalls per cmdbuffer", &m_tweak.workingSet, 512, 1 << 20, 512, 1024,
571 |                             ImGuiInputTextFlags_EnterReturnsTrue);
572 |     ImGui::Checkbox("threaded: batched submission", &m_tweak.workerBatched);
573 |     ImGui::Checkbox("animation", &m_tweak.animation);
574 |     ImGui::PopItemWidth();
575 |     ImGui::Separator();
576 | 
577 |     {
578 |       int avg = 50;
579 | 
580 |       if(m_lastFrameTime == 0)
581 |       {
582 |         m_lastFrameTime = time;
583 |         m_frames        = -1;
584 |       }
585 | 
586 |       if(m_frames > 4)
587 |       {
588 |         double curavg = (time - m_lastFrameTime) / m_frames;
589 |         if(curavg > 1.0 / 30.0)
590 |         {
591 |           avg = 10;
592 |         }
593 |       }
594 | 
595 |       if(m_profiler.getTotalFrames() % avg == avg - 1)
596 |       {
597 |         nvh::Profiler::TimerInfo info;
598 |         m_profiler.getTimerInfo("Render", info);
599 |         m_statsCpuTime      = info.cpu.average;
600 |         m_statsGpuTime      = info.gpu.average;
601 |         m_statsGpuBuildTime = 0;
602 |         bool hasPres        = m_profiler.getTimerInfo("Pre", info);
603 |         m_statsGpuBuildTime = hasPres ? info.gpu.average : 0;
604 |         m_profiler.getTimerInfo("Draw", info);
605 |         m_statsGpuDrawTime = info.gpu.average;
606 |         m_statsFrameTime   = (time - m_lastFrameTime) / m_frames;
607 |         m_lastFrameTime    = time;
608 |         m_frames           = -1;
609 |       }
610 | 
611 |       m_frames++;
612 | 
613 |       float gpuTimeF = float(m_statsGpuTime);
614 |       float cpuTimeF = float(m_statsCpuTime);
615 |       float bldTimef = float(m_statsGpuBuildTime);
616 |       float drwTimef = float(m_statsGpuDrawTime);
617 |       float maxTimeF = std::max(std::max(cpuTimeF, gpuTimeF), 0.0001f);
618 | 
619 |       //ImGui::Text("Frame          [ms]: %2.1f", m_statsFrameTime*1000.0f);
620 |       ImGui::Text("Render     CPU [ms]: %2.3f", cpuTimeF / 1000.0f);
621 |       ImGui::Text("Render     GPU [ms]: %2.3f", gpuTimeF / 1000.0f);
622 |       //ImGui::ProgressBar(gpuTimeF/maxTimeF, ImVec2(0.0f, 0.0f));
623 |       ImGui::Text("- Preproc. GPU [ms]: %2.3f", bldTimef / 1000.0f);
624 |       ImGui::ProgressBar(bldTimef / maxTimeF, ImVec2(0.0f, 0.0f));
625 |       ImGui::Text("- Draw     GPU [ms]: %2.3f", drwTimef / 1000.0f);
626 |       ImGui::ProgressBar(drwTimef / maxTimeF, ImVec2(0.0f, 0.0f));
627 | 
628 |       //ImGui::ProgressBar(cpuTimeF / maxTimeF, ImVec2(0.0f, 0.0f));
629 |       ImGui::Separator();
630 |       ImGui::Text(" cmdBuffers:           %9d\n", m_renderStats.cmdBuffers);
631 |       ImGui::Text(" drawCalls:            %9d\n", m_renderStats.drawCalls);
632 |       ImGui::Text(" drawTris:             %9d\n", m_renderStats.drawTriangles);
633 |       ImGui::Text(" serial shaderBinds:   %9d\n", m_renderStats.shaderBindings);
634 |       ImGui::Text(" dgc sequences:        %9d\n", m_renderStats.sequences);
635 |       ImGui::Text(" dgc preprocessBuffer: %9d KB\n", m_renderStats.preprocessSizeKB);
636 |       ImGui::Text(" dgc indirectBuffer:   %9d KB\n\n", m_renderStats.indirectSizeKB);
637 |     }
638 |   }
639 |   ImGui::End();
640 | }
641 | 
642 | void Sample::think(double time)
643 | {
644 |   int width  = m_windowState.m_swapSize[0];
645 |   int height = m_windowState.m_swapSize[1];
646 | 
647 |   if(m_useUI)
648 |   {
649 |     processUI(width, height, time);
650 |   }
651 | 
652 |   m_control.processActions({m_windowState.m_winSize[0], m_windowState.m_winSize[1]},
653 |                            glm::vec2(m_windowState.m_mouseCurrent[0], m_windowState.m_mouseCurrent[1]),
654 |                            m_windowState.m_mouseButtonFlags, m_windowState.m_mouseWheel);
655 | 
656 |   if(m_tweak.msaa != m_lastTweak.msaa || getVsync() != m_lastVsync)
657 |   {
658 |     m_lastVsync = getVsync();
659 |     m_resources.initFramebuffer(width, height, m_tweak.msaa, getVsync());
660 |   }
661 | 
662 |   bool sceneChanged = false;
663 |   if(m_tweak.copies != m_lastTweak.copies || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX
664 |      || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ)
665 |   {
666 |     sceneChanged = true;
667 |     m_resources.synchronize();
668 |     deinitRenderer();
669 |     m_resources.deinitScene();
670 |     initScene(m_modelFilename.c_str(), m_tweak.copies - 1,
671 |               (m_tweak.cloneaxisX << 0) | (m_tweak.cloneaxisY << 1) | (m_tweak.cloneaxisZ << 2));
672 |     m_resources.initScene(m_scene);
673 |   }
674 | 
675 |   bool rendererChanged = false;
676 |   if(m_windowState.onPress(KEY_R) || m_tweak.copies != m_lastTweak.copies)
677 |   {
678 |     m_resources.synchronize();
679 |     std::string            prepend;
680 |     CadScene::IndexingBits bits = m_scene.getIndexingBits();
681 |     prepend += nvh::ShaderFileManager::format("#define INDEXED_MATRIX_BITS %d\n", bits.matrices);
682 |     prepend += nvh::ShaderFileManager::format("#define INDEXED_MATERIAL_BITS %d\n", bits.materials);
683 |     m_resources.reloadPrograms(prepend);
684 |     rendererChanged = true;
685 |   }
686 | 
687 |   if(sceneChanged || rendererChanged || m_tweak.renderer != m_lastTweak.renderer
688 |      || m_tweak.binding != m_lastTweak.binding || m_tweak.strategy != m_lastTweak.strategy
689 |      || m_tweak.sorted != m_lastTweak.sorted || m_tweak.percent != m_lastTweak.percent
690 |      || m_tweak.workerThreads != m_lastTweak.workerThreads || m_tweak.workerBatched != m_lastTweak.workerBatched
691 |      || m_tweak.maxShaders != m_lastTweak.maxShaders || m_tweak.interleaved != m_lastTweak.interleaved
692 |      || m_tweak.permutated != m_lastTweak.permutated || m_tweak.unordered != m_lastTweak.unordered
693 |      || m_tweak.binned != m_lastTweak.binned || m_tweak.useShaderObjs != m_lastTweak.useShaderObjs)
694 |   {
695 |     m_resources.synchronize();
696 |     initRenderer(m_tweak.renderer);
697 |   }
698 | 
699 |   m_resources.beginFrame();
700 | 
701 |   if(m_tweak.animation != m_lastTweak.animation)
702 |   {
703 |     m_resources.synchronize();
704 |     m_resources.animationReset();
705 | 
706 |     m_animBeginTime = time;
707 |   }
708 | 
709 |   {
710 |     m_shared.winWidth      = width;
711 |     m_shared.winHeight     = height;
712 |     m_shared.workingSet    = m_tweak.workingSet;
713 |     m_shared.workerBatched = m_tweak.workerBatched;
714 | 
715 |     SceneData& sceneUbo = m_shared.sceneUbo;
716 | 
717 |     sceneUbo.viewport = ivec2(width, height);
718 | 
719 |     glm::mat4 projection = glm::perspectiveRH_ZO(glm::radians(45.f), float(width) / float(height),
720 |                                                  m_control.m_sceneDimension * 0.001f, m_control.m_sceneDimension * 10.0f);
721 |     projection[1][1] *= -1;
722 |     glm::mat4 view = m_control.m_viewMatrix;
723 | 
724 |     if(m_tweak.animation && m_tweak.animationSpin)
725 |     {
726 |       double animTime = (time - m_animBeginTime) * 0.3 + glm::pi<float>() * 0.2;
727 |       vec3   dir      = vec3(cos(animTime), 1, sin(animTime));
728 |       view = glm::lookAt(m_control.m_sceneOrbit - (-dir * m_control.m_sceneDimension * 0.5f), m_control.m_sceneOrbit,
729 |                          vec3(0, 1, 0));
730 |     }
731 | 
732 |     sceneUbo.viewProjMatrix = projection * view;
733 |     sceneUbo.viewMatrix     = view;
734 |     sceneUbo.viewMatrixIT   = glm::transpose(glm::inverse(view));
735 | 
736 |     sceneUbo.viewPos = glm::row(sceneUbo.viewMatrixIT, 3);
737 |     ;
738 |     sceneUbo.viewDir = -glm::row(view, 2);
739 | 
740 |     sceneUbo.wLightPos   = glm::row(sceneUbo.viewMatrixIT, 3);
741 |     sceneUbo.wLightPos.w = 1.0;
742 |   }
743 | 
744 |   if(m_tweak.animation)
745 |   {
746 |     AnimationData& animUbo = m_shared.animUbo;
747 |     animUbo.time           = float(time - m_animBeginTime);
748 | 
749 |     m_resources.animation(m_shared);
750 |   }
751 | 
752 |   {
753 |     m_renderer->draw(m_shared, m_renderStats);
754 |   }
755 | 
756 |   {
757 |     if(m_useUI)
758 |     {
759 |       ImGui::Render();
760 |       m_shared.imguiDrawData = ImGui::GetDrawData();
761 |     }
762 |     else
763 |     {
764 |       m_shared.imguiDrawData = nullptr;
765 |     }
766 | 
767 |     m_resources.blitFrame(m_shared);
768 |   }
769 | 
770 |   m_resources.endFrame();
771 |   m_resources.m_frame++;
772 | 
773 |   if(m_useUI)
774 |   {
775 |     ImGui::EndFrame();
776 |   }
777 | 
778 |   m_lastTweak = m_tweak;
779 | }
780 | 
781 | void Sample::resize(int width, int height)
782 | {
783 |   m_resources.initFramebuffer(width, height, m_tweak.msaa, getVsync());
784 | }
785 | 
786 | void Sample::setRendererFromName()
787 | {
788 |   if(!m_rendererName.empty())
789 |   {
790 |     const Renderer::Registry registry = Renderer::getRegistry();
791 |     for(size_t i = 0; i < m_renderersSorted.size(); i++)
792 |     {
793 |       if(strcmp(m_rendererName.c_str(), registry[m_renderersSorted[i]]->name()) == 0)
794 |       {
795 |         m_tweak.renderer = int(i);
796 |       }
797 |     }
798 |   }
799 | }
800 | 
801 | void Sample::setupConfigParameters()
802 | {
803 |   m_parameterList.addFilename(".csf", &m_modelFilename);
804 |   m_parameterList.addFilename(".csf.gz", &m_modelFilename);
805 |   m_parameterList.addFilename(".gltf", &m_modelFilename);
806 | 
807 |   m_parameterList.add("vkdevice", &m_contextInfo.compatibleDeviceIndex);
808 | 
809 |   m_parameterList.add("noui", &m_useUI, false);
810 | 
811 |   m_parameterList.add("unordered", &m_tweak.unordered);
812 |   m_parameterList.add("interleaved", &m_tweak.interleaved);
813 |   m_parameterList.add("binned", &m_tweak.binned);
814 |   m_parameterList.add("permutated", &m_tweak.permutated);
815 |   m_parameterList.add("sorted", &m_tweak.sorted);
816 |   m_parameterList.add("percent", &m_tweak.percent);
817 |   m_parameterList.add("renderer", (uint32_t*)&m_tweak.renderer);
818 |   m_parameterList.add("renderernamed", &m_rendererName);
819 |   m_parameterList.add("strategy", (uint32_t*)&m_tweak.strategy);
820 |   m_parameterList.add("bindingmode", (uint32_t*)&m_tweak.binding);
821 |   m_parameterList.add("shadermode", (uint32_t*)&m_tweak.useShaderObjs);
822 |   m_parameterList.add("msaa", &m_tweak.msaa);
823 |   m_parameterList.add("copies", &m_tweak.copies);
824 |   m_parameterList.add("animation", &m_tweak.animation);
825 |   m_parameterList.add("animationspin", &m_tweak.animationSpin);
826 |   m_parameterList.add("minstatechanges", &m_tweak.sorted);
827 |   m_parameterList.add("maxshaders", &m_tweak.maxShaders);
828 |   m_parameterList.add("workerbatched", &m_tweak.workerBatched);
829 |   m_parameterList.add("workerthreads", &m_tweak.workerThreads);
830 |   m_parameterList.add("workingset", &m_tweak.workingSet);
831 |   m_parameterList.add("animation", &m_tweak.animation);
832 |   m_parameterList.add("animationspin", &m_tweak.animationSpin);
833 | }
834 | 
835 | bool Sample::validateConfig()
836 | {
837 |   if(m_modelFilename.empty())
838 |   {
839 |     LOGI("no .csf model file specified\n");
840 |     LOGI("exe <filename.csf/cfg> parameters...\n");
841 |     m_parameterList.print();
842 |     return false;
843 |   }
844 |   return true;
845 | }
846 | 
847 | }  // namespace generatedcmds
848 | 
849 | using namespace generatedcmds;
850 | 
851 | int main(int argc, const char** argv)
852 | {
853 |   NVPSystem system(PROJECT_NAME);
854 | 
855 | #if defined(_WIN32) && defined(NDEBUG)
856 |   //SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
857 | #endif
858 | 
859 |   Sample sample;
860 |   {
861 |     std::vector<std::string> directories;
862 |     directories.push_back(NVPSystem::exePath());
863 |     directories.push_back(NVPSystem::exePath() + "/media");
864 |     directories.push_back(NVPSystem::exePath() + std::string(PROJECT_DOWNLOAD_RELDIRECTORY));
865 |     sample.m_modelFilename = nvh::findFile(std::string("geforce.csf.gz"), directories);
866 |   }
867 | 
868 |   return sample.run(PROJECT_NAME, argc, argv, SAMPLE_SIZE_WIDTH, SAMPLE_SIZE_HEIGHT);
869 | }
870 | 


--------------------------------------------------------------------------------