├── doc └── sample.png ├── .gitignore ├── csf.cpp ├── config.h ├── CONTRIBUTING ├── threadpool.hpp ├── animation.comp.glsl ├── resources.hpp ├── common.h ├── scene.frag.glsl ├── scene.vert.glsl ├── CMakeLists.txt ├── renderer.hpp ├── cadscene_vk.hpp ├── cadscene.hpp ├── cadscene_vk.cpp ├── renderer.cpp ├── resources_vk.hpp ├── threadpool.cpp ├── vk_ext_device_generated_commands.cpp ├── LICENSE ├── renderer_vk.cpp ├── cadscene.cpp ├── vk_ext_device_generated_commands.hpp ├── rendererthread_vk.cpp └── main.cpp /doc/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/vk_device_generated_cmds/HEAD/doc/sample.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .clang-format 2 | .editorconfig 3 | 4 | ############################# 5 | #Spirv 6 | ############################# 7 | *.spv 8 | *.spva 9 | *.sass 10 | *.sassbin 11 | *.bat 12 | 13 | ############################# 14 | #specific to the project 15 | ############################# 16 | cmake_built 17 | cmake_build 18 | build 19 | _install 20 | bin_x64 21 | NVPRO_EXTERNAL 22 | nvpro_core -------------------------------------------------------------------------------- /csf.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #define CSF_IMPLEMENTATION 22 | #define CSF_SUPPORT_GLTF2 1 23 | #define CSF_SUPPORT_FILEMAPPING 1 24 | 25 | #include 26 | 27 | #define CGLTF_IMPLEMENTATION 28 | #include 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #pragma once 22 | 23 | // artificially create a few more shader permutations, pairs of vertex/fragment shaders 24 | #define NUM_MATERIAL_SHADERS 128 25 | 26 | // favor using drawcalls firstIndex / firstVertex rather than 27 | // setting index / vertex buffers as much 28 | #define USE_DRAW_OFFSETS 0 29 | 30 | // use VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE and vkCmdBindVertexBuffers2 31 | #define USE_DYNAMIC_VERTEX_STRIDE 0 32 | 33 | // enforces single buffers for vbo/ibo 34 | #define USE_SINGLE_GEOMETRY_ALLOCATION 0 35 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | https://developercertificate.org/ 2 | 3 | Developer Certificate of Origin 4 | Version 1.1 5 | 6 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | 12 | Developer's Certificate of Origin 1.1 13 | 14 | By making a contribution to this project, I certify that: 15 | 16 | (a) The contribution was created in whole or in part by me and I 17 | have the right to submit it under the open source license 18 | indicated in the file; or 19 | 20 | (b) The contribution is based upon previous work that, to the best 21 | of my knowledge, is covered under an appropriate open source 22 | license and I have the right under that license to submit that 23 | work with modifications, whether created in whole or in part 24 | by me, under the same open source license (unless I am 25 | permitted to submit under a different license), as indicated 26 | in the file; or 27 | 28 | (c) The contribution was provided directly to me by some other 29 | person who certified (a), (b) or (c) and I have not modified 30 | it. 31 | 32 | (d) I understand and agree that this project and the contribution 33 | are public and that a record of the contribution (including all 34 | personal information I submit with it, including my sign-off) is 35 | maintained indefinitely and may be redistributed consistent with 36 | this project or the open source license(s) involved. -------------------------------------------------------------------------------- /threadpool.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef THREADPOOL_H__ 22 | #define THREADPOOL_H__ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | class ThreadPool 30 | { 31 | 32 | public: 33 | typedef void (*WorkerFunc)(void* arg); 34 | 35 | void init(unsigned int numThreads); 36 | void deinit(); 37 | 38 | void activateJob(unsigned int thread, WorkerFunc fn, void* arg); 39 | 40 | static unsigned int sysGetNumCores(); 41 | 42 | unsigned int getNumThreads() { return m_numThreads; } 43 | 44 | 45 | private: 46 | struct ThreadEntry 47 | { 48 | ThreadPool* m_origin; 49 | std::thread m_thread; 50 | unsigned int m_id; 51 | WorkerFunc m_fn; 52 | void* m_fnArg; 53 | std::mutex m_commMutex; 54 | std::condition_variable m_commCond; 55 | }; 56 | 57 | unsigned int m_numThreads; 58 | ThreadEntry* m_pool; 59 | 60 | volatile unsigned int m_globalInit; 61 | 62 | std::mutex m_globalMutex; 63 | std::condition_variable m_globalCond; 64 | 65 | static void threadKicker(void* arg); 66 | void threadProcess(ThreadEntry& entry); 67 | }; 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /animation.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #version 440 22 | /**/ 23 | 24 | #include "common.h" 25 | 26 | layout (local_size_x = ANIMATION_WORKGROUPSIZE) in; 27 | 28 | layout(binding=ANIM_UBO, std140) uniform animBuffer { 29 | AnimationData anim; 30 | }; 31 | 32 | layout(binding=ANIM_SSBO_MATRIXOUT, std430) restrict buffer matricesBuffer { 33 | MatrixData animated[]; 34 | }; 35 | 36 | layout(binding=ANIM_SSBO_MATRIXORIG, std430) restrict buffer matricesOrigBuffer { 37 | MatrixData original[]; 38 | }; 39 | 40 | void main() 41 | { 42 | int self = int(gl_GlobalInvocationID.x); 43 | if (gl_GlobalInvocationID.x >= anim.numMatrices){ 44 | return; 45 | } 46 | 47 | float s = 1-(float(self)/float(anim.numMatrices)); 48 | float movement = 4; // time until all objects done with moving (<= sequence*0.5) 49 | float sequence = movement*2+3; // time for sequence 50 | 51 | float timeS = fract(anim.time / sequence) * sequence; 52 | float time = clamp(timeS - s*movement,0,1) - clamp(timeS - (1-s)*movement - sequence*0.5, 0, 1); 53 | 54 | float scale = smoothstep(0,1,time); 55 | 56 | mat4 matrixOrig = original[self].worldMatrix; 57 | vec3 pos = matrixOrig[3].xyz; 58 | vec3 away = (pos - anim.sceneCenter ); 59 | 60 | float diridx = float(self % 3); 61 | float sidx = float(self % 6); 62 | 63 | vec3 delta; 64 | #if 1 65 | #pragma optionNV(ifcvt 16) 66 | delta.x = diridx == 0 ? 1 : 0; 67 | delta.y = diridx == 1 ? 1 : 0; 68 | delta.z = diridx == 2 ? 1 : 0; 69 | #else 70 | delta.x = step(diridx,0.5); 71 | delta.y = step(abs(diridx-1),0.5); 72 | delta.z = step(abs(diridx-2),0.5); 73 | #endif 74 | 75 | delta *= -sign(sidx-2.5); 76 | delta *= sign(dot(away,delta)); 77 | 78 | delta = normalize(delta); 79 | pos += delta * scale * anim.sceneDimension; 80 | 81 | animated[self].worldMatrix = mat4(matrixOrig[0], matrixOrig[1], matrixOrig[2], vec4(pos,1)); 82 | } 83 | -------------------------------------------------------------------------------- /resources.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #pragma once 22 | 23 | #include "config.h" 24 | #include "cadscene.hpp" 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | struct ImDrawData; 34 | 35 | using namespace glm; 36 | #include "common.h" 37 | 38 | 39 | namespace generatedcmds { 40 | 41 | enum BindingMode 42 | { 43 | BINDINGMODE_DSETS, 44 | BINDINGMODE_PUSHADDRESS, 45 | BINDINGMODE_INDEX_VERTEXATTRIB, 46 | BINDINGMODE_INDEX_BASEINSTANCE, 47 | NUM_BINDINGMODES, 48 | }; 49 | 50 | enum ShaderMode 51 | { 52 | SHADERMODE_PIPELINE, 53 | SHADERMODE_OBJS, 54 | NUM_SHADERMODES, 55 | }; 56 | 57 | inline size_t alignedSize(size_t sz, size_t align) 58 | { 59 | return ((sz + align - 1) / align) * align; 60 | } 61 | 62 | class Resources 63 | { 64 | public: 65 | struct Global 66 | { 67 | SceneData sceneUbo; 68 | AnimationData animUbo; 69 | int winWidth; 70 | int winHeight; 71 | int workingSet; 72 | bool workerBatched; 73 | ImDrawData* imguiDrawData; 74 | }; 75 | 76 | uint32_t m_numMatrices; 77 | 78 | uint32_t m_frame; 79 | 80 | uint32_t m_alignedMatrixSize; 81 | uint32_t m_alignedMaterialSize; 82 | 83 | Resources() 84 | : m_frame(0) 85 | { 86 | } 87 | 88 | virtual void synchronize() {} 89 | 90 | virtual bool init(nvvk::Context* context, nvvk::SwapChain* swapChain, nvh::Profiler* profiler) { return false; } 91 | virtual void deinit() {} 92 | 93 | virtual bool initPrograms(const std::string& path, const std::string& prepend) { return true; } 94 | virtual void reloadPrograms(const std::string& prepend) {} 95 | 96 | virtual bool initFramebuffer(int width, int height, int msaa, bool vsync) { return true; } 97 | 98 | virtual bool initScene(const CadScene&) { return true; } 99 | virtual void deinitScene() {} 100 | 101 | virtual void animation(const Global& global) {} 102 | virtual void animationReset() {} 103 | 104 | virtual void beginFrame() {} 105 | virtual void blitFrame(const Global& global) {} 106 | virtual void endFrame() {} 107 | 108 | inline void initAlignedSizes(unsigned int alignment) 109 | { 110 | m_alignedMatrixSize = (uint32_t)(alignedSize(sizeof(CadScene::MatrixNode), alignment)); 111 | m_alignedMaterialSize = (uint32_t)(alignedSize(sizeof(CadScene::Material), alignment)); 112 | } 113 | }; 114 | } // namespace generatedcmds 115 | -------------------------------------------------------------------------------- /common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | 22 | #ifndef CSFTHREADED_COMMON_H 23 | #define CSFTHREADED_COMMON_H 24 | 25 | #define VERTEX_POS_OCTNORMAL 0 26 | #define VERTEX_COMBINED_INDEX 1 27 | 28 | #ifndef INDEXED_MATRIX_BITS 29 | #define INDEXED_MATRIX_BITS 24 30 | #endif 31 | 32 | #ifndef INDEXED_MATERIAL_BITS 33 | #define INDEXED_MATERIAL_BITS 8 34 | #endif 35 | 36 | // changing these orders may break a lot of things ;) 37 | #define DRAW_UBO_SCENE 0 38 | #define DRAW_UBO_MATRIX 1 39 | #define DRAW_SSBO_MATRIX 1 40 | #define DRAW_UBO_MATERIAL 2 41 | #define DRAW_SSBO_MATERIAL 2 42 | 43 | #define ANIM_UBO 0 44 | #define ANIM_SSBO_MATRIXOUT 1 45 | #define ANIM_SSBO_MATRIXORIG 2 46 | 47 | #define ANIMATION_WORKGROUPSIZE 256 48 | 49 | #ifndef SHADER_PERMUTATION 50 | #define SHADER_PERMUTATION 1 51 | #endif 52 | 53 | ////////////////////////////////////////////////////////////////////////// 54 | 55 | // see resources_vk.hpp 56 | 57 | #ifndef UNIFORMS_MULTISETSDYNAMIC 58 | #define UNIFORMS_MULTISETSDYNAMIC 0 59 | #endif 60 | #ifndef UNIFORMS_PUSHCONSTANTS_ADDRESS 61 | #define UNIFORMS_PUSHCONSTANTS_ADDRESS 1 62 | #endif 63 | #ifndef UNIFORMS_INDEX_VERTEXATTRIB 64 | #define UNIFORMS_INDEX_VERTEXATTRIB 2 65 | #endif 66 | #ifndef UNIFORMS_INDEX_BASEINSTANCE 67 | #define UNIFORMS_INDEX_BASEINSTANCE 3 68 | #endif 69 | 70 | #ifndef UNIFORMS_TECHNIQUE 71 | #define UNIFORMS_TECHNIQUE UNIFORMS_INDEX_VERTEXATTRIB 72 | #endif 73 | 74 | ////////////////////////////////////////////////////////////////////////// 75 | 76 | #ifdef __cplusplus 77 | namespace generatedcmds { 78 | using namespace glm; 79 | #endif 80 | 81 | struct SceneData { 82 | mat4 viewProjMatrix; 83 | mat4 viewMatrix; 84 | mat4 viewMatrixIT; 85 | 86 | vec4 viewPos; 87 | vec4 viewDir; 88 | 89 | vec4 wLightPos; 90 | 91 | ivec2 viewport; 92 | ivec2 _pad; 93 | }; 94 | 95 | // must match cadscene 96 | struct MatrixData { 97 | mat4 worldMatrix; 98 | mat4 worldMatrixIT; 99 | mat4 objectMatrix; 100 | mat4 objectMatrixIT; 101 | }; 102 | 103 | // must match cadscene 104 | struct MaterialSide { 105 | vec4 ambient; 106 | vec4 diffuse; 107 | vec4 specular; 108 | vec4 emissive; 109 | }; 110 | 111 | struct MaterialData { 112 | MaterialSide sides[2]; 113 | }; 114 | 115 | struct AnimationData { 116 | uint numMatrices; 117 | float time; 118 | vec2 _pad0; 119 | 120 | vec3 sceneCenter; 121 | float sceneDimension; 122 | }; 123 | 124 | #ifdef __cplusplus 125 | } 126 | #endif 127 | 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /scene.frag.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #version 440 core 22 | /**/ 23 | 24 | #extension GL_GOOGLE_include_directive : enable 25 | #extension GL_EXT_buffer_reference : enable 26 | #extension GL_EXT_scalar_block_layout : enable 27 | 28 | #include "common.h" 29 | 30 | #if UNIFORMS_TECHNIQUE == UNIFORMS_MULTISETSDYNAMIC 31 | 32 | layout(set=DRAW_UBO_SCENE, binding=0, scalar) uniform sceneBuffer { 33 | SceneData scene; 34 | }; 35 | layout(set=DRAW_UBO_MATERIAL, binding=0, scalar) uniform materialBuffer { 36 | MaterialData material; 37 | }; 38 | 39 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_PUSHCONSTANTS_ADDRESS 40 | 41 | layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer { 42 | SceneData scene; 43 | }; 44 | layout(buffer_reference, buffer_reference_align=16, scalar) readonly buffer MaterialBuffer { 45 | MaterialData material; 46 | }; 47 | 48 | layout(push_constant, scalar) uniform pushConstants { 49 | layout(offset=8) 50 | MaterialBuffer v; 51 | }; 52 | #define material v.material 53 | 54 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE 55 | 56 | layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer { 57 | SceneData scene; 58 | }; 59 | layout(set=0, binding=DRAW_SSBO_MATERIAL, scalar) readonly buffer MaterialBuffer { 60 | MaterialData materialDatas[]; 61 | }; 62 | #define material materialDatas[IN.materialIndex * 2] 63 | 64 | #endif 65 | 66 | 67 | layout(location=0) in Interpolants { 68 | vec3 wPos; 69 | vec3 wNormal; 70 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE 71 | flat uint materialIndex; 72 | #endif 73 | #if SHADER_PERMUTATION 74 | vec3 oNormal; 75 | #endif 76 | } IN; 77 | 78 | layout(location=0,index=0) out vec4 out_Color; 79 | 80 | void main() 81 | { 82 | MaterialSide side = material.sides[gl_FrontFacing ? 1 : 0]; 83 | 84 | vec4 color = side.ambient + side.emissive; 85 | #if SHADER_PERMUTATION 86 | ivec2 pixel = ivec2(gl_FragCoord.xy); 87 | pixel /= (SHADER_PERMUTATION % 8) + 1; 88 | pixel %= (SHADER_PERMUTATION % 2) + 1; 89 | pixel = ivec2(1) - pixel; 90 | 91 | color = mix(color, vec4(IN.oNormal*0.5+0.5, 1), vec4(0.5) * float(pixel.x * pixel.y)); 92 | color += 0.001 * float(SHADER_PERMUTATION); 93 | #endif 94 | 95 | vec3 eyePos = vec3(scene.viewMatrixIT[0].w,scene.viewMatrixIT[1].w,scene.viewMatrixIT[2].w); 96 | 97 | vec3 lightDir = normalize( scene.wLightPos.xyz - IN.wPos); 98 | vec3 viewDir = normalize( eyePos - IN.wPos); 99 | vec3 halfDir = normalize(lightDir + viewDir); 100 | vec3 normal = normalize(IN.wNormal) * (gl_FrontFacing ? 1 : -1); 101 | 102 | float ldot = dot(normal,lightDir); 103 | normal *= sign(ldot); 104 | ldot *= sign(ldot); 105 | 106 | color += side.diffuse * ldot; 107 | color += side.specular * pow(max(0,dot(normal,halfDir)),16); 108 | 109 | out_Color = color; 110 | } 111 | -------------------------------------------------------------------------------- /scene.vert.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #version 460 core 22 | /**/ 23 | 24 | #extension GL_GOOGLE_include_directive : enable 25 | #extension GL_EXT_buffer_reference : enable 26 | #extension GL_EXT_scalar_block_layout : enable 27 | 28 | #include "common.h" 29 | 30 | #if UNIFORMS_TECHNIQUE == UNIFORMS_MULTISETSDYNAMIC 31 | 32 | layout(set=DRAW_UBO_SCENE, binding=0, scalar) uniform sceneBuffer { 33 | SceneData scene; 34 | }; 35 | layout(set=DRAW_UBO_MATRIX, binding=0, scalar) uniform objectBuffer { 36 | MatrixData matrix; 37 | }; 38 | 39 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_PUSHCONSTANTS_ADDRESS 40 | 41 | layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer { 42 | SceneData scene; 43 | }; 44 | layout(buffer_reference, buffer_reference_align=16, scalar) readonly buffer MatrixBuffer { 45 | MatrixData matrix; 46 | }; 47 | 48 | layout(push_constant, scalar) uniform pushConstants { 49 | MatrixBuffer v; 50 | }; 51 | #define matrix v.matrix 52 | 53 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB || UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE 54 | 55 | layout(set=0, binding=DRAW_UBO_SCENE, scalar) uniform sceneBuffer { 56 | SceneData scene; 57 | }; 58 | layout(set=0, binding=DRAW_SSBO_MATRIX, scalar) readonly buffer MatrixBuffer { 59 | MatrixData matrices[]; 60 | }; 61 | 62 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB 63 | in layout(location=VERTEX_COMBINED_INDEX) uint inCombinedIndex; 64 | 65 | #define matrix matrices[uint(inCombinedIndex) & ((1u<= 0.0) ? +1.0 : -1.0, (v.y >= 0.0) ? +1.0 : -1.0); 89 | } 90 | vec3 oct_to_float32x3(vec2 e) { 91 | vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); 92 | if (v.z < 0) v.xy = (1.0 - abs(v.yx)) * oct_signNotZero(v.xy); 93 | return normalize(v); 94 | } 95 | vec2 float32x3_to_oct(in vec3 v) { 96 | // Project the sphere onto the octahedron, and then onto the xy plane 97 | vec2 p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z))); 98 | // Reflect the folds of the lower hemisphere over the diagonals 99 | return (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * oct_signNotZero(p)) : p; 100 | } 101 | 102 | void main() 103 | { 104 | vec3 inNormal = oct_to_float32x3(unpackSnorm2x16(floatBitsToUint(inPosNormal.w))); 105 | 106 | vec3 wPos = (matrix.worldMatrix * vec4(inPosNormal.xyz,1)).xyz; 107 | vec3 wNormal = mat3(matrix.worldMatrixIT) * inNormal; 108 | 109 | gl_Position = scene.viewProjMatrix * vec4(wPos,1); 110 | 111 | #if SHADER_PERMUTATION 112 | OUT.oNormal = inNormal; 113 | #endif 114 | #if UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_BASEINSTANCE 115 | OUT.materialIndex = uint(gl_BaseInstance) >> INDEXED_MATRIX_BITS; 116 | #elif UNIFORMS_TECHNIQUE == UNIFORMS_INDEX_VERTEXATTRIB 117 | OUT.materialIndex = inCombinedIndex >> INDEXED_MATRIX_BITS; 118 | #endif 119 | 120 | OUT.wPos = wPos; 121 | OUT.wNormal = wNormal; 122 | } 123 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | get_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME) 3 | Project(${PROJNAME}) 4 | Message(STATUS "-------------------------------") 5 | Message(STATUS "Processing Project ${PROJNAME}:") 6 | 7 | ##################################################################################### 8 | # look for nvpro_core 1) as a sub-folder 2) at some other locations 9 | # this cannot be put anywhere else since we still didn't find setup.cmake yet 10 | # 11 | if(NOT BASE_DIRECTORY) 12 | 13 | find_path(BASE_DIRECTORY 14 | NAMES nvpro_core/cmake/setup.cmake 15 | PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. 16 | REQUIRED 17 | DOC "Directory containing nvpro_core" 18 | ) 19 | endif() 20 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 21 | include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 22 | include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake) 23 | else() 24 | message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core") 25 | endif() 26 | 27 | _add_project_definitions(${PROJNAME}) 28 | 29 | #-------------------------------------------------------------------------------------------------- 30 | # Resources 31 | # 32 | download_files(FILENAMES geforce.csf.gz) 33 | 34 | 35 | ##################################################################################### 36 | # additions from packages needed for this sample 37 | # add refs in LIBRARIES_OPTIMIZED 38 | # add refs in LIBRARIES_DEBUG 39 | # add files in PACKAGE_SOURCE_FILES 40 | # 41 | set( EXENAME ${PROJNAME} ) 42 | _add_package_VulkanSDK() 43 | _add_package_ShaderC() 44 | _add_package_ZLIB() 45 | _add_package_ImGUI() 46 | 47 | ##################################################################################### 48 | # process the rest of some cmake code that needs to be done *after* the packages add 49 | _add_nvpro_core_lib() 50 | 51 | add_definitions(-DCSF_SUPPORT_ZLIB=1) 52 | 53 | ##################################################################################### 54 | # Resources 55 | # 56 | download_files(FILENAMES geforce.csf.gz) 57 | 58 | ##################################################################################### 59 | # Source files for this project 60 | # 61 | file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c) 62 | file(GLOB GLSL_FILES *.glsl) 63 | 64 | ##################################################################################### 65 | # Executable 66 | # 67 | if(WIN32 AND NOT GLUT_FOUND) 68 | add_definitions(/wd4267) #remove size_t to int warning 69 | add_definitions(/wd4996) #remove printf warning 70 | add_definitions(/wd4244) #remove double to float conversion warning 71 | add_definitions(/wd4305) #remove double to float truncation warning 72 | else() 73 | add_definitions(-fpermissive) 74 | endif() 75 | 76 | add_executable(${EXENAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_FILES}) 77 | 78 | ##################################################################################### 79 | # common source code needed for this sample 80 | # 81 | source_group(common FILES 82 | ${COMMON_SOURCE_FILES} 83 | ${PACKAGE_SOURCE_FILES} 84 | ) 85 | source_group(shaders FILES 86 | ${GLSL_FILES} 87 | ) 88 | source_group("Source Files" FILES ${SOURCE_FILES}) 89 | 90 | if(UNIX) 91 | set(UNIXLINKLIBS dl pthread) 92 | else() 93 | set(UNIXLINKLIBS) 94 | endif() 95 | 96 | ##################################################################################### 97 | # Linkage 98 | # 99 | target_link_libraries(${EXENAME} ${PLATFORM_LIBRARIES} nvpro_core) 100 | 101 | foreach(DEBUGLIB ${LIBRARIES_DEBUG}) 102 | target_link_libraries(${EXENAME} debug ${DEBUGLIB}) 103 | endforeach(DEBUGLIB) 104 | 105 | foreach(RELEASELIB ${LIBRARIES_OPTIMIZED}) 106 | target_link_libraries(${EXENAME} optimized ${RELEASELIB}) 107 | endforeach(RELEASELIB) 108 | 109 | ##################################################################################### 110 | # copies binaries that need to be put next to the exe files (ZLib, etc.) 111 | # 112 | 113 | _finalize_target( ${EXENAME} ) 114 | 115 | LIST(APPEND GLSL_FILES "common.h") 116 | install(FILES ${GLSL_FILES} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/GLSL_${PROJNAME}") 117 | install(FILES ${GLSL_FILES} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/GLSL_${PROJNAME}") 118 | 119 | -------------------------------------------------------------------------------- /renderer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef RENDERER_H__ 22 | #define RENDERER_H__ 23 | 24 | #include "resources_vk.hpp" 25 | #include 26 | 27 | // disable state filtering for buffer binds 28 | #define USE_NOFILTER 0 29 | // print per-thread stats 30 | #define PRINT_TIMER_STATS 1 31 | 32 | namespace generatedcmds { 33 | 34 | enum Strategy 35 | { // per-object 36 | STRATEGY_GROUPS, // sorted and combined parts by material 37 | STRATEGY_INDIVIDUAL, // keep all parts individual 38 | STRATEGY_SINGLE // single draw / material 39 | }; 40 | 41 | class Renderer 42 | { 43 | public: 44 | struct Stats 45 | { 46 | uint32_t drawCalls = 0; 47 | uint32_t drawTriangles = 0; 48 | uint32_t shaderBindings = 0; 49 | uint32_t sequences = 0; 50 | uint32_t preprocessSizeKB = 0; 51 | uint32_t indirectSizeKB = 0; 52 | uint32_t cmdBuffers = 0; 53 | }; 54 | 55 | struct Config 56 | { 57 | Strategy strategy; 58 | BindingMode bindingMode; 59 | uint32_t objectFrom; 60 | uint32_t objectNum; 61 | uint32_t maxShaders = 16; 62 | uint32_t workerThreads; 63 | bool interleaved = false; 64 | bool sorted = false; 65 | bool unordered = false; 66 | bool permutated = false; 67 | bool binned = false; 68 | bool shaderObjs = false; 69 | }; 70 | 71 | struct DrawItem 72 | { 73 | bool solid; 74 | int materialIndex; 75 | int geometryIndex; 76 | int matrixIndex; 77 | int shaderIndex; 78 | CadScene::DrawRange range; 79 | }; 80 | 81 | static inline bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b) 82 | { 83 | int diff = 0; 84 | diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : (a.solid ? -1 : 1)); 85 | diff = diff != 0 ? diff : (a.shaderIndex - b.shaderIndex); 86 | diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex); 87 | diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex); 88 | diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex); 89 | 90 | return diff < 0; 91 | } 92 | 93 | class Type 94 | { 95 | public: 96 | Type() { getRegistry().push_back(this); } 97 | 98 | public: 99 | virtual bool isAvailable(const nvvk::Context& context) = 0; 100 | virtual const char* name() const = 0; 101 | virtual Renderer* create() const = 0; 102 | virtual uint32_t priority() const { return 0xFF; } 103 | virtual uint32_t supportedBindingModes() const { return 0xFF; } 104 | virtual bool supportsShaderObjs() const { return true; } 105 | virtual uint32_t supportedShaderBinds() const { return ~0; } 106 | }; 107 | 108 | typedef std::vector Registry; 109 | 110 | static Registry& getRegistry() 111 | { 112 | static Registry s_registry; 113 | return s_registry; 114 | } 115 | 116 | public: 117 | virtual void init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) {} 118 | virtual void deinit() {} 119 | virtual void draw(const Resources::Global& global, Stats& stats) {} 120 | 121 | virtual ~Renderer() {} 122 | 123 | void fillDrawItems(std::vector& drawItems, const CadScene* scene, const Config& config, Stats& stats); 124 | void fillRandomPermutation(uint32_t drawCount, uint32_t* permutation, const DrawItem* drawItems, Stats& stats); 125 | 126 | Config m_config; 127 | const CadScene* m_scene; 128 | }; 129 | } // namespace generatedcmds 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /cadscene_vk.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #pragma once 22 | 23 | #include "cadscene.hpp" 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | // ScopeStaging handles uploads and other staging operations. 30 | // not efficient because it blocks/syncs operations 31 | 32 | struct ScopeStaging 33 | { 34 | ScopeStaging(nvvk::ResourceAllocator& resAllocator, VkQueue queue_, uint32_t queueFamily) 35 | : staging(*resAllocator.getStaging()) 36 | , cmdPool(resAllocator.getDevice(), queueFamily) 37 | , queue(queue_) 38 | , cmd(VK_NULL_HANDLE) 39 | { 40 | } 41 | ~ScopeStaging() { submit(); } 42 | 43 | VkCommandBuffer cmd; 44 | nvvk::StagingMemoryManager& staging; 45 | nvvk::CommandPool cmdPool; 46 | VkQueue queue; 47 | 48 | VkCommandBuffer getCmd() 49 | { 50 | cmd = cmd ? cmd : cmdPool.createCommandBuffer(); 51 | return cmd; 52 | } 53 | 54 | void submit() 55 | { 56 | if(cmd) 57 | { 58 | cmdPool.submitAndWait(cmd, queue); 59 | cmd = VK_NULL_HANDLE; 60 | staging.releaseResources(); 61 | } 62 | } 63 | 64 | void uploadAutoSubmit(const VkDescriptorBufferInfo& binding, const void* data) 65 | { 66 | if(cmd && (data == nullptr || !staging.fitsInAllocated(binding.range))) 67 | { 68 | submit(); 69 | } 70 | if(data && binding.range) 71 | { 72 | staging.cmdToBuffer(getCmd(), binding.buffer, binding.offset, binding.range, data); 73 | } 74 | } 75 | 76 | void* upload(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data = nullptr) 77 | { 78 | return staging.cmdToBuffer(getCmd(), buffer, offset, size, data); 79 | } 80 | 81 | template 82 | T* uploadT(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data = nullptr) 83 | { 84 | return (T*)staging.cmdToBuffer(getCmd(), buffer, offset, size, data); 85 | } 86 | }; 87 | 88 | 89 | // GeometryMemoryVK manages vbo/ibo etc. in chunks 90 | // allows to reduce number of bindings and be more memory efficient 91 | 92 | struct GeometryMemoryVK 93 | { 94 | typedef size_t Index; 95 | 96 | 97 | struct Allocation 98 | { 99 | Index chunkIndex; 100 | VkDeviceSize vboOffset; 101 | VkDeviceSize iboOffset; 102 | }; 103 | 104 | struct Chunk 105 | { 106 | nvvk::Buffer vbo; 107 | nvvk::Buffer ibo; 108 | 109 | VkDeviceSize vboSize; 110 | VkDeviceSize iboSize; 111 | }; 112 | 113 | 114 | VkDevice m_device = VK_NULL_HANDLE; 115 | nvvk::ResourceAllocator* m_resourceAllocator; 116 | std::vector m_chunks; 117 | 118 | void init(nvvk::ResourceAllocator* resourceAllocator, VkDeviceSize vboStride, VkDeviceSize maxChunk); 119 | void deinit(); 120 | void alloc(VkDeviceSize vboSize, VkDeviceSize iboSize, Allocation& allocation); 121 | void finalize(); 122 | 123 | const Chunk& getChunk(const Allocation& allocation) const { return m_chunks[allocation.chunkIndex]; } 124 | 125 | const Chunk& getChunk(Index index) const { return m_chunks[index]; } 126 | 127 | VkDeviceSize getVertexSize() const 128 | { 129 | VkDeviceSize size = 0; 130 | for(size_t i = 0; i < m_chunks.size(); i++) 131 | { 132 | size += m_chunks[i].vboSize; 133 | } 134 | return size; 135 | } 136 | 137 | VkDeviceSize getIndexSize() const 138 | { 139 | VkDeviceSize size = 0; 140 | for(size_t i = 0; i < m_chunks.size(); i++) 141 | { 142 | size += m_chunks[i].iboSize; 143 | } 144 | return size; 145 | } 146 | 147 | VkDeviceSize getChunkCount() const { return m_chunks.size(); } 148 | 149 | private: 150 | VkDeviceSize m_alignment; 151 | VkDeviceSize m_vboAlignment; 152 | VkDeviceSize m_maxVboChunk; 153 | VkDeviceSize m_maxIboChunk; 154 | 155 | Index getActiveIndex() { return (m_chunks.size() - 1); } 156 | 157 | Chunk& getActiveChunk() 158 | { 159 | assert(!m_chunks.empty()); 160 | return m_chunks[getActiveIndex()]; 161 | } 162 | }; 163 | 164 | 165 | class CadSceneVK 166 | { 167 | public: 168 | struct Geometry 169 | { 170 | GeometryMemoryVK::Allocation allocation; 171 | 172 | VkDescriptorBufferInfo vbo; 173 | VkDescriptorBufferInfo ibo; 174 | }; 175 | 176 | struct Buffers 177 | { 178 | nvvk::Buffer materials = {}; 179 | nvvk::Buffer matrices = {}; 180 | nvvk::Buffer matricesOrig = {}; 181 | }; 182 | 183 | struct Infos 184 | { 185 | VkDescriptorBufferInfo materialsSingle, materials, matricesSingle, matrices, matricesOrig; 186 | }; 187 | 188 | struct Config 189 | { 190 | bool singleAllocation = false; 191 | }; 192 | 193 | VkDevice m_device = VK_NULL_HANDLE; 194 | 195 | Config m_config; 196 | 197 | Buffers m_buffers; 198 | Infos m_infos; 199 | 200 | std::vector m_geometry; 201 | GeometryMemoryVK m_geometryMem; 202 | nvvk::ResourceAllocator* m_resourceAllocator = nullptr; 203 | 204 | 205 | void init(const CadScene& cadscene, nvvk::ResourceAllocator& resourceAllocator, VkQueue queue, uint32_t queueFamilyIndex, const Config& config); 206 | void deinit(); 207 | }; 208 | -------------------------------------------------------------------------------- /cadscene.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef CADSCENE_H__ 22 | #define CADSCENE_H__ 23 | 24 | #include // memset 25 | #include 26 | #include 27 | #include 28 | 29 | class CadScene 30 | { 31 | 32 | public: 33 | struct BBox 34 | { 35 | glm::vec4 min; 36 | glm::vec4 max; 37 | 38 | BBox() 39 | : min(FLT_MAX) 40 | , max(-FLT_MAX) 41 | { 42 | } 43 | 44 | inline void merge(const glm::vec4& point) 45 | { 46 | min = glm::min(min, point); 47 | max = glm::max(max, point); 48 | } 49 | 50 | inline void merge(const BBox& bbox) 51 | { 52 | min = glm::min(min, bbox.min); 53 | max = glm::max(max, bbox.max); 54 | } 55 | 56 | inline BBox transformed(const glm::mat4& matrix, int dim = 3) 57 | { 58 | int i; 59 | glm::vec4 box[16]; 60 | // create box corners 61 | box[0] = glm::vec4(min.x, min.y, min.z, min.w); 62 | box[1] = glm::vec4(max.x, min.y, min.z, min.w); 63 | box[2] = glm::vec4(min.x, max.y, min.z, min.w); 64 | box[3] = glm::vec4(max.x, max.y, min.z, min.w); 65 | box[4] = glm::vec4(min.x, min.y, max.z, min.w); 66 | box[5] = glm::vec4(max.x, min.y, max.z, min.w); 67 | box[6] = glm::vec4(min.x, max.y, max.z, min.w); 68 | box[7] = glm::vec4(max.x, max.y, max.z, min.w); 69 | 70 | box[8] = glm::vec4(min.x, min.y, min.z, max.w); 71 | box[9] = glm::vec4(max.x, min.y, min.z, max.w); 72 | box[10] = glm::vec4(min.x, max.y, min.z, max.w); 73 | box[11] = glm::vec4(max.x, max.y, min.z, max.w); 74 | box[12] = glm::vec4(min.x, min.y, max.z, max.w); 75 | box[13] = glm::vec4(max.x, min.y, max.z, max.w); 76 | box[14] = glm::vec4(min.x, max.y, max.z, max.w); 77 | box[15] = glm::vec4(max.x, max.y, max.z, max.w); 78 | 79 | // transform box corners 80 | // and find new mins,maxs 81 | BBox bbox; 82 | 83 | for(i = 0; i < (1 << dim); i++) 84 | { 85 | glm::vec4 point = matrix * box[i]; 86 | bbox.merge(point); 87 | } 88 | 89 | return bbox; 90 | } 91 | }; 92 | 93 | struct MaterialSide 94 | { 95 | glm::vec4 ambient; 96 | glm::vec4 diffuse; 97 | glm::vec4 specular; 98 | glm::vec4 emissive; 99 | }; 100 | 101 | // need to keep this 256 byte aligned (UBO range) 102 | struct Material 103 | { 104 | MaterialSide sides[2]; 105 | unsigned int _pad[32]; 106 | 107 | Material() { memset(this, 0, sizeof(Material)); } 108 | }; 109 | 110 | // need to keep this 256 byte aligned (UBO range) 111 | struct MatrixNode 112 | { 113 | glm::mat4 worldMatrix; 114 | glm::mat4 worldMatrixIT; 115 | glm::mat4 objectMatrix; 116 | glm::mat4 objectMatrixIT; 117 | }; 118 | 119 | struct Vertex 120 | { 121 | glm::vec3 position; 122 | uint16_t normalOctX; 123 | uint16_t normalOctY; 124 | }; 125 | 126 | struct DrawRange 127 | { 128 | size_t offset; 129 | int count; 130 | 131 | DrawRange() 132 | : offset(0) 133 | , count(0) 134 | { 135 | } 136 | }; 137 | 138 | struct DrawStateInfo 139 | { 140 | int materialIndex; 141 | int matrixIndex; 142 | 143 | friend bool operator!=(const DrawStateInfo& lhs, const DrawStateInfo& rhs) 144 | { 145 | return lhs.materialIndex != rhs.materialIndex || lhs.matrixIndex != rhs.matrixIndex; 146 | } 147 | 148 | friend bool operator==(const DrawStateInfo& lhs, const DrawStateInfo& rhs) 149 | { 150 | return lhs.materialIndex == rhs.materialIndex && lhs.matrixIndex == rhs.matrixIndex; 151 | } 152 | }; 153 | 154 | struct DrawRangeCache 155 | { 156 | std::vector state; 157 | std::vector stateCount; 158 | 159 | std::vector offsets; 160 | std::vector counts; 161 | }; 162 | 163 | struct GeometryPart 164 | { 165 | DrawRange indexSolid; 166 | DrawRange indexWire; 167 | }; 168 | 169 | struct Geometry 170 | { 171 | int cloneIdx; 172 | size_t vboSize; 173 | size_t iboSize; 174 | 175 | Vertex* vboData; 176 | unsigned int* iboData; 177 | 178 | std::vector parts; 179 | 180 | int numVertices; 181 | int numIndexSolid; 182 | int numIndexWire; 183 | }; 184 | 185 | struct ObjectPart 186 | { 187 | int active; 188 | int materialIndex; 189 | int matrixIndex; 190 | }; 191 | 192 | struct Object 193 | { 194 | int matrixIndex; 195 | int geometryIndex; 196 | 197 | std::vector parts; 198 | 199 | DrawRangeCache cacheSolid; 200 | DrawRangeCache cacheWire; 201 | }; 202 | 203 | std::vector m_materials; 204 | std::vector m_geometryBboxes; 205 | std::vector m_geometry; 206 | std::vector m_matrices; 207 | std::vector m_objects; 208 | 209 | 210 | BBox m_bbox; 211 | 212 | 213 | void updateObjectDrawCache(Object& object); 214 | 215 | bool loadCSF(const char* filename, int clones = 0, int cloneaxis = 3); 216 | void unload(); 217 | 218 | struct IndexingBits 219 | { 220 | uint32_t matrices = 0; 221 | uint32_t materials = 0; 222 | 223 | uint32_t packIndices(uint32_t matrixIndex, uint32_t materialIndex) const 224 | { 225 | return matrixIndex | (materialIndex << matrices); 226 | } 227 | }; 228 | 229 | IndexingBits getIndexingBits() const; 230 | bool supportsIndexing() const; 231 | }; 232 | 233 | 234 | #endif 235 | -------------------------------------------------------------------------------- /cadscene_vk.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include "config.h" 22 | #include "cadscene_vk.hpp" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | 29 | static inline VkDeviceSize alignedSize(VkDeviceSize sz, VkDeviceSize align) 30 | { 31 | return ((sz + align - 1) / (align)) * align; 32 | } 33 | 34 | 35 | void GeometryMemoryVK::init(nvvk::ResourceAllocator* resourceAllocator, VkDeviceSize vboStride, VkDeviceSize maxChunk) 36 | { 37 | m_resourceAllocator = resourceAllocator; 38 | m_alignment = 16; 39 | m_vboAlignment = 16; 40 | 41 | m_maxVboChunk = maxChunk; 42 | m_maxIboChunk = maxChunk; 43 | } 44 | 45 | void GeometryMemoryVK::deinit() 46 | { 47 | for(size_t i = 0; i < m_chunks.size(); i++) 48 | { 49 | Chunk chunk = getChunk(i); 50 | m_resourceAllocator->destroy(chunk.vbo); 51 | m_resourceAllocator->destroy(chunk.ibo); 52 | } 53 | m_chunks = std::vector(); 54 | m_device = nullptr; 55 | m_resourceAllocator = nullptr; 56 | } 57 | 58 | void GeometryMemoryVK::alloc(VkDeviceSize vboSize, VkDeviceSize iboSize, Allocation& allocation) 59 | { 60 | vboSize = alignedSize(vboSize, m_vboAlignment); 61 | iboSize = alignedSize(iboSize, m_alignment); 62 | 63 | if(m_chunks.empty() || getActiveChunk().vboSize + vboSize > m_maxVboChunk || getActiveChunk().iboSize + iboSize > m_maxIboChunk) 64 | { 65 | finalize(); 66 | Chunk chunk = {}; 67 | m_chunks.push_back(chunk); 68 | } 69 | 70 | Chunk& chunk = getActiveChunk(); 71 | 72 | allocation.chunkIndex = getActiveIndex(); 73 | allocation.vboOffset = chunk.vboSize; 74 | allocation.iboOffset = chunk.iboSize; 75 | 76 | chunk.vboSize += vboSize; 77 | chunk.iboSize += iboSize; 78 | } 79 | 80 | void GeometryMemoryVK::finalize() 81 | { 82 | if(m_chunks.empty()) 83 | { 84 | return; 85 | } 86 | 87 | Chunk& chunk = getActiveChunk(); 88 | 89 | uint32_t flags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; 90 | 91 | chunk.vbo = m_resourceAllocator->createBuffer(chunk.vboSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | flags); 92 | chunk.ibo = m_resourceAllocator->createBuffer(chunk.iboSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | flags); 93 | } 94 | 95 | void CadSceneVK::init(const CadScene& cadscene, nvvk::ResourceAllocator& resourceAllocator, VkQueue queue, uint32_t queueFamilyIndex, const Config& config) 96 | { 97 | VkDeviceSize MB = 1024 * 1024; 98 | 99 | m_resourceAllocator = &resourceAllocator; 100 | m_config = config; 101 | m_geometry.resize(cadscene.m_geometry.size(), {0}); 102 | 103 | if(m_geometry.empty()) 104 | return; 105 | 106 | { 107 | // allocation phase 108 | m_geometryMem.init(&resourceAllocator, sizeof(CadScene::Vertex), config.singleAllocation ? VkDeviceSize(4096) * MB : 256 * MB); 109 | 110 | for(size_t g = 0; g < cadscene.m_geometry.size(); g++) 111 | { 112 | const CadScene::Geometry& cadgeom = cadscene.m_geometry[g]; 113 | Geometry& geom = m_geometry[g]; 114 | 115 | m_geometryMem.alloc(cadgeom.vboSize, cadgeom.iboSize, geom.allocation); 116 | } 117 | 118 | m_geometryMem.finalize(); 119 | 120 | LOGI("Size of vertex data: %11" PRId64 "\n", uint64_t(m_geometryMem.getVertexSize())); 121 | LOGI("Size of index data: %11" PRId64 "\n", uint64_t(m_geometryMem.getIndexSize())); 122 | LOGI("Size of data: %11" PRId64 "\n", uint64_t(m_geometryMem.getVertexSize() + m_geometryMem.getIndexSize())); 123 | LOGI("Chunks: %11d\n", uint32_t(m_geometryMem.getChunkCount())); 124 | } 125 | 126 | ScopeStaging staging(resourceAllocator, queue, queueFamilyIndex); 127 | 128 | for(size_t g = 0; g < cadscene.m_geometry.size(); g++) 129 | { 130 | const CadScene::Geometry& cadgeom = cadscene.m_geometry[g]; 131 | Geometry& geom = m_geometry[g]; 132 | const GeometryMemoryVK::Chunk& chunk = m_geometryMem.getChunk(geom.allocation); 133 | 134 | // upload and assignment phase 135 | geom.vbo.buffer = chunk.vbo.buffer; 136 | geom.vbo.offset = geom.allocation.vboOffset; 137 | geom.vbo.range = cadgeom.vboSize; 138 | staging.uploadAutoSubmit(geom.vbo, cadgeom.vboData); 139 | 140 | geom.ibo.buffer = chunk.ibo.buffer; 141 | geom.ibo.offset = geom.allocation.iboOffset; 142 | geom.ibo.range = cadgeom.iboSize; 143 | staging.uploadAutoSubmit(geom.ibo, cadgeom.iboData); 144 | } 145 | 146 | VkBufferUsageFlags usageFlags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; 147 | usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; 148 | 149 | VkDeviceSize materialsSize = cadscene.m_materials.size() * sizeof(CadScene::Material); 150 | VkDeviceSize matricesSize = cadscene.m_matrices.size() * sizeof(CadScene::MatrixNode); 151 | 152 | m_buffers.materials = resourceAllocator.createBuffer(materialsSize, usageFlags); 153 | m_buffers.matrices = resourceAllocator.createBuffer(matricesSize, usageFlags); 154 | m_buffers.matricesOrig = resourceAllocator.createBuffer(matricesSize, usageFlags | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 155 | 156 | m_infos.materialsSingle = {m_buffers.materials.buffer, 0, sizeof(CadScene::Material)}; 157 | m_infos.materials = {m_buffers.materials.buffer, 0, materialsSize}; 158 | m_infos.matricesSingle = {m_buffers.matrices.buffer, 0, sizeof(CadScene::MatrixNode)}; 159 | m_infos.matrices = {m_buffers.matrices.buffer, 0, matricesSize}; 160 | m_infos.matricesOrig = {m_buffers.matricesOrig.buffer, 0, matricesSize}; 161 | 162 | staging.uploadAutoSubmit(m_infos.materials, cadscene.m_materials.data()); 163 | staging.uploadAutoSubmit(m_infos.matrices, cadscene.m_matrices.data()); 164 | staging.uploadAutoSubmit(m_infos.matricesOrig, cadscene.m_matrices.data()); 165 | 166 | staging.uploadAutoSubmit({}, nullptr); 167 | } 168 | 169 | void CadSceneVK::deinit() 170 | { 171 | m_resourceAllocator->destroy(m_buffers.materials); 172 | m_resourceAllocator->destroy(m_buffers.matrices); 173 | m_resourceAllocator->destroy(m_buffers.matricesOrig); 174 | m_geometry.clear(); 175 | m_geometryMem.deinit(); 176 | } 177 | -------------------------------------------------------------------------------- /renderer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include 22 | #include 23 | #include "renderer.hpp" 24 | #include 25 | 26 | #include "common.h" 27 | 28 | #pragma pack(1) 29 | 30 | 31 | namespace generatedcmds { 32 | ////////////////////////////////////////////////////////////////////////// 33 | 34 | static void AddItem(std::vector& drawItems, const Renderer::Config& config, const Renderer::DrawItem& di) 35 | { 36 | if(di.range.count) 37 | { 38 | drawItems.push_back(di); 39 | } 40 | } 41 | 42 | static void FillSingle(std::vector& drawItems, 43 | const Renderer::Config& config, 44 | const CadScene::Object& obj, 45 | const CadScene::Geometry& geo, 46 | bool solid, 47 | int objectIndex) 48 | { 49 | int begin = 0; 50 | const CadScene::DrawRangeCache& cache = solid ? obj.cacheSolid : obj.cacheWire; 51 | 52 | if(obj.parts.empty()) 53 | return; 54 | 55 | const CadScene::ObjectPart& part = obj.parts[0]; 56 | const CadScene::GeometryPart& mesh = geo.parts[0]; 57 | 58 | if(!part.active) 59 | return; 60 | 61 | // evict 62 | Renderer::DrawItem di; 63 | di.geometryIndex = obj.geometryIndex; 64 | di.matrixIndex = part.matrixIndex; 65 | di.materialIndex = part.materialIndex; 66 | di.shaderIndex = part.materialIndex % config.maxShaders; 67 | 68 | di.solid = solid; 69 | di.range.offset = solid ? 0 : geo.numIndexSolid * sizeof(unsigned int); 70 | di.range.count = solid ? geo.numIndexSolid : geo.numIndexWire; 71 | 72 | AddItem(drawItems, config, di); 73 | } 74 | 75 | static void FillCache(std::vector& drawItems, 76 | const Renderer::Config& config, 77 | const CadScene::Object& obj, 78 | const CadScene::Geometry& geo, 79 | bool solid, 80 | int objectIndex) 81 | { 82 | int begin = 0; 83 | const CadScene::DrawRangeCache& cache = solid ? obj.cacheSolid : obj.cacheWire; 84 | 85 | for(size_t s = 0; s < cache.state.size(); s++) 86 | { 87 | const CadScene::DrawStateInfo& state = cache.state[s]; 88 | for(int d = 0; d < cache.stateCount[s]; d++) 89 | { 90 | // evict 91 | Renderer::DrawItem di; 92 | di.geometryIndex = obj.geometryIndex; 93 | di.matrixIndex = state.matrixIndex; 94 | di.materialIndex = state.materialIndex; 95 | di.shaderIndex = state.materialIndex % config.maxShaders; 96 | 97 | di.solid = solid; 98 | di.range.offset = cache.offsets[begin + d]; 99 | di.range.count = cache.counts[begin + d]; 100 | 101 | AddItem(drawItems, config, di); 102 | } 103 | begin += cache.stateCount[s]; 104 | } 105 | } 106 | 107 | static void FillIndividual(std::vector& drawItems, 108 | const Renderer::Config& config, 109 | const CadScene::Object& obj, 110 | const CadScene::Geometry& geo, 111 | bool solid, 112 | int objectIndex) 113 | { 114 | for(size_t p = 0; p < obj.parts.size(); p++) 115 | { 116 | const CadScene::ObjectPart& part = obj.parts[p]; 117 | const CadScene::GeometryPart& mesh = geo.parts[p]; 118 | 119 | if(!part.active) 120 | continue; 121 | 122 | Renderer::DrawItem di; 123 | di.geometryIndex = obj.geometryIndex; 124 | di.matrixIndex = part.matrixIndex; 125 | di.materialIndex = part.materialIndex; 126 | di.shaderIndex = part.materialIndex % config.maxShaders; 127 | 128 | di.solid = solid; 129 | di.range = solid ? mesh.indexSolid : mesh.indexWire; 130 | 131 | AddItem(drawItems, config, di); 132 | } 133 | } 134 | 135 | void Renderer::fillDrawItems(std::vector& drawItems, const CadScene* scene, const Config& config, Stats& stats) 136 | { 137 | bool solid = true; 138 | bool wire = false; 139 | 140 | size_t maxObjects = scene->m_objects.size(); 141 | size_t from = std::min(maxObjects - 1, size_t(config.objectFrom)); 142 | maxObjects = std::min(maxObjects, from + size_t(config.objectNum)); 143 | 144 | for(size_t i = from; i < maxObjects; i++) 145 | { 146 | const CadScene::Object& obj = scene->m_objects[i]; 147 | const CadScene::Geometry& geo = scene->m_geometry[obj.geometryIndex]; 148 | 149 | if(config.strategy == STRATEGY_SINGLE) 150 | { 151 | if(solid) 152 | FillSingle(drawItems, config, obj, geo, true, int(i)); 153 | if(wire) 154 | FillSingle(drawItems, config, obj, geo, false, int(i)); 155 | } 156 | else if(config.strategy == STRATEGY_GROUPS) 157 | { 158 | if(solid) 159 | FillCache(drawItems, config, obj, geo, true, int(i)); 160 | if(wire) 161 | FillCache(drawItems, config, obj, geo, false, int(i)); 162 | } 163 | else if(config.strategy == STRATEGY_INDIVIDUAL) 164 | { 165 | if(solid) 166 | FillIndividual(drawItems, config, obj, geo, true, int(i)); 167 | if(wire) 168 | FillIndividual(drawItems, config, obj, geo, false, int(i)); 169 | } 170 | } 171 | 172 | if(config.sorted && !config.permutated) 173 | { 174 | std::sort(drawItems.begin(), drawItems.end(), DrawItem_compare_groups); 175 | } 176 | 177 | int shaderIndex = -1; 178 | for(size_t i = 0; i < drawItems.size(); i++) 179 | { 180 | stats.drawCalls++; 181 | stats.drawTriangles += drawItems[i].range.count / 3; 182 | if(drawItems[i].shaderIndex != shaderIndex) 183 | { 184 | stats.shaderBindings++; 185 | shaderIndex = drawItems[i].shaderIndex; 186 | } 187 | } 188 | } 189 | 190 | void Renderer::fillRandomPermutation(uint32_t drawCount, uint32_t* permutation, const DrawItem* drawItems, Stats& stats) 191 | { 192 | srand(634523); 193 | for(uint32_t i = 0; i < drawCount; i++) 194 | { 195 | permutation[i] = i; 196 | } 197 | if(drawCount) 198 | { 199 | // not exactly a good way to generate random 32bit ;) 200 | for(uint32_t i = drawCount - 1; i > 0; i--) 201 | { 202 | uint32_t r = 0; 203 | r |= (rand() & 0xFF) << 0; 204 | r |= (rand() & 0xFF) << 8; 205 | r |= (rand() & 0xFF) << 16; 206 | r |= (rand() & 0xFF) << 24; 207 | 208 | uint32_t other = r % (i + 1); 209 | std::swap(permutation[i], permutation[other]); 210 | } 211 | 212 | int shaderIndex = -1; 213 | stats.shaderBindings = 0; 214 | for(uint32_t i = 0; i < drawCount; i++) 215 | { 216 | uint32_t idx = permutation[i]; 217 | if(drawItems[idx].shaderIndex != shaderIndex) 218 | { 219 | stats.shaderBindings++; 220 | shaderIndex = drawItems[idx].shaderIndex; 221 | } 222 | } 223 | } 224 | } 225 | 226 | } // namespace generatedcmds 227 | -------------------------------------------------------------------------------- /resources_vk.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #pragma once 22 | 23 | #define DRAW_UBOS_NUM 3 24 | 25 | #include "cadscene_vk.hpp" 26 | #include "resources.hpp" 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | namespace generatedcmds { 43 | 44 | class ResourcesVK : public Resources 45 | { 46 | public: 47 | ResourcesVK() {} 48 | 49 | static ResourcesVK* get() 50 | { 51 | static ResourcesVK res; 52 | 53 | return &res; 54 | } 55 | static bool isAvailable(); 56 | 57 | static void initImGui(const nvvk::Context& context); 58 | static void deinitImGui(const nvvk::Context& context); 59 | 60 | struct FrameBuffer 61 | { 62 | int renderWidth = 0; 63 | int renderHeight = 0; 64 | int supersample = 0; 65 | bool useResolved = false; 66 | bool vsync = false; 67 | int msaa = 0; 68 | 69 | VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM; 70 | VkFormat depthStencilFormat; 71 | 72 | VkViewport viewport; 73 | VkViewport viewportUI; 74 | VkRect2D scissor; 75 | VkRect2D scissorUI; 76 | 77 | nvvk::Image imgColor = {}; 78 | nvvk::Image imgColorResolved = {}; 79 | nvvk::Image imgDepthStencil = {}; 80 | 81 | VkImageView viewColor = VK_NULL_HANDLE; 82 | VkImageView viewColorResolved = VK_NULL_HANDLE; 83 | VkImageView viewDepthStencil = VK_NULL_HANDLE; 84 | 85 | VkRenderingAttachmentInfo attachColor; 86 | VkRenderingAttachmentInfo attachColorUI; 87 | VkRenderingAttachmentInfo attachDepth; 88 | 89 | VkRenderingInfo renderingInfo = {VK_STRUCTURE_TYPE_RENDERING_INFO}; 90 | VkRenderingInfo renderingInfoUI = {VK_STRUCTURE_TYPE_RENDERING_INFO}; 91 | VkPipelineRenderingCreateInfo pipelineRenderingInfo = {VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO}; 92 | VkPipelineRenderingCreateInfo pipelineRenderingInfoUI = {VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO}; 93 | }; 94 | 95 | struct Common 96 | { 97 | nvvk::Buffer viewBuffer; 98 | VkDescriptorBufferInfo viewInfo; 99 | 100 | nvvk::Buffer animBuffer; 101 | VkDescriptorBufferInfo animInfo; 102 | }; 103 | 104 | struct 105 | { 106 | nvvk::ShaderModuleID shaderModuleID = {}; 107 | VkShaderModule shader = nullptr; 108 | VkPipeline pipeline = nullptr; 109 | } m_animShading; 110 | 111 | struct 112 | { 113 | VkPipeline pipelines[NUM_MATERIAL_SHADERS] = {}; 114 | VkShaderEXT vertexShaderObjs[NUM_MATERIAL_SHADERS] = {}; 115 | VkShaderEXT fragmentShaderObjs[NUM_MATERIAL_SHADERS] = {}; 116 | } m_drawShading; 117 | 118 | struct 119 | { 120 | nvvk::ShaderModuleID vertexIDs[NUM_MATERIAL_SHADERS] = {}; 121 | nvvk::ShaderModuleID fragmentIDs[NUM_MATERIAL_SHADERS] = {}; 122 | VkShaderModule vertexShaders[NUM_MATERIAL_SHADERS] = {}; 123 | VkShaderModule fragmentShaders[NUM_MATERIAL_SHADERS] = {}; 124 | } m_drawShaderModules[NUM_BINDINGMODES]; 125 | 126 | 127 | bool m_withinFrame = false; 128 | nvvk::ShaderModuleManager m_shaderManager; 129 | 130 | 131 | FrameBuffer m_framebuffer = {}; 132 | Common m_common; 133 | 134 | nvvk::SwapChain* m_swapChain = nullptr; 135 | nvvk::Context* m_context = nullptr; 136 | nvvk::ProfilerVK m_profilerVK; 137 | 138 | VkDevice m_device = VK_NULL_HANDLE; 139 | VkPhysicalDevice m_physical; 140 | VkQueue m_queue; 141 | uint32_t m_queueFamily; 142 | nvvk::DeviceMemoryAllocator m_memoryAllocator; 143 | nvvk::ResourceAllocator m_resourceAllocator; 144 | nvvk::RingFences m_ringFences; 145 | nvvk::RingCommandPool m_ringCmdPool; 146 | nvvk::BatchSubmission m_submission; 147 | bool m_submissionWaitForRead; 148 | 149 | VkPipelineCreateFlags2CreateInfoKHR m_gfxStateFlags2CreateInfo; 150 | nvvk::GraphicsPipelineState m_gfxState; 151 | nvvk::GraphicsPipelineGenerator m_gfxGen{m_gfxState}; 152 | nvvk::GraphicShaderObjectPipeline m_gfxStateShaderObjects; 153 | 154 | nvvk::TDescriptorSetContainer m_drawBind; 155 | nvvk::DescriptorSetContainer m_drawPush; 156 | nvvk::DescriptorSetContainer m_drawIndexed; 157 | nvvk::DescriptorSetContainer m_anim; 158 | VkPushConstantRange m_pushRanges[2]; 159 | 160 | BindingMode m_lastBindingMode = NUM_BINDINGMODES; 161 | VkPipelineCreateFlags2KHR m_lastPipeFlags = ~0; 162 | bool m_lastUseShaderObjs = false; 163 | 164 | uint32_t m_numMatrices; 165 | CadSceneVK m_scene; 166 | 167 | size_t m_pipeChangeID; 168 | size_t m_fboChangeID; 169 | 170 | 171 | bool init(nvvk::Context* context, nvvk::SwapChain* swapChain, nvh::Profiler* profiler) override; 172 | void deinit() override; 173 | 174 | void initPipelinesOrShaders(BindingMode bindingMode, VkPipelineCreateFlags2KHR pipeFlags, bool useShaderObjs, bool force = false); 175 | void deinitPipelinesOrShaders(); 176 | bool hasPipes() { return m_animShading.pipeline != 0; } 177 | 178 | bool initPrograms(const std::string& path, const std::string& prepend) override; 179 | void reloadPrograms(const std::string& prepend) override; 180 | 181 | void updatedPrograms(); 182 | void deinitPrograms(); 183 | 184 | bool initFramebuffer(int width, int height, int msaa, bool vsync) override; 185 | void deinitFramebuffer(); 186 | 187 | bool initScene(const CadScene&) override; 188 | void deinitScene() override; 189 | 190 | void synchronize() override; 191 | 192 | void beginFrame() override; 193 | void blitFrame(const Global& global) override; 194 | void endFrame() override; 195 | 196 | void animation(const Global& global) override; 197 | void animationReset() override; 198 | 199 | ////////////////////////////////////////////////////////////////////////// 200 | 201 | VkCommandBuffer createCmdBuffer(VkCommandPool pool, bool singleshot, bool primary, bool secondaryInClear) const; 202 | VkCommandBuffer createTempCmdBuffer(bool primary = true, bool secondaryInClear = false); 203 | 204 | 205 | // submit for batched execution 206 | void submissionEnqueue(VkCommandBuffer cmdbuffer) { m_submission.enqueue(cmdbuffer); } 207 | void submissionEnqueue(uint32_t num, const VkCommandBuffer* cmdbuffers) { m_submission.enqueue(num, cmdbuffers); } 208 | // perform queue submit 209 | void submissionExecute(VkFence fence = nullptr, bool useImageReadWait = false, bool useImageWriteSignals = false); 210 | 211 | // synchronizes to queue 212 | void resetTempResources(); 213 | 214 | 215 | void cmdShaderObjectState(VkCommandBuffer cmd) const; 216 | void cmdDynamicPipelineState(VkCommandBuffer cmd) const; 217 | void cmdImageTransition(VkCommandBuffer cmd, 218 | VkImage img, 219 | VkImageAspectFlags aspects, 220 | VkAccessFlags src, 221 | VkAccessFlags dst, 222 | VkImageLayout oldLayout, 223 | VkImageLayout newLayout) const; 224 | 225 | void cmdBegin(VkCommandBuffer cmd, bool singleshot, bool primary, bool secondaryInClear) const; 226 | void cmdBeginRendering(VkCommandBuffer cmd, bool hasSecondary = false) const; 227 | 228 | void cmdPipelineBarrier(VkCommandBuffer cmd) const; 229 | }; 230 | 231 | } // namespace generatedcmds 232 | -------------------------------------------------------------------------------- /threadpool.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include 22 | 23 | #include "threadpool.hpp" 24 | #include "nvh/nvprint.hpp" 25 | #include 26 | 27 | #define THREADPOOL_TERMINATE_FUNC ((ThreadPool::WorkerFunc)1) 28 | 29 | #define USE_PHYSICAL_CORES_ONLY 1 30 | 31 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY 32 | 33 | #include 34 | 35 | typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); 36 | 37 | 38 | // Helper function to count set bits in the processor mask. 39 | static DWORD CountSetBits(ULONG_PTR bitMask) 40 | { 41 | DWORD LSHIFT = sizeof(ULONG_PTR) * 8 - 1; 42 | DWORD bitSetCount = 0; 43 | ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT; 44 | DWORD i; 45 | 46 | for(i = 0; i <= LSHIFT; ++i) 47 | { 48 | bitSetCount += ((bitMask & bitTest) ? 1 : 0); 49 | bitTest /= 2; 50 | } 51 | 52 | return bitSetCount; 53 | } 54 | 55 | unsigned int ThreadPool::sysGetNumCores() 56 | { 57 | LPFN_GLPI glpi; 58 | BOOL done = FALSE; 59 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr; 60 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr; 61 | DWORD returnLength = 0; 62 | DWORD logicalProcessorCount = 0; 63 | DWORD numaNodeCount = 0; 64 | DWORD processorCoreCount = 0; 65 | DWORD processorL1CacheCount = 0; 66 | DWORD processorL2CacheCount = 0; 67 | DWORD processorL3CacheCount = 0; 68 | DWORD processorPackageCount = 0; 69 | DWORD byteOffset = 0; 70 | PCACHE_DESCRIPTOR Cache; 71 | 72 | glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandleA("kernel32"), "GetLogicalProcessorInformation"); 73 | if(nullptr == glpi) 74 | { 75 | return std::thread::hardware_concurrency(); 76 | } 77 | 78 | while(!done) 79 | { 80 | DWORD rc = glpi(buffer, &returnLength); 81 | 82 | if(FALSE == rc) 83 | { 84 | if(GetLastError() == ERROR_INSUFFICIENT_BUFFER) 85 | { 86 | if(buffer) 87 | free(buffer); 88 | 89 | buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength); 90 | 91 | if(nullptr == buffer) 92 | { 93 | return std::thread::hardware_concurrency(); 94 | } 95 | } 96 | else 97 | { 98 | return std::thread::hardware_concurrency(); 99 | } 100 | } 101 | else 102 | { 103 | done = TRUE; 104 | } 105 | } 106 | 107 | ptr = buffer; 108 | 109 | while(byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) 110 | { 111 | switch(ptr->Relationship) 112 | { 113 | case RelationNumaNode: 114 | // Non-NUMA systems report a single record of this type. 115 | numaNodeCount++; 116 | break; 117 | 118 | case RelationProcessorCore: 119 | processorCoreCount++; 120 | 121 | // A hyperthreaded core supplies more than one logical processor. 122 | logicalProcessorCount += CountSetBits(ptr->ProcessorMask); 123 | break; 124 | 125 | case RelationCache: 126 | // Cache data is in ptr->Cache, one CACHE_DESCRIPTOR structure for each cache. 127 | Cache = &ptr->Cache; 128 | if(Cache->Level == 1) 129 | { 130 | processorL1CacheCount++; 131 | } 132 | else if(Cache->Level == 2) 133 | { 134 | processorL2CacheCount++; 135 | } 136 | else if(Cache->Level == 3) 137 | { 138 | processorL3CacheCount++; 139 | } 140 | break; 141 | 142 | case RelationProcessorPackage: 143 | // Logical processors share a physical package. 144 | processorPackageCount++; 145 | break; 146 | 147 | default: 148 | break; 149 | } 150 | byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 151 | ptr++; 152 | } 153 | 154 | #if 0 155 | LOGI(TEXT("\nGetLogicalProcessorInformation results:\n")); 156 | LOGI(TEXT("Number of NUMA nodes: %d\n"), 157 | numaNodeCount); 158 | LOGI(TEXT("Number of physical processor packages: %d\n"), 159 | processorPackageCount); 160 | LOGI(TEXT("Number of processor cores: %d\n"), 161 | processorCoreCount); 162 | LOGI(TEXT("Number of logical processors: %d\n"), 163 | logicalProcessorCount); 164 | LOGI(TEXT("Number of processor L1/L2/L3 caches: %d/%d/%d\n"), 165 | processorL1CacheCount, 166 | processorL2CacheCount, 167 | processorL3CacheCount); 168 | #endif 169 | 170 | free(buffer); 171 | 172 | return processorCoreCount; 173 | } 174 | 175 | #else 176 | 177 | unsigned int ThreadPool::sysGetNumCores() 178 | { 179 | return std::thread::hardware_concurrency(); 180 | } 181 | 182 | #endif 183 | 184 | 185 | void ThreadPool::threadKicker(void* arg) 186 | { 187 | ThreadEntry* thread = (ThreadEntry*)arg; 188 | thread->m_origin->threadProcess(*thread); 189 | } 190 | 191 | void ThreadPool::threadProcess(ThreadEntry& entry) 192 | { 193 | { 194 | std::unique_lock lock(m_globalMutex); 195 | 196 | LOGI("%d created...\n", entry.m_id); 197 | 198 | m_globalInit++; 199 | m_globalCond.notify_all(); 200 | } 201 | 202 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY 203 | // assume hyperthreading, move to n physical cores 204 | unsigned int cpuCore = entry.m_id * 2 + 1; 205 | SetThreadAffinityMask(GetCurrentThread(), uint64_t(1) << cpuCore); 206 | #endif 207 | 208 | while(true) 209 | { 210 | { 211 | std::unique_lock lock(entry.m_commMutex); 212 | while(!entry.m_fn) 213 | { 214 | entry.m_commCond.wait(lock); 215 | } 216 | } 217 | 218 | if(entry.m_fn == THREADPOOL_TERMINATE_FUNC) 219 | break; 220 | 221 | NV_BARRIER(); 222 | 223 | LOGI("%d started job\n", entry.m_id); 224 | 225 | entry.m_fn(entry.m_fnArg); 226 | entry.m_fn = 0; 227 | 228 | LOGI("%d finished job\n", entry.m_id); 229 | } 230 | 231 | LOGI("%d exiting...\n", entry.m_id); 232 | 233 | { 234 | std::unique_lock lock(m_globalMutex); 235 | LOGI("%d shutdown\n", entry.m_id); 236 | } 237 | } 238 | 239 | void ThreadPool::init(unsigned int numThreads) 240 | { 241 | m_numThreads = numThreads; 242 | m_globalInit = 0; 243 | 244 | m_pool = new ThreadEntry[numThreads]; 245 | 246 | for(unsigned int i = 0; i < numThreads; i++) 247 | { 248 | ThreadEntry& entry = m_pool[i]; 249 | entry.m_id = numThreads - i - 1; 250 | entry.m_origin = this; 251 | entry.m_fn = 0; 252 | entry.m_fnArg = 0; 253 | } 254 | 255 | NV_BARRIER(); 256 | 257 | for(unsigned int i = 0; i < numThreads; i++) 258 | { 259 | ThreadEntry& entry = m_pool[i]; 260 | entry.m_thread = std::thread(threadKicker, &m_pool[i]); 261 | } 262 | 263 | { 264 | std::unique_lock lock(m_globalMutex); 265 | while(m_globalInit < numThreads) 266 | { 267 | m_globalCond.wait(lock); 268 | } 269 | } 270 | 271 | #if _WIN32 && USE_PHYSICAL_CORES_ONLY 272 | // pin the main thread to core 0 273 | SetThreadAffinityMask(GetCurrentThread(), 1); 274 | #endif 275 | } 276 | 277 | void ThreadPool::deinit() 278 | { 279 | NV_BARRIER(); 280 | 281 | for(unsigned int i = 0; i < m_numThreads; i++) 282 | { 283 | ThreadEntry& entry = m_pool[i]; 284 | 285 | { 286 | std::unique_lock lock(entry.m_commMutex); 287 | entry.m_fn = THREADPOOL_TERMINATE_FUNC; 288 | entry.m_fnArg = 0; 289 | entry.m_commCond.notify_all(); 290 | } 291 | 292 | std::this_thread::yield(); 293 | 294 | entry.m_thread.join(); 295 | } 296 | 297 | delete[] m_pool; 298 | m_pool = 0; 299 | m_numThreads = 0; 300 | } 301 | 302 | void ThreadPool::activateJob(unsigned int tid, WorkerFunc fn, void* arg) 303 | { 304 | assert(tid < m_numThreads); 305 | 306 | ThreadEntry& entry = m_pool[tid]; 307 | 308 | assert(entry.m_fn == 0); 309 | 310 | { 311 | std::unique_lock lock(entry.m_commMutex); 312 | entry.m_fn = fn; 313 | entry.m_fnArg = arg; 314 | entry.m_commCond.notify_all(); 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /vk_ext_device_generated_commands.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | #include "vk_ext_device_generated_commands.hpp" 22 | 23 | static PFN_vkGetGeneratedCommandsMemoryRequirementsEXT s_vkGetGeneratedCommandsMemoryRequirementsEXT = nullptr; 24 | static PFN_vkCmdPreprocessGeneratedCommandsEXT s_vkCmdPreprocessGeneratedCommandsEXT = nullptr; 25 | static PFN_vkCmdExecuteGeneratedCommandsEXT s_vkCmdExecuteGeneratedCommandsEXT = nullptr; 26 | static PFN_vkCreateIndirectCommandsLayoutEXT s_vkCreateIndirectCommandsLayoutEXT = nullptr; 27 | static PFN_vkDestroyIndirectCommandsLayoutEXT s_vkDestroyIndirectCommandsLayoutEXT = nullptr; 28 | static PFN_vkCreateIndirectExecutionSetEXT s_vkCreateIndirectExecutionSetEXT = nullptr; 29 | static PFN_vkDestroyIndirectExecutionSetEXT s_vkDestroyIndirectExecutionSetEXT = nullptr; 30 | static PFN_vkUpdateIndirectExecutionSetPipelineEXT s_vkUpdateIndirectExecutionSetPipelineEXT = nullptr; 31 | static PFN_vkUpdateIndirectExecutionSetShaderEXT s_vkUpdateIndirectExecutionSetShaderEXT = nullptr; 32 | 33 | #ifndef NVVK_HAS_VK_EXT_device_generated_commands 34 | 35 | VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsEXT(VkDevice device, 36 | VkGeneratedCommandsMemoryRequirementsInfoEXT const* pInfo, 37 | VkMemoryRequirements2* pMemoryRequirements) 38 | { 39 | s_vkGetGeneratedCommandsMemoryRequirementsEXT(device, pInfo, pMemoryRequirements); 40 | } 41 | 42 | VKAPI_ATTR void VKAPI_CALL vkCmdPreprocessGeneratedCommandsEXT(VkCommandBuffer commandBuffer, 43 | VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo, 44 | VkCommandBuffer stateCommandBuffer) 45 | { 46 | s_vkCmdPreprocessGeneratedCommandsEXT(commandBuffer, pGeneratedCommandsInfo, stateCommandBuffer); 47 | } 48 | 49 | VKAPI_ATTR void VKAPI_CALL vkCmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, 50 | VkBool32 isPreprocessed, 51 | VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo) 52 | { 53 | s_vkCmdExecuteGeneratedCommandsEXT(commandBuffer, isPreprocessed, pGeneratedCommandsInfo); 54 | } 55 | 56 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutEXT(VkDevice device, 57 | VkIndirectCommandsLayoutCreateInfoEXT const* pCreateInfo, 58 | VkAllocationCallbacks const* pAllocator, 59 | VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout) 60 | { 61 | return s_vkCreateIndirectCommandsLayoutEXT(device, pCreateInfo, pAllocator, pIndirectCommandsLayout); 62 | } 63 | 64 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutEXT(VkDevice device, 65 | VkIndirectCommandsLayoutEXT indirectCommandsLayout, 66 | VkAllocationCallbacks const* pAllocator) 67 | { 68 | s_vkDestroyIndirectCommandsLayoutEXT(device, indirectCommandsLayout, pAllocator); 69 | } 70 | 71 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectExecutionSetEXT(VkDevice device, 72 | VkIndirectExecutionSetCreateInfoEXT const* pCreateInfo, 73 | VkAllocationCallbacks const* pAllocator, 74 | VkIndirectExecutionSetEXT* pIndirectExecutionSet) 75 | { 76 | return s_vkCreateIndirectExecutionSetEXT(device, pCreateInfo, pAllocator, pIndirectExecutionSet); 77 | } 78 | 79 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectExecutionSetEXT(VkDevice device, 80 | VkIndirectExecutionSetEXT indirectExecutionSet, 81 | VkAllocationCallbacks const* pAllocator) 82 | { 83 | s_vkDestroyIndirectExecutionSetEXT(device, indirectExecutionSet, pAllocator); 84 | } 85 | 86 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetPipelineEXT(VkDevice device, 87 | VkIndirectExecutionSetEXT indirectExecutionSet, 88 | uint32_t executionSetWriteCount, 89 | VkWriteIndirectExecutionSetPipelineEXT const* pExecutionSetWrites) 90 | { 91 | s_vkUpdateIndirectExecutionSetPipelineEXT(device, indirectExecutionSet, executionSetWriteCount, pExecutionSetWrites); 92 | } 93 | 94 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetShaderEXT(VkDevice device, 95 | VkIndirectExecutionSetEXT indirectExecutionSet, 96 | uint32_t executionSetWriteCount, 97 | VkWriteIndirectExecutionSetShaderEXT const* pExecutionSetWrites) 98 | { 99 | s_vkUpdateIndirectExecutionSetShaderEXT(device, indirectExecutionSet, executionSetWriteCount, pExecutionSetWrites); 100 | } 101 | #endif 102 | 103 | VkBool32 load_VK_EXT_device_generated_commands(VkInstance instance, VkDevice device) 104 | { 105 | s_vkGetGeneratedCommandsMemoryRequirementsEXT = nullptr; 106 | s_vkCmdPreprocessGeneratedCommandsEXT = nullptr; 107 | s_vkCmdExecuteGeneratedCommandsEXT = nullptr; 108 | s_vkCreateIndirectCommandsLayoutEXT = nullptr; 109 | s_vkDestroyIndirectCommandsLayoutEXT = nullptr; 110 | s_vkCreateIndirectExecutionSetEXT = nullptr; 111 | s_vkDestroyIndirectExecutionSetEXT = nullptr; 112 | s_vkUpdateIndirectExecutionSetPipelineEXT = nullptr; 113 | s_vkUpdateIndirectExecutionSetShaderEXT = nullptr; 114 | 115 | s_vkGetGeneratedCommandsMemoryRequirementsEXT = 116 | (PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)vkGetDeviceProcAddr(device, "vkGetGeneratedCommandsMemoryRequirementsEXT"); 117 | s_vkCmdPreprocessGeneratedCommandsEXT = 118 | (PFN_vkCmdPreprocessGeneratedCommandsEXT)vkGetDeviceProcAddr(device, "vkCmdPreprocessGeneratedCommandsEXT"); 119 | s_vkCmdExecuteGeneratedCommandsEXT = 120 | (PFN_vkCmdExecuteGeneratedCommandsEXT)vkGetDeviceProcAddr(device, "vkCmdExecuteGeneratedCommandsEXT"); 121 | s_vkCreateIndirectCommandsLayoutEXT = 122 | (PFN_vkCreateIndirectCommandsLayoutEXT)vkGetDeviceProcAddr(device, "vkCreateIndirectCommandsLayoutEXT"); 123 | s_vkDestroyIndirectCommandsLayoutEXT = 124 | (PFN_vkDestroyIndirectCommandsLayoutEXT)vkGetDeviceProcAddr(device, "vkDestroyIndirectCommandsLayoutEXT"); 125 | s_vkCreateIndirectExecutionSetEXT = 126 | (PFN_vkCreateIndirectExecutionSetEXT)vkGetDeviceProcAddr(device, "vkCreateIndirectExecutionSetEXT"); 127 | s_vkDestroyIndirectExecutionSetEXT = 128 | (PFN_vkDestroyIndirectExecutionSetEXT)vkGetDeviceProcAddr(device, "vkDestroyIndirectExecutionSetEXT"); 129 | s_vkUpdateIndirectExecutionSetPipelineEXT = 130 | (PFN_vkUpdateIndirectExecutionSetPipelineEXT)vkGetDeviceProcAddr(device, "vkUpdateIndirectExecutionSetPipelineEXT"); 131 | s_vkUpdateIndirectExecutionSetShaderEXT = 132 | (PFN_vkUpdateIndirectExecutionSetShaderEXT)vkGetDeviceProcAddr(device, "vkUpdateIndirectExecutionSetShaderEXT"); 133 | 134 | return s_vkGetGeneratedCommandsMemoryRequirementsEXT && s_vkCmdPreprocessGeneratedCommandsEXT 135 | && s_vkCmdExecuteGeneratedCommandsEXT && s_vkCreateIndirectCommandsLayoutEXT 136 | && s_vkDestroyIndirectCommandsLayoutEXT && s_vkCreateIndirectExecutionSetEXT && s_vkDestroyIndirectExecutionSetEXT 137 | && s_vkUpdateIndirectExecutionSetPipelineEXT && s_vkUpdateIndirectExecutionSetShaderEXT; 138 | } 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /renderer_vk.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ 22 | 23 | 24 | #include 25 | #include 26 | 27 | #include "renderer.hpp" 28 | #include "resources_vk.hpp" 29 | 30 | #include 31 | 32 | #include "common.h" 33 | 34 | 35 | namespace generatedcmds { 36 | 37 | ////////////////////////////////////////////////////////////////////////// 38 | 39 | 40 | class RendererVK : public Renderer 41 | { 42 | public: 43 | class TypeCmd : public Renderer::Type 44 | { 45 | bool isAvailable(const nvvk::Context& context) override { return true; } 46 | 47 | const char* name() const override { return "re-used cmds"; } 48 | Renderer* create() const override 49 | { 50 | RendererVK* renderer = new RendererVK(); 51 | return renderer; 52 | } 53 | uint32_t priority() const override { return 8; } 54 | }; 55 | 56 | public: 57 | void init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) override; 58 | void deinit() override; 59 | void draw(const Resources::Global& global, Stats& stats) override; 60 | 61 | RendererVK() {} 62 | 63 | private: 64 | struct DrawSetup 65 | { 66 | VkCommandBuffer cmdBuffer; 67 | nvvk::Buffer combinedIndices; 68 | }; 69 | 70 | std::vector m_drawItems; 71 | std::vector m_seqIndices; 72 | CadScene::IndexingBits m_indexingBits; 73 | VkCommandPool m_cmdPool; 74 | DrawSetup m_draw; 75 | ResourcesVK* m_resources; 76 | 77 | void fillCmdBuffer(VkCommandBuffer cmd, const DrawItem* drawItems, size_t drawCount) 78 | { 79 | ResourcesVK* res = m_resources; 80 | const CadSceneVK& scene = res->m_scene; 81 | BindingMode bindingMode = m_config.bindingMode; 82 | 83 | int lastMaterial = -1; 84 | int lastGeometry = -1; 85 | int lastMatrix = -1; 86 | int lastObject = -1; 87 | int lastShader = -1; 88 | 89 | VkDeviceAddress matrixAddress = scene.m_buffers.matrices.address; 90 | VkDeviceAddress materialAddress = scene.m_buffers.materials.address; 91 | 92 | // setup staging buffer for filling 93 | ScopeStaging staging(res->m_resourceAllocator, res->m_queue, res->m_queueFamily); 94 | 95 | size_t combinedIndicesSize = bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB ? sizeof(uint32_t) * drawCount : 0; 96 | uint32_t* combinedIndicesMapping = nullptr; 97 | if(combinedIndicesSize) 98 | { 99 | m_draw.combinedIndices = res->m_resourceAllocator.createBuffer(combinedIndicesSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 100 | combinedIndicesMapping = staging.uploadT(m_draw.combinedIndices.buffer, 0, combinedIndicesSize); 101 | } 102 | 103 | switch(bindingMode) 104 | { 105 | case BINDINGMODE_DSETS: 106 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), DRAW_UBO_SCENE, 107 | 1, res->m_drawBind.at(DRAW_UBO_SCENE).getSets(), 0, nullptr); 108 | break; 109 | case BINDINGMODE_PUSHADDRESS: 110 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawPush.getPipeLayout(), 0, 1, 111 | res->m_drawPush.getSets(), 0, nullptr); 112 | break; 113 | case BINDINGMODE_INDEX_BASEINSTANCE: 114 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1, 115 | res->m_drawIndexed.getSets(), 0, nullptr); 116 | break; 117 | case BINDINGMODE_INDEX_VERTEXATTRIB: 118 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1, 119 | res->m_drawIndexed.getSets(), 0, nullptr); 120 | 121 | { 122 | VkDeviceSize offset = {0}; 123 | VkDeviceSize size = {VK_WHOLE_SIZE}; 124 | VkDeviceSize stride = {sizeof(uint32_t)}; 125 | #if USE_DYNAMIC_VERTEX_STRIDE 126 | vkCmdBindVertexBuffers2(cmd, 1, 1, &m_draw.combinedIndices.buffer, &offset, &size, &stride); 127 | #else 128 | vkCmdBindVertexBuffers(cmd, 1, 1, &m_draw.combinedIndices.buffer, &offset); 129 | #endif 130 | } 131 | break; 132 | } 133 | 134 | if(m_config.shaderObjs) 135 | { 136 | const VkShaderStageFlagBits unusedStages[3] = {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, 137 | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_GEOMETRY_BIT}; 138 | vkCmdBindShadersEXT(cmd, 3, unusedStages, nullptr); 139 | } 140 | 141 | for(size_t i = 0; i < drawCount; i++) 142 | { 143 | uint32_t idx = m_config.permutated ? m_seqIndices[i] : uint32_t(i); 144 | const DrawItem& di = drawItems[idx]; 145 | 146 | if(di.shaderIndex != lastShader) 147 | { 148 | if(m_config.shaderObjs) 149 | { 150 | VkShaderStageFlagBits stages[2] = {VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT}; 151 | VkShaderEXT shaders[2] = {res->m_drawShading.vertexShaderObjs[di.shaderIndex], 152 | res->m_drawShading.fragmentShaderObjs[di.shaderIndex]}; 153 | vkCmdBindShadersEXT(cmd, 2, stages, shaders); 154 | } 155 | else 156 | { 157 | vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawShading.pipelines[di.shaderIndex]); 158 | } 159 | 160 | lastShader = di.shaderIndex; 161 | } 162 | 163 | #if USE_DRAW_OFFSETS 164 | if(lastGeometry != int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex)) 165 | { 166 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 167 | 168 | vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, 0, VK_INDEX_TYPE_UINT32); 169 | VkDeviceSize offset = {0}; 170 | VkDeviceSize size = {VK_WHOLE_SIZE}; 171 | VkDeviceSize stride = {sizeof(CadScene::Vertex)}; 172 | #if USE_DYNAMIC_VERTEX_STRIDE 173 | vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &offset, &size, &stride); 174 | #else 175 | vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &offset); 176 | #endif 177 | lastGeometry = int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex); 178 | } 179 | #else 180 | if(lastGeometry != di.geometryIndex) 181 | { 182 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 183 | VkDeviceSize stride = {sizeof(CadScene::Vertex)}; 184 | 185 | vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, geo.ibo.offset, VK_INDEX_TYPE_UINT32); 186 | #if USE_DYNAMIC_VERTEX_STRIDE 187 | vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset, &geo.vbo.range, &stride); 188 | #else 189 | vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset); 190 | #endif 191 | 192 | lastGeometry = di.geometryIndex; 193 | } 194 | #endif 195 | 196 | uint32_t firstInstance = 0; 197 | 198 | if(bindingMode == BINDINGMODE_DSETS) 199 | { 200 | if(lastMatrix != di.matrixIndex) 201 | { 202 | uint32_t offset = di.matrixIndex * res->m_alignedMatrixSize; 203 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), 204 | DRAW_UBO_MATRIX, 1, res->m_drawBind.at(DRAW_UBO_MATRIX).getSets(), 1, &offset); 205 | lastMatrix = di.matrixIndex; 206 | } 207 | 208 | if(lastMaterial != di.materialIndex) 209 | { 210 | uint32_t offset = di.materialIndex * res->m_alignedMaterialSize; 211 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), 212 | DRAW_UBO_MATERIAL, 1, res->m_drawBind.at(DRAW_UBO_MATERIAL).getSets(), 1, &offset); 213 | lastMaterial = di.materialIndex; 214 | } 215 | } 216 | else if(bindingMode == BINDINGMODE_PUSHADDRESS) 217 | { 218 | if(lastMatrix != di.matrixIndex) 219 | { 220 | VkDeviceAddress address = matrixAddress + sizeof(CadScene::MatrixNode) * di.matrixIndex; 221 | 222 | vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(VkDeviceAddress), &address); 223 | 224 | lastMatrix = di.matrixIndex; 225 | } 226 | 227 | if(lastMaterial != di.materialIndex) 228 | { 229 | VkDeviceAddress address = materialAddress + sizeof(CadScene::Material) * di.materialIndex; 230 | 231 | vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_FRAGMENT_BIT, 232 | sizeof(VkDeviceAddress), sizeof(VkDeviceAddress), &address); 233 | 234 | lastMaterial = di.materialIndex; 235 | } 236 | } 237 | else if(bindingMode == BINDINGMODE_INDEX_BASEINSTANCE) 238 | { 239 | firstInstance = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex); 240 | } 241 | else if(bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB) 242 | { 243 | firstInstance = i; 244 | combinedIndicesMapping[i] = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex); 245 | } 246 | 247 | // drawcall 248 | #if USE_DRAW_OFFSETS 249 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 250 | vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset + geo.ibo.offset / sizeof(uint32_t)), 251 | geo.vbo.offset / sizeof(CadScene::Vertex), firstInstance); 252 | #else 253 | vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset / sizeof(uint32_t)), 0, firstInstance); 254 | #endif 255 | 256 | lastShader = di.shaderIndex; 257 | } 258 | } 259 | 260 | void setupCmdBuffer(const DrawItem* drawItems, size_t drawCount) 261 | { 262 | const ResourcesVK* res = m_resources; 263 | 264 | VkCommandBuffer cmd = res->createCmdBuffer(m_cmdPool, false, false, true); 265 | 266 | if(m_config.shaderObjs) 267 | { 268 | res->cmdShaderObjectState(cmd); 269 | } 270 | else 271 | { 272 | res->cmdDynamicPipelineState(cmd); 273 | } 274 | 275 | fillCmdBuffer(cmd, drawItems, drawCount); 276 | 277 | vkEndCommandBuffer(cmd); 278 | m_draw.cmdBuffer = cmd; 279 | } 280 | 281 | void deleteCmdBuffer() { vkFreeCommandBuffers(m_resources->m_device, m_cmdPool, 1, &m_draw.cmdBuffer); } 282 | }; 283 | 284 | 285 | static RendererVK::TypeCmd s_type_cmdbuffer_vk; 286 | 287 | void RendererVK::init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) 288 | { 289 | ResourcesVK* res = (ResourcesVK*)resources; 290 | m_resources = res; 291 | m_scene = scene; 292 | m_config = config; 293 | 294 | stats.cmdBuffers = 1; 295 | 296 | m_indexingBits = m_scene->getIndexingBits(); 297 | 298 | res->initPipelinesOrShaders(config.bindingMode, 0, config.shaderObjs); 299 | 300 | VkResult result; 301 | VkCommandPoolCreateInfo cmdPoolInfo = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; 302 | cmdPoolInfo.queueFamilyIndex = 0; 303 | result = vkCreateCommandPool(res->m_device, &cmdPoolInfo, nullptr, &m_cmdPool); 304 | assert(result == VK_SUCCESS); 305 | 306 | fillDrawItems(m_drawItems, scene, config, stats); 307 | if(config.permutated) 308 | { 309 | m_seqIndices.resize(m_drawItems.size()); 310 | fillRandomPermutation(m_drawItems.size(), m_seqIndices.data(), m_drawItems.data(), stats); 311 | } 312 | 313 | setupCmdBuffer(m_drawItems.data(), m_drawItems.size()); 314 | } 315 | 316 | void RendererVK::deinit() 317 | { 318 | m_resources->m_resourceAllocator.destroy(m_draw.combinedIndices); 319 | 320 | deleteCmdBuffer(); 321 | vkDestroyCommandPool(m_resources->m_device, m_cmdPool, nullptr); 322 | } 323 | 324 | void RendererVK::draw(const Resources::Global& global, Stats& stats) 325 | { 326 | ResourcesVK* res = m_resources; 327 | 328 | VkCommandBuffer primary = res->createTempCmdBuffer(); 329 | { 330 | nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Render", primary); 331 | { 332 | nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Draw", primary); 333 | 334 | vkCmdUpdateBuffer(primary, res->m_common.viewBuffer.buffer, 0, sizeof(SceneData), (const uint32_t*)&global.sceneUbo); 335 | res->cmdPipelineBarrier(primary); 336 | 337 | // clear via pass 338 | res->cmdBeginRendering(primary, true); 339 | vkCmdExecuteCommands(primary, 1, &m_draw.cmdBuffer); 340 | vkCmdEndRendering(primary); 341 | } 342 | } 343 | vkEndCommandBuffer(primary); 344 | res->submissionEnqueue(primary); 345 | } 346 | 347 | } // namespace generatedcmds 348 | -------------------------------------------------------------------------------- /cadscene.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include "cadscene.hpp" 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #define USE_CACHECOMBINE 1 29 | 30 | 31 | glm::vec4 randomVector(float from, float to) 32 | { 33 | glm::vec4 vec; 34 | float width = to - from; 35 | for(int i = 0; i < 4; i++) 36 | { 37 | vec[i] = from + (float(rand()) / float(RAND_MAX)) * width; 38 | } 39 | return vec; 40 | } 41 | 42 | // all oct functions derived from "A Survey of Efficient Representations for Independent Unit Vectors" 43 | // http://jcgt.org/published/0003/02/01/paper.pdf 44 | // Returns +/- 1 45 | inline glm::vec3 oct_signNotZero(glm::vec3 v) 46 | { 47 | // leaves z as is 48 | return glm::vec3((v.x >= 0.0f) ? +1.0f : -1.0f, (v.y >= 0.0f) ? +1.0f : -1.0f, 1.0f); 49 | } 50 | 51 | // Assume normalized input. Output is on [-1, 1] for each component. 52 | inline glm::vec3 float32x3_to_oct(glm::vec3 v) 53 | { 54 | // Project the sphere onto the octahedron, and then onto the xy plane 55 | glm::vec3 p = glm::vec3(v.x, v.y, 0) * (1.0f / (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))); 56 | // Reflect the folds of the lower hemisphere over the diagonals 57 | return (v.z <= 0.0f) ? glm::vec3(1.0f - fabsf(p.y), 1.0f - fabsf(p.x), 0.0f) * oct_signNotZero(p) : p; 58 | } 59 | 60 | inline glm::vec3 oct_to_float32x3(glm::vec3 e) 61 | { 62 | glm::vec3 v = glm::vec3(e.x, e.y, 1.0f - fabsf(e.x) - fabsf(e.y)); 63 | if(v.z < 0.0f) 64 | { 65 | v = glm::vec3(1.0f - fabs(v.y), 1.0f - fabs(v.x), v.z) * oct_signNotZero(v); 66 | } 67 | return glm::normalize(v); 68 | } 69 | 70 | inline glm::vec3 float32x3_to_octn_precise(glm::vec3 v, const int n) 71 | { 72 | glm::vec3 s = float32x3_to_oct(v); // Remap to the square 73 | // Each snorm's max value interpreted as an integer, 74 | // e.g., 127.0 for snorm8 75 | float M = float(1 << ((n / 2) - 1)) - 1.0; 76 | // Remap components to snorm(n/2) precision...with floor instead 77 | // of round (see equation 1) 78 | s = glm::floor(glm::clamp(s, -1.0f, +1.0f) * M) * (1.0f / M); 79 | glm::vec3 bestRepresentation = s; 80 | float highestCosine = glm::dot(oct_to_float32x3(s), v); 81 | // Test all combinations of floor and ceil and keep the best. 82 | // Note that at +/- 1, this will exit the square... but that 83 | // will be a worse encoding and never win. 84 | for(int i = 0; i <= 1; ++i) 85 | { 86 | for(int j = 0; j <= 1; ++j) 87 | { 88 | // This branch will be evaluated at compile time 89 | if((i != 0) || (j != 0)) 90 | { 91 | // Offset the bit pattern (which is stored in floating 92 | // point!) to effectively change the rounding mode 93 | // (when i or j is 0: floor, when it is one: ceiling) 94 | glm::vec3 candidate = glm::vec3(i, j, 0) * (1 / M) + s; 95 | float cosine = glm::dot(oct_to_float32x3(candidate), v); 96 | if(cosine > highestCosine) 97 | { 98 | bestRepresentation = candidate; 99 | highestCosine = cosine; 100 | } 101 | } 102 | } 103 | } 104 | return bestRepresentation; 105 | } 106 | 107 | bool CadScene::loadCSF(const char* filename, int clones, int cloneaxis) 108 | { 109 | CSFile* csf; 110 | CSFileMemoryPTR mem = CSFileMemory_new(); 111 | if(CSFile_loadExt(&csf, filename, mem) != CADSCENEFILE_NOERROR || !(csf->fileFlags & CADSCENEFILE_FLAG_UNIQUENODES)) 112 | { 113 | CSFileMemory_delete(mem); 114 | return false; 115 | } 116 | 117 | int copies = clones + 1; 118 | 119 | CSFile_transform(csf); 120 | 121 | srand(234525); 122 | 123 | 124 | // materials 125 | m_materials.resize(csf->numMaterials); 126 | for(int n = 0; n < csf->numMaterials; n++) 127 | { 128 | CSFMaterial* csfmaterial = &csf->materials[n]; 129 | Material& material = m_materials[n]; 130 | 131 | for(int i = 0; i < 2; i++) 132 | { 133 | material.sides[i].ambient = randomVector(0.0f, 0.1f); 134 | material.sides[i].diffuse = glm::make_vec4(csf->materials[n].color) + randomVector(0.0f, 0.07f); 135 | material.sides[i].specular = randomVector(0.25f, 0.55f); 136 | material.sides[i].emissive = randomVector(0.0f, 0.05f); 137 | } 138 | } 139 | 140 | 141 | // geometry 142 | int numGeoms = csf->numGeometries; 143 | m_geometry.resize(csf->numGeometries * copies); 144 | m_geometryBboxes.resize(csf->numGeometries * copies); 145 | for(int n = 0; n < csf->numGeometries; n++) 146 | { 147 | CSFGeometry* csfgeom = &csf->geometries[n]; 148 | Geometry& geom = m_geometry[n]; 149 | geom.cloneIdx = -1; 150 | 151 | geom.numVertices = csfgeom->numVertices; 152 | geom.numIndexSolid = csfgeom->numIndexSolid; 153 | geom.numIndexWire = csfgeom->numIndexWire; 154 | 155 | Vertex* vertices = new Vertex[csfgeom->numVertices]; 156 | for(int i = 0; i < csfgeom->numVertices; i++) 157 | { 158 | vertices[i].position[0] = csfgeom->vertex[3 * i + 0]; 159 | vertices[i].position[1] = csfgeom->vertex[3 * i + 1]; 160 | vertices[i].position[2] = csfgeom->vertex[3 * i + 2]; 161 | 162 | glm::vec3 normal; 163 | if(csfgeom->normal) 164 | { 165 | normal.x = csfgeom->normal[3 * i + 0]; 166 | normal.y = csfgeom->normal[3 * i + 1]; 167 | normal.z = csfgeom->normal[3 * i + 2]; 168 | } 169 | else 170 | { 171 | normal = normalize(glm::vec3(vertices[i].position)); 172 | } 173 | 174 | glm::vec3 packed = float32x3_to_octn_precise(normal, 16); 175 | vertices[i].normalOctX = std::min(32767, std::max(-32767, int32_t(packed.x * 32767.0f))); 176 | vertices[i].normalOctY = std::min(32767, std::max(-32767, int32_t(packed.y * 32767.0f))); 177 | 178 | m_geometryBboxes[n].merge(glm::vec4(vertices[i].position, 1)); 179 | } 180 | 181 | geom.vboData = vertices; 182 | geom.vboSize = sizeof(Vertex) * csfgeom->numVertices; 183 | 184 | 185 | unsigned int* indices = new unsigned int[csfgeom->numIndexSolid + csfgeom->numIndexWire]; 186 | memcpy(&indices[0], csfgeom->indexSolid, sizeof(unsigned int) * csfgeom->numIndexSolid); 187 | if(csfgeom->indexWire) 188 | { 189 | memcpy(&indices[csfgeom->numIndexSolid], csfgeom->indexWire, sizeof(unsigned int) * csfgeom->numIndexWire); 190 | } 191 | 192 | geom.iboData = indices; 193 | geom.iboSize = sizeof(unsigned int) * (csfgeom->numIndexSolid + csfgeom->numIndexWire); 194 | 195 | 196 | geom.parts.resize(csfgeom->numParts); 197 | 198 | size_t offsetSolid = 0; 199 | size_t offsetWire = csfgeom->numIndexSolid * sizeof(unsigned int); 200 | for(int i = 0; i < csfgeom->numParts; i++) 201 | { 202 | geom.parts[i].indexWire.count = csfgeom->parts[i].numIndexWire; 203 | geom.parts[i].indexSolid.count = csfgeom->parts[i].numIndexSolid; 204 | 205 | geom.parts[i].indexWire.offset = offsetWire; 206 | geom.parts[i].indexSolid.offset = offsetSolid; 207 | 208 | offsetWire += csfgeom->parts[i].numIndexWire * sizeof(unsigned int); 209 | offsetSolid += csfgeom->parts[i].numIndexSolid * sizeof(unsigned int); 210 | } 211 | } 212 | for(int c = 1; c <= clones; c++) 213 | { 214 | for(int n = 0; n < numGeoms; n++) 215 | { 216 | m_geometryBboxes[n + numGeoms * c] = m_geometryBboxes[n]; 217 | 218 | const Geometry& geomorig = m_geometry[n]; 219 | Geometry& geom = m_geometry[n + numGeoms * c]; 220 | 221 | geom = geomorig; 222 | geom.cloneIdx = n; 223 | } 224 | } 225 | 226 | 227 | // nodes 228 | int numObjects = 0; 229 | m_matrices.resize(csf->numNodes * copies); 230 | 231 | for(int n = 0; n < csf->numNodes; n++) 232 | { 233 | CSFNode* csfnode = &csf->nodes[n]; 234 | 235 | memcpy(glm::value_ptr(m_matrices[n].objectMatrix), csfnode->objectTM, sizeof(float) * 16); 236 | memcpy(glm::value_ptr(m_matrices[n].worldMatrix), csfnode->worldTM, sizeof(float) * 16); 237 | 238 | m_matrices[n].objectMatrixIT = glm::transpose(glm::inverse(m_matrices[n].objectMatrix)); 239 | m_matrices[n].worldMatrixIT = glm::transpose(glm::inverse(m_matrices[n].worldMatrix)); 240 | 241 | if(csfnode->geometryIDX < 0) 242 | continue; 243 | 244 | numObjects++; 245 | } 246 | 247 | 248 | // objects 249 | m_objects.resize(numObjects * copies); 250 | numObjects = 0; 251 | for(int n = 0; n < csf->numNodes; n++) 252 | { 253 | CSFNode* csfnode = &csf->nodes[n]; 254 | 255 | if(csfnode->geometryIDX < 0) 256 | continue; 257 | 258 | Object& object = m_objects[numObjects]; 259 | 260 | object.matrixIndex = n; 261 | object.geometryIndex = csfnode->geometryIDX; 262 | 263 | object.parts.resize(csfnode->numParts); 264 | for(int i = 0; i < csfnode->numParts; i++) 265 | { 266 | object.parts[i].active = 1; 267 | object.parts[i].matrixIndex = csfnode->parts[i].nodeIDX < 0 ? object.matrixIndex : csfnode->parts[i].nodeIDX; 268 | object.parts[i].materialIndex = csfnode->parts[i].materialIDX; 269 | #if 1 270 | if(csf->materials[csfnode->parts[i].materialIDX].color[3] < 0.9f) 271 | { 272 | object.parts[i].active = 0; 273 | } 274 | #endif 275 | } 276 | 277 | BBox bbox = m_geometryBboxes[object.geometryIndex].transformed(m_matrices[n].worldMatrix); 278 | m_bbox.merge(bbox); 279 | 280 | updateObjectDrawCache(object); 281 | 282 | numObjects++; 283 | } 284 | 285 | // compute clone move delta based on m_bbox; 286 | 287 | glm::vec4 dim = m_bbox.max - m_bbox.min; 288 | 289 | int sq = 1; 290 | int numAxis = 0; 291 | for(int i = 0; i < 3; i++) 292 | { 293 | numAxis += (cloneaxis & (1 << i)) ? 1 : 0; 294 | } 295 | 296 | assert(numAxis); 297 | 298 | switch(numAxis) 299 | { 300 | case 1: 301 | sq = copies; 302 | break; 303 | case 2: 304 | while(sq * sq < copies) 305 | { 306 | sq++; 307 | } 308 | break; 309 | case 3: 310 | while(sq * sq * sq < copies) 311 | { 312 | sq++; 313 | } 314 | break; 315 | } 316 | 317 | 318 | for(int c = 1; c <= clones; c++) 319 | { 320 | int numNodes = csf->numNodes; 321 | 322 | glm::vec4 shift = dim * 1.05f; 323 | 324 | float u = 0; 325 | float v = 0; 326 | float w = 0; 327 | 328 | switch(numAxis) 329 | { 330 | case 1: 331 | u = float(c); 332 | break; 333 | case 2: 334 | u = float(c % sq); 335 | v = float(c / sq); 336 | break; 337 | case 3: 338 | u = float(c % sq); 339 | v = float((c / sq) % sq); 340 | w = float(c / (sq * sq)); 341 | break; 342 | } 343 | 344 | float use = u; 345 | 346 | if(cloneaxis & (1 << 0)) 347 | { 348 | shift.x *= -use; 349 | if(numAxis > 1) 350 | use = v; 351 | } 352 | else 353 | { 354 | shift.x = 0; 355 | } 356 | 357 | if(cloneaxis & (1 << 1)) 358 | { 359 | shift.y *= use; 360 | if(numAxis > 2) 361 | use = w; 362 | else if(numAxis > 1) 363 | use = v; 364 | } 365 | else 366 | { 367 | shift.y = 0; 368 | } 369 | 370 | if(cloneaxis & (1 << 2)) 371 | { 372 | shift.z *= -use; 373 | } 374 | else 375 | { 376 | shift.z = 0; 377 | } 378 | 379 | shift.w = 0; 380 | 381 | // move all world matrices 382 | for(int n = 0; n < numNodes; n++) 383 | { 384 | MatrixNode& node = m_matrices[n + numNodes * c]; 385 | MatrixNode& nodeOrig = m_matrices[n]; 386 | node = nodeOrig; 387 | node.worldMatrix[3] = node.worldMatrix[3] + shift; 388 | node.worldMatrixIT = glm::transpose(glm::inverse(node.worldMatrix)); 389 | } 390 | 391 | { 392 | // patch object matrix of root 393 | MatrixNode& node = m_matrices[csf->rootIDX + numNodes * c]; 394 | node.objectMatrix[3] = node.objectMatrix[3] + shift; 395 | node.objectMatrixIT = glm::transpose(glm::inverse(node.objectMatrix)); 396 | } 397 | 398 | // clone objects 399 | for(int n = 0; n < numObjects; n++) 400 | { 401 | const Object& objectorig = m_objects[n]; 402 | Object& object = m_objects[n + numObjects * c]; 403 | 404 | object = objectorig; 405 | object.geometryIndex += c * numGeoms; 406 | object.matrixIndex += c * numNodes; 407 | for(size_t i = 0; i < object.parts.size(); i++) 408 | { 409 | object.parts[i].matrixIndex += c * numNodes; 410 | } 411 | 412 | for(size_t i = 0; i < object.cacheSolid.state.size(); i++) 413 | { 414 | object.cacheSolid.state[i].matrixIndex += c * numNodes; 415 | } 416 | for(size_t i = 0; i < object.cacheWire.state.size(); i++) 417 | { 418 | object.cacheWire.state[i].matrixIndex += c * numNodes; 419 | } 420 | } 421 | } 422 | 423 | CSFileMemory_delete(mem); 424 | return true; 425 | } 426 | 427 | 428 | struct ListItem 429 | { 430 | CadScene::DrawStateInfo state; 431 | CadScene::DrawRange range; 432 | }; 433 | 434 | static bool ListItem_compare(const ListItem& a, const ListItem& b) 435 | { 436 | int diff = 0; 437 | diff = diff != 0 ? diff : (a.state.materialIndex - b.state.materialIndex); 438 | diff = diff != 0 ? diff : (a.state.matrixIndex - b.state.matrixIndex); 439 | diff = diff != 0 ? diff : int(a.range.offset - b.range.offset); 440 | 441 | return diff < 0; 442 | } 443 | 444 | static void fillCache(CadScene::DrawRangeCache& cache, const std::vector& list) 445 | { 446 | cache = CadScene::DrawRangeCache(); 447 | 448 | if(!list.size()) 449 | return; 450 | 451 | CadScene::DrawStateInfo state = list[0].state; 452 | CadScene::DrawRange range = list[0].range; 453 | 454 | int stateCount = 0; 455 | 456 | for(size_t i = 1; i < list.size() + 1; i++) 457 | { 458 | bool newrange = false; 459 | if(i == list.size() || list[i].state != state) 460 | { 461 | // push range 462 | if(range.count) 463 | { 464 | stateCount++; 465 | cache.offsets.push_back(range.offset); 466 | cache.counts.push_back(range.count); 467 | } 468 | 469 | // emit 470 | if(stateCount) 471 | { 472 | cache.state.push_back(state); 473 | cache.stateCount.push_back(stateCount); 474 | } 475 | 476 | stateCount = 0; 477 | 478 | if(i == list.size()) 479 | { 480 | break; 481 | } 482 | else 483 | { 484 | state = list[i].state; 485 | range.offset = list[i].range.offset; 486 | range.count = 0; 487 | newrange = true; 488 | } 489 | } 490 | 491 | const CadScene::DrawRange& currange = list[i].range; 492 | if(newrange || (USE_CACHECOMBINE && currange.offset == (range.offset + sizeof(unsigned int) * range.count))) 493 | { 494 | // merge 495 | range.count += currange.count; 496 | } 497 | else 498 | { 499 | // push 500 | if(range.count) 501 | { 502 | stateCount++; 503 | cache.offsets.push_back(range.offset); 504 | cache.counts.push_back(range.count); 505 | } 506 | 507 | range = currange; 508 | } 509 | } 510 | } 511 | 512 | void CadScene::updateObjectDrawCache(Object& object) 513 | { 514 | Geometry& geom = m_geometry[object.geometryIndex]; 515 | 516 | std::vector listSolid; 517 | std::vector listWire; 518 | 519 | listSolid.reserve(geom.parts.size()); 520 | listWire.reserve(geom.parts.size()); 521 | 522 | for(size_t i = 0; i < geom.parts.size(); i++) 523 | { 524 | if(!object.parts[i].active) 525 | continue; 526 | 527 | ListItem item; 528 | item.state.materialIndex = object.parts[i].materialIndex; 529 | 530 | item.range = geom.parts[i].indexSolid; 531 | item.state.matrixIndex = object.parts[i].matrixIndex; 532 | listSolid.push_back(item); 533 | 534 | item.range = geom.parts[i].indexWire; 535 | item.state.matrixIndex = object.parts[i].matrixIndex; 536 | listWire.push_back(item); 537 | } 538 | 539 | std::sort(listSolid.begin(), listSolid.end(), ListItem_compare); 540 | std::sort(listWire.begin(), listWire.end(), ListItem_compare); 541 | 542 | fillCache(object.cacheSolid, listSolid); 543 | fillCache(object.cacheWire, listWire); 544 | } 545 | 546 | void CadScene::unload() 547 | { 548 | if(m_geometry.empty()) 549 | return; 550 | 551 | 552 | for(size_t i = 0; i < m_geometry.size(); i++) 553 | { 554 | if(m_geometry[i].cloneIdx >= 0) 555 | continue; 556 | 557 | delete[] m_geometry[i].vboData; 558 | delete[] m_geometry[i].iboData; 559 | } 560 | 561 | m_matrices.clear(); 562 | m_geometryBboxes.clear(); 563 | m_geometry.clear(); 564 | m_objects.clear(); 565 | m_geometryBboxes.clear(); 566 | } 567 | 568 | CadScene::IndexingBits CadScene::getIndexingBits() const 569 | { 570 | CadScene::IndexingBits bits = {1, 1}; 571 | 572 | for(uint32_t i = 32; i >= 1; i--) 573 | { 574 | uint64_t max = uint64_t(1) << i; 575 | if(m_matrices.size() < max) 576 | { 577 | bits.matrices = i; 578 | } 579 | if(m_materials.size() < max) 580 | { 581 | bits.materials = i; 582 | } 583 | } 584 | 585 | return bits; 586 | } 587 | 588 | bool CadScene::supportsIndexing() const 589 | { 590 | IndexingBits bits = getIndexingBits(); 591 | return (bits.materials + bits.matrices) <= 32; 592 | } 593 | -------------------------------------------------------------------------------- /vk_ext_device_generated_commands.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | 22 | #ifndef VK_EXT_device_generated_commands 23 | #define VK_EXT_device_generated_commands 1 24 | #define VK_EXT_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1 25 | #define VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_EXT_device_generated_commands" 26 | #define VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT ((VkShaderCreateFlagBitsEXT)0x00000080) 27 | #define VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT ((VkBufferUsageFlagBits2KHR)0x0000000080000000ULL) 28 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_EXT ((VkStructureType)1000572000) 29 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_EXT ((VkStructureType)1000572001) 30 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_EXT ((VkStructureType)1000572002) 31 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_CREATE_INFO_EXT ((VkStructureType)1000572003) 32 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_EXT ((VkStructureType)1000572004) 33 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_EXT ((VkStructureType)1000572005) 34 | #define VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_EXT ((VkStructureType)1000572006) 35 | #define VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_EXT ((VkStructureType)1000572007) 36 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_PIPELINE_EXT ((VkStructureType)1000572008) 37 | #define VK_STRUCTURE_TYPE_WRITE_INDIRECT_EXECUTION_SET_SHADER_EXT ((VkStructureType)1000572009) 38 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_PIPELINE_INFO_EXT ((VkStructureType)1000572010) 39 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_SHADER_INFO_EXT ((VkStructureType)1000572011) 40 | #define VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_SHADER_LAYOUT_INFO_EXT ((VkStructureType)1000572012) 41 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_PIPELINE_INFO_EXT ((VkStructureType)1000572013) 42 | #define VK_STRUCTURE_TYPE_GENERATED_COMMANDS_SHADER_INFO_EXT ((VkStructureType)1000572014) 43 | #define VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV 44 | #define VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV 45 | #define VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV 46 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT ((VkIndirectCommandsTokenTypeEXT)1000328000) 47 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT ((VkIndirectCommandsTokenTypeEXT)1000328001) 48 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT ((VkIndirectCommandsTokenTypeEXT)1000202002) 49 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT ((VkIndirectCommandsTokenTypeEXT)1000202003) 50 | #define VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT ((VkIndirectCommandsTokenTypeEXT)1000386004) 51 | #define VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT ((VkObjectType)1000572000) 52 | #define VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT ((VkObjectType)1000572001) 53 | #define VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT ((VkPipelineCreateFlagBits2KHR)0x0000004000000000ULL) 54 | #define VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_EXT VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV 55 | #define VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_EXT VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV 56 | #define VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_EXT VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV 57 | 58 | typedef struct VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT 59 | { 60 | VkStructureType sType; 61 | void* pNext; 62 | VkBool32 deviceGeneratedCommandsEXT; 63 | VkBool32 dynamicGeneratedPipelineLayout; 64 | } VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT; 65 | 66 | typedef VkFlags VkIndirectCommandsInputModeFlagsEXT; 67 | 68 | typedef struct VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT 69 | { 70 | VkStructureType sType; 71 | void* pNext; 72 | uint32_t maxIndirectPipelineCount; 73 | uint32_t maxIndirectShaderObjectCount; 74 | uint32_t maxIndirectSequenceCount; 75 | uint32_t maxIndirectCommandsTokenCount; 76 | uint32_t maxIndirectCommandsTokenOffset; 77 | uint32_t maxIndirectCommandsIndirectStride; 78 | VkIndirectCommandsInputModeFlagsEXT supportedIndirectCommandsInputModes; 79 | VkShaderStageFlags supportedIndirectCommandsShaderStages; 80 | VkShaderStageFlags supportedIndirectCommandsShaderStagesPipelineBinding; 81 | VkShaderStageFlags supportedIndirectCommandsShaderStagesShaderBinding; 82 | VkBool32 deviceGeneratedCommandsTransformFeedback; 83 | VkBool32 deviceGeneratedCommandsMultiDrawIndirectCount; 84 | } VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT; 85 | 86 | VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutEXT) 87 | 88 | VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectExecutionSetEXT) 89 | 90 | typedef struct VkGeneratedCommandsMemoryRequirementsInfoEXT 91 | { 92 | VkStructureType sType; 93 | void* pNext; 94 | VkIndirectExecutionSetEXT indirectExecutionSet; 95 | VkIndirectCommandsLayoutEXT indirectCommandsLayout; 96 | uint32_t maxSequenceCount; 97 | uint32_t maxDrawCount; 98 | } VkGeneratedCommandsMemoryRequirementsInfoEXT; 99 | 100 | typedef struct VkIndirectExecutionSetPipelineInfoEXT 101 | { 102 | VkStructureType sType; 103 | void const* pNext; 104 | VkPipeline initialPipeline; 105 | uint32_t maxPipelineCount; 106 | } VkIndirectExecutionSetPipelineInfoEXT; 107 | 108 | typedef struct VkIndirectExecutionSetShaderLayoutInfoEXT 109 | { 110 | VkStructureType sType; 111 | void const* pNext; 112 | uint32_t setLayoutCount; 113 | VkDescriptorSetLayout const* pSetLayouts; 114 | } VkIndirectExecutionSetShaderLayoutInfoEXT; 115 | 116 | typedef struct VkIndirectExecutionSetShaderInfoEXT 117 | { 118 | VkStructureType sType; 119 | void const* pNext; 120 | uint32_t shaderCount; 121 | VkShaderEXT const* pInitialShaders; 122 | VkIndirectExecutionSetShaderLayoutInfoEXT const* pSetLayoutInfos; 123 | uint32_t maxShaderCount; 124 | uint32_t pushConstantRangeCount; 125 | VkPushConstantRange const* pPushConstantRanges; 126 | } VkIndirectExecutionSetShaderInfoEXT; 127 | 128 | typedef union VkIndirectExecutionSetInfoEXT 129 | { 130 | VkIndirectExecutionSetPipelineInfoEXT const* pPipelineInfo; 131 | VkIndirectExecutionSetShaderInfoEXT const* pShaderInfo; 132 | } VkIndirectExecutionSetInfoEXT; 133 | 134 | typedef enum VkIndirectExecutionSetInfoTypeEXT 135 | { 136 | VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT = 0, 137 | VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT = 1, 138 | VK_INDIRECT_EXECUTION_SET_INFO_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF 139 | } VkIndirectExecutionSetInfoTypeEXT; 140 | 141 | typedef struct VkIndirectExecutionSetCreateInfoEXT 142 | { 143 | VkStructureType sType; 144 | void const* pNext; 145 | VkIndirectExecutionSetInfoTypeEXT type; 146 | VkIndirectExecutionSetInfoEXT info; 147 | } VkIndirectExecutionSetCreateInfoEXT; 148 | 149 | typedef struct VkGeneratedCommandsInfoEXT 150 | { 151 | VkStructureType sType; 152 | void const* pNext; 153 | VkShaderStageFlags shaderStages; 154 | VkIndirectExecutionSetEXT indirectExecutionSet; 155 | VkIndirectCommandsLayoutEXT indirectCommandsLayout; 156 | VkDeviceAddress indirectAddress; 157 | VkDeviceSize indirectAddressSize; 158 | VkDeviceAddress preprocessAddress; 159 | VkDeviceSize preprocessSize; 160 | uint32_t maxSequenceCount; 161 | VkDeviceAddress sequenceCountAddress; 162 | uint32_t maxDrawCount; 163 | } VkGeneratedCommandsInfoEXT; 164 | 165 | typedef struct VkWriteIndirectExecutionSetPipelineEXT 166 | { 167 | VkStructureType sType; 168 | void const* pNext; 169 | uint32_t index; 170 | VkPipeline pipeline; 171 | } VkWriteIndirectExecutionSetPipelineEXT; 172 | 173 | typedef struct VkWriteIndirectExecutionSetShaderEXT 174 | { 175 | VkStructureType sType; 176 | void const* pNext; 177 | uint32_t index; 178 | VkShaderEXT shader; 179 | } VkWriteIndirectExecutionSetShaderEXT; 180 | 181 | typedef struct VkIndirectCommandsVertexBufferTokenEXT 182 | { 183 | uint32_t vertexBindingUnit; 184 | } VkIndirectCommandsVertexBufferTokenEXT; 185 | 186 | typedef enum VkIndirectCommandsInputModeFlagBitsEXT 187 | { 188 | VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT = 0x00000001, 189 | VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT = 0x00000002, 190 | VK_INDIRECT_COMMANDS_INPUT_MODE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF 191 | } VkIndirectCommandsInputModeFlagBitsEXT; 192 | 193 | typedef struct VkIndirectCommandsIndexBufferTokenEXT 194 | { 195 | VkIndirectCommandsInputModeFlagBitsEXT mode; 196 | } VkIndirectCommandsIndexBufferTokenEXT; 197 | 198 | typedef struct VkIndirectCommandsPushConstantTokenEXT 199 | { 200 | VkPushConstantRange updateRange; 201 | } VkIndirectCommandsPushConstantTokenEXT; 202 | 203 | typedef struct VkIndirectCommandsExecutionSetTokenEXT 204 | { 205 | VkIndirectExecutionSetInfoTypeEXT type; 206 | VkShaderStageFlags shaderStages; 207 | } VkIndirectCommandsExecutionSetTokenEXT; 208 | 209 | typedef union VkIndirectCommandsTokenDataEXT 210 | { 211 | VkIndirectCommandsPushConstantTokenEXT const* pPushConstant; 212 | VkIndirectCommandsVertexBufferTokenEXT const* pVertexBuffer; 213 | VkIndirectCommandsIndexBufferTokenEXT const* pIndexBuffer; 214 | VkIndirectCommandsExecutionSetTokenEXT const* pExecutionSet; 215 | } VkIndirectCommandsTokenDataEXT; 216 | 217 | typedef enum VkIndirectCommandsTokenTypeEXT 218 | { 219 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT = 0, 220 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT = 1, 221 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT = 2, 222 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT = 3, 223 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT = 4, 224 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT = 5, 225 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT = 6, 226 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT = 7, 227 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT = 8, 228 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT = 9, 229 | VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF 230 | } VkIndirectCommandsTokenTypeEXT; 231 | 232 | typedef struct VkIndirectCommandsLayoutTokenEXT 233 | { 234 | VkStructureType sType; 235 | void const* pNext; 236 | VkIndirectCommandsTokenTypeEXT type; 237 | VkIndirectCommandsTokenDataEXT data; 238 | uint32_t offset; 239 | } VkIndirectCommandsLayoutTokenEXT; 240 | 241 | typedef VkFlags VkIndirectCommandsLayoutUsageFlagsEXT; 242 | 243 | typedef struct VkIndirectCommandsLayoutCreateInfoEXT 244 | { 245 | VkStructureType sType; 246 | void const* pNext; 247 | VkIndirectCommandsLayoutUsageFlagsEXT flags; 248 | VkShaderStageFlags shaderStages; 249 | uint32_t indirectStride; 250 | VkPipelineLayout pipelineLayout; 251 | uint32_t tokenCount; 252 | VkIndirectCommandsLayoutTokenEXT const* pTokens; 253 | } VkIndirectCommandsLayoutCreateInfoEXT; 254 | 255 | typedef struct VkDrawIndirectCountIndirectCommandEXT 256 | { 257 | VkDeviceAddress bufferAddress; 258 | uint32_t stride; 259 | uint32_t commandCount; 260 | } VkDrawIndirectCountIndirectCommandEXT; 261 | 262 | typedef struct VkBindVertexBufferIndirectCommandEXT 263 | { 264 | VkDeviceAddress bufferAddress; 265 | uint32_t size; 266 | uint32_t stride; 267 | } VkBindVertexBufferIndirectCommandEXT; 268 | 269 | typedef struct VkBindIndexBufferIndirectCommandEXT 270 | { 271 | VkDeviceAddress bufferAddress; 272 | uint32_t size; 273 | VkIndexType indexType; 274 | } VkBindIndexBufferIndirectCommandEXT; 275 | 276 | typedef enum VkIndirectCommandsLayoutUsageFlagBitsEXT 277 | { 278 | VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT = 0x00000001, 279 | VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_EXT = 0x00000002, 280 | VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF 281 | } VkIndirectCommandsLayoutUsageFlagBitsEXT; 282 | 283 | typedef struct VkGeneratedCommandsPipelineInfoEXT 284 | { 285 | VkStructureType sType; 286 | void* pNext; 287 | VkPipeline pipeline; 288 | } VkGeneratedCommandsPipelineInfoEXT; 289 | 290 | typedef struct VkGeneratedCommandsShaderInfoEXT 291 | { 292 | VkStructureType sType; 293 | void* pNext; 294 | uint32_t shaderCount; 295 | VkShaderEXT const* pShaders; 296 | } VkGeneratedCommandsShaderInfoEXT; 297 | 298 | typedef void(VKAPI_PTR* PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)(VkDevice device, 299 | const VkGeneratedCommandsMemoryRequirementsInfoEXT* pInfo, 300 | VkMemoryRequirements2* pMemoryRequirements); 301 | typedef void(VKAPI_PTR* PFN_vkCmdPreprocessGeneratedCommandsEXT)(VkCommandBuffer commandBuffer, 302 | const VkGeneratedCommandsInfoEXT* pGeneratedCommandsInfo, 303 | VkCommandBuffer stateCommandBuffer); 304 | typedef void(VKAPI_PTR* PFN_vkCmdExecuteGeneratedCommandsEXT)(VkCommandBuffer commandBuffer, 305 | VkBool32 isPreprocessed, 306 | const VkGeneratedCommandsInfoEXT* pGeneratedCommandsInfo); 307 | typedef VkResult(VKAPI_PTR* PFN_vkCreateIndirectCommandsLayoutEXT)(VkDevice device, 308 | const VkIndirectCommandsLayoutCreateInfoEXT* pCreateInfo, 309 | const VkAllocationCallbacks* pAllocator, 310 | VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout); 311 | typedef void(VKAPI_PTR* PFN_vkDestroyIndirectCommandsLayoutEXT)(VkDevice device, 312 | VkIndirectCommandsLayoutEXT indirectCommandsLayout, 313 | const VkAllocationCallbacks* pAllocator); 314 | typedef VkResult(VKAPI_PTR* PFN_vkCreateIndirectExecutionSetEXT)(VkDevice device, 315 | const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo, 316 | const VkAllocationCallbacks* pAllocator, 317 | VkIndirectExecutionSetEXT* pIndirectExecutionSet); 318 | typedef void(VKAPI_PTR* PFN_vkDestroyIndirectExecutionSetEXT)(VkDevice device, 319 | VkIndirectExecutionSetEXT indirectExecutionSet, 320 | const VkAllocationCallbacks* pAllocator); 321 | typedef void(VKAPI_PTR* PFN_vkUpdateIndirectExecutionSetPipelineEXT)(VkDevice device, 322 | VkIndirectExecutionSetEXT indirectExecutionSet, 323 | uint32_t executionSetWriteCount, 324 | const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites); 325 | typedef void(VKAPI_PTR* PFN_vkUpdateIndirectExecutionSetShaderEXT)(VkDevice device, 326 | VkIndirectExecutionSetEXT indirectExecutionSet, 327 | uint32_t executionSetWriteCount, 328 | const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites); 329 | 330 | #ifndef VK_NO_PROTOTYPES 331 | VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsEXT(VkDevice device, 332 | VkGeneratedCommandsMemoryRequirementsInfoEXT const* pInfo, 333 | VkMemoryRequirements2* pMemoryRequirements); 334 | 335 | VKAPI_ATTR void VKAPI_CALL vkCmdPreprocessGeneratedCommandsEXT(VkCommandBuffer commandBuffer, 336 | VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo, 337 | VkCommandBuffer stateCommandBuffer); 338 | 339 | VKAPI_ATTR void VKAPI_CALL vkCmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, 340 | VkBool32 isPreprocessed, 341 | VkGeneratedCommandsInfoEXT const* pGeneratedCommandsInfo); 342 | 343 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutEXT(VkDevice device, 344 | VkIndirectCommandsLayoutCreateInfoEXT const* pCreateInfo, 345 | VkAllocationCallbacks const* pAllocator, 346 | VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout); 347 | 348 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutEXT(VkDevice device, 349 | VkIndirectCommandsLayoutEXT indirectCommandsLayout, 350 | VkAllocationCallbacks const* pAllocator); 351 | 352 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectExecutionSetEXT(VkDevice device, 353 | VkIndirectExecutionSetCreateInfoEXT const* pCreateInfo, 354 | VkAllocationCallbacks const* pAllocator, 355 | VkIndirectExecutionSetEXT* pIndirectExecutionSet); 356 | 357 | VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectExecutionSetEXT(VkDevice device, 358 | VkIndirectExecutionSetEXT indirectExecutionSet, 359 | VkAllocationCallbacks const* pAllocator); 360 | 361 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetPipelineEXT(VkDevice device, 362 | VkIndirectExecutionSetEXT indirectExecutionSet, 363 | uint32_t executionSetWriteCount, 364 | VkWriteIndirectExecutionSetPipelineEXT const* pExecutionSetWrites); 365 | 366 | VKAPI_ATTR void VKAPI_CALL vkUpdateIndirectExecutionSetShaderEXT(VkDevice device, 367 | VkIndirectExecutionSetEXT indirectExecutionSet, 368 | uint32_t executionSetWriteCount, 369 | VkWriteIndirectExecutionSetShaderEXT const* pExecutionSetWrites); 370 | #endif 371 | #endif 372 | 373 | VkBool32 load_VK_EXT_device_generated_commands(VkInstance instance, VkDevice device); 374 | -------------------------------------------------------------------------------- /rendererthread_vk.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "renderer.hpp" 27 | #include "resources_vk.hpp" 28 | #include "threadpool.hpp" 29 | #include 30 | #include 31 | 32 | #include "common.h" 33 | 34 | #if 0 35 | #include 36 | #define THREAD_BARRIER() _mm_mfence() 37 | #else 38 | #define THREAD_BARRIER() std::atomic_thread_fence(std::memory_order_seq_cst) 39 | #endif 40 | 41 | namespace generatedcmds { 42 | 43 | ////////////////////////////////////////////////////////////////////////// 44 | 45 | 46 | class RendererThreadedVK : public Renderer 47 | { 48 | public: 49 | class TypeCmd : public Renderer::Type 50 | { 51 | bool isAvailable(const nvvk::Context& context) override { return true; } 52 | const char* name() const override { return "threaded cmds"; } 53 | Renderer* create() const override 54 | { 55 | RendererThreadedVK* renderer = new RendererThreadedVK(); 56 | return renderer; 57 | } 58 | uint32_t priority() const override { return 10; } 59 | }; 60 | 61 | public: 62 | void init(const CadScene* scene, ResourcesVK* res, const Config& config, Stats& stats) override; 63 | void deinit() override; 64 | void draw(const Resources::Global& global, Stats& stats) override; 65 | 66 | RendererThreadedVK() {} 67 | 68 | private: 69 | struct DrawSetup 70 | { 71 | std::vector cmdbuffers; 72 | }; 73 | 74 | 75 | struct ThreadJob 76 | { 77 | RendererThreadedVK* renderer; 78 | int index; 79 | 80 | nvvk::RingCommandPool m_pool; 81 | 82 | int m_frame; 83 | std::condition_variable m_hasWorkCond; 84 | std::mutex m_hasWorkMutex; 85 | volatile int m_hasWork; 86 | 87 | size_t m_scIdx; 88 | std::vector m_scs; 89 | 90 | 91 | void resetFrame() { m_scIdx = 0; } 92 | 93 | DrawSetup* getFrameCommand() 94 | { 95 | DrawSetup* sc; 96 | if(m_scIdx + 1 > m_scs.size()) 97 | { 98 | sc = new DrawSetup; 99 | m_scIdx++; 100 | m_scs.push_back(sc); 101 | } 102 | else 103 | { 104 | sc = m_scs[m_scIdx++]; 105 | } 106 | 107 | sc->cmdbuffers.clear(); 108 | return sc; 109 | } 110 | }; 111 | 112 | 113 | std::vector m_drawItems; 114 | std::vector m_seqIndices; 115 | ResourcesVK* m_resources; 116 | int m_numThreads; 117 | CadScene::IndexingBits m_indexingBits; 118 | std::vector m_combinedIndicesData; 119 | nvvk::Buffer m_combinedIndices[nvvk::DEFAULT_RING_SIZE]; 120 | void* m_combinedIndicesMappings[nvvk::DEFAULT_RING_SIZE]; 121 | 122 | ThreadPool m_threadpool; 123 | 124 | bool m_workerBatched; 125 | int m_workingSet; 126 | int m_frame; 127 | uint32_t m_cycleCurrent; 128 | 129 | ThreadJob* m_jobs; 130 | 131 | volatile uint32_t m_ready; 132 | volatile uint32_t m_stopThreads; 133 | volatile size_t m_numCurItems; 134 | 135 | std::condition_variable m_readyCond; 136 | std::mutex m_readyMutex; 137 | 138 | size_t m_numEnqueues; 139 | std::queue m_drawQueue; 140 | 141 | std::mutex m_workMutex; 142 | std::mutex m_drawMutex; 143 | std::condition_variable m_drawMutexCondition; 144 | 145 | VkCommandBuffer m_primary; 146 | 147 | static void threadMaster(void* arg) 148 | { 149 | ThreadJob* job = (ThreadJob*)arg; 150 | job->renderer->RunThread(job->index); 151 | } 152 | 153 | bool getWork_ts(size_t& start, size_t& num) 154 | { 155 | std::lock_guard lock(m_workMutex); 156 | bool hasWork = false; 157 | 158 | const size_t chunkSize = m_workingSet; 159 | size_t total = m_drawItems.size(); 160 | 161 | if(m_numCurItems < total) 162 | { 163 | size_t batch = std::min(total - m_numCurItems, chunkSize); 164 | start = m_numCurItems; 165 | num = batch; 166 | m_numCurItems += batch; 167 | hasWork = true; 168 | } 169 | else 170 | { 171 | hasWork = false; 172 | start = 0; 173 | num = 0; 174 | } 175 | 176 | return hasWork; 177 | } 178 | 179 | void RunThread(int index); 180 | unsigned int RunThreadFrame(ThreadJob& job); 181 | 182 | void enqueueShadeCommand_ts(DrawSetup* sc); 183 | 184 | void drawThreaded(const Resources::Global& global, VkCommandBuffer cmd, Stats& stats); 185 | 186 | void fillCmdBuffer(VkCommandBuffer cmd, BindingMode bindingMode, size_t begin, const DrawItem* drawItems, size_t drawCount) 187 | { 188 | const ResourcesVK* res = m_resources; 189 | const CadSceneVK& scene = res->m_scene; 190 | 191 | int lastMaterial = -1; 192 | int lastGeometry = -1; 193 | int lastMatrix = -1; 194 | int lastObject = -1; 195 | int lastShader = -1; 196 | 197 | VkDeviceAddress matrixAddress = scene.m_buffers.matrices.address; 198 | VkDeviceAddress materialAddress = scene.m_buffers.materials.address; 199 | 200 | switch(bindingMode) 201 | { 202 | case BINDINGMODE_DSETS: 203 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), DRAW_UBO_SCENE, 204 | 1, res->m_drawBind.at(DRAW_UBO_SCENE).getSets(), 0, nullptr); 205 | break; 206 | case BINDINGMODE_PUSHADDRESS: 207 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawPush.getPipeLayout(), 0, 1, 208 | res->m_drawPush.getSets(), 0, nullptr); 209 | break; 210 | case BINDINGMODE_INDEX_BASEINSTANCE: 211 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1, 212 | res->m_drawIndexed.getSets(), 0, nullptr); 213 | break; 214 | case BINDINGMODE_INDEX_VERTEXATTRIB: 215 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawIndexed.getPipeLayout(), 0, 1, 216 | res->m_drawIndexed.getSets(), 0, nullptr); 217 | 218 | { 219 | VkDeviceSize offset = {sizeof(uint32_t) * begin}; 220 | VkDeviceSize size = {VK_WHOLE_SIZE}; 221 | VkDeviceSize stride = {sizeof(uint32_t)}; 222 | #if USE_DYNAMIC_VERTEX_STRIDE 223 | vkCmdBindVertexBuffers2(cmd, 1, 1, &m_combinedIndices[m_cycleCurrent].buffer, &offset, &size, &stride); 224 | #else 225 | vkCmdBindVertexBuffers(cmd, 1, 1, &m_combinedIndices[m_cycleCurrent].buffer, &offset); 226 | #endif 227 | } 228 | break; 229 | } 230 | 231 | if(m_config.shaderObjs) 232 | { 233 | const VkShaderStageFlagBits unusedStages[3] = {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, 234 | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_GEOMETRY_BIT}; 235 | vkCmdBindShadersEXT(cmd, 3, unusedStages, nullptr); 236 | } 237 | 238 | for(size_t i = 0; i < drawCount; i++) 239 | { 240 | size_t idx = m_config.permutated ? m_seqIndices[i + begin] : i + begin; 241 | const DrawItem& di = drawItems[idx]; 242 | 243 | if(di.shaderIndex != lastShader) 244 | { 245 | if(m_config.shaderObjs) 246 | { 247 | VkShaderStageFlagBits stages[2] = {VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_FRAGMENT_BIT}; 248 | VkShaderEXT shaders[2] = {res->m_drawShading.vertexShaderObjs[di.shaderIndex], 249 | res->m_drawShading.fragmentShaderObjs[di.shaderIndex]}; 250 | vkCmdBindShadersEXT(cmd, 2, stages, shaders); 251 | } 252 | else 253 | { 254 | vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawShading.pipelines[di.shaderIndex]); 255 | } 256 | 257 | lastShader = di.shaderIndex; 258 | } 259 | 260 | #if USE_DRAW_OFFSETS 261 | if(lastGeometry != int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex)) 262 | { 263 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 264 | 265 | vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, 0, VK_INDEX_TYPE_UINT32); 266 | VkDeviceSize offset = {0}; 267 | VkDeviceSize size = {VK_WHOLE_SIZE}; 268 | VkDeviceSize stride = {sizeof(CadScene::Vertex)}; 269 | #if USE_DYNAMIC_VERTEX_STRIDE 270 | vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &offset, &size, &stride); 271 | #else 272 | vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &offset); 273 | #endif 274 | lastGeometry = int(scene.m_geometry[di.geometryIndex].allocation.chunkIndex); 275 | } 276 | #else 277 | if(lastGeometry != di.geometryIndex) 278 | { 279 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 280 | VkDeviceSize stride = {sizeof(CadScene::Vertex)}; 281 | 282 | vkCmdBindIndexBuffer(cmd, geo.ibo.buffer, geo.ibo.offset, VK_INDEX_TYPE_UINT32); 283 | #if USE_DYNAMIC_VERTEX_STRIDE 284 | vkCmdBindVertexBuffers2(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset, &geo.vbo.range, &stride); 285 | #else 286 | vkCmdBindVertexBuffers(cmd, 0, 1, &geo.vbo.buffer, &geo.vbo.offset); 287 | #endif 288 | 289 | lastGeometry = di.geometryIndex; 290 | } 291 | #endif 292 | 293 | uint32_t firstInstance = 0; 294 | 295 | if(bindingMode == BINDINGMODE_DSETS) 296 | { 297 | if(lastMatrix != di.matrixIndex) 298 | { 299 | uint32_t offset = di.matrixIndex * res->m_alignedMatrixSize; 300 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), 301 | DRAW_UBO_MATRIX, 1, res->m_drawBind.at(DRAW_UBO_MATRIX).getSets(), 1, &offset); 302 | lastMatrix = di.matrixIndex; 303 | } 304 | 305 | if(lastMaterial != di.materialIndex) 306 | { 307 | uint32_t offset = di.materialIndex * res->m_alignedMaterialSize; 308 | vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, res->m_drawBind.getPipeLayout(), 309 | DRAW_UBO_MATERIAL, 1, res->m_drawBind.at(DRAW_UBO_MATERIAL).getSets(), 1, &offset); 310 | lastMaterial = di.materialIndex; 311 | } 312 | } 313 | else if(bindingMode == BINDINGMODE_PUSHADDRESS) 314 | { 315 | if(lastMatrix != di.matrixIndex) 316 | { 317 | VkDeviceAddress address = matrixAddress + sizeof(CadScene::MatrixNode) * di.matrixIndex; 318 | 319 | vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(VkDeviceAddress), &address); 320 | 321 | lastMatrix = di.matrixIndex; 322 | } 323 | 324 | if(lastMaterial != di.materialIndex) 325 | { 326 | VkDeviceAddress address = materialAddress + sizeof(CadScene::Material) * di.materialIndex; 327 | 328 | vkCmdPushConstants(cmd, res->m_drawPush.getPipeLayout(), VK_SHADER_STAGE_FRAGMENT_BIT, 329 | sizeof(VkDeviceAddress), sizeof(VkDeviceAddress), &address); 330 | 331 | lastMaterial = di.materialIndex; 332 | } 333 | } 334 | else if(bindingMode == BINDINGMODE_INDEX_BASEINSTANCE) 335 | { 336 | firstInstance = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex); 337 | } 338 | else if(bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB) 339 | { 340 | firstInstance = i; 341 | m_combinedIndicesData[begin + i] = m_indexingBits.packIndices(di.matrixIndex, di.materialIndex); 342 | } 343 | 344 | // drawcall 345 | #if USE_DRAW_OFFSETS 346 | const CadSceneVK::Geometry& geo = scene.m_geometry[di.geometryIndex]; 347 | vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset + geo.ibo.offset / sizeof(uint32_t)), 348 | geo.vbo.offset / sizeof(CadScene::Vertex), firstInstance); 349 | #else 350 | vkCmdDrawIndexed(cmd, di.range.count, 1, uint32_t(di.range.offset / sizeof(uint32_t)), 0, firstInstance); 351 | #endif 352 | 353 | lastShader = di.shaderIndex; 354 | } 355 | 356 | if(m_combinedIndicesData.size()) 357 | { 358 | // copy 359 | uint32_t* mapping = (uint32_t*)m_combinedIndicesMappings[m_cycleCurrent]; 360 | memcpy(mapping + begin, m_combinedIndicesData.data() + begin, sizeof(uint32_t) * drawCount); 361 | } 362 | } 363 | 364 | void setupCmdBuffer(DrawSetup& sc, nvvk::RingCommandPool& pool, size_t begin, const DrawItem* drawItems, size_t drawCount) 365 | { 366 | const ResourcesVK* res = m_resources; 367 | 368 | VkCommandBuffer cmd = pool.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_SECONDARY, false); 369 | res->cmdBegin(cmd, true, false, true); 370 | 371 | if(m_config.shaderObjs) 372 | { 373 | res->cmdShaderObjectState(cmd); 374 | } 375 | else 376 | { 377 | res->cmdDynamicPipelineState(cmd); 378 | } 379 | 380 | fillCmdBuffer(cmd, m_config.bindingMode, begin, drawItems, drawCount); 381 | 382 | vkEndCommandBuffer(cmd); 383 | sc.cmdbuffers.push_back(cmd); 384 | } 385 | }; 386 | 387 | 388 | static RendererThreadedVK::TypeCmd s_type_cmdmain_vk; 389 | 390 | void RendererThreadedVK::init(const CadScene* scene, ResourcesVK* resources, const Config& config, Stats& stats) 391 | { 392 | ResourcesVK* res = (ResourcesVK*)resources; 393 | m_resources = res; 394 | m_scene = scene; 395 | m_config = config; 396 | 397 | res->initPipelinesOrShaders(config.bindingMode, 0, config.shaderObjs); 398 | 399 | fillDrawItems(m_drawItems, scene, config, stats); 400 | if(config.permutated) 401 | { 402 | m_seqIndices.resize(m_drawItems.size()); 403 | fillRandomPermutation(m_drawItems.size(), m_seqIndices.data(), m_drawItems.data(), stats); 404 | } 405 | 406 | if(m_config.bindingMode == BINDINGMODE_INDEX_VERTEXATTRIB) 407 | { 408 | m_combinedIndicesData.resize(m_drawItems.size()); 409 | for(uint32_t i = 0; i < nvvk::DEFAULT_RING_SIZE; i++) 410 | { 411 | m_combinedIndices[i] = 412 | res->m_resourceAllocator.createBuffer(sizeof(uint32_t) * m_drawItems.size(), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 413 | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); 414 | 415 | m_combinedIndicesMappings[i] = res->m_resourceAllocator.map(m_combinedIndices[i]); 416 | } 417 | } 418 | 419 | m_indexingBits = m_scene->getIndexingBits(); 420 | 421 | m_threadpool.init(m_config.workerThreads); 422 | 423 | // make jobs 424 | m_ready = 0; 425 | m_jobs = new ThreadJob[m_config.workerThreads]; 426 | m_stopThreads = 0; 427 | 428 | for(uint32_t i = 0; i < m_config.workerThreads; i++) 429 | { 430 | ThreadJob& job = m_jobs[i]; 431 | job.index = i; 432 | job.renderer = this; 433 | job.m_hasWork = -1; 434 | job.m_frame = 0; 435 | 436 | job.m_pool.init(res->m_device, res->m_context->m_queueGCT); 437 | 438 | m_threadpool.activateJob(i, threadMaster, &m_jobs[i]); 439 | } 440 | 441 | m_frame = 0; 442 | } 443 | 444 | void RendererThreadedVK::deinit() 445 | { 446 | m_stopThreads = 1; 447 | m_ready = 0; 448 | 449 | THREAD_BARRIER(); 450 | for(uint32_t i = 0; i < m_config.workerThreads; i++) 451 | { 452 | std::unique_lock lock(m_jobs[i].m_hasWorkMutex); 453 | m_jobs[i].m_hasWork = m_frame; 454 | m_jobs[i].m_hasWorkCond.notify_one(); 455 | } 456 | m_drawMutexCondition.notify_all(); 457 | 458 | std::this_thread::yield(); 459 | 460 | { 461 | std::unique_lock lock(m_readyMutex); 462 | while(m_ready < m_config.workerThreads) 463 | { 464 | m_readyCond.wait(lock); 465 | } 466 | } 467 | 468 | THREAD_BARRIER(); 469 | 470 | for(uint32_t i = 0; i < m_config.workerThreads; i++) 471 | { 472 | for(size_t s = 0; s < m_jobs[i].m_scs.size(); s++) 473 | { 474 | delete m_jobs[i].m_scs[s]; 475 | } 476 | m_jobs[i].m_pool.deinit(); 477 | } 478 | 479 | for(uint32_t i = 0; i < nvvk::DEFAULT_RING_SIZE; i++) 480 | { 481 | if(m_combinedIndices[i].memHandle) 482 | { 483 | m_resources->m_resourceAllocator.unmap(m_combinedIndices[i]); 484 | m_resources->m_resourceAllocator.destroy(m_combinedIndices[i]); 485 | } 486 | } 487 | 488 | delete[] m_jobs; 489 | 490 | m_threadpool.deinit(); 491 | 492 | m_drawItems.clear(); 493 | m_combinedIndicesData.clear(); 494 | } 495 | 496 | void RendererThreadedVK::enqueueShadeCommand_ts(DrawSetup* sc) 497 | { 498 | std::unique_lock lock(m_drawMutex); 499 | 500 | m_drawQueue.push(sc); 501 | m_drawMutexCondition.notify_one(); 502 | } 503 | 504 | unsigned int RendererThreadedVK::RunThreadFrame(ThreadJob& job) 505 | { 506 | unsigned int dispatches = 0; 507 | 508 | bool first = true; 509 | size_t tnum = 0; 510 | size_t begin = 0; 511 | size_t num = 0; 512 | 513 | size_t offset = 0; 514 | 515 | job.resetFrame(); 516 | job.m_pool.setCycle(m_cycleCurrent); 517 | 518 | if(m_workerBatched || true) 519 | { 520 | DrawSetup* sc = job.getFrameCommand(); 521 | while(getWork_ts(begin, num)) 522 | { 523 | setupCmdBuffer(*sc, job.m_pool, begin, m_drawItems.data(), num); 524 | tnum += num; 525 | } 526 | if(!sc->cmdbuffers.empty()) 527 | { 528 | enqueueShadeCommand_ts(sc); 529 | dispatches += 1; 530 | } 531 | } 532 | else 533 | { 534 | while(getWork_ts(begin, num)) 535 | { 536 | DrawSetup* sc = job.getFrameCommand(); 537 | setupCmdBuffer(*sc, job.m_pool, begin, m_drawItems.data(), num); 538 | 539 | if(!sc->cmdbuffers.empty()) 540 | { 541 | enqueueShadeCommand_ts(sc); 542 | dispatches += 1; 543 | } 544 | tnum += num; 545 | } 546 | } 547 | 548 | // nullptr signals we are done 549 | enqueueShadeCommand_ts(nullptr); 550 | 551 | return dispatches; 552 | } 553 | 554 | void RendererThreadedVK::RunThread(int tid) 555 | { 556 | ThreadJob& job = m_jobs[tid]; 557 | 558 | double timeWork = 0; 559 | double timeFrame = 0; 560 | int timerFrames = 0; 561 | size_t dispatches = 0; 562 | 563 | double timePrint = NVPSystem::getTime(); 564 | 565 | while(!m_stopThreads) 566 | { 567 | double beginFrame = NVPSystem::getTime(); 568 | timeFrame -= NVPSystem::getTime(); 569 | { 570 | std::unique_lock lock(job.m_hasWorkMutex); 571 | while(job.m_hasWork != job.m_frame) 572 | { 573 | job.m_hasWorkCond.wait(lock); 574 | } 575 | } 576 | 577 | if(m_stopThreads) 578 | { 579 | break; 580 | } 581 | 582 | double beginWork = NVPSystem::getTime(); 583 | timeWork -= NVPSystem::getTime(); 584 | 585 | dispatches += RunThreadFrame(job); 586 | 587 | job.m_frame++; 588 | 589 | timeWork += NVPSystem::getTime(); 590 | 591 | double currentTime = NVPSystem::getTime(); 592 | timeFrame += currentTime; 593 | 594 | timerFrames++; 595 | 596 | if(timerFrames && (currentTime - timePrint) > 2.0) 597 | { 598 | timeFrame /= double(timerFrames); 599 | timeWork /= double(timerFrames); 600 | 601 | timeFrame *= 1000000.0; 602 | timeWork *= 1000000.0; 603 | 604 | timePrint = currentTime; 605 | 606 | float avgdispatch = float(double(dispatches) / double(timerFrames)); 607 | 608 | #if 1 609 | LOGI("thread %d: work %6d [us] cmdbuffers %5.1f (avg)\n", tid, uint32_t(timeWork), avgdispatch); 610 | #endif 611 | timeFrame = 0; 612 | timeWork = 0; 613 | 614 | timerFrames = 0; 615 | dispatches = 0; 616 | } 617 | } 618 | 619 | { 620 | std::unique_lock lock(m_readyMutex); 621 | m_ready++; 622 | m_readyCond.notify_all(); 623 | } 624 | } 625 | 626 | 627 | void RendererThreadedVK::drawThreaded(const Resources::Global& global, VkCommandBuffer primary, Stats& stats) 628 | { 629 | ResourcesVK* res = m_resources; 630 | 631 | m_workingSet = global.workingSet; 632 | m_workerBatched = global.workerBatched; 633 | m_numCurItems = 0; 634 | m_numEnqueues = 0; 635 | m_cycleCurrent = res->m_ringFences.getCycleIndex(); 636 | 637 | stats.cmdBuffers = 0; 638 | 639 | // generate & cmdbuffers in parallel 640 | 641 | THREAD_BARRIER(); 642 | 643 | // start to dispatch threads 644 | for(uint32_t i = 0; i < m_config.workerThreads; i++) 645 | { 646 | { 647 | std::unique_lock lock(m_jobs[i].m_hasWorkMutex); 648 | m_jobs[i].m_hasWork = m_frame; 649 | } 650 | m_jobs[i].m_hasWorkCond.notify_one(); 651 | } 652 | 653 | // collect secondaries here 654 | { 655 | int numTerminated = 0; 656 | while(true) 657 | { 658 | bool hadEntry = false; 659 | DrawSetup* sc = nullptr; 660 | { 661 | std::unique_lock lock(m_drawMutex); 662 | if(m_drawQueue.empty()) 663 | { 664 | m_drawMutexCondition.wait(lock); 665 | } 666 | if(!m_drawQueue.empty()) 667 | { 668 | 669 | sc = m_drawQueue.front(); 670 | m_drawQueue.pop(); 671 | 672 | hadEntry = true; 673 | } 674 | } 675 | 676 | if(hadEntry) 677 | { 678 | if(sc) 679 | { 680 | m_numEnqueues++; 681 | THREAD_BARRIER(); 682 | vkCmdExecuteCommands(primary, (uint32_t)sc->cmdbuffers.size(), sc->cmdbuffers.data()); 683 | stats.cmdBuffers += (uint32_t)sc->cmdbuffers.size(); 684 | sc->cmdbuffers.clear(); 685 | } 686 | else 687 | { 688 | numTerminated++; 689 | } 690 | } 691 | 692 | if(numTerminated == m_config.workerThreads) 693 | { 694 | break; 695 | } 696 | std::this_thread::yield(); 697 | } 698 | } 699 | 700 | m_frame++; 701 | 702 | THREAD_BARRIER(); 703 | } 704 | 705 | void RendererThreadedVK::draw(const Resources::Global& global, Stats& stats) 706 | { 707 | ResourcesVK* res = m_resources; 708 | 709 | VkCommandBuffer primary = res->createTempCmdBuffer(); 710 | { 711 | nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Render", primary); 712 | { 713 | nvvk::ProfilerVK::Section profile(res->m_profilerVK, "Draw", primary); 714 | 715 | vkCmdUpdateBuffer(primary, res->m_common.viewBuffer.buffer, 0, sizeof(SceneData), (const uint32_t*)&global.sceneUbo); 716 | res->cmdPipelineBarrier(primary); 717 | res->cmdBeginRendering(primary, true); 718 | 719 | drawThreaded(global, primary, stats); 720 | 721 | 722 | vkCmdEndRendering(primary); 723 | } 724 | } 725 | vkEndCommandBuffer(primary); 726 | res->submissionEnqueue(primary); 727 | } 728 | 729 | 730 | } // namespace generatedcmds 731 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ 22 | 23 | #define DEBUG_FILTER 1 24 | 25 | #include "vk_ext_device_generated_commands.hpp" 26 | #include 27 | 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | 36 | #include "renderer.hpp" 37 | #include "threadpool.hpp" 38 | #include "resources_vk.hpp" 39 | #include "glm/gtc/matrix_access.hpp" 40 | 41 | namespace generatedcmds { 42 | int const SAMPLE_SIZE_WIDTH(1024); 43 | int const SAMPLE_SIZE_HEIGHT(960); 44 | 45 | void setupVulkanContextInfo(nvvk::ContextCreateInfo& info) 46 | { 47 | info.apiMajor = 1; 48 | info.apiMinor = 3; 49 | 50 | static VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjsFeatureExt = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT}; 51 | info.addDeviceExtension(VK_EXT_SHADER_OBJECT_EXTENSION_NAME, true, &shaderObjsFeatureExt, VK_EXT_SHADER_OBJECT_SPEC_VERSION); 52 | 53 | #if 1 54 | static VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV dgcFeaturesNv = { 55 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV}; 56 | info.addDeviceExtension(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME, true, &dgcFeaturesNv, 57 | VK_NV_DEVICE_GENERATED_COMMANDS_SPEC_VERSION); 58 | 59 | static VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT dgcFeaturesExt = { 60 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_EXT}; 61 | info.addDeviceExtension(VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME, true, &dgcFeaturesExt, 62 | VK_EXT_DEVICE_GENERATED_COMMANDS_SPEC_VERSION); 63 | 64 | #if _DEBUG 65 | // extensions don't work with validation layer 66 | #if 1 67 | info.removeInstanceLayer("VK_LAYER_KHRONOS_validation"); 68 | #else 69 | 70 | // Removing the handle wrapping to the KHRONOS validation layer 71 | // See: https://vulkan.lunarg.com/doc/sdk/1.3.275.0/linux/khronos_validation_layer.html 72 | static const char* layer_name = "VK_LAYER_KHRONOS_validation"; 73 | static const VkBool32 handle_wrapping = VK_FALSE; 74 | 75 | static const VkLayerSettingEXT settings[] = { 76 | {layer_name, "handle_wrapping", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &handle_wrapping}, 77 | }; 78 | 79 | static VkLayerSettingsCreateInfoEXT layerSettingsCreateInfo = { 80 | .sType = VK_STRUCTURE_TYPE_LAYER_SETTINGS_CREATE_INFO_EXT, 81 | .settingCount = static_cast(std::size(settings)), 82 | .pSettings = settings, 83 | }; 84 | 85 | info.instanceCreateInfoExt = &layerSettingsCreateInfo; 86 | #endif 87 | #endif 88 | #endif 89 | } 90 | 91 | 92 | class Sample : public nvvk::AppWindowProfilerVK 93 | { 94 | 95 | enum GuiEnums 96 | { 97 | GUI_SHADERS, 98 | GUI_BINDINGS, 99 | GUI_RENDERER, 100 | GUI_STRATEGY, 101 | GUI_MSAA, 102 | }; 103 | 104 | public: 105 | struct Tweak 106 | { 107 | int renderer = 0; 108 | BindingMode binding = BINDINGMODE_INDEX_VERTEXATTRIB; 109 | Strategy strategy = STRATEGY_GROUPS; 110 | int msaa = 4; 111 | int copies = 4; 112 | bool unordered = true; 113 | bool interleaved = true; 114 | bool sorted = false; 115 | bool permutated = false; 116 | bool binned = false; 117 | bool animation = false; 118 | bool animationSpin = false; 119 | int useShaderObjs = 0; 120 | uint32_t maxShaders = 16; 121 | int cloneaxisX = 1; 122 | int cloneaxisY = 1; 123 | int cloneaxisZ = 1; 124 | float percent = 1.01f; 125 | uint32_t workingSet = 4096; 126 | uint32_t workerThreads = 4; 127 | bool workerBatched = true; 128 | }; 129 | 130 | 131 | bool m_useUI = true; 132 | bool m_supportsShaderObjs = false; 133 | bool m_supportsBinning = false; 134 | bool m_supportsNV = false; 135 | uint32_t m_maxThreads = 1; 136 | 137 | ImGuiH::Registry m_ui; 138 | double m_uiTime = 0; 139 | 140 | Tweak m_tweak; 141 | Tweak m_lastTweak; 142 | bool m_lastVsync; 143 | 144 | CadScene m_scene; 145 | std::vector m_renderersSorted; 146 | std::string m_rendererName; 147 | 148 | Renderer* m_renderer = nullptr; 149 | ResourcesVK m_resources; 150 | Resources::Global m_shared; 151 | Renderer::Stats m_renderStats; 152 | 153 | std::string m_modelFilename; 154 | double m_animBeginTime; 155 | 156 | double m_lastFrameTime = 0; 157 | double m_frames = 0; 158 | 159 | double m_statsFrameTime = 0; 160 | double m_statsCpuTime = 0; 161 | double m_statsGpuTime = 0; 162 | double m_statsGpuDrawTime = 0; 163 | double m_statsGpuBuildTime = 0; 164 | 165 | bool initProgram(); 166 | bool initScene(const char* filename, int clones, int cloneaxis); 167 | void initRenderer(int type); 168 | void deinitRenderer(); 169 | void initResources(); 170 | 171 | void setupConfigParameters(); 172 | void setRendererFromName(); 173 | 174 | Sample() 175 | : AppWindowProfilerVK(false) 176 | { 177 | m_maxThreads = ThreadPool::sysGetNumCores(); 178 | m_tweak.workerThreads = m_maxThreads; 179 | 180 | setupConfigParameters(); 181 | setupVulkanContextInfo(m_contextInfo); 182 | #if defined(NDEBUG) 183 | setVsync(false); 184 | #endif 185 | } 186 | 187 | public: 188 | bool validateConfig() override; 189 | 190 | void postBenchmarkAdvance() override { setRendererFromName(); } 191 | 192 | bool begin() override; 193 | void think(double time) override; 194 | void resize(int width, int height) override; 195 | 196 | void processUI(int width, int height, double time); 197 | 198 | nvh::CameraControl m_control; 199 | 200 | void end() override; 201 | 202 | // return true to prevent m_window updates 203 | bool mouse_pos(int x, int y) override 204 | { 205 | if(!m_useUI) 206 | return false; 207 | 208 | return ImGuiH::mouse_pos(x, y); 209 | } 210 | bool mouse_button(int button, int action) override 211 | { 212 | if(!m_useUI) 213 | return false; 214 | 215 | return ImGuiH::mouse_button(button, action); 216 | } 217 | bool mouse_wheel(int wheel) override 218 | { 219 | if(!m_useUI) 220 | return false; 221 | 222 | return ImGuiH::mouse_wheel(wheel); 223 | } 224 | bool key_char(int key) override 225 | { 226 | if(!m_useUI) 227 | return false; 228 | 229 | return ImGuiH::key_char(key); 230 | } 231 | bool key_button(int button, int action, int mods) override 232 | { 233 | if(!m_useUI) 234 | return false; 235 | 236 | return ImGuiH::key_button(button, action, mods); 237 | } 238 | }; 239 | 240 | 241 | bool Sample::initProgram() 242 | { 243 | return true; 244 | } 245 | 246 | bool Sample::initScene(const char* filename, int clones, int cloneaxis) 247 | { 248 | std::string modelFilename(filename); 249 | 250 | if(!nvh::fileExists(filename)) 251 | { 252 | modelFilename = nvh::getFileName(filename); 253 | std::vector searchPaths; 254 | searchPaths.push_back("./"); 255 | searchPaths.push_back(exePath() + PROJECT_RELDIRECTORY); 256 | searchPaths.push_back(exePath() + PROJECT_DOWNLOAD_RELDIRECTORY); 257 | modelFilename = nvh::findFile(modelFilename, searchPaths); 258 | } 259 | 260 | m_scene.unload(); 261 | 262 | bool status = m_scene.loadCSF(modelFilename.c_str(), clones, cloneaxis); 263 | if(status) 264 | { 265 | LOGI("\nscene %s\n", filename); 266 | LOGI("geometries: %6d\n", uint32_t(m_scene.m_geometry.size())); 267 | LOGI("materials: %6d\n", uint32_t(m_scene.m_materials.size())); 268 | LOGI("nodes: %6d\n", uint32_t(m_scene.m_matrices.size())); 269 | LOGI("objects: %6d\n", uint32_t(m_scene.m_objects.size())); 270 | LOGI("\n"); 271 | } 272 | else 273 | { 274 | LOGW("\ncould not load model %s\n", modelFilename.c_str()); 275 | } 276 | 277 | m_shared.animUbo.numMatrices = uint(m_scene.m_matrices.size()); 278 | 279 | return status; 280 | } 281 | 282 | void Sample::deinitRenderer() 283 | { 284 | if(m_renderer) 285 | { 286 | m_resources.synchronize(); 287 | m_renderer->deinit(); 288 | delete m_renderer; 289 | m_renderer = nullptr; 290 | } 291 | } 292 | 293 | void Sample::initResources() 294 | { 295 | std::string prepend; 296 | CadScene::IndexingBits bits = m_scene.getIndexingBits(); 297 | prepend += nvh::ShaderFileManager::format("#define INDEXED_MATRIX_BITS %d\n", bits.matrices); 298 | prepend += nvh::ShaderFileManager::format("#define INDEXED_MATERIAL_BITS %d\n", bits.materials); 299 | 300 | bool valid = m_resources.init(&m_context, &m_swapChain, &m_profiler); 301 | valid = valid && m_resources.initFramebuffer(m_windowState.m_swapSize[0], m_windowState.m_swapSize[1], m_tweak.msaa, getVsync()); 302 | valid = valid && m_resources.initPrograms(exePath(), prepend); 303 | valid = valid && m_resources.initScene(m_scene); 304 | m_resources.m_frame = 0; 305 | 306 | if(!valid) 307 | { 308 | LOGE("resource initialization failed\n"); 309 | exit(-1); 310 | } 311 | 312 | m_lastVsync = getVsync(); 313 | } 314 | 315 | void Sample::initRenderer(int typesort) 316 | { 317 | int type = m_renderersSorted[typesort]; 318 | 319 | deinitRenderer(); 320 | 321 | { 322 | uint32_t supported = Renderer::getRegistry()[type]->supportedBindingModes(); 323 | BindingMode mode = BINDINGMODE_DSETS; 324 | m_ui.enumReset(GUI_BINDINGS); 325 | if(supported & (1 << BINDINGMODE_DSETS)) 326 | { 327 | m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_DSETS, "dsetbinding"); 328 | mode = BINDINGMODE_DSETS; 329 | } 330 | if(supported & (1 << BINDINGMODE_PUSHADDRESS)) 331 | { 332 | m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_PUSHADDRESS, "pushaddress"); 333 | mode = BINDINGMODE_PUSHADDRESS; 334 | } 335 | if(supported & (1 << BINDINGMODE_INDEX_BASEINSTANCE) && m_scene.supportsIndexing()) 336 | { 337 | m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_INDEX_BASEINSTANCE, "baseinstance index"); 338 | mode = BINDINGMODE_INDEX_BASEINSTANCE; 339 | } 340 | if(supported & (1 << BINDINGMODE_INDEX_VERTEXATTRIB) && m_scene.supportsIndexing()) 341 | { 342 | m_ui.enumAdd(GUI_BINDINGS, BINDINGMODE_INDEX_VERTEXATTRIB, "inst.vertexattrib index"); 343 | mode = BINDINGMODE_INDEX_VERTEXATTRIB; 344 | } 345 | 346 | if(!(supported & (1 << m_tweak.binding))) 347 | { 348 | m_tweak.binding = mode; 349 | } 350 | } 351 | 352 | { 353 | bool supported = Renderer::getRegistry()[type]->supportsShaderObjs(); 354 | bool useShaderObjs = false; 355 | m_ui.enumReset(GUI_SHADERS); 356 | m_ui.enumAdd(GUI_SHADERS, SHADERMODE_PIPELINE, "pipeline"); 357 | if(supported) 358 | { 359 | m_ui.enumAdd(GUI_SHADERS, SHADERMODE_OBJS, "shaderobjs"); 360 | } 361 | 362 | if(!supported && m_tweak.useShaderObjs) 363 | { 364 | m_tweak.useShaderObjs = false; 365 | } 366 | } 367 | 368 | if(m_tweak.sorted) 369 | { 370 | m_tweak.permutated = false; 371 | } 372 | 373 | m_tweak.maxShaders = std::min(m_tweak.maxShaders, std::min(uint32_t(NUM_MATERIAL_SHADERS), 374 | Renderer::getRegistry()[type]->supportedShaderBinds())); 375 | m_tweak.maxShaders = std::max(m_tweak.maxShaders, uint32_t(1)); 376 | 377 | Renderer::Config config; 378 | config.objectFrom = 0; 379 | config.objectNum = uint32_t(double(m_scene.m_objects.size()) * double(m_tweak.percent)); 380 | config.strategy = m_tweak.strategy; 381 | config.bindingMode = m_tweak.binding; 382 | config.sorted = m_tweak.sorted; 383 | config.binned = m_tweak.binned; 384 | config.interleaved = m_tweak.interleaved; 385 | config.unordered = m_tweak.unordered; 386 | config.permutated = m_tweak.permutated; 387 | config.maxShaders = m_tweak.maxShaders; 388 | config.workerThreads = m_tweak.workerThreads; 389 | config.shaderObjs = m_tweak.useShaderObjs != 0; 390 | 391 | m_renderStats = Renderer::Stats(); 392 | 393 | LOGI("renderer: %s\n", Renderer::getRegistry()[type]->name()); 394 | m_renderer = Renderer::getRegistry()[type]->create(); 395 | m_renderer->init(&m_scene, &m_resources, config, m_renderStats); 396 | 397 | LOGI("drawCalls: %9d\n", m_renderStats.drawCalls); 398 | LOGI("drawTris: %9d\n", m_renderStats.drawTriangles); 399 | LOGI("shaderBinds: %9d\n", m_renderStats.shaderBindings); 400 | LOGI("prep.Buffer: %9d KB\n\n", m_renderStats.preprocessSizeKB); 401 | } 402 | 403 | 404 | void Sample::end() 405 | { 406 | deinitRenderer(); 407 | m_resources.deinit(); 408 | ResourcesVK::deinitImGui(m_context); 409 | } 410 | 411 | 412 | bool Sample::begin() 413 | { 414 | #if !PRINT_TIMER_STATS 415 | m_profilerPrint = false; 416 | m_timeInTitle = true; 417 | #else 418 | m_profilerPrint = true; 419 | m_timeInTitle = true; 420 | #endif 421 | 422 | 423 | ImGuiH::Init(m_windowState.m_winSize[0], m_windowState.m_winSize[1], this); 424 | 425 | if(m_context.hasDeviceExtension(VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME)) 426 | { 427 | bool loaded = load_VK_EXT_device_generated_commands(m_context.m_instance, m_context.m_device); 428 | if(!loaded) 429 | { 430 | LOGE("Failed to load functions for VK_EXT_DEVICE_GENERATED_COMMANDS_EXTENSION\n"); 431 | return false; 432 | } 433 | 434 | VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT props = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_EXT}; 435 | VkPhysicalDeviceProperties2 props2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; 436 | props2.pNext = &props; 437 | vkGetPhysicalDeviceProperties2(m_context.m_physicalDevice, &props2); 438 | 439 | if(props.deviceGeneratedCommandsMultiDrawIndirectCount) 440 | { 441 | m_supportsBinning = true; 442 | } 443 | } 444 | m_supportsNV = m_context.hasDeviceExtension(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME); 445 | m_supportsShaderObjs = m_context.hasDeviceExtension(VK_EXT_SHADER_OBJECT_EXTENSION_NAME); 446 | 447 | bool validated(true); 448 | validated = validated && initProgram(); 449 | validated = validated 450 | && initScene(m_modelFilename.c_str(), m_tweak.copies - 1, 451 | (m_tweak.cloneaxisX << 0) | (m_tweak.cloneaxisY << 1) | (m_tweak.cloneaxisZ << 2)); 452 | 453 | if(!validated) 454 | { 455 | LOGE("resources failed\n"); 456 | return false; 457 | } 458 | 459 | ResourcesVK::initImGui(m_context); 460 | 461 | const Renderer::Registry registry = Renderer::getRegistry(); 462 | for(size_t i = 0; i < registry.size(); i++) 463 | { 464 | if(registry[i]->isAvailable(m_context)) 465 | { 466 | uint sortkey = uint(i); 467 | sortkey |= registry[i]->priority() << 16; 468 | m_renderersSorted.push_back(sortkey); 469 | } 470 | } 471 | 472 | if(m_renderersSorted.empty()) 473 | { 474 | LOGE("No renderers available\n"); 475 | return false; 476 | } 477 | 478 | std::sort(m_renderersSorted.begin(), m_renderersSorted.end()); 479 | 480 | for(size_t i = 0; i < m_renderersSorted.size(); i++) 481 | { 482 | m_renderersSorted[i] &= 0xFFFF; 483 | } 484 | 485 | for(size_t i = 0; i < m_renderersSorted.size(); i++) 486 | { 487 | LOGI("renderers found: %d %s\n", uint32_t(i), registry[m_renderersSorted[i]]->name()); 488 | } 489 | 490 | setRendererFromName(); 491 | 492 | if(m_useUI) 493 | { 494 | auto& imgui_io = ImGui::GetIO(); 495 | imgui_io.IniFilename = nullptr; 496 | 497 | for(size_t i = 0; i < m_renderersSorted.size(); i++) 498 | { 499 | m_ui.enumAdd(GUI_RENDERER, int(i), registry[m_renderersSorted[i]]->name()); 500 | } 501 | 502 | m_ui.enumAdd(GUI_STRATEGY, STRATEGY_GROUPS, "object material groups"); 503 | m_ui.enumAdd(GUI_STRATEGY, STRATEGY_INDIVIDUAL, "object individual surfaces"); 504 | m_ui.enumAdd(GUI_STRATEGY, STRATEGY_SINGLE, "object as single mesh"); 505 | 506 | m_ui.enumAdd(GUI_MSAA, 0, "none"); 507 | m_ui.enumAdd(GUI_MSAA, 2, "2x"); 508 | m_ui.enumAdd(GUI_MSAA, 4, "4x"); 509 | m_ui.enumAdd(GUI_MSAA, 8, "8x"); 510 | } 511 | 512 | m_control.m_sceneOrbit = glm::vec3(m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f; 513 | m_control.m_sceneDimension = glm::length((m_scene.m_bbox.max - m_scene.m_bbox.min)); 514 | m_control.m_viewMatrix = glm::lookAt(m_control.m_sceneOrbit - (-vec3(1, 1, 1) * m_control.m_sceneDimension * 0.5f), 515 | m_control.m_sceneOrbit, vec3(0, 1, 0)); 516 | 517 | m_shared.animUbo.sceneCenter = m_control.m_sceneOrbit; 518 | m_shared.animUbo.sceneDimension = m_control.m_sceneDimension * 0.2f; 519 | m_shared.animUbo.numMatrices = uint(m_scene.m_matrices.size()); 520 | m_shared.sceneUbo.wLightPos = (m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f + m_control.m_sceneDimension; 521 | m_shared.sceneUbo.wLightPos.w = 1.0; 522 | 523 | initResources(); 524 | initRenderer(m_tweak.renderer); 525 | 526 | m_lastTweak = m_tweak; 527 | 528 | return validated; 529 | } 530 | 531 | 532 | void Sample::processUI(int width, int height, double time) 533 | { 534 | // Update imgui configuration 535 | auto& imgui_io = ImGui::GetIO(); 536 | imgui_io.DeltaTime = static_cast(time - m_uiTime); 537 | imgui_io.DisplaySize = ImVec2(width, height); 538 | 539 | m_uiTime = time; 540 | 541 | ImGui::NewFrame(); 542 | ImGui::SetNextWindowSize(ImGuiH::dpiScaled(380, 0), ImGuiCond_FirstUseEver); 543 | if(ImGui::Begin("NVIDIA " PROJECT_NAME, nullptr)) 544 | { 545 | m_ui.enumCombobox(GUI_RENDERER, "renderer", &m_tweak.renderer); 546 | m_ui.enumCombobox(GUI_SHADERS, "shaders", &m_tweak.useShaderObjs); 547 | m_ui.enumCombobox(GUI_BINDINGS, "binding", &m_tweak.binding); 548 | m_ui.enumCombobox(GUI_STRATEGY, "strategy", &m_tweak.strategy); 549 | 550 | ImGui::PushItemWidth(ImGuiH::dpiScaled(100)); 551 | 552 | //guiRegistry.enumCombobox(GUI_SUPERSAMPLE, "supersample", &tweak.supersample); 553 | ImGuiH::InputIntClamped("max shadergroups", &m_tweak.maxShaders, 1, NUM_MATERIAL_SHADERS, 1, 1, ImGuiInputTextFlags_EnterReturnsTrue); 554 | ImGuiH::InputIntClamped("copies", &m_tweak.copies, 1, 16, 1, 1, ImGuiInputTextFlags_EnterReturnsTrue); 555 | ImGui::SliderFloat("pct visible", &m_tweak.percent, 0.0f, 1.001f); 556 | ImGui::Checkbox("sorted once (minimized state changes)", &m_tweak.sorted); 557 | ImGui::Checkbox("permutated (random state changes,\ngen nv: use seqindex)", &m_tweak.permutated); 558 | ImGui::Checkbox("gen: unordered (non-coherent)", &m_tweak.unordered); 559 | if(m_supportsBinning) 560 | { 561 | ImGui::Checkbox("gen ext: binned via draw_indexed_count", &m_tweak.binned); 562 | } 563 | if(m_supportsNV) 564 | { 565 | ImGui::Checkbox("gen nv: interleaved inputs", &m_tweak.interleaved); 566 | } 567 | 568 | ImGuiH::InputIntClamped("threaded: worker threads", &m_tweak.workerThreads, 1, m_maxThreads, 1, 1, 569 | ImGuiInputTextFlags_EnterReturnsTrue); 570 | ImGuiH::InputIntClamped("threaded: drawcalls per cmdbuffer", &m_tweak.workingSet, 512, 1 << 20, 512, 1024, 571 | ImGuiInputTextFlags_EnterReturnsTrue); 572 | ImGui::Checkbox("threaded: batched submission", &m_tweak.workerBatched); 573 | ImGui::Checkbox("animation", &m_tweak.animation); 574 | ImGui::PopItemWidth(); 575 | ImGui::Separator(); 576 | 577 | { 578 | int avg = 50; 579 | 580 | if(m_lastFrameTime == 0) 581 | { 582 | m_lastFrameTime = time; 583 | m_frames = -1; 584 | } 585 | 586 | if(m_frames > 4) 587 | { 588 | double curavg = (time - m_lastFrameTime) / m_frames; 589 | if(curavg > 1.0 / 30.0) 590 | { 591 | avg = 10; 592 | } 593 | } 594 | 595 | if(m_profiler.getTotalFrames() % avg == avg - 1) 596 | { 597 | nvh::Profiler::TimerInfo info; 598 | m_profiler.getTimerInfo("Render", info); 599 | m_statsCpuTime = info.cpu.average; 600 | m_statsGpuTime = info.gpu.average; 601 | m_statsGpuBuildTime = 0; 602 | bool hasPres = m_profiler.getTimerInfo("Pre", info); 603 | m_statsGpuBuildTime = hasPres ? info.gpu.average : 0; 604 | m_profiler.getTimerInfo("Draw", info); 605 | m_statsGpuDrawTime = info.gpu.average; 606 | m_statsFrameTime = (time - m_lastFrameTime) / m_frames; 607 | m_lastFrameTime = time; 608 | m_frames = -1; 609 | } 610 | 611 | m_frames++; 612 | 613 | float gpuTimeF = float(m_statsGpuTime); 614 | float cpuTimeF = float(m_statsCpuTime); 615 | float bldTimef = float(m_statsGpuBuildTime); 616 | float drwTimef = float(m_statsGpuDrawTime); 617 | float maxTimeF = std::max(std::max(cpuTimeF, gpuTimeF), 0.0001f); 618 | 619 | //ImGui::Text("Frame [ms]: %2.1f", m_statsFrameTime*1000.0f); 620 | ImGui::Text("Render CPU [ms]: %2.3f", cpuTimeF / 1000.0f); 621 | ImGui::Text("Render GPU [ms]: %2.3f", gpuTimeF / 1000.0f); 622 | //ImGui::ProgressBar(gpuTimeF/maxTimeF, ImVec2(0.0f, 0.0f)); 623 | ImGui::Text("- Preproc. GPU [ms]: %2.3f", bldTimef / 1000.0f); 624 | ImGui::ProgressBar(bldTimef / maxTimeF, ImVec2(0.0f, 0.0f)); 625 | ImGui::Text("- Draw GPU [ms]: %2.3f", drwTimef / 1000.0f); 626 | ImGui::ProgressBar(drwTimef / maxTimeF, ImVec2(0.0f, 0.0f)); 627 | 628 | //ImGui::ProgressBar(cpuTimeF / maxTimeF, ImVec2(0.0f, 0.0f)); 629 | ImGui::Separator(); 630 | ImGui::Text(" cmdBuffers: %9d\n", m_renderStats.cmdBuffers); 631 | ImGui::Text(" drawCalls: %9d\n", m_renderStats.drawCalls); 632 | ImGui::Text(" drawTris: %9d\n", m_renderStats.drawTriangles); 633 | ImGui::Text(" serial shaderBinds: %9d\n", m_renderStats.shaderBindings); 634 | ImGui::Text(" dgc sequences: %9d\n", m_renderStats.sequences); 635 | ImGui::Text(" dgc preprocessBuffer: %9d KB\n", m_renderStats.preprocessSizeKB); 636 | ImGui::Text(" dgc indirectBuffer: %9d KB\n\n", m_renderStats.indirectSizeKB); 637 | } 638 | } 639 | ImGui::End(); 640 | } 641 | 642 | void Sample::think(double time) 643 | { 644 | int width = m_windowState.m_swapSize[0]; 645 | int height = m_windowState.m_swapSize[1]; 646 | 647 | if(m_useUI) 648 | { 649 | processUI(width, height, time); 650 | } 651 | 652 | m_control.processActions({m_windowState.m_winSize[0], m_windowState.m_winSize[1]}, 653 | glm::vec2(m_windowState.m_mouseCurrent[0], m_windowState.m_mouseCurrent[1]), 654 | m_windowState.m_mouseButtonFlags, m_windowState.m_mouseWheel); 655 | 656 | if(m_tweak.msaa != m_lastTweak.msaa || getVsync() != m_lastVsync) 657 | { 658 | m_lastVsync = getVsync(); 659 | m_resources.initFramebuffer(width, height, m_tweak.msaa, getVsync()); 660 | } 661 | 662 | bool sceneChanged = false; 663 | if(m_tweak.copies != m_lastTweak.copies || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX 664 | || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ) 665 | { 666 | sceneChanged = true; 667 | m_resources.synchronize(); 668 | deinitRenderer(); 669 | m_resources.deinitScene(); 670 | initScene(m_modelFilename.c_str(), m_tweak.copies - 1, 671 | (m_tweak.cloneaxisX << 0) | (m_tweak.cloneaxisY << 1) | (m_tweak.cloneaxisZ << 2)); 672 | m_resources.initScene(m_scene); 673 | } 674 | 675 | bool rendererChanged = false; 676 | if(m_windowState.onPress(KEY_R) || m_tweak.copies != m_lastTweak.copies) 677 | { 678 | m_resources.synchronize(); 679 | std::string prepend; 680 | CadScene::IndexingBits bits = m_scene.getIndexingBits(); 681 | prepend += nvh::ShaderFileManager::format("#define INDEXED_MATRIX_BITS %d\n", bits.matrices); 682 | prepend += nvh::ShaderFileManager::format("#define INDEXED_MATERIAL_BITS %d\n", bits.materials); 683 | m_resources.reloadPrograms(prepend); 684 | rendererChanged = true; 685 | } 686 | 687 | if(sceneChanged || rendererChanged || m_tweak.renderer != m_lastTweak.renderer 688 | || m_tweak.binding != m_lastTweak.binding || m_tweak.strategy != m_lastTweak.strategy 689 | || m_tweak.sorted != m_lastTweak.sorted || m_tweak.percent != m_lastTweak.percent 690 | || m_tweak.workerThreads != m_lastTweak.workerThreads || m_tweak.workerBatched != m_lastTweak.workerBatched 691 | || m_tweak.maxShaders != m_lastTweak.maxShaders || m_tweak.interleaved != m_lastTweak.interleaved 692 | || m_tweak.permutated != m_lastTweak.permutated || m_tweak.unordered != m_lastTweak.unordered 693 | || m_tweak.binned != m_lastTweak.binned || m_tweak.useShaderObjs != m_lastTweak.useShaderObjs) 694 | { 695 | m_resources.synchronize(); 696 | initRenderer(m_tweak.renderer); 697 | } 698 | 699 | m_resources.beginFrame(); 700 | 701 | if(m_tweak.animation != m_lastTweak.animation) 702 | { 703 | m_resources.synchronize(); 704 | m_resources.animationReset(); 705 | 706 | m_animBeginTime = time; 707 | } 708 | 709 | { 710 | m_shared.winWidth = width; 711 | m_shared.winHeight = height; 712 | m_shared.workingSet = m_tweak.workingSet; 713 | m_shared.workerBatched = m_tweak.workerBatched; 714 | 715 | SceneData& sceneUbo = m_shared.sceneUbo; 716 | 717 | sceneUbo.viewport = ivec2(width, height); 718 | 719 | glm::mat4 projection = glm::perspectiveRH_ZO(glm::radians(45.f), float(width) / float(height), 720 | m_control.m_sceneDimension * 0.001f, m_control.m_sceneDimension * 10.0f); 721 | projection[1][1] *= -1; 722 | glm::mat4 view = m_control.m_viewMatrix; 723 | 724 | if(m_tweak.animation && m_tweak.animationSpin) 725 | { 726 | double animTime = (time - m_animBeginTime) * 0.3 + glm::pi() * 0.2; 727 | vec3 dir = vec3(cos(animTime), 1, sin(animTime)); 728 | view = glm::lookAt(m_control.m_sceneOrbit - (-dir * m_control.m_sceneDimension * 0.5f), m_control.m_sceneOrbit, 729 | vec3(0, 1, 0)); 730 | } 731 | 732 | sceneUbo.viewProjMatrix = projection * view; 733 | sceneUbo.viewMatrix = view; 734 | sceneUbo.viewMatrixIT = glm::transpose(glm::inverse(view)); 735 | 736 | sceneUbo.viewPos = glm::row(sceneUbo.viewMatrixIT, 3); 737 | ; 738 | sceneUbo.viewDir = -glm::row(view, 2); 739 | 740 | sceneUbo.wLightPos = glm::row(sceneUbo.viewMatrixIT, 3); 741 | sceneUbo.wLightPos.w = 1.0; 742 | } 743 | 744 | if(m_tweak.animation) 745 | { 746 | AnimationData& animUbo = m_shared.animUbo; 747 | animUbo.time = float(time - m_animBeginTime); 748 | 749 | m_resources.animation(m_shared); 750 | } 751 | 752 | { 753 | m_renderer->draw(m_shared, m_renderStats); 754 | } 755 | 756 | { 757 | if(m_useUI) 758 | { 759 | ImGui::Render(); 760 | m_shared.imguiDrawData = ImGui::GetDrawData(); 761 | } 762 | else 763 | { 764 | m_shared.imguiDrawData = nullptr; 765 | } 766 | 767 | m_resources.blitFrame(m_shared); 768 | } 769 | 770 | m_resources.endFrame(); 771 | m_resources.m_frame++; 772 | 773 | if(m_useUI) 774 | { 775 | ImGui::EndFrame(); 776 | } 777 | 778 | m_lastTweak = m_tweak; 779 | } 780 | 781 | void Sample::resize(int width, int height) 782 | { 783 | m_resources.initFramebuffer(width, height, m_tweak.msaa, getVsync()); 784 | } 785 | 786 | void Sample::setRendererFromName() 787 | { 788 | if(!m_rendererName.empty()) 789 | { 790 | const Renderer::Registry registry = Renderer::getRegistry(); 791 | for(size_t i = 0; i < m_renderersSorted.size(); i++) 792 | { 793 | if(strcmp(m_rendererName.c_str(), registry[m_renderersSorted[i]]->name()) == 0) 794 | { 795 | m_tweak.renderer = int(i); 796 | } 797 | } 798 | } 799 | } 800 | 801 | void Sample::setupConfigParameters() 802 | { 803 | m_parameterList.addFilename(".csf", &m_modelFilename); 804 | m_parameterList.addFilename(".csf.gz", &m_modelFilename); 805 | m_parameterList.addFilename(".gltf", &m_modelFilename); 806 | 807 | m_parameterList.add("vkdevice", &m_contextInfo.compatibleDeviceIndex); 808 | 809 | m_parameterList.add("noui", &m_useUI, false); 810 | 811 | m_parameterList.add("unordered", &m_tweak.unordered); 812 | m_parameterList.add("interleaved", &m_tweak.interleaved); 813 | m_parameterList.add("binned", &m_tweak.binned); 814 | m_parameterList.add("permutated", &m_tweak.permutated); 815 | m_parameterList.add("sorted", &m_tweak.sorted); 816 | m_parameterList.add("percent", &m_tweak.percent); 817 | m_parameterList.add("renderer", (uint32_t*)&m_tweak.renderer); 818 | m_parameterList.add("renderernamed", &m_rendererName); 819 | m_parameterList.add("strategy", (uint32_t*)&m_tweak.strategy); 820 | m_parameterList.add("bindingmode", (uint32_t*)&m_tweak.binding); 821 | m_parameterList.add("shadermode", (uint32_t*)&m_tweak.useShaderObjs); 822 | m_parameterList.add("msaa", &m_tweak.msaa); 823 | m_parameterList.add("copies", &m_tweak.copies); 824 | m_parameterList.add("animation", &m_tweak.animation); 825 | m_parameterList.add("animationspin", &m_tweak.animationSpin); 826 | m_parameterList.add("minstatechanges", &m_tweak.sorted); 827 | m_parameterList.add("maxshaders", &m_tweak.maxShaders); 828 | m_parameterList.add("workerbatched", &m_tweak.workerBatched); 829 | m_parameterList.add("workerthreads", &m_tweak.workerThreads); 830 | m_parameterList.add("workingset", &m_tweak.workingSet); 831 | m_parameterList.add("animation", &m_tweak.animation); 832 | m_parameterList.add("animationspin", &m_tweak.animationSpin); 833 | } 834 | 835 | bool Sample::validateConfig() 836 | { 837 | if(m_modelFilename.empty()) 838 | { 839 | LOGI("no .csf model file specified\n"); 840 | LOGI("exe parameters...\n"); 841 | m_parameterList.print(); 842 | return false; 843 | } 844 | return true; 845 | } 846 | 847 | } // namespace generatedcmds 848 | 849 | using namespace generatedcmds; 850 | 851 | int main(int argc, const char** argv) 852 | { 853 | NVPSystem system(PROJECT_NAME); 854 | 855 | #if defined(_WIN32) && defined(NDEBUG) 856 | //SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS); 857 | #endif 858 | 859 | Sample sample; 860 | { 861 | std::vector directories; 862 | directories.push_back(NVPSystem::exePath()); 863 | directories.push_back(NVPSystem::exePath() + "/media"); 864 | directories.push_back(NVPSystem::exePath() + std::string(PROJECT_DOWNLOAD_RELDIRECTORY)); 865 | sample.m_modelFilename = nvh::findFile(std::string("geforce.csf.gz"), directories); 866 | } 867 | 868 | return sample.run(PROJECT_NAME, argc, argv, SAMPLE_SIZE_WIDTH, SAMPLE_SIZE_HEIGHT); 869 | } 870 | --------------------------------------------------------------------------------