├── .gitattributes ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CONTRIBUTING ├── GLSL ├── GLSL_grid.frag ├── GLSL_grid.vert ├── GLSL_mesh.frag ├── GLSL_mesh.vert ├── GLSL_mesh_lines.frag ├── GLSL_mesh_lines.vert └── noise64x64_RGB.dds ├── GLSLShader.cpp ├── GLSLShader.h ├── LICENSE ├── NVK.cpp ├── NVK.h ├── README.md ├── bk3dBase.h ├── bk3dDefs.h ├── bk3dEx.h ├── bk3d_glcommandlist.cpp ├── bk3d_glstandard.cpp ├── bk3d_vk.cpp ├── dedicated_image.cpp ├── dedicated_image.h ├── doc ├── Fences.JPG ├── Memory_chunks.JPG ├── Multithreading.md ├── NSight_Captures.md ├── OpenGL.JPG ├── Rendering_Modes.md ├── Results.md ├── Thread_workers.JPG ├── Vulkan.JPG ├── Vulkan_Code_Style.md ├── Vulkan_MT.JPG ├── Vulkan_Renderer.md ├── cmd-buffers.JPG ├── offsets.JPG ├── sample.jpg ├── toggles.JPG ├── vkbuffers.JPG └── vulkan_bk3dthreaded.md ├── gl_nv_command_list.h ├── gl_nv_commandlist_helpers.h ├── gl_vk_bk3dthreaded.cpp ├── gl_vk_bk3dthreaded.h ├── helper_fbo.h ├── mt ├── CThread.cpp ├── CThread.h ├── CThreadWork.cpp ├── CThreadWork.h ├── CThreadWork.pptx └── RingBuffer.h ├── window_surface_vk.cpp └── window_surface_vk.hpp /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ############################# 2 | #Spirv 3 | ############################# 4 | *.spv 5 | 6 | ################# 7 | ## Eclipse 8 | ################# 9 | 10 | *.pydevproject 11 | .project 12 | .metadata 13 | bin/ 14 | tmp/ 15 | *.tmp 16 | *.bak 17 | *.swp 18 | *~.nib 19 | local.properties 20 | .classpath 21 | .settings/ 22 | .loadpath 23 | 24 | # External tool builders 25 | .externalToolBuilders/ 26 | 27 | # Locally stored "Eclipse launch configurations" 28 | *.launch 29 | 30 | # CDT-specific 31 | .cproject 32 | 33 | # PDT-specific 34 | .buildpath 35 | 36 | ################# 37 | ## KDev / Linux 38 | ################# 39 | .kdev4 40 | *.kdev4 41 | *.*~ 42 | ################# 43 | ## Visual Studio 44 | ################# 45 | 46 | ## Ignore Visual Studio temporary files, build results, and 47 | ## files generated by popular Visual Studio add-ons. 48 | 49 | # User-specific files 50 | *.vcxproj 51 | *.filters 52 | *.sln 53 | *.user 54 | *.suo 55 | *.user 56 | *.sln.docstates 57 | 58 | # Build results 59 | [Dd]ebug/ 60 | [Rr]elease/ 61 | *_i.c 62 | *_p.c 63 | *.ilk 64 | *.meta 65 | *.obj 66 | *.pch 67 | *.pdb 68 | *.pgc 69 | *.pgd 70 | *.rsp 71 | *.sbr 72 | *.tlb 73 | *.tli 74 | *.tlh 75 | *.tmp 76 | *.vspscc 77 | .builds 78 | *.dotCover 79 | 80 | ## TODO: If you have NuGet Package Restore enabled, uncomment this 81 | #packages/ 82 | 83 | # Visual C++ cache files 84 | ipch/ 85 | *.aps 86 | *.ncb 87 | *.opensdf 88 | *.sdf 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | 94 | # ReSharper is a .NET coding add-in 95 | _ReSharper* 96 | 97 | # Installshield output folder 98 | [Ee]xpress 99 | 100 | # DocProject is a documentation generator add-in 101 | DocProject/buildhelp/ 102 | DocProject/Help/*.HxT 103 | DocProject/Help/*.HxC 104 | DocProject/Help/*.hhc 105 | DocProject/Help/*.hhk 106 | DocProject/Help/*.hhp 107 | DocProject/Help/Html2 108 | DocProject/Help/html 109 | 110 | # Click-Once directory 111 | publish 112 | 113 | # Others 114 | [Bb]in 115 | [Oo]bj 116 | sql 117 | TestResults 118 | *.Cache 119 | ClientBin 120 | stylecop.* 121 | ~$* 122 | *.dbmdl 123 | Generated_Code #added for RIA/Silverlight projects 124 | 125 | # Backup & report files from converting an old project file to a newer 126 | # Visual Studio version. Backup files are not needed, because we have git ;-) 127 | _UpgradeReport_Files/ 128 | Backup*/ 129 | UpgradeLog*.XML 130 | 131 | 132 | 133 | ############ 134 | ## Windows 135 | ############ 136 | 137 | # Windows image file caches 138 | Thumbs.db 139 | 140 | # Folder config file 141 | Desktop.ini 142 | 143 | 144 | ############# 145 | ## Python 146 | ############# 147 | 148 | *.py[co] 149 | 150 | # Packages 151 | *.egg 152 | *.egg-info 153 | dist 154 | build 155 | eggs 156 | parts 157 | bin 158 | var 159 | sdist 160 | develop-eggs 161 | .installed.cfg 162 | 163 | # Installer logs 164 | pip-log.txt 165 | 166 | # Unit test / coverage reports 167 | .coverage 168 | .tox 169 | 170 | #Translations 171 | *.mo 172 | 173 | #Mr Developer 174 | .mr.developer.cfg 175 | 176 | # Mac crap 177 | .DS_Store 178 | 179 | ############################# 180 | #specific to the project 181 | ############################# 182 | cmake_built 183 | cmake_build 184 | build_kd6 185 | build 186 | 187 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "nvpro_core"] 2 | path = nvpro_core 3 | url = https://github.com/nvpro-samples/nvpro_core.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | set(PROJNAME gl_vk_bk3dthreaded) 3 | Project(${PROJNAME}) 4 | Message(STATUS "-------------------------------") 5 | Message(STATUS "Processing Project ${PROJNAME}:") 6 | 7 | ##################################################################################### 8 | # offer the choice of having nvpro_core as a sub-folder... good for packaging a sample 9 | # 10 | if(NOT BASE_DIRECTORY) 11 | 12 | find_path(BASE_DIRECTORY 13 | NAMES nvpro_core/cmake/setup.cmake 14 | PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. 15 | REQUIRED 16 | DOC "Directory containing nvpro_core" 17 | ) 18 | endif() 19 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 20 | include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 21 | include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake) 22 | else() 23 | message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core") 24 | endif() 25 | 26 | _add_project_definitions(${PROJNAME}) 27 | 28 | ##################################################################################### 29 | # additions from packages needed for this sample 30 | # add refs in LIBRARIES_OPTIMIZED 31 | # add refs in LIBRARIES_DEBUG 32 | # add files in PACKAGE_SOURCE_FILES 33 | # 34 | #set(USING_OPENGL "YES") 35 | _add_package_OpenGL() 36 | _add_package_VulkanSDK() 37 | _add_package_ImGUI() 38 | _add_package_ZLIB() 39 | 40 | ##################################################################################### 41 | # process the rest of some cmake code that needs to be done *after* the packages add 42 | _add_nvpro_core_lib() 43 | 44 | ##################################################################################### 45 | # Source files for this project 46 | # 47 | file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c mt/*.cpp mt/*.h VK_nvidia/*.c VK_nvidia/*.h) 48 | 49 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/VK_nvidia) 50 | 51 | ##################################################################################### 52 | # download model for this demo to run. Avoids using GitHub for this 53 | # TODO: we need to put the models online. Should happen next week (>Dec.22) 54 | # 55 | unset(FILELISTOUT) 56 | if(1) 57 | set(FILELIST 58 | "SubMarine_134.bk3d.gz" 59 | ) 60 | else() 61 | set(FILELIST 62 | "SubMarine_134.bk3d.gz" 63 | "Jet_134.bk3d.gz" 64 | "Driveline_v134.bk3d.gz" 65 | "Body_v134.bk3d.gz" 66 | "Camera_134.bk3d.gz" 67 | "ConceptCar_134.bk3d.gz" 68 | "Eiffel_133.bk3d.gz" 69 | "Smobby_134.bk3d.gz" 70 | ) 71 | endif() 72 | 73 | download_files(FILENAMES ${FILELIST}) 74 | ##################################################################################### 75 | # GLSL to SPIR-V custom build 76 | # 77 | #more than one file can be given: _compile_GLSL("GLSL_mesh.vert;GLSL_mesh.frag" "GLSL_mesh.spv" GLSL_SOURCES) 78 | # the SpirV validator is fine as long as files are for different pipeline stages (entry points still need to be main()) 79 | #_compile_GLSL( ) 80 | UNSET(GLSL_SOURCES) 81 | UNSET(SPV_OUTPUT) 82 | _compile_GLSL("GLSL/GLSL_mesh.vert" "GLSL/GLSL_mesh_vert.spv" GLSL_SOURCES SPV_OUTPUT) 83 | _compile_GLSL("GLSL/GLSL_mesh.frag" "GLSL/GLSL_mesh_frag.spv" GLSL_SOURCES SPV_OUTPUT) 84 | _compile_GLSL("GLSL/GLSL_mesh_lines.frag" "GLSL/GLSL_mesh_lines_frag.spv" GLSL_SOURCES SPV_OUTPUT) 85 | _compile_GLSL("GLSL/GLSL_mesh_lines.vert" "GLSL/GLSL_mesh_lines_vert.spv" GLSL_SOURCES SPV_OUTPUT) 86 | _compile_GLSL("GLSL/GLSL_grid.vert" "GLSL/GLSL_grid_vert.spv" GLSL_SOURCES SPV_OUTPUT) 87 | _compile_GLSL("GLSL/GLSL_grid.frag" "GLSL/GLSL_grid_frag.spv" GLSL_SOURCES SPV_OUTPUT) 88 | source_group(GLSL_Files FILES ${GLSL_SOURCES}) 89 | 90 | ##################################################################################### 91 | # additional files from helpers 92 | # 93 | #LIST(APPEND COMMON_SOURCE_FILES 94 | # ${BASE_DIRECTORY}/nvpro_core/nvgl/WindowInertiaCamera.h 95 | # ${BASE_DIRECTORY}/nvpro_core/nvh/TimeSampler.h 96 | # ${BASE_DIRECTORY}/nvpro_core/nvh/InertiaCamera.h 97 | # ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath.inl 98 | # ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath.h 99 | # ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath_types.h 100 | # ${BASE_DIRECTORY}/nvpro_core/nvh/profiler.hpp 101 | # ${BASE_DIRECTORY}/nvpro_core/nvh/profiler.cpp 102 | #) 103 | ##################################################################################### 104 | # Executable 105 | # 106 | if(WIN32 AND NOT GLUT_FOUND) 107 | add_definitions(/wd4267) #remove size_t to int warning 108 | add_definitions(/wd4996) #remove printf warning 109 | add_definitions(/wd4244) #remove double to float conversion warning 110 | add_definitions(/wd4305) #remove double to float truncation warning 111 | else() 112 | # allow gcc to be tolerant on some issues. TODO:should remove this option 113 | add_definitions(-fpermissive) 114 | endif() 115 | add_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_SOURCES}) 116 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJNAME}) 117 | 118 | ##################################################################################### 119 | # common source code needed for this sample 120 | # 121 | source_group(common FILES 122 | ${COMMON_SOURCE_FILES} 123 | ${PACKAGE_SOURCE_FILES} 124 | ) 125 | ##################################################################################### 126 | # Linkage 127 | # 128 | target_link_libraries(${PROJNAME} optimized 129 | ${LIBRARIES_OPTIMIZED} 130 | ${PLATFORM_LIBRARIES} 131 | nvpro_core 132 | ) 133 | target_link_libraries(${PROJNAME} debug 134 | ${LIBRARIES_DEBUG} 135 | ${PLATFORM_LIBRARIES} 136 | nvpro_core 137 | ) 138 | 139 | ##################################################################################### 140 | # copies binaries that need to be put next to the exe files (ZLib, etc.) 141 | # 142 | _finalize_target( ${PROJNAME} ) 143 | 144 | # additional copies for standalone run from install folder 145 | install(FILES ${SPV_OUTPUT} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/SPV_${PROJNAME}") 146 | install(FILES ${SPV_OUTPUT} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/SPV_${PROJNAME}") 147 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | https://developercertificate.org/ 2 | 3 | Developer Certificate of Origin 4 | Version 1.1 5 | 6 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | 12 | Developer's Certificate of Origin 1.1 13 | 14 | By making a contribution to this project, I certify that: 15 | 16 | (a) The contribution was created in whole or in part by me and I 17 | have the right to submit it under the open source license 18 | indicated in the file; or 19 | 20 | (b) The contribution is based upon previous work that, to the best 21 | of my knowledge, is covered under an appropriate open source 22 | license and I have the right under that license to submit that 23 | work with modifications, whether created in whole or in part 24 | by me, under the same open source license (unless I am 25 | permitted to submit under a different license), as indicated 26 | in the file; or 27 | 28 | (c) The contribution was provided directly to me by some other 29 | person who certified (a), (b) or (c) and I have not modified 30 | it. 31 | 32 | (d) I understand and agree that this project and the contribution 33 | are public and that a record of the contribution (including all 34 | personal information I submit with it, including my sign-off) is 35 | maintained indefinitely and may be redistributed consistent with 36 | this project or the open source license(s) involved. -------------------------------------------------------------------------------- /GLSL/GLSL_grid.frag: -------------------------------------------------------------------------------- 1 | #version 440 core 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define DSET_GLOBAL 0 5 | # define BINDING_MATRIX 0 6 | # define BINDING_LIGHT 1 7 | 8 | #define DSET_OBJECT 1 9 | # define BINDING_MATRIXOBJ 0 10 | # define BINDING_MATERIAL 1 11 | //////////////////////////////////////////////////////////////////////////////// 12 | //////////////////////////////////////////////////////////////////////////////// 13 | layout(location=0,index=0) out vec4 out_Color; 14 | void main() 15 | { 16 | out_Color = vec4(0.5,0.7,0.5,1); 17 | } 18 | 19 | /* 20 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 21 | * 22 | * Licensed under the Apache License, Version 2.0 (the "License"); 23 | * you may not use this file except in compliance with the License. 24 | * You may obtain a copy of the License at 25 | * 26 | * http://www.apache.org/licenses/LICENSE-2.0 27 | * 28 | * Unless required by applicable law or agreed to in writing, software 29 | * distributed under the License is distributed on an "AS IS" BASIS, 30 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 31 | * See the License for the specific language governing permissions and 32 | * limitations under the License. 33 | * 34 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 35 | * SPDX-License-Identifier: Apache-2.0 36 | */ -------------------------------------------------------------------------------- /GLSL/GLSL_grid.vert: -------------------------------------------------------------------------------- 1 | #version 440 core 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define DSET_GLOBAL 0 5 | # define BINDING_MATRIX 0 6 | # define BINDING_LIGHT 1 7 | 8 | #define DSET_OBJECT 1 9 | # define BINDING_MATRIXOBJ 0 10 | # define BINDING_MATERIAL 1 11 | //////////////////////////////////////////////////////////////////////////////// 12 | //////////////////////////////////////////////////////////////////////////////// 13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer { 14 | mat4 mW; 15 | mat4 mVP; 16 | } matrix; 17 | in layout(location=0) vec3 pos; 18 | out gl_PerVertex { 19 | vec4 gl_Position; 20 | }; 21 | void main() 22 | { 23 | vec4 wPos = /*matrix.mW **/ ( vec4(pos,0.51) ); 24 | gl_Position = matrix.mVP * wPos; 25 | } 26 | 27 | /* 28 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 29 | * 30 | * Licensed under the Apache License, Version 2.0 (the "License"); 31 | * you may not use this file except in compliance with the License. 32 | * You may obtain a copy of the License at 33 | * 34 | * http://www.apache.org/licenses/LICENSE-2.0 35 | * 36 | * Unless required by applicable law or agreed to in writing, software 37 | * distributed under the License is distributed on an "AS IS" BASIS, 38 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 39 | * See the License for the specific language governing permissions and 40 | * limitations under the License. 41 | * 42 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 43 | * SPDX-License-Identifier: Apache-2.0 44 | */ -------------------------------------------------------------------------------- /GLSL/GLSL_mesh.frag: -------------------------------------------------------------------------------- 1 | // 2 | // License Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. 3 | // https://creativecommons.org/licenses/by-nc-sa/3.0/us/ 4 | // Voronoi part taken from Ben Weston: https://www.shadertoy.com/view/ldsGzl 5 | // few changes made for the purpose of Vulkan code 6 | // 7 | 8 | #version 440 core 9 | #extension GL_ARB_separate_shader_objects : enable 10 | 11 | #define DSET_GLOBAL 0 12 | # define BINDING_MATRIX 0 13 | # define BINDING_LIGHT 1 14 | # define BINDING_NOISE 2 15 | 16 | #define DSET_OBJECT 1 17 | # define BINDING_MATRIXOBJ 0 18 | # define BINDING_MATERIAL 1 19 | //////////////////////////////////////////////////////////////////////////////// 20 | //////////////////////////////////////////////////////////////////////////////// 21 | layout(set= DSET_GLOBAL, binding= BINDING_NOISE ) uniform sampler3D iChannel0; 22 | 23 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATERIAL ) uniform materialBuffer { 24 | vec3 diffuse; 25 | float a; 26 | } material; 27 | 28 | layout(location=1) in vec3 N; 29 | layout(location=2) in vec3 inWPos; 30 | layout(location=3) in vec3 inEyePos; 31 | 32 | layout(location=0,index=0) out vec4 outColor; 33 | 34 | vec3 Sky( vec3 ray ) 35 | { 36 | return mix( vec3(.8), vec3(0), exp2(-(1.0/max(ray.y,.01))*vec3(.4,.6,1.0)) ); 37 | } 38 | 39 | mat2 mm2(in float a){float c = cos(a), s = sin(a);return mat2(c,-s,s,c);} 40 | 41 | vec3 Voronoi( vec3 pos ) 42 | { 43 | vec3 d[8]; 44 | d[0] = vec3(0,0,0); 45 | d[1] = vec3(1,0,0); 46 | d[2] = vec3(0,1,0); 47 | d[3] = vec3(1,1,0); 48 | d[4] = vec3(0,0,1); 49 | d[5] = vec3(1,0,1); 50 | d[6] = vec3(0,1,1); 51 | d[7] = vec3(1,1,1); 52 | 53 | const float maxDisplacement = .7; //tweak this to hide grid artefacts 54 | 55 | vec3 pf = floor(pos); 56 | 57 | const float phi = 1.61803398875; 58 | 59 | float closest = 12.0; 60 | vec3 result; 61 | for ( int i=0; i < 8; i++ ) 62 | { 63 | vec3 v = (pf+d[i]); 64 | vec3 r = fract(phi*v.yzx+17.*fract(v.zxy*phi)+v*v*.03);//Noise(ivec3(floor(pos+d[i]))); 65 | vec3 p = d[i] + maxDisplacement*(r.xyz-.5); 66 | p -= fract(pos); 67 | float lsq = dot(p,p); 68 | if ( lsq < closest ) 69 | { 70 | closest = lsq; 71 | result = r; 72 | } 73 | } 74 | return fract(result.xyz);//+result.www); // random colour 75 | } 76 | 77 | vec3 shade( vec3 pos, vec3 norm, vec3 rayDir, vec3 lightDir ) 78 | { 79 | vec3 paint = material.diffuse; 80 | 81 | vec3 norm2 = normalize(norm+.02*(Voronoi(pos*800.0)*2.0-1.0)); 82 | 83 | if ( dot(norm2,rayDir) > 0.0 ) // we shouldn't see flecks that point away from us 84 | norm2 -= 2.0*dot(norm2,rayDir)*rayDir; 85 | 86 | 87 | // diffuse layer, reduce overall contrast 88 | vec3 result = paint*.6*(pow(max(0.0,dot(norm,lightDir)),2.0)+.2); 89 | 90 | vec3 h = normalize( lightDir-rayDir ); 91 | vec3 s = pow(max(0.0,dot(h,norm2)),50.0)*10.0*vec3(1); 92 | 93 | float rdotn = dot(rayDir,norm2); 94 | vec3 reflection = rayDir-2.0*rdotn*norm; 95 | s += Sky( reflection ); 96 | 97 | float f = pow(1.0+rdotn,5.0); 98 | f = mix( .2, 1.0, f ); 99 | 100 | result = mix(result,paint*s,f); 101 | 102 | // gloss layer 103 | s = pow(max(0.0,dot(h,norm)),1000.0)*32.0*vec3(1); 104 | 105 | rdotn = dot(rayDir,norm); 106 | reflection = rayDir-2.0*rdotn*norm; 107 | 108 | return result; 109 | } 110 | 111 | void main() 112 | { 113 | vec3 lightDir = vec3(0, 0.707, 0.707); 114 | vec3 ray = inWPos; 115 | ray -= inEyePos; 116 | ray = normalize(ray); 117 | vec3 shaded = shade( inWPos, N, ray, lightDir ); 118 | outColor = vec4(shaded, 1); 119 | } 120 | -------------------------------------------------------------------------------- /GLSL/GLSL_mesh.vert: -------------------------------------------------------------------------------- 1 | #version 440 core 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define DSET_GLOBAL 0 5 | # define BINDING_MATRIX 0 6 | # define BINDING_LIGHT 1 7 | 8 | #define DSET_OBJECT 1 9 | # define BINDING_MATRIXOBJ 0 10 | # define BINDING_MATERIAL 1 11 | //////////////////////////////////////////////////////////////////////////////// 12 | //////////////////////////////////////////////////////////////////////////////// 13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer { 14 | mat4 mW; 15 | mat4 mVP; 16 | vec3 eyePos; 17 | } matrix; 18 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATRIXOBJ ) uniform matrixObjBuffer { 19 | mat4 mO; 20 | } object; 21 | layout(location=0) in vec3 pos; 22 | layout(location=1) in vec3 N; 23 | 24 | layout(location=1) out vec3 outN; 25 | layout(location=2) out vec3 outWPos; 26 | layout(location=3) out vec3 outEyePos; 27 | out gl_PerVertex { 28 | vec4 gl_Position; 29 | }; 30 | void main() 31 | { 32 | outN = N.xzy; 33 | vec4 wpos = matrix.mW * (object.mO * vec4(pos,1)); 34 | gl_Position = matrix.mVP * wpos; 35 | outWPos = wpos.xyz; 36 | outEyePos = matrix.eyePos; 37 | } 38 | 39 | /* 40 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 41 | * 42 | * Licensed under the Apache License, Version 2.0 (the "License"); 43 | * you may not use this file except in compliance with the License. 44 | * You may obtain a copy of the License at 45 | * 46 | * http://www.apache.org/licenses/LICENSE-2.0 47 | * 48 | * Unless required by applicable law or agreed to in writing, software 49 | * distributed under the License is distributed on an "AS IS" BASIS, 50 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 51 | * See the License for the specific language governing permissions and 52 | * limitations under the License. 53 | * 54 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 55 | * SPDX-License-Identifier: Apache-2.0 56 | */ -------------------------------------------------------------------------------- /GLSL/GLSL_mesh_lines.frag: -------------------------------------------------------------------------------- 1 | #version 440 core 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define DSET_GLOBAL 0 5 | # define BINDING_MATRIX 0 6 | # define BINDING_LIGHT 1 7 | 8 | #define DSET_OBJECT 1 9 | # define BINDING_MATRIXOBJ 0 10 | # define BINDING_MATERIAL 1 11 | //////////////////////////////////////////////////////////////////////////////// 12 | //////////////////////////////////////////////////////////////////////////////// 13 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATERIAL ) uniform materialBuffer { 14 | uniform vec3 diffuse; 15 | } material; 16 | //layout(std140, set= DSET_GLOBAL , binding= BINDING_LIGHT ) uniform lightBuffer { 17 | // uniform vec3 dir; 18 | //} light; 19 | layout(location=0) out vec4 outColor; 20 | void main() { 21 | 22 | outColor = vec4(0.5,0.5,0.2,1); 23 | } 24 | 25 | /* 26 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 27 | * 28 | * Licensed under the Apache License, Version 2.0 (the "License"); 29 | * you may not use this file except in compliance with the License. 30 | * You may obtain a copy of the License at 31 | * 32 | * http://www.apache.org/licenses/LICENSE-2.0 33 | * 34 | * Unless required by applicable law or agreed to in writing, software 35 | * distributed under the License is distributed on an "AS IS" BASIS, 36 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 37 | * See the License for the specific language governing permissions and 38 | * limitations under the License. 39 | * 40 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 41 | * SPDX-License-Identifier: Apache-2.0 42 | */ -------------------------------------------------------------------------------- /GLSL/GLSL_mesh_lines.vert: -------------------------------------------------------------------------------- 1 | #version 440 core 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define DSET_GLOBAL 0 5 | # define BINDING_MATRIX 0 6 | # define BINDING_LIGHT 1 7 | 8 | #define DSET_OBJECT 1 9 | # define BINDING_MATRIXOBJ 0 10 | # define BINDING_MATERIAL 1 11 | //////////////////////////////////////////////////////////////////////////////// 12 | //////////////////////////////////////////////////////////////////////////////// 13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer { 14 | mat4 mW; 15 | mat4 mVP; 16 | vec3 eyePos; 17 | } matrix; 18 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATRIXOBJ ) uniform matrixObjBuffer { 19 | mat4 mO; 20 | } object; 21 | layout(location=0) in vec3 pos; 22 | 23 | layout(location=2) out vec3 outWPos; 24 | layout(location=3) out vec3 outEyePos; 25 | out gl_PerVertex { 26 | vec4 gl_Position; 27 | }; 28 | void main() 29 | { 30 | vec4 wpos = matrix.mW * (object.mO * vec4(pos,1)); 31 | gl_Position = matrix.mVP * wpos; 32 | outWPos = wpos.xyz; 33 | outEyePos = matrix.eyePos; 34 | } 35 | 36 | /* 37 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 38 | * 39 | * Licensed under the Apache License, Version 2.0 (the "License"); 40 | * you may not use this file except in compliance with the License. 41 | * You may obtain a copy of the License at 42 | * 43 | * http://www.apache.org/licenses/LICENSE-2.0 44 | * 45 | * Unless required by applicable law or agreed to in writing, software 46 | * distributed under the License is distributed on an "AS IS" BASIS, 47 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 48 | * See the License for the specific language governing permissions and 49 | * limitations under the License. 50 | * 51 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 52 | * SPDX-License-Identifier: Apache-2.0 53 | */ -------------------------------------------------------------------------------- /GLSL/noise64x64_RGB.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/GLSL/noise64x64_RGB.dds -------------------------------------------------------------------------------- /GLSLShader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include "GLSLShader.h" 21 | #include 22 | #include 23 | #include 24 | #include "nvh/nvprint.hpp" 25 | 26 | GLSLShader::GLSLShader() 27 | { 28 | m_linkNeeded = false; 29 | m_program = 0; 30 | } 31 | 32 | //*NVTL* 33 | GLSLShader::~GLSLShader() 34 | { 35 | cleanup(); 36 | } 37 | //*NVTL* 38 | void GLSLShader::cleanup() 39 | { 40 | m_fragFiles.clear(); 41 | m_vertFiles.clear(); 42 | m_fragSrc.clear(); 43 | m_vertSrc.clear(); 44 | if(m_program) glDeleteProgram(m_program); 45 | m_program = 0; 46 | } 47 | bool GLSLShader::compileShaderFromString(const char *shader, GLenum type) 48 | { 49 | bool bRes = true; 50 | if(!shader) 51 | return false; 52 | if(0 == m_program) 53 | m_program = glCreateProgram(); 54 | GLuint obj = glCreateShader(type); 55 | 56 | // set source 57 | GLint size = (GLint)strlen(shader); 58 | const GLchar* progString = (const GLchar*)shader; 59 | glShaderSource(obj, 1, &progString, &size); 60 | glCompileShader(obj); 61 | bRes = outputShaderLog(obj); 62 | 63 | glAttachShader(m_program, obj); 64 | glDeleteShader(obj); 65 | 66 | m_linkNeeded = true; 67 | return bRes; 68 | } 69 | 70 | bool GLSLShader::compileShader(const char *filename, GLenum type) 71 | { 72 | bool bRes; 73 | if(0 == m_program) 74 | m_program = glCreateProgram(); 75 | 76 | std::ifstream ifs; 77 | ifs.open(filename); 78 | if(ifs.bad()) 79 | return false; 80 | 81 | std::string file((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); 82 | //*NVTL* added the checking here because ifs.bad() doesn't fail when the file doesn't exist... 83 | if(file.size()==0) 84 | { 85 | //*NVTL* : added the message. Because I lost quite some time because of this missing feature ;-P 86 | if(filename) LOGE("\nGLSL ERROR: loading file %s \n", filename ? filename : "NULL"); 87 | fflush(stdout); 88 | //# ifdef _DEBUG 89 | // _asm {int 3 } 90 | //# endif 91 | return false; 92 | } 93 | 94 | //std::string file; 95 | //while(!ifs.eof()) file += ifs.get(); 96 | 97 | 98 | GLuint obj = glCreateShader(type); 99 | 100 | // set source 101 | GLint size = (GLint)file.size(); 102 | const GLchar* progString = (const GLchar*)file.c_str(); 103 | glShaderSource(obj, 1, &progString, &size); 104 | glCompileShader(obj); 105 | bRes = outputShaderLog(obj); 106 | 107 | glAttachShader(m_program, obj); 108 | glDeleteShader(obj); 109 | 110 | ifs.close(); 111 | m_linkNeeded = true; 112 | return bRes; 113 | } 114 | 115 | bool GLSLShader::addFragmentShader(const char *filename, bool isNew) 116 | { 117 | bool bRes; 118 | 119 | bRes = compileShader(filename, GL_FRAGMENT_SHADER); 120 | 121 | if(isNew) m_fragFiles.push_back(filename); 122 | return bRes; 123 | } 124 | 125 | 126 | bool GLSLShader::addVertexShader(const char *filename, bool isNew) 127 | { 128 | bool bRes; 129 | 130 | bRes = compileShader(filename, GL_VERTEX_SHADER); 131 | 132 | if(isNew) m_vertFiles.push_back(filename); 133 | return bRes; 134 | } 135 | 136 | //----> *NVTL* 137 | bool GLSLShader::addFragmentShaderFromString(const char *shader) 138 | { 139 | bool bRes; 140 | 141 | bRes = compileShaderFromString(shader, GL_FRAGMENT_SHADER); 142 | 143 | m_fragSrc.push_back(shader); 144 | return bRes; 145 | } 146 | 147 | 148 | bool GLSLShader::addVertexShaderFromString(const char *shader) 149 | { 150 | bool bRes; 151 | 152 | bRes = compileShaderFromString(shader, GL_VERTEX_SHADER); 153 | 154 | m_vertSrc.push_back(shader); 155 | return bRes; 156 | } 157 | // <---- *NVTL* 158 | bool GLSLShader::link() 159 | { 160 | bool bRes = true; 161 | if(m_linkNeeded) 162 | { 163 | glLinkProgram(m_program); 164 | bRes = outputProgramLog(m_program); 165 | m_linkNeeded = false; 166 | } 167 | return bRes; 168 | } 169 | 170 | bool GLSLShader::bindShader() 171 | { 172 | bool bRes = true; 173 | 174 | if(m_linkNeeded) 175 | bRes = link(); 176 | 177 | glUseProgram(m_program); 178 | 179 | //GL_FLOAT_RGBA32_NV; 180 | //GL_RGBA_FLOAT32_ATI; 181 | return bRes; 182 | } 183 | 184 | void GLSLShader::unbindShader() 185 | { 186 | glUseProgram(0); 187 | } 188 | 189 | bool GLSLShader::outputProgramLog(GLuint obj) 190 | { 191 | char buf[1024]; 192 | int len; 193 | glGetProgramInfoLog(obj, 1024, &len, buf); 194 | if(len) 195 | { 196 | LOGW("Log for %d:\n%s\n\n", obj, buf); 197 | if(strstr(buf, "error") != nullptr) 198 | return false; 199 | # ifdef _DEBUG 200 | //if(strstr(buf, "error") > 0) 201 | //{ 202 | // _asm {int 3 } 203 | //} 204 | # endif 205 | } 206 | return true; 207 | } 208 | 209 | bool GLSLShader::outputShaderLog(GLuint obj) 210 | { 211 | char buf[1024]; 212 | int len; 213 | glGetShaderInfoLog(obj, 1024, &len, buf); 214 | if(len) 215 | { 216 | LOGW("Log for %d:\n%s\n\n", obj, buf); 217 | if(strstr(buf, "error") != nullptr) 218 | return false; 219 | # ifdef _DEBUG 220 | //if(strstr(buf, "error") > 0) 221 | //{ 222 | // _asm {int 3 } 223 | //} 224 | # endif 225 | } 226 | return true; 227 | } 228 | 229 | void GLSLShader::setUniformFloat(const char *name, float val) 230 | { 231 | glUniform1f(glGetUniformLocation(m_program, name), val); 232 | } 233 | 234 | void GLSLShader::setUniformInt(const char *name, int val) 235 | { 236 | glUniform1i(glGetUniformLocation(m_program, name), val); 237 | } 238 | 239 | //----> *NVTL* 240 | void GLSLShader::setUniformVector(const char * name, float* val, int count) 241 | { 242 | GLint id = glGetUniformLocation(m_program, name); 243 | if (id == -1) { 244 | return; 245 | } 246 | switch (count) { 247 | case 1: 248 | glUniform1fv(id, 1, val); 249 | break; 250 | case 2: 251 | glUniform2fv(id, 1, val); 252 | break; 253 | case 3: 254 | glUniform3fv(id, 1, val); 255 | break; 256 | case 4: 257 | glUniform4fv(id, 1, val); 258 | break; 259 | } 260 | } 261 | 262 | void GLSLShader::setTextureUnit(const char * texname, int texunit) 263 | { 264 | GLint linked; 265 | glGetProgramiv(m_program, GL_LINK_STATUS, &linked); 266 | if (linked != GL_TRUE) { 267 | return; 268 | } 269 | GLint id = glGetUniformLocation(m_program, texname); 270 | if (id == -1) { 271 | return; 272 | } 273 | glUniform1i(id, texunit); 274 | } 275 | 276 | void GLSLShader::bindTexture(GLenum target, const char * texname, GLuint texid, int texunit) 277 | { 278 | glActiveTexture(GL_TEXTURE0 + texunit); 279 | glBindTexture(target, texid); 280 | setTextureUnit(texname, texunit); 281 | glActiveTexture(GL_TEXTURE0); 282 | } 283 | //<---- *NVTL* 284 | 285 | void GLSLShader::reloadShader() 286 | { 287 | glDeleteProgram(m_program); 288 | 289 | //We should really also detach the old fragment and vertex shaders 290 | //and delete them as well... 291 | 292 | m_program = 0; 293 | 294 | for(unsigned int i = 0; i < m_vertFiles.size(); ++i) 295 | addVertexShader(m_vertFiles[i].c_str(), false); 296 | for(unsigned int i = 0; i < m_vertSrc.size(); ++i) 297 | addVertexShader(m_vertSrc[i].c_str(), false); 298 | 299 | for(unsigned int i = 0; i < m_fragFiles.size(); ++i) 300 | addFragmentShader(m_fragFiles[i].c_str(), false); 301 | for(unsigned int i = 0; i < m_fragSrc.size(); ++i) 302 | addFragmentShader(m_fragSrc[i].c_str(), false); 303 | } 304 | -------------------------------------------------------------------------------- /GLSLShader.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #pragma once 21 | #ifdef WIN32 22 | # include 23 | #endif 24 | #include 25 | #include 26 | #include 27 | 28 | class GLSLShader 29 | { 30 | public: 31 | GLSLShader(); 32 | ~GLSLShader(); 33 | 34 | void cleanup(); 35 | 36 | bool addFragmentShader(const char* filename, bool isNew=true); 37 | bool addVertexShader(const char* filename, bool isNew=true); 38 | bool addFragmentShaderFromString(const char* shader); 39 | bool addVertexShaderFromString(const char* shader); 40 | bool link(); 41 | 42 | bool bindShader(); 43 | void unbindShader(); 44 | 45 | void setUniformFloat(const char* name, float val); 46 | void setUniformInt(const char* name, int val); 47 | void setUniformVector(const char * name, float* val, int count); 48 | void setTextureUnit(const char * texname, int texunit); 49 | void bindTexture(GLenum target, const char * texname, GLuint texid, int texunit); 50 | 51 | void reloadShader(); 52 | 53 | inline GLuint getProgram() {return m_program;} 54 | 55 | inline int getUniformLocation(const char* name) { return glGetUniformLocation(m_program, name); } 56 | 57 | private: 58 | 59 | bool compileShader(const char* filename, GLenum type); 60 | bool compileShaderFromString(const char *shader, GLenum type); 61 | bool outputProgramLog(GLuint obj); 62 | bool outputShaderLog(GLuint obj); 63 | 64 | bool m_bound; 65 | bool m_linkNeeded; 66 | 67 | std::vector m_vertFiles; 68 | std::vector m_fragFiles; 69 | std::vector m_vertSrc; 70 | std::vector m_fragSrc; 71 | 72 | GLuint m_program; 73 | 74 | }; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | 3 | This sample is from earlier versions of the Vulkan API and we do not recommend it anymore. 4 | 5 | # Vulkan & OpenGL & Command-list Sample using "Thread-Workers" 6 | 7 | With the official release of Vulkan, NVIDIA and the "Devtech-Proviz" Team released new samples on [professional graphics repository](https://github.com/nvpro-samples). 8 | 9 | The Purpose of this Blog post is to give more details on what is happening in the Sample called `gl_vk_bk3dthreaded` [(available here)](https://github.com/nvpro-samples/gl_vk_bk3dthreaded). 10 | 11 | ![Example](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/sample.jpg) 12 | 13 | ## How to build the sample 14 | For now, I am sorry to say that the sample might only run on Windows. I didn't consolidate it for Linux, yet. 15 | 16 | This sample requires the following: 17 | 18 | - LunarG SDK v1.0.3.1 : just install it from https://vulkan.lunarg.com : cmake should be able to locate it 19 | - the submarine model: when you will configure the project with cmake, cmake script will perform a *wget* to get the model and store it locally: `MODEL_DOWNLOAD_SUBMARINE` Checked. The model is 32Mb and will be stored in a shared folder called `downloaded_resources` 20 | 21 | Optionally, be aware that other *bk3d* models could be used in this sample. But to avoid heavy download, only the submarine will be taken by default. Check `MODEL_DOWNLOAD_MORE` On for more models... 22 | 23 | ## How does the sample work 24 | 25 | The sample will run by default with the *submarine* model *and some camera animation*. So if you want to freely move the camera, don't forget to stop the animation (UI or 'a' key) 26 | 27 | If you give as cmd-line argument another model (*.bk3d.gz or *.bk3d), the sample should be able to render it but the animation will be turned off; and it is possible that the camera won't focus exactly over the new model... 28 | 29 | Vulkan renderer will be the default one at startup. You can switch between: 30 | 31 | - **OpenGL & Command-lists**: an example on how to feed the token-buffers 32 | - **OpenGL**: a basic implementation of how would you render 3D with OpenGL 33 | - **Vulkan**: the default renderer 34 | 35 | ![toggles](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/toggles.JPG) 36 | 37 | > **Note**: toggles are preceded by a character between quotes: when the viewport has the focus, you can use the keyboard instead. 38 | 39 | - **Use Workers**: checked for multi-threading. Unchecked: only the main thread will update the draw commands (cmd-buffers) 40 | - **command-buffer amount**: by default, 16 secondary command-buffers will be created to render everything. In the multi-threading case, thread-workers will get spawned and will work on building them: when **'c'** toggle is checked (*command-buffer continuous refresh*) 41 | - **Cmd-buf-style**: this model came from a CAD application. It turns out that at the time this model was created, primitives were issued depending on their 'parts', rather than depending on their primitive type and/or materials (hence shaders). **"sort on primitive type"** would allow to first render triangles; then strips; then lines... 42 | - **MSAA**: Multispampling mode 43 | - 'c', &g_bRefreshCmdBuffers, "c: toggles command buffer continuous refresh\n"); 44 | - , &m_realtime.bNonStopRendering, "space: toggles continuous rendering\n"); 45 | - toggles from 'o'to '5' are obvious options... just give a try 46 | 47 | ###cmd-line arguments 48 | 49 | - -v (VBO max Size) 50 | - -m (bk3d model) 51 | - -c 0 or 1 : use command-lists 52 | - -o 0 or 1 : display meshes 53 | - -g 0 or 1 : display grid 54 | - -s 0 or 1 : stats 55 | - -a 0 or 1 : animate camera 56 | - -d 0 or 1 : debug stuff (ui) 57 | - -m (bk3d file) : load a specific model 58 | - (bk3d file name) : load a specific model 59 | - -q (msaa) : MSAA 60 | 61 | ### mouse 62 | special Key with the mouse allows few to move around the model. The camera is always targeting a focus point and is essentially working in "polar coordinates" (**TODO**: I need to display the focus point with a cross...) 63 | 64 | - **mouse wheel**: zoom in/out from the focus point 65 | - **left mouse button**: rotate around the focus point 66 | - **right mouse button**: rotate around Ox axis and zoom in/out from focus point 67 | - **right mouse button + Ctrl**: will push forward/backward the focus point 68 | - **middle mouse button**: pan left/right up/down the focus point along camera axis 69 | - **arrows**: rotate around the focus point 70 | - **Pg-up/Pg-down**: zoom in/out 71 | - **Pg-up/Pg-down + Ctrl**: push forward/backward the focus point along camera axis 72 | 73 | ## 3D model(s) 74 | the 3D model comes from a *pre-baked* format (see [here](https://github.com/tlorach/Bak3d) ). There is no value to understand how it is working: main interest is that it loads fast (baked format... saving us parsing time) and that I managed to 'capture' some models as they were issued by various applications. 75 | 76 | The sample will load the model, then attach it to the renderers. The resource creation will thus depend on which Graphic API is being used. 77 | 78 | ## More technical details 79 | 80 | Here are more details in separate sub-sections : 81 | 82 | * [Rendering Modes](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Rendering_Modes.md) : details on what is this sample rendering and how 83 | * [Rendering with Vulkan](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_Renderer.md) 84 | : key steps in order to make Vulkan API work for this sample, including `GL_NV_draw_vulkan_image` extension 85 | * [Vulkan Code Style](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_Code_Style.md) 86 | : helper file that allows to write Vulkan code in a more compact fashion 87 | * [Multithreading](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Multithreading.md "Multithreading") 88 | : based on "Thread-workers", and how to use Vulkan in this case 89 | * [Results / performances](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Results.md "Results") 90 | * [NSight captures](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/NSight_Captures.md "NSight captures") : some NSight captures showing what is happening 91 | 92 | -------------------------------------------------------------------------------- /bk3dDefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #ifndef __BK3DDEFS__ 21 | #define __BK3DDEFS__ 22 | 23 | //------------------------------------------------- 24 | // 25 | // D3D9 definitions 26 | // ---------------- 27 | // This part is needed when OpenGL/DX9-10 is not used : 28 | // some enums & defines are needed anyways. 29 | // Instead of including OpenGL only for that, 30 | // this section will define them 31 | // Furthermore : this can be used by any exporter/converter 32 | // 33 | //------------------------------------------------- 34 | #ifndef _d3d9TYPES_H_ 35 | #ifdef BK3DVERBOSE 36 | #pragma message("defining D3DPRIMITIVETYPE here...") 37 | #endif 38 | enum D3DPRIMITIVETYPE 39 | { 40 | D3DPT_UNDEFINED = 0, 41 | D3DPT_POINTLIST = 1, 42 | D3DPT_LINELIST = 2, 43 | D3DPT_LINESTRIP = 3, 44 | D3DPT_TRIANGLELIST = 4, 45 | D3DPT_TRIANGLESTRIP = 5, 46 | D3DPT_TRIANGLEFAN = 6, 47 | D3DPT_END = -1 48 | }; 49 | #ifdef BK3DVERBOSE 50 | #pragma message("defining D3DFORMAT here...") 51 | #endif 52 | enum D3DFORMAT 53 | { 54 | D3DFMT_INDEX16 = 101, 55 | D3DFMT_INDEX32 = 102, 56 | D3DFMT_END = -1 57 | }; 58 | #ifdef BK3DVERBOSE 59 | #pragma message("defining D3DDECLTYPE here...") 60 | #endif 61 | enum D3DDECLTYPE 62 | { 63 | D3DDECLTYPE_FLOAT1 = 0, 64 | D3DDECLTYPE_FLOAT2 = 1, 65 | D3DDECLTYPE_FLOAT3 = 2, 66 | D3DDECLTYPE_FLOAT4 = 3, 67 | D3DDECLTYPE_D3DCOLOR = 4, 68 | D3DDECLTYPE_UBYTE4 = 5, 69 | D3DDECLTYPE_SHORT2 = 6, 70 | D3DDECLTYPE_SHORT4 = 7, 71 | D3DDECLTYPE_UBYTE4N = 8, 72 | D3DDECLTYPE_SHORT2N = 9, 73 | D3DDECLTYPE_SHORT4N = 10, 74 | D3DDECLTYPE_USHORT2N = 11, 75 | D3DDECLTYPE_USHORT4N = 12, 76 | D3DDECLTYPE_UDEC3 = 13, 77 | D3DDECLTYPE_DEC3N = 14, 78 | D3DDECLTYPE_FLOAT16_2 = 15, 79 | D3DDECLTYPE_FLOAT16_4 = 16, 80 | D3DDECLTYPE_UNUSED = 17, 81 | 82 | D3DDECLTYPE_UNDEF = -1, 83 | }; 84 | #endif 85 | 86 | //------------------------------------------------- 87 | // 88 | // D3D10 definitions 89 | // ---------------- 90 | // This part is needed when OpenGL is not used : 91 | // some enums & defines are needed anyways. 92 | // Instead of including OpenGL only for that, 93 | // this section will define them 94 | // Furthermore : this can be used by any exporter/converter 95 | // 96 | //------------------------------------------------- 97 | #ifndef __d3d10_h__ 98 | #ifdef BK3DVERBOSE 99 | #pragma message("defining D3D10_PRIMITIVE_TOPOLOGY enum...") 100 | #endif 101 | enum D3D10_PRIMITIVE_TOPOLOGY 102 | { 103 | D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, 104 | D3D10_PRIMITIVE_TOPOLOGY_POINTLIST = 1, 105 | D3D10_PRIMITIVE_TOPOLOGY_LINELIST = 2, 106 | D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP = 3, 107 | D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, 108 | D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, 109 | D3D10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, 110 | D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, 111 | D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, 112 | D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, 113 | //D3D10_PRIMITIVE_TOPOLOGY_FAN = 14 // Doesn't exist in DXGI... 114 | D3D10_PT_END = -1 115 | }; 116 | #ifdef BK3DVERBOSE 117 | #pragma message("defining DXGI_FORMAT enum...") 118 | #endif 119 | enum DXGI_FORMAT // stick to DXGI values 120 | { 121 | DXGI_FORMAT_UNKNOWN = 0, 122 | DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, 123 | DXGI_FORMAT_R32G32B32A32_FLOAT = 2, 124 | DXGI_FORMAT_R32G32B32A32_UINT = 3, 125 | DXGI_FORMAT_R32G32B32A32_SINT = 4, 126 | DXGI_FORMAT_R32G32B32_TYPELESS = 5, 127 | DXGI_FORMAT_R32G32B32_FLOAT = 6, 128 | DXGI_FORMAT_R32G32B32_UINT = 7, 129 | DXGI_FORMAT_R32G32B32_SINT = 8, 130 | DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, 131 | DXGI_FORMAT_R16G16B16A16_FLOAT = 10, 132 | DXGI_FORMAT_R16G16B16A16_UNORM = 11, 133 | DXGI_FORMAT_R16G16B16A16_UINT = 12, 134 | DXGI_FORMAT_R16G16B16A16_SNORM = 13, 135 | DXGI_FORMAT_R16G16B16A16_SINT = 14, 136 | DXGI_FORMAT_R32G32_TYPELESS = 15, 137 | DXGI_FORMAT_R32G32_FLOAT = 16, 138 | DXGI_FORMAT_R32G32_UINT = 17, 139 | DXGI_FORMAT_R32G32_SINT = 18, 140 | DXGI_FORMAT_R32G8X24_TYPELESS = 19, 141 | DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, 142 | DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, 143 | DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, 144 | DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, 145 | DXGI_FORMAT_R10G10B10A2_UNORM = 24, 146 | DXGI_FORMAT_R10G10B10A2_UINT = 25, 147 | DXGI_FORMAT_R11G11B10_FLOAT = 26, 148 | DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, 149 | DXGI_FORMAT_R8G8B8A8_UNORM = 28, 150 | DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, 151 | DXGI_FORMAT_R8G8B8A8_UINT = 30, 152 | DXGI_FORMAT_R8G8B8A8_SNORM = 31, 153 | DXGI_FORMAT_R8G8B8A8_SINT = 32, 154 | DXGI_FORMAT_R16G16_TYPELESS = 33, 155 | DXGI_FORMAT_R16G16_FLOAT = 34, 156 | DXGI_FORMAT_R16G16_UNORM = 35, 157 | DXGI_FORMAT_R16G16_UINT = 36, 158 | DXGI_FORMAT_R16G16_SNORM = 37, 159 | DXGI_FORMAT_R16G16_SINT = 38, 160 | DXGI_FORMAT_R32_TYPELESS = 39, 161 | DXGI_FORMAT_D32_FLOAT = 40, 162 | DXGI_FORMAT_R32_FLOAT = 41, 163 | DXGI_FORMAT_R32_UINT = 42, 164 | DXGI_FORMAT_R32_SINT = 43, 165 | DXGI_FORMAT_R24G8_TYPELESS = 44, 166 | DXGI_FORMAT_D24_UNORM_S8_UINT = 45, 167 | DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, 168 | DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, 169 | DXGI_FORMAT_R8G8_TYPELESS = 48, 170 | DXGI_FORMAT_R8G8_UNORM = 49, 171 | DXGI_FORMAT_R8G8_UINT = 50, 172 | DXGI_FORMAT_R8G8_SNORM = 51, 173 | DXGI_FORMAT_R8G8_SINT = 52, 174 | DXGI_FORMAT_R16_TYPELESS = 53, 175 | DXGI_FORMAT_R16_FLOAT = 54, 176 | DXGI_FORMAT_D16_UNORM = 55, 177 | DXGI_FORMAT_R16_UNORM = 56, 178 | DXGI_FORMAT_R16_UINT = 57, 179 | DXGI_FORMAT_R16_SNORM = 58, 180 | DXGI_FORMAT_R16_SINT = 59, 181 | DXGI_FORMAT_R8_TYPELESS = 60, 182 | DXGI_FORMAT_R8_UNORM = 61, 183 | DXGI_FORMAT_R8_UINT = 62, 184 | DXGI_FORMAT_R8_SNORM = 63, 185 | DXGI_FORMAT_R8_SINT = 64, 186 | DXGI_FORMAT_A8_UNORM = 65, 187 | DXGI_FORMAT_R1_UNORM = 66, 188 | DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, 189 | DXGI_FORMAT_R8G8_B8G8_UNORM = 68, 190 | DXGI_FORMAT_G8R8_G8B8_UNORM = 69, 191 | DXGI_FORMAT_BC1_TYPELESS = 70, 192 | DXGI_FORMAT_BC1_UNORM = 71, 193 | DXGI_FORMAT_BC1_UNORM_SRGB = 72, 194 | DXGI_FORMAT_BC2_TYPELESS = 73, 195 | DXGI_FORMAT_BC2_UNORM = 74, 196 | DXGI_FORMAT_BC2_UNORM_SRGB = 75, 197 | DXGI_FORMAT_BC3_TYPELESS = 76, 198 | DXGI_FORMAT_BC3_UNORM = 77, 199 | DXGI_FORMAT_BC3_UNORM_SRGB = 78, 200 | DXGI_FORMAT_BC4_TYPELESS = 79, 201 | DXGI_FORMAT_BC4_UNORM = 80, 202 | DXGI_FORMAT_BC4_SNORM = 81, 203 | DXGI_FORMAT_BC5_TYPELESS = 82, 204 | DXGI_FORMAT_BC5_UNORM = 83, 205 | DXGI_FORMAT_BC5_SNORM = 84, 206 | DXGI_FORMAT_B5G6R5_UNORM = 85, 207 | DXGI_FORMAT_B5G5R5A1_UNORM = 86, 208 | DXGI_FORMAT_B8G8R8A8_UNORM = 87, 209 | DXGI_FORMAT_B8G8R8X8_UNORM = 88, 210 | DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, 211 | DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, 212 | DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, 213 | DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, 214 | DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, 215 | DXGI_FORMAT_BC6H_TYPELESS = 94, 216 | DXGI_FORMAT_BC6H_UF16 = 95, 217 | DXGI_FORMAT_BC6H_SF16 = 96, 218 | DXGI_FORMAT_BC7_TYPELESS = 97, 219 | DXGI_FORMAT_BC7_UNORM = 98, 220 | DXGI_FORMAT_BC7_UNORM_SRGB = 99, 221 | DXGI_FORMAT_END = -1 222 | }; 223 | enum D3D10_INPUT_CLASSIFICATION 224 | { D3D10_INPUT_PER_VERTEX_DATA = 0, 225 | D3D10_INPUT_PER_INSTANCE_DATA = 1 226 | }; 227 | #endif 228 | 229 | #if !defined( __d3d11_h__ ) 230 | #if defined __d3dcommon_h__ 231 | #define D3D11_PRIMITIVE_TOPOLOGY D3D_PRIMITIVE_TOPOLOGY 232 | #else 233 | typedef 234 | enum D3D11_PRIMITIVE_TOPOLOGY 235 | { 236 | D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, 237 | D3D11_PRIMITIVE_TOPOLOGY_POINTLIST = 1, 238 | D3D11_PRIMITIVE_TOPOLOGY_LINELIST = 2, 239 | D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP = 3, 240 | D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, 241 | D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, 242 | D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, 243 | D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, 244 | D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, 245 | D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, 246 | D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33, 247 | D3D11_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST = 34, 248 | D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST = 35, 249 | D3D11_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST = 36, 250 | D3D11_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST = 37, 251 | D3D11_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST = 38, 252 | D3D11_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST = 39, 253 | D3D11_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST = 40, 254 | D3D11_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST = 41, 255 | D3D11_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST = 42, 256 | D3D11_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST = 43, 257 | D3D11_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST = 44, 258 | D3D11_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST = 45, 259 | D3D11_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST = 46, 260 | D3D11_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST = 47, 261 | D3D11_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST = 48, 262 | D3D11_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST = 49, 263 | D3D11_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST = 50, 264 | D3D11_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST = 51, 265 | D3D11_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST = 52, 266 | D3D11_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST = 53, 267 | D3D11_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST = 54, 268 | D3D11_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST = 55, 269 | D3D11_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST = 56, 270 | D3D11_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST = 57, 271 | D3D11_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST = 58, 272 | D3D11_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST = 59, 273 | D3D11_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST = 60, 274 | D3D11_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST = 61, 275 | D3D11_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST = 62, 276 | D3D11_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST = 63, 277 | D3D11_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST = 64 278 | } D3D11_PRIMITIVE_TOPOLOGY; 279 | #endif // common 280 | #endif //d3d11 281 | 282 | //------------------------------------------------- 283 | // 284 | // OpenGL enums... 285 | // This part is needed when OpenGL is not used : 286 | // some enums & defines are needed anyways. 287 | // Instead of including OpenGL only for that, 288 | // this section will define them 289 | // Furthermore : this can be used by any exporter/converter 290 | // 291 | //------------------------------------------------- 292 | #ifndef __gl_h_ 293 | typedef unsigned int GLenum; 294 | typedef GLenum GLType; 295 | typedef GLenum GLTopology; 296 | //enum GLTopology // turn GL enums in real enums ? 297 | //{ 298 | #define GL_POINTS 0x0000 299 | #define GL_LINES 0x0001 300 | #define GL_LINE_LOOP 0x0002 301 | #define GL_LINE_STRIP 0x0003 302 | #define GL_TRIANGLES 0x0004 303 | #define GL_TRIANGLE_STRIP 0x0005 304 | #define GL_TRIANGLE_FAN 0x0006 305 | #define GL_QUADS 0x0007 306 | #define GL_QUAD_STRIP 0x0008 307 | #define GL_PATCHES 0x000E 308 | //}; 309 | //enum GLType 310 | //{ 311 | // enums from OpenGL so that we are directly ready 312 | #define GL_BYTE 0x1400 313 | #define GL_UNSIGNED_BYTE 0x1401 314 | #define GL_SHORT 0x1402 315 | #define GL_UNSIGNED_SHORT 0x1403 316 | #define GL_INT 0x1404 317 | #define GL_UNSIGNED_INT 0x1405 318 | #define GL_FLOAT 0x1406 319 | #define GL_2_BYTES 0x1407 320 | #define GL_3_BYTES 0x1408 321 | #define GL_4_BYTES 0x1409 322 | #define GL_DOUBLE 0x140A 323 | //}; 324 | #else 325 | typedef GLenum GLType; 326 | typedef GLenum GLTopology; 327 | #endif 328 | enum OGL_PATCH_VERTICES 329 | { 330 | GL_PATCH_VERTICES_0 = 32, 331 | GL_PATCH_VERTICES_1 = 33, 332 | GL_PATCH_VERTICES_2 = 34, 333 | GL_PATCH_VERTICES_3 = 35, 334 | GL_PATCH_VERTICES_4 = 36, 335 | GL_PATCH_VERTICES_5 = 37, 336 | GL_PATCH_VERTICES_6 = 38, 337 | GL_PATCH_VERTICES_7 = 39, 338 | GL_PATCH_VERTICES_8 = 40, 339 | GL_PATCH_VERTICES_9 = 41, 340 | GL_PATCH_VERTICES_10 = 42, 341 | GL_PATCH_VERTICES_11 = 43, 342 | GL_PATCH_VERTICES_12 = 44, 343 | GL_PATCH_VERTICES_13 = 45, 344 | GL_PATCH_VERTICES_14 = 46, 345 | GL_PATCH_VERTICES_15 = 47, 346 | GL_PATCH_VERTICES_16 = 48, 347 | GL_PATCH_VERTICES_17 = 49, 348 | GL_PATCH_VERTICES_18 = 50, 349 | GL_PATCH_VERTICES_19 = 51, 350 | GL_PATCH_VERTICES_20 = 52, 351 | GL_PATCH_VERTICES_21 = 53, 352 | GL_PATCH_VERTICES_22 = 54, 353 | GL_PATCH_VERTICES_23 = 55, 354 | GL_PATCH_VERTICES_24 = 56, 355 | GL_PATCH_VERTICES_25 = 57, 356 | GL_PATCH_VERTICES_26 = 58, 357 | GL_PATCH_VERTICES_27 = 59, 358 | GL_PATCH_VERTICES_28 = 60, 359 | GL_PATCH_VERTICES_29 = 61, 360 | GL_PATCH_VERTICES_30 = 62, 361 | GL_PATCH_VERTICES_31 = 63, 362 | GL_PATCH_VERTICES_32 = 64 363 | }; 364 | 365 | 366 | /// 367 | /// \brief These are the typical names of attributes that could be in the bk3d baked file 368 | /// @{ 369 | #define MESH_POSITION "position" 370 | #define MESH_VERTEXID "vertexid" 371 | #define MESH_COLOR "color" 372 | #define MESH_FACENORMAL "facenormal" 373 | #define MESH_TANGENT "tangent" 374 | #define MESH_BINORMAL "binormal" 375 | #define MESH_NORMAL "normal" 376 | #define MESH_TEXCOORD0 "texcoord0" 377 | #define MESH_TEXCOORD1 "texcoord1" 378 | #define MESH_TEXCOORD2 "texcoord2" 379 | #define MESH_TEXCOORD3 "texcoord3" 380 | #define MESH_BLIND0 "blind0" 381 | #define MESH_BLIND1 "blind1" 382 | #define MESH_BLIND2 "blind2" 383 | #define MESH_BLIND3 "blind3" 384 | #define MESH_BONESOFFSETS "bonesoffsets" 385 | #define MESH_BONESWEIGHTS "bonesweights" 386 | #define MESH_2BONES2WEIGHTS "2Bones2Weights" 387 | /// @} 388 | 389 | #endif //__BK3DDEFS__ 390 | 391 | -------------------------------------------------------------------------------- /dedicated_image.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #include "dedicated_image.h" 22 | #include "nvvk/commands_vk.hpp" 23 | #include "nvvk/images_vk.hpp" 24 | #include 25 | 26 | namespace nvvk { 27 | 28 | ////////////////////////////////////////////////////////////////////////// 29 | 30 | void DedicatedImage::init(VkDevice device, 31 | VkPhysicalDevice physical, 32 | const VkImageCreateInfo& imageInfo, 33 | VkMemoryPropertyFlags memoryPropertyFlags, 34 | const void* pNextMemory /*= nullptr*/) 35 | { 36 | 37 | m_device = device; 38 | 39 | if(vkCreateImage(device, &imageInfo, nullptr, &m_image) != VK_SUCCESS) 40 | { 41 | assert(0 && "image create failed"); 42 | } 43 | 44 | VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; 45 | VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS}; 46 | VkImageMemoryRequirementsInfo2 imageReqs = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2}; 47 | 48 | imageReqs.image = m_image; 49 | memReqs.pNext = &dedicatedRegs; 50 | vkGetImageMemoryRequirements2(device, &imageReqs, &memReqs); 51 | 52 | VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV}; 53 | dedicatedInfo.image = m_image; 54 | dedicatedInfo.pNext = pNextMemory; 55 | 56 | VkMemoryAllocateInfo allocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; 57 | allocInfo.pNext = &dedicatedInfo; 58 | allocInfo.allocationSize = memReqs.memoryRequirements.size; 59 | 60 | VkPhysicalDeviceMemoryProperties memoryProperties; 61 | vkGetPhysicalDeviceMemoryProperties(physical, &memoryProperties); 62 | 63 | // Find an available memory type that satisfies the requested properties. 64 | for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex) 65 | { 66 | if((memReqs.memoryRequirements.memoryTypeBits & (1 << memoryTypeIndex)) 67 | && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & memoryPropertyFlags) == memoryPropertyFlags) 68 | { 69 | allocInfo.memoryTypeIndex = memoryTypeIndex; 70 | break; 71 | } 72 | } 73 | assert(allocInfo.memoryTypeIndex != ~0); 74 | 75 | if(vkAllocateMemory(device, &allocInfo, nullptr, &m_memory) != VK_SUCCESS) 76 | { 77 | assert(0 && "failed to allocate image memory!"); 78 | } 79 | 80 | vkBindImageMemory(device, m_image, m_memory, 0); 81 | } 82 | 83 | void DedicatedImage::initWithView(VkDevice device, 84 | VkPhysicalDevice physical, 85 | uint32_t width, 86 | uint32_t height, 87 | uint32_t layers, 88 | VkFormat format, 89 | VkImageUsageFlags usage /*= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT*/, 90 | VkImageTiling tiling /*= VK_IMAGE_TILING_OPTIMAL*/, 91 | VkMemoryPropertyFlags memoryPropertyFlags /*= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT*/, 92 | VkSampleCountFlagBits samples /*= VK_SAMPLE_COUNT_1_BIT*/, 93 | VkImageAspectFlags aspect /*= VK_IMAGE_ASPECT_COLOR_BIT*/, 94 | const void* pNextImage /*= nullptr*/, 95 | const void* pNextMemory /*= nullptr*/, 96 | const void* pNextImageView /*= nullptr*/) 97 | { 98 | VkImageCreateInfo imageInfo{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; 99 | imageInfo.pNext = pNextImage; 100 | imageInfo.imageType = VK_IMAGE_TYPE_2D; 101 | imageInfo.extent.width = width; 102 | imageInfo.extent.height = height; 103 | imageInfo.extent.depth = 1; 104 | imageInfo.mipLevels = 1; 105 | imageInfo.arrayLayers = layers; 106 | imageInfo.format = format; 107 | imageInfo.tiling = tiling; 108 | imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; 109 | imageInfo.usage = usage; 110 | imageInfo.samples = samples; 111 | imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 112 | 113 | init(device, physical, imageInfo, memoryPropertyFlags, pNextMemory); 114 | initView(imageInfo, aspect, layers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, pNextImageView); 115 | } 116 | 117 | void DedicatedImage::initView(const VkImageCreateInfo& imageInfo, VkImageAspectFlags aspect, VkImageViewType viewType, const void* pNextImageView /*= nullptr*/) 118 | { 119 | VkImageViewCreateInfo createInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; 120 | createInfo.pNext = pNextImageView; 121 | createInfo.components.r = VK_COMPONENT_SWIZZLE_R; 122 | createInfo.components.g = VK_COMPONENT_SWIZZLE_G; 123 | createInfo.components.b = VK_COMPONENT_SWIZZLE_B; 124 | createInfo.components.a = VK_COMPONENT_SWIZZLE_A; 125 | createInfo.subresourceRange.aspectMask = aspect; 126 | createInfo.subresourceRange.baseArrayLayer = 0; 127 | createInfo.subresourceRange.baseMipLevel = 0; 128 | createInfo.subresourceRange.layerCount = imageInfo.arrayLayers; 129 | createInfo.subresourceRange.levelCount = imageInfo.mipLevels; 130 | createInfo.format = imageInfo.format; 131 | createInfo.viewType = viewType; 132 | createInfo.image = m_image; 133 | 134 | VkResult result = vkCreateImageView(m_device, &createInfo, nullptr, &m_imageView); 135 | assert(result == VK_SUCCESS); 136 | } 137 | 138 | void DedicatedImage::deinit() 139 | { 140 | if(m_image != nullptr) 141 | vkDestroyImage(m_device, m_image, nullptr); 142 | if(m_imageView != nullptr) 143 | vkDestroyImageView(m_device, m_imageView, nullptr); 144 | if(m_memory != nullptr) 145 | vkFreeMemory(m_device, m_memory, nullptr); 146 | *this = {}; 147 | } 148 | 149 | void DedicatedImage::cmdInitialTransition(VkCommandBuffer cmd, VkImageLayout layout, VkAccessFlags access) 150 | { 151 | VkPipelineStageFlags srcPipe = nvvk::makeAccessMaskPipelineStageFlags(0); 152 | VkPipelineStageFlags dstPipe = nvvk::makeAccessMaskPipelineStageFlags(access); 153 | 154 | VkImageMemoryBarrier memBarrier = nvvk::makeImageMemoryBarrier(m_image, 0, access, VK_IMAGE_LAYOUT_UNDEFINED, layout); 155 | 156 | vkCmdPipelineBarrier(cmd, srcPipe, dstPipe, VK_FALSE, 0, NULL, 0, NULL, 1, &memBarrier); 157 | } 158 | 159 | } // namespace nvvk 160 | -------------------------------------------------------------------------------- /dedicated_image.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #pragma once 22 | #include 23 | 24 | namespace nvvk { 25 | ////////////////////////////////////////////////////////////////////////// 26 | /** 27 | # class DedicatedImage 28 | 29 | DedicatedImages have their own dedicated device memory allocation. 30 | This can be beneficial for render pass attachments. 31 | 32 | Also provides utility function setup the initial image layout. 33 | */ 34 | class DedicatedImage 35 | { 36 | public: 37 | VkDevice m_device = {}; // Logical device, help for many operations 38 | VkImage m_image = {}; // Vulkan image representation (handle) 39 | VkImageView m_imageView = {}; // view of the image (optional) 40 | VkDeviceMemory m_memory = {}; // Device allocation of the image 41 | VkFormat m_format = {}; // Format when created 42 | 43 | operator VkImage() const { return m_image; } 44 | operator VkImageView() const { return m_imageView; } 45 | 46 | void init(VkDevice device, 47 | VkPhysicalDevice physical, 48 | const VkImageCreateInfo& createInfo, 49 | VkMemoryPropertyFlags memoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 50 | const void* pNextMemory = nullptr); 51 | 52 | void initWithView(VkDevice device, 53 | VkPhysicalDevice physical, 54 | uint32_t width, 55 | uint32_t height, 56 | uint32_t layers, 57 | VkFormat format, 58 | VkImageUsageFlags usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 59 | VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL, 60 | VkMemoryPropertyFlags memoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 61 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, 62 | VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT, 63 | const void* pNextImage = nullptr, 64 | const void* pNextMemory = nullptr, 65 | const void* pNextImageView = nullptr); 66 | 67 | void initView(const VkImageCreateInfo& imageInfo, VkImageAspectFlags aspect, VkImageViewType viewType, const void* pNextImageView = nullptr); 68 | void deinit(); 69 | 70 | void cmdInitialTransition(VkCommandBuffer cmd, VkImageLayout layout, VkAccessFlags access); 71 | }; 72 | 73 | } // namespace nvvk 74 | -------------------------------------------------------------------------------- /doc/Fences.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Fences.JPG -------------------------------------------------------------------------------- /doc/Memory_chunks.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Memory_chunks.JPG -------------------------------------------------------------------------------- /doc/Multithreading.md: -------------------------------------------------------------------------------- 1 | # Multithreading: Thread-Workers 2 | 3 | The source code of the sample containing Vulkan and OpenGL source code is **not** especially designed to be multi-threaded. 4 | 5 | To be more specific, the *only* declaration that suggests multi-threading is `NThreadLocalVar` template: to allow TLS (Thread Local Storage) to happen... 6 | 7 | All the rest of the multi-threading happens in the main sample file `gl_vk_bk3dthreaded.cpp`. 8 | 9 | In other words, some methods of OpenGL or Vulkan renderers become multithreaded because *they got wrapped by dedicated Class*, making them multi-threaded. 10 | 11 | ## Thread-Workers job assignment 12 | To assign a job to a worker, you must declare a specific Class, where: 13 | 14 | - the *constructor* will become the receiver for **the function arguments** 15 | - the worker will start his job through a specific method: `Invoke()` 16 | 17 | This approach allows to prepare function arguments so that they are ready for later use: when the thread will finally be kicked-off by the thread-worker manager. 18 | Generic example: 19 | 20 | class TskXXX : public TaskBase 21 | { 22 | private: 23 | int arg1; 24 | int arg2; 25 | public: 26 | TskUpdateCommandBuffer(int _arg1, int _arg2) 27 | { 28 | arg1 = _arg1; arg2 = _arg2; 29 | } 30 | virtual void Invoke() 31 | { 32 | s_pCurRenderer->SomeMethod(arg1, arg2); 33 | } 34 | }; 35 | 36 | To execute this job, we can queue workers for TskXXX as follow: 37 | 38 | for(int n=0; n<100; n++) 39 | { 40 | // worker will be deleted by the default method Done() 41 | TskXXX *tskXXX = new TskXXX(10, 2); 42 | g_mainThreadPool->pushTask(tskXXX); 43 | } 44 | 45 | `g_mainThreadPool` is the main thread Pool manager that god initialized as follow: 46 | 47 | g_mainThreadPool = new ThreadWorkerPool(NUMTHREADS, false, false, NWTPS_ROUND_ROBIN, std::string("Main Worker Pool")); 48 | 49 | ![ThreadWorkers](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Thread_workers.JPG) 50 | 51 | ## Workers for command-buffer creation 52 | 53 | for more details, one of the most important part of multi-threading in this sample is in `refreshCmdBuffers()` 54 | 55 | Here is what multi-threaded command-buffer updates do: 56 | 57 | - walk through the 3D model and split it in equal parts (almost...) 58 | - push a Worker for the command-buffer creation of this part ( `g_mainThreadPool->pushTask(tskUpdateCommandBuffer)` ) 59 | - workers will execute in specific thread: what the worker-manager (`g_mainThreadPool`) will chose for you 60 | - each worker will signal an *event* object when it finished the command-buffer creation 61 | - the main thread in the meantime will have to wait for all to be done: looping into all the *event objects* 62 | - Once secondary command-buffers are ready, the main thread will put them together in the primary command-buffer: . This task is not supposed to take time 63 | -------------------------------------------------------------------------------- /doc/NSight_Captures.md: -------------------------------------------------------------------------------- 1 | ### NSight captures 2 | 3 | Here is an image of NSight Custom-markers when using OpenGL 4 | 5 | ![OpenGL](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/OpenGL.JPG) 6 | 7 | You can see the expected "display" function where 8 | 9 | - in the bottom: the cascade of GPU commands pipelined through the GPU 10 | - on the top, the brown line: a very dense series of OpenGL commands for state changes, buffer-binding and drawcalls. It shows how busy is the display() function in issuing commands to OpenGL. It shows how much the CPU is involved in this task (including the driver) 11 | 12 | 13 | ---------- 14 | 15 | Here is an image of NSight Custom-markers when Vulkan is using thread-workers. 16 | 17 | ![VulkanMT](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_MT.JPG) 18 | 19 | You can see how much rooms is available for anything else on the CPU: the 8 thread finished very quickly the command-buffer update. Not only the main thread could do something while waiting for other threads to build command-buffers, but more thread workers could be allocated onto the 8 threads available. 20 | 21 | ---------- 22 | 23 | And here is an NSight capture when Vulkan is only using the main-thread. 24 | 25 | ![VulkanMT](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan.JPG) 26 | 27 | Despite the fact that the framerate is the same, there is not much room for more CPU processing. Now, one could argue that 8 additional threads could be used in parallel for other tasks, too. 28 | 29 | This is true: multi-threading can offer a wide range of possibilities. It all depends on what kind of design is needed... 30 | 31 | And what is really exciting with Vulkan is exactly this kind of flexibility that many engineers have patiently been waiting for some time. Now the challenge is to make good use of this strength... it may not be always easy. 32 | 33 | ```` 34 | Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 35 | 36 | Licensed under the Apache License, Version 2.0 (the "License"); 37 | you may not use this file except in compliance with the License. 38 | You may obtain a copy of the License at 39 | 40 | http://www.apache.org/licenses/LICENSE-2.0 41 | 42 | Unless required by applicable law or agreed to in writing, software 43 | distributed under the License is distributed on an "AS IS" BASIS, 44 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 45 | See the License for the specific language governing permissions and 46 | limitations under the License. 47 | 48 | SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 49 | SPDX-License-Identifier: Apache-2.0 50 | ```` 51 | 52 | 53 | -------------------------------------------------------------------------------- /doc/OpenGL.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/OpenGL.JPG -------------------------------------------------------------------------------- /doc/Rendering_Modes.md: -------------------------------------------------------------------------------- 1 | ## Rendering modes 2 | This is a simple sample, so I took the liberty to make the shader-system extremely simple: *only 3 fragment shaders* are in involved: one for the grid; one for lines; the other for filled primitives. So I cannot claim showcasing complex use-case made of tons of shaders as it often happens. On the other hand, it might allow the sample to keep simple... 3 | 4 | In any renderer, we are trying to be efficient: the model contains lots of transformations as well as lots of materials. Rather than updating them on the flight (by updating uniforms, for example), we will generate *arrays of materials* and *arrays of transformations*. Then we will bind the right buffer offsets thanks to: 5 | 6 | * `glBindBufferRange` for OpenGL 7 | * *Bindless pointers* for Command-lists 8 | * `vkCmdBindDescriptorSets` offset argument for Vulkan 9 | 10 | In many cases, especially for OpenGL, 'bucketing' Primitives and/or grouping them according to their shaders allows greater performance than taking primitives as they come. Although Vulkan & Command-lists adds lots of tolerance over the amount of state transition in their command buffers, it is better practice to avoid overloading them too much. 11 | -------------------------------------------------------------------------------- /doc/Results.md: -------------------------------------------------------------------------------- 1 | # Results 2 | 3 | - **Vulkan static**: means we render with Vulkan but command-buffers for geometry is never updated. The scene is made of **static meshes** 4 | - **Vulkan dynamic 16 workers**: command-buffers are all built during each frame, like if it required constant update or change. Typic for dynamic scenes. 16 workers involved 5 | - **Vulkan dynamic 1 worker**: same above but like if no multi-threading involved 6 | - **OpenGL**: regular OpenGL. It pretty much corresponds to a dynamic scene, because OpenGL *requires you to update render-states and drawcalls each frames* (except for Display-Lists) 7 | - Cmd-Lists static: assuming we created once all the **token-buffers**. Scene is static 8 | - Cmd-Lists dynamic 16 workers: re-building the token buffers each frame in multithread 9 | - Cmd-Lists dynamic 1 worker:re-building the token buffers each frame with one thread 10 | 11 | 12 | rendering mode | GPU time | CPU time [ms]| 13 | --------------------------- | -------- | -------- | 14 | Vulkan static | 5.7 | 0.688 | 15 | Vulkan dynamic 16 workers | 5.7 | 3.0 | 16 | Vulkan dynamic 1 worker | 5.7 | 5.2 | 17 | | | | 18 | OpenGL | 9.9 | 9.4 | 19 | | | | 20 | Cmd-Lists static | 5.0 | 0.097 | 21 | Cmd-Lists dynamic 16 workers| 40.0 | 40.0 | 22 | Cmd-Lists dynamic 1 worker | 20.0 | 20.0 | 23 | 24 | Vulkans shows as expected that it performs very well in multi-threaded mode. this model may not be the best use-case for multi-threading, but we can already see that workers allows parallel processing, almost dividing by 2 the amount of time required. 25 | 26 | **OpenGL is driver limited**: the fact that a lot of state changes and drawcalls are required for each frame doesn't leave much room (none, in fact) for *more CPU processing*. So if the engine had to perform some *Physic simulation over the scene*, the performances would *drop even more*. On the other hand, Vulkan left some room for the CPU to process additional tasks: the frame-rate could stay the same with more processing ! 27 | 28 | Command-lists are the best for static scenes. It makes sense because the token-buffers are really very close to the GPU front-End. So the *driver has nearly nothing to do*. 29 | 30 | On the other hand: as soon as we want to make command-lists dynamic, things get complicated and way less efficient. However: 31 | 32 | - there must be a **bug** (sorry) in the multi-threaded command-list approach. Even though it may not be as efficient as Vulkan, it shouldn't be as bad... to be continued in upcoming github updates :-). But in a way, this bug shows one thing: it *shows that Command-lists in multithreaded mode is not as straightforward as Vulkan API*. Even if it might be possible to get good performances, the source code could become hard to maintain. 33 | - Command-lists are in OpenGL API. And OpenGL is really bad for multi-threading. In this sample, the token-buffer creation in thread-workers is absolutely not dealing with OpenGL. It means that it postponed the stitching of token-buffers to later, by the main thread. 34 | -------------------------------------------------------------------------------- /doc/Thread_workers.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Thread_workers.JPG -------------------------------------------------------------------------------- /doc/Vulkan.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Vulkan.JPG -------------------------------------------------------------------------------- /doc/Vulkan_Code_Style.md: -------------------------------------------------------------------------------- 1 | # Vulkan code style 2 | This previous source code snippet reveals a weird syntax that is not native to Vulkan... 3 | 4 | `NVK.h` and `NVK.cpp` contain an experimental overlay that turns many (ideally, all) structures of Vulkan to simple Classes made of **constructors** and occasionally **functors**. 5 | 6 | My purpose was to find a way to lower the amount of C code required to fill all these Vulkan structures: To be honest I was quite scared the first time I saw Vulkan include file! 7 | 8 | > 9 | **Note**: I don't claim this is an ideal solution. Not even sure that it makes the code more readable. But I wanted to try it through few samples and stress the idea. Feedback or better ideas are most welcome. 10 | 11 | The best examples are in the source code of `bk3d_vk.cpp`. But here is a simple example: 12 | 13 | When creating a **Vertex Input State**, there are a bunch of nested structures to put together in order to finalize the description. 14 | 15 | constructors and functors are interesting because they can turn C/C++ code into some sort of *functional* programming, where declarations are nested into one another and don't require *explicit temporary storage*. 16 | 17 | Besides, they need less space in the code and can even have default argument values. 18 | 19 | NVK::VkPipelineVertexInputStateCreateInfo vkPipelineVertexInputStateCreateInfo( 20 | NVK::VkVertexInputBindingDescription (0/*binding*/, 2*sizeof(vec3f)/*stride*/, VK_VERTEX_INPUT_RATE_VERTEX), 21 | NVK::VkVertexInputAttributeDescription (0/*location*/, 0/*binding*/, VK_FORMAT_R32G32B32_SFLOAT, 0 /*offset*/) // pos 22 | (1/*location*/, 0/*binding*/, VK_FORMAT_R32G32B32_SFLOAT, sizeof(vec3f)/*offset*/) // normal 23 | ); 24 | 25 | > **Notes**: `VkVertexInputBindingDescription` pretty much corresponds to **D3D10 Slots** : a way to group interleaved attributes together in one buffer. You can have many of these 'Slots' 26 | > `VkVertexInputAttributeDescription` corresponds to the attribute that lives in one of these slots, Hence the reference to the binding 27 | 28 | In this example, the structure `VkPipelineVertexInputStateCreateInfo` is filled with parameters without the need to declare any temporary intermediate structure, to then pass its pointer: 29 | 30 | - `NVK::VkVertexInputBindingDescription` constructor directly creates a local instance of the structure; which obviously will be destroyed with `vkPipelineVertexInputStateCreateInfo` 31 | - if there were more than one Input-binding, a functor with the same arguments as the constructor would be added right afterward. This is what happens with the next class: 32 | - `NVK::VkVertexInputAttributeDescription` is needed for more than one attribute: position and normal 33 | - the first tuple `(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0)` is its **constructor** 34 | - the second tuple `(1, 0, VK_FORMAT_R32G32B32_SFLOAT, sizeof(vec3f))` is its **functor** 35 | - if there was more than 2 attributes, *another functor* would follow, etc. 36 | 37 | >**Note**: I tried to *avoid 'shortcuts'* and keep the *original names* and structures so there is less confusion when translating Vulkan structures to this kind of writing. 38 | 39 | Another example I find particularly nicer to read is for the **RenderPass** creation: 40 | 41 | NVK::VkRenderPassCreateInfo rpinfo = NVK::VkRenderPassCreateInfo( 42 | NVK::VkAttachmentDescription 43 | ( VK_FORMAT_R8G8B8A8_UNORM, (VkSampleCountFlagBits)MSAA, //format, samples 44 | VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, //loadOp, storeOp 45 | VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, //stencilLoadOp, stencilStoreOp 46 | VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL //initialLayout, finalLayout 47 | ) 48 | ( VK_FORMAT_D24_UNORM_S8_UINT, (VkSampleCountFlagBits)MSAA, 49 | VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, 50 | VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, 51 | VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL 52 | ) 53 | ( VK_FORMAT_R8G8B8A8_UNORM, (VkSampleCountFlagBits)1, //format, samples 54 | VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, //loadOp, storeOp 55 | VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, //stencilLoadOp, stencilStoreOp 56 | VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL //initialLayout, finalLayout 57 | ), 58 | // many sub-passes could be put after one another 59 | NVK::VkSubpassDescription 60 | ( VK_PIPELINE_BIND_POINT_GRAPHICS, //pipelineBindPoint 61 | NVK::VkAttachmentReference(), //inputAttachments 62 | NVK::VkAttachmentReference(0/*attachment*/, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL/*layout*/), //colorAttachments 63 | NVK::VkAttachmentReference(2/*attachment*/, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL/*layout*/), //resolveAttachments 64 | NVK::VkAttachmentReference(1/*attachment*/, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL/*layout*/),//depthStencilAttachment 65 | NVK::Uint32Array(), //preserveAttachments 66 | 0 //flags 67 | ), 68 | 69 | 70 | Of course there is not magic and what you don't do yourself is done behind the scene (check `class VkPipelineVertexInputStateCreateInfo` for example). One could argue that it would be even more expensive than using regular Vulkan structures... But let's not forget that this part of the code is happening at **initialization time**... so does it really matter ? 71 | 72 | Now, more attention should be brought when dealing with the *main rendering loop*... 73 | -------------------------------------------------------------------------------- /doc/Vulkan_MT.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Vulkan_MT.JPG -------------------------------------------------------------------------------- /doc/Vulkan_Renderer.md: -------------------------------------------------------------------------------- 1 | # vulkan renderer 2 | Vulkan renderer is located in `bk3d_vk.cpp` file: 3 | 4 | - `RendererVk` is the class for the renderer 5 | - `Bk3dModelVk` is the class for the model being rendered 6 | 7 | ## Initialization of resources 8 | 9 | `RendererVk::initGraphics` will setup most of the Vulkan objects and related memory 10 | 11 | Vulkan requires you to manage memory as much as possible. Of course you can rely on driver memory allocation ( `vkAllocateMemory` ), but better practice is to allocate memory with `vkAllocateMemory` in larger chunks and later take care about partitioning what is inside. 12 | 13 | few possibilities to reach the right resources: 14 | 15 | 1. bind many VkBuffers or images at various offsets of the device memory chunk (`vkBindBufferMemory`...) 16 | 2. or use the binding offsets available in `vkCmdBindVertexBuffers` or `vkCmdBindIndexBuffer` or `vkCmdBindDescriptorSets` to reach the right section in the current buffer 17 | 3. Or a mix of both! 18 | 19 | Note that in a real situation, more chunks of memory would be allocated: when previous ones are full, the engine might create a new one; and in a real situation, the application should have a more clever heap management from what gets allocated to what gets freed in chunks of memory. 20 | 21 | Ideally, the memory areas are a mix of buffers mapped to various offsets, while drawcalls do also use offsets withing buffers that are active: 22 | 23 | ![memory chunks](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Memory_chunks.JPG) 24 | 25 | This sample doesn't implement this general case, but implements both of the 'extreme' cases: 26 | 27 | the default one (see `#define USE_VKCMDBINDVERTEXBUFFERS_OFFSET`) will allocate *one VkBuffer for one chunk of Device Memory*; then offsets will be maintained for the 3D parts to find back their vertices/indices 28 | ![offsets](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/offsets.JPG) 29 | 30 | Another approach will 'forget' about offsets in buffers and naively create a VkBuffer for each required VBO/IBO. A basic allocator will bind these buffers to the right offsets in the device memory chunk: 31 | ![offsets](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/vkbuffers.JPG) 32 | 33 | It turns-out that, out of demonstrating how to bind buffers at different areas of a device memory, **this latter approach could rapidly reach the limits of available objects** (here VkBuffer). This is precisely what happened to me once, with a big model from a CAD application... 34 | 35 | In other words, it is **not a good idea to blindly use object handles only**: there are good reasons for why the *offset* parameter in command-binding exist. The renderer should be clever enough aggregate small buffers together thanks to the offset-binding in the command-buffer creation. The best solution would be *to mix both, depending on the requirements of the engine*. 36 | 37 | ## Initialization of Vulkan components 38 | This section should be self-explanatory. Essentially the idea is to prepare things up-front, as much as possible (in fact: everything, except command-buffers) so that the *rendering loop doesn't involve any sort of expensive validation*. 39 | 40 | Validation is what made OpenGL so tricky: the state-machine of OpenGL tends to transform the driver into a *paranoid state* where it can never really know or guess what exactly is the application doing: everything is possible at any time and the driver must be ready for this! 41 | 42 | Vulkan on the other hand expects the opposite: the developer must exactly know what he'll use. He must prepare things so that Vulkan driver doesn't have to worry anymore on un-expected situations. 43 | 44 | This section will setup the following components: 45 | 46 | - **Spir-V shaders** (*.spv) 47 | - **semaphores** for glDrawVkImageNV synchronization 48 | - a combination of various **Graphics-Pipelines**: One for 'lines' primitives; one for triangle-fans; another for triangle lists... 49 | - **Sampler(s) and Texture(s)** (Note that I do load a Noise DDS texture but the latest shaders don't use it, finally...) 50 | - **general Uniform buffer**: needed for Projection/view dependent matrices, for example 51 | - **descriptor-set layouts**: how the descriptor-sets are put together for various situations. You can see the Descriptor-Set layout as a way to reduce the scope of resource addressing: a layout that allows the driver to identify the scope of which memory pointers need to be set for a given situation. 52 | - **pipeline-layout**: created from the *descriptor-set layouts* 53 | - a list of states we want to **keep dynamic** (meaning they can be modified from withing a command-buffer): Viewport and scissors etc. 54 | - a **Descriptor-Pool** and some **Descriptor-Sets**: we will associate some resources to some descriptor-sets 55 | - **Fences** for command-buffer update (later below) 56 | - **Render-Pass** and its sub-pass(es) 57 | - **Frame-buffer** to associate with the Render-Pass 58 | - Vulkan **timer** initialization 59 | 60 | ## Initialization of Command-Buffer Pools 61 | *Command-buffer Pools must be created per thread*: the allocation/deallocation of command-buffers can only be performed in a concurrent manner if each thread owns its own allocation pool. In our sample, we will use the **TLS** (Thread Local Storage) for each thread to refer to his own pool. 62 | 63 | The main initialization function will issue a series of calls to each thread in order to have them store their command-buffer pool in their own TLS (see `initThreadLocalVars`) 64 | 65 | ## Command-Buffers 66 | Vulkan introduced the concept of **primary** and **secondary** command-buffers. The idea behind is to allow a more generic primary command-buffer to call secondary ones that would contain more details about the scene. Note that Vulkan restricted the hierarchy to only 2 levels. 67 | 68 | Command-buffer usage is rather flexible. In our case, we will use various command-buffers with the idea that: 69 | 70 | - for every frame, we will re-create the *primary command-buffer* 71 | - *secondary command-buffers* might be created every frame or recycled: it is optional (see 'c' option in the UI) 72 | - secondary command-buffers are used for specific purposes: 73 | - one for *memory barrier* and *viewport setup*: this buffer will be created/updated *only when the viewport size changed* 74 | - another one for the grid of the floor: this command-buffer can also be very static and can be created once and for all... 75 | - finally other secondary command-buffers are used to render the geometry of the scene 76 | 77 | ![cmd-buffers](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/cmd-buffers.JPG) 78 | 79 | I mentioned earlier in the "initialization section" the creation of **Fences**. 80 | 81 | As a reminder, the GPU is a co-processor that we want to fill with tasks in parallel with what a CPU would do on its side. Because we really want both of them to work in parallel, it is bad to 'serialize' the CPU with the GPU. Neverthelessm, it is still necessary to synchronize them at various critical steps. 82 | 83 | The update of command-buffers is one of them: After we generated a bunch of command-buffers, we will en-queue them for the GPU to consume them. But it is possible that the CPU already looped back to the next frame for command-buffer creation, and this *before* the GPU was finished with the previous batch of command-buffers. 84 | 85 | One naive solution is to wait for the GPU to be done and finally recycle the command-buffers for the next iteration. But waiting for the GPU would a be waste... 86 | 87 | The *ideal solution* would be to allocate new command-buffers for the next frame so that we don't wait for GPU completion. Later, the consumed command-buffers should be identified and put back to the pool (garbage collection). 88 | 89 | This sample is doing a bit of the latter: using a 2 caches of command-buffers and doing a **"Ping-pong"** transaction: 90 | 91 | - while GPU is dealing with cache #1; we will check the completion of cache #2 thanks to the Fence #2; 92 | - most of the time it might be ready; worst case: a bit of wait. 93 | - When ready, we will Free the command-buffer from cache #2 94 | - then we would Allocate new command-buffers in this cache #2, while GPU finishes to consume what is in cache #1 95 | - Then we will enqueue the new cmd-buffers from cache #2 to the GPU, tagged with Fence #2 96 | - Next frame, the CPU will check the Fence #1, to see if the GPU was done with it (normally it should... GPU would have already started to consume cmd-buffers from cache #2) 97 | - etc. 98 | 99 | ![Fences](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Fences.JPG) 100 | 101 | This approach cannot allow building more than 1 frame ahead (in fact, many games artificially limit the # of frames ahead: to prevent lagging in game controls. Frame-ahead are Good for Benchmarks... not so good for gaming experience ;-). 102 | 103 | I suppose that a more generic approach would be to use a **ring-buffer** or a **command-buffer 'garbage' collector**, rather than limiting ourselves to 2 slots (ping-pong). Next revision of the sample might have a better approach... 104 | 105 | ## Blit to OpenGL back-buffer: `GL_NV_draw_vulkan_image` 106 | 107 | The driver team introduced a convenient way to mix Vulkan rendering with an existing OpenGL context associated with the window. 108 | 109 | Normally, **WSI** should be the way to work with Vulkan: dealing with a swapchain; associating it with the windows surface etc. 110 | 111 | The interesting part of GL_NV_draw_vulkan_image is that it can spare you the work of setting up WSI; but more importantly is allows you to **mix Vulkan with openGL**. As an example: most of our samples are currently running Vulkan with an overlay in OpenGL: **AntTweakBar** or any other UI overlay is are still in OpenGL. If we didn't have this feature, No overlay would have worked right away and would have required quite some time to port... 112 | 113 | GL_NV_draw_vulkan_image requires 2 **semaphores**: 114 | 115 | - one that will be signaled as soon as the blit of the Vulkan image to the backbuffer is done (`m_semOpenGLReadDone` in the sample) 116 | - the other one to be signaled by the Queue (`vkQueueSubmit`) when the GPU finally finished the rendering (`m_semVKRenderingDone` in the sample) 117 | 118 | nvk.vkQueueSubmit( NVK::VkSubmitInfo( 119 | 1, &m_semOpenGLReadDone, // <== might make the queue wait to be signaled 120 | 1, &m_cmdScene[m_cmdSceneIdx], 121 | 1, &m_semVKRenderingDone), // <== might make the copy to OpenGL to wait 122 | m_sceneFence[m_cmdSceneIdx] );` 123 | 124 | The sample will call `RendererVk::blitToBackbuffer()` at the end for the final copy to the OpenGL backbuffer: 125 | 126 | glWaitVkSemaphoreNV((GLuint64)m_semVKRenderingDone); 127 | glDrawVkImageNV((GLuint64)m_colorImage.img, 0, 0,0,w,h, 0, 0,1,1,0); 128 | glSignalVkSemaphoreNV((GLuint64)m_semOpenGLReadDone); 129 | -------------------------------------------------------------------------------- /doc/cmd-buffers.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/cmd-buffers.JPG -------------------------------------------------------------------------------- /doc/offsets.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/offsets.JPG -------------------------------------------------------------------------------- /doc/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/sample.jpg -------------------------------------------------------------------------------- /doc/toggles.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/toggles.JPG -------------------------------------------------------------------------------- /doc/vkbuffers.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/vkbuffers.JPG -------------------------------------------------------------------------------- /gl_nv_command_list.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef NV_COMMANDLIST_H__ 22 | #define NV_COMMANDLIST_H__ 23 | 24 | #include 25 | 26 | 27 | # if defined(__MINGW32__) || defined(__CYGWIN__) 28 | # define GLEXT_APIENTRY __stdcall 29 | # elif (_MSC_VER >= 800) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__) 30 | # define GLEXT_APIENTRY __stdcall 31 | # else 32 | # define GLEXT_APIENTRY 33 | # endif 34 | 35 | /* 36 | #pragma pack(push,1) 37 | 38 | typedef struct { 39 | GLuint header; 40 | } TerminateSequenceCommandNV; 41 | 42 | typedef struct { 43 | GLuint header; 44 | } NOPCommandNV; 45 | 46 | typedef struct { 47 | GLuint header; 48 | GLuint count; 49 | GLuint firstIndex; 50 | GLuint baseVertex; 51 | } DrawElementsCommandNV; 52 | 53 | typedef struct { 54 | GLuint header; 55 | GLuint count; 56 | GLuint first; 57 | } DrawArraysCommandNV; 58 | 59 | typedef struct { 60 | GLuint header; 61 | GLenum mode; 62 | GLuint count; 63 | GLuint instanceCount; 64 | GLuint firstIndex; 65 | GLuint baseVertex; 66 | GLuint baseInstance; 67 | } DrawElementsInstancedCommandNV; 68 | 69 | typedef struct { 70 | GLuint header; 71 | GLenum mode; 72 | GLuint count; 73 | GLuint instanceCount; 74 | GLuint first; 75 | GLuint baseInstance; 76 | } DrawArraysInstancedCommandNV; 77 | 78 | typedef struct { 79 | GLuint header; 80 | GLuint addressLo; 81 | GLuint addressHi; 82 | GLuint typeSizeInByte; 83 | } ElementAddressCommandNV; 84 | 85 | typedef struct { 86 | GLuint header; 87 | GLuint index; 88 | GLuint addressLo; 89 | GLuint addressHi; 90 | } AttributeAddressCommandNV; 91 | 92 | typedef struct { 93 | GLuint header; 94 | GLushort index; 95 | GLushort stage; 96 | GLuint addressLo; 97 | GLuint addressHi; 98 | } UniformAddressCommandNV; 99 | 100 | typedef struct { 101 | GLuint header; 102 | float red; 103 | float green; 104 | float blue; 105 | float alpha; 106 | } BlendColorCommandNV; 107 | 108 | typedef struct { 109 | GLuint header; 110 | GLuint frontStencilRef; 111 | GLuint backStencilRef; 112 | } StencilRefCommandNV; 113 | 114 | typedef struct { 115 | GLuint header; 116 | float lineWidth; 117 | } LineWidthCommandNV; 118 | 119 | typedef struct { 120 | GLuint header; 121 | float scale; 122 | float bias; 123 | } PolygonOffsetCommandNV; 124 | 125 | typedef struct { 126 | GLuint header; 127 | float alphaRef; 128 | } AlphaRefCommandNV; 129 | 130 | typedef struct { 131 | GLuint header; 132 | GLuint x; 133 | GLuint y; 134 | GLuint width; 135 | GLuint height; 136 | } ViewportCommandNV; // only ViewportIndex 0 137 | 138 | typedef struct { 139 | GLuint header; 140 | GLuint x; 141 | GLuint y; 142 | GLuint width; 143 | GLuint height; 144 | } ScissorCommandNV; // only ViewportIndex 0 145 | 146 | typedef struct { 147 | GLuint header; 148 | GLuint frontFace; // 0 for CW, 1 for CCW 149 | } FrontFaceCommandNV; 150 | 151 | #pragma pack(pop) 152 | */ 153 | 154 | #endif 155 | 156 | -------------------------------------------------------------------------------- /gl_nv_commandlist_helpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | // 22 | // Shader stages for command-list 23 | // 24 | enum ShaderStages { 25 | STAGE_VERTEX, 26 | STAGE_TESS_CONTROL, 27 | STAGE_TESS_EVALUATION, 28 | STAGE_GEOMETRY, 29 | STAGE_FRAGMENT, 30 | STAGES, 31 | }; 32 | 33 | // 34 | // Put together all what is needed to give to the extension function 35 | // for a token buffer 36 | // 37 | struct TokenBuffer 38 | { 39 | GLuint bufferID; // buffer containing all 40 | GLuint64EXT bufferAddr; // buffer GPU-pointer 41 | std::string data; // bytes of data containing the structures to send to the driver 42 | void release() { 43 | glDeleteBuffers(1, &bufferID); 44 | bufferAddr = 0; 45 | bufferID = 0; 46 | data.clear(); 47 | } 48 | }; 49 | // 50 | // Grouping together what is needed to issue a single command made of many states, fbos and Token Buffer pointers 51 | // 52 | struct CommandStatesBatch 53 | { 54 | void release() 55 | { 56 | dataGPUPtrs.clear(); 57 | dataPtrs.clear(); 58 | sizes.clear(); 59 | stateGroups.clear(); 60 | fbos.clear(); 61 | numItems = 0; 62 | } 63 | void pushBatch(GLuint stateGroup_, GLuint fbo_, GLuint64EXT dataGPUPtr_, const GLvoid* dataPtr_, GLsizei size_) 64 | { 65 | dataGPUPtrs.push_back(dataGPUPtr_); 66 | dataPtrs.push_back(dataPtr_); 67 | sizes.push_back(size_); 68 | stateGroups.push_back(stateGroup_); 69 | fbos.push_back(fbo_); 70 | numItems = fbos.size(); 71 | } 72 | CommandStatesBatch& operator+= (CommandStatesBatch &cb) 73 | { 74 | // TODO: do better than that... 75 | size_t sz = cb.fbos.size(); 76 | for(int i=0; i dataGPUPtrs; // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV) 88 | std::vector dataPtrs; // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV) 89 | std::vector sizes; // sizes of each groups 90 | std::vector stateGroups;// state-group IDs used for each groups 91 | std::vector fbos; // FBOs being used for each groups 92 | size_t numItems; // == fbos.size() or sizes.size()... 93 | 94 | //CommandStatesBatch& operator+= (CommandStatesBatch &cb) 95 | //{ 96 | // dataGPUPtrs += cb.dataGPUPtrs; 97 | // dataPtrs += cb.dataPtrs; 98 | // sizes += cb.sizes; 99 | // stateGroups += cb.stateGroups; 100 | // fbos += cb.fbos; 101 | // numItems += fbos.size(); 102 | // return *this; 103 | //} 104 | //std::basic_string, std::allocator > dataGPUPtrs; // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV) 105 | //std::basic_string, std::allocator > dataPtrs; // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV) 106 | //std::basic_string, std::allocator > sizes; // sizes of each groups 107 | //std::basic_string, std::allocator > stateGroups;// state-group IDs used for each groups 108 | //std::basic_string, std::allocator > fbos; // FBOs being used for each groups 109 | //size_t numItems; // == fbos.size() or sizes.size()... 110 | }; 111 | 112 | //----------------------------------------------------------------------------- 113 | // Useful stuff for Command-list 114 | //----------------------------------------------------------------------------- 115 | static GLuint s_header[GL_FRONT_FACE_COMMAND_NV+1] = { 0 }; 116 | static GLuint s_headerSizes[GL_FRONT_FACE_COMMAND_NV+1] = { 0 }; 117 | 118 | static GLushort s_stages[STAGES]; 119 | 120 | struct Token_Nop { 121 | static const GLenum ID = GL_NOP_COMMAND_NV; 122 | NOPCommandNV cmd; 123 | Token_Nop() { 124 | cmd.header = s_header[ID]; 125 | } 126 | }; 127 | 128 | struct Token_TerminateSequence { 129 | static const GLenum ID = GL_TERMINATE_SEQUENCE_COMMAND_NV; 130 | 131 | TerminateSequenceCommandNV cmd; 132 | 133 | Token_TerminateSequence() { 134 | cmd.header = s_header[ID]; 135 | } 136 | }; 137 | 138 | struct Token_DrawElemsInstanced { 139 | static const GLenum ID = GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV; 140 | 141 | DrawElementsInstancedCommandNV cmd; 142 | 143 | Token_DrawElemsInstanced() { 144 | cmd.baseInstance = 0; 145 | cmd.baseVertex = 0; 146 | cmd.firstIndex = 0; 147 | cmd.count = 0; 148 | cmd.instanceCount = 1; 149 | 150 | cmd.header = s_header[ID]; 151 | } 152 | }; 153 | 154 | struct Token_DrawArraysInstanced { 155 | static const GLenum ID = GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV; 156 | 157 | DrawArraysInstancedCommandNV cmd; 158 | 159 | Token_DrawArraysInstanced() { 160 | cmd.baseInstance = 0; 161 | cmd.first = 0; 162 | cmd.count = 0; 163 | cmd.instanceCount = 1; 164 | 165 | cmd.header = s_header[ID]; 166 | } 167 | }; 168 | 169 | struct Token_DrawElements { 170 | static const GLenum ID = GL_DRAW_ELEMENTS_COMMAND_NV; 171 | 172 | DrawElementsCommandNV cmd; 173 | 174 | Token_DrawElements() { 175 | cmd.baseVertex = 0; 176 | cmd.firstIndex = 0; 177 | cmd.count = 0; 178 | 179 | cmd.header = s_header[ID]; 180 | } 181 | }; 182 | 183 | struct Token_DrawArrays { 184 | static const GLenum ID = GL_DRAW_ARRAYS_COMMAND_NV; 185 | 186 | DrawArraysCommandNV cmd; 187 | 188 | Token_DrawArrays() { 189 | cmd.first = 0; 190 | cmd.count = 0; 191 | 192 | cmd.header = s_header[ID]; 193 | } 194 | }; 195 | 196 | struct Token_DrawElementsStrip { 197 | static const GLenum ID = GL_DRAW_ELEMENTS_STRIP_COMMAND_NV; 198 | 199 | DrawElementsCommandNV cmd; 200 | 201 | Token_DrawElementsStrip() { 202 | cmd.baseVertex = 0; 203 | cmd.firstIndex = 0; 204 | cmd.count = 0; 205 | 206 | cmd.header = s_header[ID]; 207 | } 208 | }; 209 | 210 | struct Token_DrawArraysStrip { 211 | static const GLenum ID = GL_DRAW_ARRAYS_STRIP_COMMAND_NV; 212 | 213 | DrawArraysCommandNV cmd; 214 | 215 | Token_DrawArraysStrip() { 216 | cmd.first = 0; 217 | cmd.count = 0; 218 | 219 | cmd.header = s_header[ID]; 220 | } 221 | }; 222 | 223 | struct Token_AttributeAddress { 224 | static const GLenum ID = GL_ATTRIBUTE_ADDRESS_COMMAND_NV; 225 | 226 | AttributeAddressCommandNV cmd; 227 | 228 | Token_AttributeAddress() { 229 | cmd.header = s_header[ID]; 230 | } 231 | }; 232 | 233 | struct Token_ElementAddress { 234 | static const GLenum ID = GL_ELEMENT_ADDRESS_COMMAND_NV; 235 | 236 | ElementAddressCommandNV cmd; 237 | 238 | Token_ElementAddress() { 239 | cmd.header = s_header[ID]; 240 | } 241 | }; 242 | 243 | struct Token_UniformAddress { 244 | static const GLenum ID = GL_UNIFORM_ADDRESS_COMMAND_NV; 245 | 246 | UniformAddressCommandNV cmd; 247 | 248 | Token_UniformAddress() { 249 | cmd.header = s_header[ID]; 250 | } 251 | }; 252 | 253 | struct Token_BlendColor{ 254 | static const GLenum ID = GL_BLEND_COLOR_COMMAND_NV; 255 | 256 | BlendColorCommandNV cmd; 257 | 258 | Token_BlendColor() { 259 | cmd.header = s_header[ID]; 260 | } 261 | }; 262 | 263 | struct Token_StencilRef{ 264 | static const GLenum ID = GL_STENCIL_REF_COMMAND_NV; 265 | 266 | StencilRefCommandNV cmd; 267 | 268 | Token_StencilRef() { 269 | cmd.header = s_header[ID]; 270 | } 271 | }; 272 | 273 | struct Token_LineWidth{ 274 | static const GLenum ID = GL_LINE_WIDTH_COMMAND_NV; 275 | 276 | LineWidthCommandNV cmd; 277 | 278 | Token_LineWidth() { 279 | cmd.header = s_header[ID]; 280 | } 281 | }; 282 | 283 | struct Token_PolygonOffset{ 284 | static const GLenum ID = GL_POLYGON_OFFSET_COMMAND_NV; 285 | 286 | PolygonOffsetCommandNV cmd; 287 | 288 | Token_PolygonOffset() { 289 | cmd.header = s_header[ID]; 290 | } 291 | }; 292 | 293 | struct Token_AlphaRef{ 294 | static const GLenum ID = GL_ALPHA_REF_COMMAND_NV; 295 | 296 | AlphaRefCommandNV cmd; 297 | 298 | Token_AlphaRef() { 299 | cmd.header = s_header[ID]; 300 | } 301 | }; 302 | 303 | struct Token_Viewport{ 304 | static const GLenum ID = GL_VIEWPORT_COMMAND_NV; 305 | ViewportCommandNV cmd; 306 | Token_Viewport() { 307 | cmd.header = s_header[ID]; 308 | } 309 | }; 310 | 311 | struct Token_Scissor { 312 | static const GLenum ID = GL_SCISSOR_COMMAND_NV; 313 | ScissorCommandNV cmd; 314 | Token_Scissor() { 315 | cmd.header = s_header[ID]; 316 | } 317 | }; 318 | 319 | struct Token_FrontFace { 320 | static const GLenum ID = GL_FRONT_FACE_COMMAND_NV; 321 | FrontFaceCommandNV cmd; 322 | Token_FrontFace() { 323 | cmd.header = s_header[ID]; 324 | } 325 | }; 326 | 327 | //----------------------------------------------------------------------------- 328 | // 329 | //----------------------------------------------------------------------------- 330 | template 331 | void registerSize() 332 | { 333 | s_headerSizes[T::ID] = sizeof(T); 334 | } 335 | 336 | 337 | //----------------------------------------------------------------------------- 338 | // 339 | //----------------------------------------------------------------------------- 340 | void initTokenInternals() 341 | { 342 | registerSize(); 343 | registerSize(); 344 | registerSize(); 345 | registerSize(); 346 | registerSize(); 347 | registerSize(); 348 | registerSize(); 349 | registerSize(); 350 | registerSize(); 351 | registerSize(); 352 | registerSize(); 353 | registerSize(); 354 | registerSize(); 355 | registerSize(); 356 | registerSize(); 357 | registerSize(); 358 | registerSize(); 359 | registerSize(); 360 | registerSize(); 361 | 362 | for (int i = 0; i < (GL_FRONT_FACE_COMMAND_NV+1); i++){ 363 | // using i instead of a table of token IDs because the are arranged in the same order as i incrementing. 364 | // shortcut for the source code. See gl_nv_command_list.h 365 | s_header[i] = glGetCommandHeaderNV(i/*==Token enum*/, s_headerSizes[i]); 366 | } 367 | s_stages[STAGE_VERTEX] = glGetStageIndexNV(GL_VERTEX_SHADER); 368 | s_stages[STAGE_TESS_CONTROL] = glGetStageIndexNV(GL_TESS_CONTROL_SHADER); 369 | s_stages[STAGE_TESS_EVALUATION] = glGetStageIndexNV(GL_TESS_EVALUATION_SHADER); 370 | s_stages[STAGE_GEOMETRY] = glGetStageIndexNV(GL_GEOMETRY_SHADER); 371 | s_stages[STAGE_FRAGMENT] = glGetStageIndexNV(GL_FRAGMENT_SHADER); 372 | } 373 | 374 | //------------------------------------------------------------------------------ 375 | // build 376 | //------------------------------------------------------------------------------ 377 | std::string buildLineWidthCommand(float w) 378 | { 379 | std::string cmd; 380 | Token_LineWidth lw; 381 | lw.cmd.lineWidth = w; 382 | cmd = std::string((const char*)&lw, sizeof(Token_LineWidth)); 383 | 384 | return cmd; 385 | } 386 | //------------------------------------------------------------------------------ 387 | // build 388 | //------------------------------------------------------------------------------ 389 | std::string buildUniformAddressCommand(int idx, GLuint64 p, GLsizeiptr sizeBytes, ShaderStages stage) 390 | { 391 | std::string cmd; 392 | Token_UniformAddress attr; 393 | attr.cmd.stage = s_stages[stage]; 394 | attr.cmd.index = idx; 395 | ((GLuint64EXT*)&attr.cmd.addressLo)[0] = p; 396 | cmd = std::string((const char*)&attr, sizeof(Token_UniformAddress)); 397 | 398 | return cmd; 399 | } 400 | //------------------------------------------------------------------------------ 401 | // build 402 | //------------------------------------------------------------------------------ 403 | std::string buildAttributeAddressCommand(int idx, GLuint64 p, GLsizeiptr sizeBytes) 404 | { 405 | std::string cmd; 406 | Token_AttributeAddress attr; 407 | attr.cmd.index = idx; 408 | ((GLuint64EXT*)&attr.cmd.addressLo)[0] = p; 409 | cmd = std::string((const char*)&attr, sizeof(Token_AttributeAddress)); 410 | 411 | return cmd; 412 | } 413 | //------------------------------------------------------------------------------ 414 | // build 415 | //------------------------------------------------------------------------------ 416 | std::string buildElementAddressCommand(GLuint64 ptr, GLenum indexFormatGL) 417 | { 418 | std::string cmd; 419 | Token_ElementAddress attr; 420 | ((GLuint64EXT*)&attr.cmd.addressLo)[0] = ptr; 421 | switch (indexFormatGL) 422 | { 423 | case GL_UNSIGNED_INT: 424 | attr.cmd.typeSizeInByte = 4; 425 | break; 426 | case GL_UNSIGNED_SHORT: 427 | attr.cmd.typeSizeInByte = 2; 428 | break; 429 | } 430 | cmd = std::string((const char*)&attr, sizeof(Token_AttributeAddress)); 431 | 432 | return cmd; 433 | } 434 | //------------------------------------------------------------------------------ 435 | // 436 | //------------------------------------------------------------------------------ 437 | std::string buildDrawElementsCommand(GLenum topologyGL, GLuint indexCount) 438 | { 439 | std::string cmd; 440 | Token_DrawElements dc; 441 | Token_DrawElementsStrip dcstrip; 442 | switch (topologyGL) 443 | { 444 | case GL_TRIANGLE_STRIP: 445 | case GL_QUAD_STRIP: 446 | case GL_LINE_STRIP: 447 | dcstrip.cmd.baseVertex = 0; 448 | dcstrip.cmd.firstIndex = 0; 449 | dcstrip.cmd.count = indexCount; 450 | cmd = std::string((const char*)&dcstrip, sizeof(Token_DrawElementsStrip)); 451 | break; 452 | default: 453 | dc.cmd.baseVertex = 0; 454 | dc.cmd.firstIndex = 0; 455 | dc.cmd.count = indexCount; 456 | cmd = std::string((const char*)&dc, sizeof(Token_DrawElements)); 457 | break; 458 | } 459 | return cmd; 460 | } 461 | //------------------------------------------------------------------------------ 462 | // 463 | //------------------------------------------------------------------------------ 464 | std::string buildDrawArraysCommand(GLenum topologyGL, GLuint indexCount) 465 | { 466 | std::string cmd; 467 | Token_DrawArrays dc; 468 | Token_DrawArraysStrip dcstrip; 469 | switch (topologyGL) 470 | { 471 | case GL_TRIANGLE_STRIP: 472 | case GL_QUAD_STRIP: 473 | case GL_LINE_STRIP: 474 | dcstrip.cmd.first = 0; 475 | dcstrip.cmd.count = indexCount; 476 | cmd = std::string((const char*)&dcstrip, sizeof(Token_DrawArraysStrip)); 477 | break; 478 | default: 479 | dc.cmd.first = 0; 480 | dc.cmd.count = indexCount; 481 | cmd = std::string((const char*)&dc, sizeof(Token_DrawArrays)); 482 | break; 483 | } 484 | return cmd; 485 | } 486 | 487 | //------------------------------------------------------------------------------ 488 | // 489 | //------------------------------------------------------------------------------ 490 | std::string buildViewportCommand(GLint x, GLint y, GLsizei width, GLsizei height) 491 | { 492 | std::string cmd; 493 | Token_Viewport dc; 494 | dc.cmd.x = x; 495 | dc.cmd.y = y; 496 | dc.cmd.width = width; 497 | dc.cmd.height = height; 498 | cmd = std::string((const char*)&dc, sizeof(Token_Viewport)); 499 | return cmd; 500 | } 501 | 502 | //------------------------------------------------------------------------------ 503 | // 504 | //------------------------------------------------------------------------------ 505 | std::string buildBlendColorCommand(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) 506 | { 507 | std::string cmd; 508 | Token_BlendColor dc; 509 | dc.cmd.red = red; 510 | dc.cmd.green = green; 511 | dc.cmd.blue = blue; 512 | dc.cmd.alpha = alpha; 513 | cmd = std::string((const char*)&dc, sizeof(Token_BlendColor)); 514 | return cmd; 515 | } 516 | 517 | //------------------------------------------------------------------------------ 518 | // 519 | //------------------------------------------------------------------------------ 520 | std::string buildStencilRefCommand(GLuint frontStencilRef, GLuint backStencilRef) 521 | { 522 | std::string cmd; 523 | Token_StencilRef dc; 524 | dc.cmd.frontStencilRef = frontStencilRef; 525 | dc.cmd.backStencilRef = backStencilRef; 526 | cmd = std::string((const char*)&dc, sizeof(Token_StencilRef)); 527 | return cmd; 528 | } 529 | 530 | //------------------------------------------------------------------------------ 531 | // 532 | //------------------------------------------------------------------------------ 533 | std::string buildPolygonOffsetCommand(GLfloat scale, GLfloat bias) 534 | { 535 | std::string cmd; 536 | Token_PolygonOffset dc; 537 | dc.cmd.bias = bias; 538 | dc.cmd.scale = scale; 539 | cmd = std::string((const char*)&dc, sizeof(Token_PolygonOffset)); 540 | return cmd; 541 | } 542 | 543 | //------------------------------------------------------------------------------ 544 | // 545 | //------------------------------------------------------------------------------ 546 | std::string buildScissorCommand(GLint x, GLint y, GLsizei width, GLsizei height) 547 | { 548 | std::string cmd; 549 | Token_Scissor dc; 550 | dc.cmd.x = x; 551 | dc.cmd.y = y; 552 | dc.cmd.width = width; 553 | dc.cmd.height = height; 554 | cmd = std::string((const char*)&dc, sizeof(Token_Scissor)); 555 | return cmd; 556 | } 557 | -------------------------------------------------------------------------------- /gl_vk_bk3dthreaded.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #define USEWORKERS 21 | #define MAXCMDBUFFERS 100 22 | #ifdef USEWORKERS 23 | #define CCRITICALSECTIONHOLDER(c) CCriticalSectionHolder _cs(c); 24 | #else 25 | #define CCRITICALSECTIONHOLDER(c) 26 | #endif 27 | 28 | #include 29 | #include "nvpwindow.hpp" 30 | 31 | #include 32 | using namespace glm; 33 | 34 | #include "GLSLShader.h" 35 | #include "gl_nv_command_list.h" 36 | #include 37 | 38 | #include 39 | 40 | #include "helper_fbo.h" 41 | 42 | #ifdef NVP_SUPPORTS_GZLIB 43 | #include "zlib.h" 44 | #endif 45 | #include "bk3dEx.h" // a baked binary format for few models 46 | 47 | #define PROFILE_SECTION(name) nvh::Profiler::Section _tempTimer(g_profiler, name) 48 | 49 | // 50 | // For the case where we work with Descriptor Sets (Vulkan) 51 | // 52 | #define DSET_GLOBAL 0 53 | #define BINDING_MATRIX 0 54 | #define BINDING_LIGHT 1 55 | #define BINDING_NOISE 2 56 | 57 | #define DSET_OBJECT 1 58 | #define BINDING_MATRIXOBJ 0 59 | #define BINDING_MATERIAL 1 60 | 61 | #define DSET_TOTALAMOUNT 2 62 | // 63 | // For the case where we just assign UBO bindings (cmd-list) 64 | // 65 | #define UBO_MATRIX 0 66 | #define UBO_MATRIXOBJ 1 67 | #define UBO_MATERIAL 2 68 | #define UBO_LIGHT 3 69 | #define NUM_UBOS 4 70 | 71 | #define TOSTR_(x) #x 72 | #define TOSTR(x) TOSTR_(x) 73 | 74 | // 75 | // Let's assume we would put any matrix that don't get impacted by the local object transformation 76 | // 77 | NV_ALIGN( 78 | 256, 79 | struct MatrixBufferGlobal { 80 | mat4 mW; 81 | mat4 mVP; 82 | vec3 eyePos; 83 | }); 84 | // 85 | // Let's assume these are the ones that can change for each object 86 | // will used at an array of MatrixBufferObject 87 | // 88 | NV_ALIGN( 89 | 256, 90 | struct MatrixBufferObject { mat4 mO; }); 91 | // 92 | // if we create arrays with a structure, we must be aligned according to 93 | // GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT (to query) 94 | // 95 | NV_ALIGN( 96 | 256, 97 | struct MaterialBuffer { 98 | vec3 diffuse; 99 | float a; 100 | }); 101 | 102 | NV_ALIGN( 103 | 256, 104 | struct LightBuffer { vec3 dir; }); 105 | 106 | // 107 | // Externs 108 | // 109 | extern nvh::Profiler g_profiler; 110 | 111 | extern bool g_bDisplayObject; 112 | extern GLuint g_MaxBOSz; 113 | extern bool g_bDisplayGrid; 114 | 115 | extern MatrixBufferGlobal g_globalMatrices; 116 | 117 | //------------------------------------------------------------------------------ 118 | class Bk3dModel; 119 | //------------------------------------------------------------------------------ 120 | // Renderer: can be OpenGL or other 121 | //------------------------------------------------------------------------------ 122 | class Renderer 123 | { 124 | public: 125 | Renderer() {} 126 | virtual ~Renderer() {} 127 | virtual const char* getName() = 0; 128 | virtual bool valid() = 0; 129 | virtual bool initGraphics(int w, int h, int MSAA) = 0; 130 | virtual bool terminateGraphics() = 0; 131 | virtual bool initThreadLocalVars(int threadId) = 0; 132 | virtual void releaseThreadLocalVars() = 0; 133 | virtual void destroyCommandBuffers(bool bAll) = 0; 134 | virtual void resetCommandBuffersPool() {} 135 | virtual void waitForGPUIdle() = 0; 136 | 137 | virtual bool attachModel(Bk3dModel* pModel) = 0; 138 | virtual bool detachModels() = 0; 139 | 140 | virtual bool initResourcesModel(Bk3dModel* pModel) = 0; 141 | 142 | virtual bool buildPrimaryCmdBuffer() = 0; 143 | // bufIdx: index of cmdBuffer to create, containing mesh mstart to mend-1 (for testing concurrent cmd buffer creation) 144 | virtual bool buildCmdBufferModel(Bk3dModel* pModelcmd, int bufIdx = 0, int mstart = 0, int mend = -1) = 0; 145 | virtual void consolidateCmdBuffersModel(Bk3dModel* pModelcmd, int numCmdBuffers) = 0; 146 | virtual bool deleteCmdBufferModel(Bk3dModel* pModel) = 0; 147 | 148 | virtual bool updateForChangedRenderTarget(Bk3dModel* pModel) = 0; 149 | 150 | 151 | virtual void displayStart(const mat4& world, const InertiaCamera& camera, const mat4& projection, bool bTimingGlitch) = 0; 152 | virtual void displayEnd() {} 153 | virtual void displayGrid(const InertiaCamera& camera, const mat4 projection) = 0; 154 | // topologies: bits for each primitive type (Lines:1, linestrip:2, triangles:4, tristrips:8, trifans:16) 155 | virtual void displayBk3dModel(Bk3dModel* pModel, const mat4& cameraView, const mat4 projection, unsigned char topologies = 0xFF) = 0; 156 | virtual void blitToBackbuffer() = 0; 157 | 158 | virtual void updateViewport(GLint x, GLint y, GLsizei width, GLsizei height) = 0; 159 | 160 | virtual bool bFlipViewport() { return false; } 161 | }; 162 | extern Renderer* g_renderers[10]; 163 | extern int g_numRenderers; 164 | 165 | //------------------------------------------------------------------------------ 166 | // Class for Object (made of 1 to N meshes) 167 | // This class is agnostic to any renderer: just contains the data of geometry 168 | //------------------------------------------------------------------------------ 169 | class Bk3dModel 170 | { 171 | public: 172 | Bk3dModel(const char* name, vec3* pPos = NULL, float* pScale = NULL); 173 | ~Bk3dModel(); 174 | 175 | vec3 m_posOffset; 176 | float m_scale; 177 | std::string m_name; 178 | struct Stats 179 | { 180 | unsigned int primitives; 181 | unsigned int drawcalls; 182 | unsigned int attr_update; 183 | unsigned int uniform_update; 184 | }; 185 | 186 | MatrixBufferObject* m_objectMatrices; 187 | int m_objectMatricesNItems; 188 | 189 | MaterialBuffer* m_material; 190 | int m_materialNItems; 191 | 192 | bk3d::FileHeader* m_meshFile; 193 | 194 | Stats m_stats; 195 | 196 | Renderer* m_pRenderer; 197 | void* m_pRendererData; 198 | 199 | bool updateForChangedRenderTarget(); 200 | bool loadModel(); 201 | void printPosition(); 202 | void addStats(Stats& stats); 203 | }; //Class Bk3dModel 204 | 205 | 206 | extern std::vector g_bk3dModels; 207 | 208 | #define FOREACHMODEL(cmd) \ 209 | { \ 210 | for(int m = 0; m < g_bk3dModels.size(); m++) \ 211 | { \ 212 | g_bk3dModels[m]->cmd; \ 213 | } \ 214 | } 215 | -------------------------------------------------------------------------------- /helper_fbo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #ifndef __HELPERFBO__ 21 | #define __HELPERFBO__ 22 | #include 23 | #include 24 | namespace fbo { 25 | inline bool CheckStatus() 26 | { 27 | GLenum status; 28 | status = (GLenum)glCheckFramebufferStatus(GL_FRAMEBUFFER); 29 | switch(status) 30 | { 31 | case GL_FRAMEBUFFER_COMPLETE: 32 | return true; 33 | case GL_FRAMEBUFFER_UNSUPPORTED: 34 | LOGE("Unsupported framebuffer format\n"); 35 | assert(!"Unsupported framebuffer format"); 36 | break; 37 | case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: 38 | LOGE("Framebuffer incomplete, missing attachment\n"); 39 | assert(!"Framebuffer incomplete, missing attachment"); 40 | break; 41 | //case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: 42 | // PRINTF(("Framebuffer incomplete, attached images must have same dimensions\n")); 43 | // assert(!"Framebuffer incomplete, attached images must have same dimensions"); 44 | // break; 45 | //case GL_FRAMEBUFFER_INCOMPLETE_FORMATS: 46 | // PRINTF(("Framebuffer incomplete, attached images must have same format\n")); 47 | // assert(!"Framebuffer incomplete, attached images must have same format"); 48 | // break; 49 | case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER: 50 | LOGE("Framebuffer incomplete, missing draw buffer\n"); 51 | assert(!"Framebuffer incomplete, missing draw buffer"); 52 | break; 53 | case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER: 54 | LOGE("Framebuffer incomplete, missing read buffer\n"); 55 | assert(!"Framebuffer incomplete, missing read buffer"); 56 | break; 57 | case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: 58 | LOGE("Framebuffer incomplete attachment\n"); 59 | assert(!"Framebuffer incomplete attachment"); 60 | break; 61 | case GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE: 62 | LOGE("Framebuffer incomplete multisample\n"); 63 | assert(!"Framebuffer incomplete multisample"); 64 | break; 65 | default: 66 | LOGE("Error %x\n", status); 67 | assert(!"unknown FBO Error"); 68 | break; 69 | } 70 | return false; 71 | } 72 | //------------------------------------------------------------------------------ 73 | // 74 | //------------------------------------------------------------------------------ 75 | inline GLuint create() 76 | { 77 | GLuint fb; 78 | glGenFramebuffers(1, &fb); 79 | return fb; 80 | } 81 | 82 | //------------------------------------------------------------------------------ 83 | // 84 | //------------------------------------------------------------------------------ 85 | inline void bind(GLuint framebuffer) 86 | { 87 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 88 | } 89 | 90 | //------------------------------------------------------------------------------ 91 | // 92 | //------------------------------------------------------------------------------ 93 | inline bool attachTexture2DTarget(GLuint framebuffer, GLuint textureID, int colorAttachment, GLenum target = GL_TEXTURE_2D) 94 | { 95 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 96 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, target, textureID, 0); 97 | return true; //CheckStatus(); 98 | } 99 | 100 | //------------------------------------------------------------------------------ 101 | // 102 | //------------------------------------------------------------------------------ 103 | inline bool attachTexture2D(GLuint framebuffer, GLuint textureID, int colorAttachment, int samples) 104 | { 105 | return attachTexture2DTarget(framebuffer, textureID, colorAttachment, samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); 106 | } 107 | //------------------------------------------------------------------------------ 108 | // 109 | //------------------------------------------------------------------------------ 110 | inline bool detachColorTexture(GLuint framebuffer, int colorAttachment, int samples) 111 | { 112 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 113 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, 114 | samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D, 0, 0); 115 | return true; //CheckStatus(); 116 | } 117 | //------------------------------------------------------------------------------ 118 | // 119 | //------------------------------------------------------------------------------ 120 | #ifdef USE_RENDERBUFFERS 121 | inline bool attachRenderbuffer(GLuint framebuffer, GLuint rb, int colorAttachment) 122 | { 123 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 124 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, GL_RENDERBUFFER, rb); 125 | return true; //CheckStatus(); 126 | } 127 | //------------------------------------------------------------------------------ 128 | // 129 | //------------------------------------------------------------------------------ 130 | inline bool attachDSTRenderbuffer(GLuint framebuffer, GLuint dstrb) 131 | { 132 | bool bRes; 133 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 134 | //glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, dstrb); 135 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, dstrb); 136 | return true; //CheckStatus() ; 137 | } 138 | #endif 139 | //------------------------------------------------------------------------------ 140 | // 141 | //------------------------------------------------------------------------------ 142 | inline bool attachDSTTexture2DTarget(GLuint framebuffer, GLuint textureDepthID, GLenum target = GL_TEXTURE_2D) 143 | { 144 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 145 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, target, textureDepthID, 0); 146 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, target, textureDepthID, 0); 147 | return true; //CheckStatus(); 148 | } 149 | 150 | //------------------------------------------------------------------------------ 151 | // 152 | //------------------------------------------------------------------------------ 153 | inline bool attachDSTTexture2D(GLuint framebuffer, GLuint textureDepthID, int msaaRaster) 154 | { 155 | return attachDSTTexture2DTarget(framebuffer, textureDepthID, (msaaRaster > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); 156 | } 157 | //------------------------------------------------------------------------------ 158 | // 159 | //------------------------------------------------------------------------------ 160 | inline bool detachDSTTexture(GLuint framebuffer, int msaaRaster) 161 | { 162 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 163 | GLenum target = (msaaRaster > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 164 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, target, 0, 0); 165 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, target, 0, 0); 166 | 167 | return true; //CheckStatus(); 168 | } 169 | 170 | //------------------------------------------------------------------------------ 171 | // 172 | //------------------------------------------------------------------------------ 173 | inline void deleteFBO(GLuint fbo) 174 | { 175 | glDeleteFramebuffers(1, &fbo); 176 | } 177 | 178 | //------------------------------------------------------------------------------ 179 | // 180 | //------------------------------------------------------------------------------ 181 | inline void blitFBO(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLenum filtering) 182 | { 183 | glBindFramebuffer(GL_READ_FRAMEBUFFER, srcFBO); 184 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dstFBO); 185 | // GL_NEAREST is needed when Stencil/depth are involved 186 | glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, 187 | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, filtering); 188 | glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); 189 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); 190 | } 191 | //------------------------------------------------------------------------------ 192 | // 193 | //------------------------------------------------------------------------------ 194 | inline void blitFBONearest(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1) 195 | { 196 | blitFBO(srcFBO, dstFBO, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, GL_NEAREST); 197 | } 198 | //------------------------------------------------------------------------------ 199 | // 200 | //------------------------------------------------------------------------------ 201 | inline void blitFBOLinear(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1) 202 | { 203 | blitFBO(srcFBO, dstFBO, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, GL_LINEAR); 204 | } 205 | 206 | } // namespace fbo 207 | //------------------------------------------------------------------------------ 208 | // 209 | //------------------------------------------------------------------------------ 210 | namespace texture { 211 | inline GLuint create(int w, int h, int samples, int coverageSamples, GLenum intfmt, GLenum fmt, GLuint textureID = 0) 212 | { 213 | if(samples <= 1) 214 | { 215 | if(textureID == 0) 216 | glCreateTextures(GL_TEXTURE_2D, 1, &textureID); 217 | glTextureStorage2D(textureID, 1, intfmt, w, h); 218 | glTextureParameterf(textureID, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 219 | glTextureParameterf(textureID, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 220 | glTextureParameterf(textureID, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 221 | glTextureParameterf(textureID, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 222 | } 223 | else 224 | { 225 | if(textureID == 0) 226 | glCreateTextures(GL_TEXTURE_2D_MULTISAMPLE, 1, &textureID); 227 | // Note: fixed-samples set to GL_TRUE, otherwise it could fail when attaching to FBO having render-buffer !! 228 | if(coverageSamples > 1) 229 | { 230 | glTextureImage2DMultisampleCoverageNV(textureID, GL_TEXTURE_2D_MULTISAMPLE, coverageSamples, samples, intfmt, w, h, GL_TRUE); 231 | } 232 | else 233 | { 234 | glTextureStorage2DMultisample(textureID, samples, intfmt, w, h, GL_TRUE); 235 | } 236 | } 237 | return textureID; 238 | } 239 | //------------------------------------------------------------------------------ 240 | // 241 | //------------------------------------------------------------------------------ 242 | inline GLuint createRGBA8(int w, int h, int samples, int coverageSamples = 0, GLuint textureID = 0) 243 | { 244 | return create(w, h, samples, coverageSamples, GL_RGBA8, GL_RGBA, textureID); 245 | } 246 | 247 | //------------------------------------------------------------------------------ 248 | // 249 | //------------------------------------------------------------------------------ 250 | inline GLuint createDST(int w, int h, int samples, int coverageSamples = 0, GLuint textureID = 0) 251 | { 252 | return create(w, h, samples, coverageSamples, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, textureID); 253 | } 254 | //------------------------------------------------------------------------------ 255 | // 256 | //------------------------------------------------------------------------------ 257 | inline void deleteTexture(GLuint texture) 258 | { 259 | glDeleteTextures(1, &texture); 260 | } 261 | } // namespace texture 262 | //------------------------------------------------------------------------------ 263 | // Render-buffers should be forgotten. Thing of the past 264 | //------------------------------------------------------------------------------ 265 | #ifdef USE_RENDERBUFFERS 266 | namespace renderbuffer { 267 | 268 | inline GLuint createRenderBuffer(int w, int h, int samples, int coverageSamples, GLenum fmt) 269 | { 270 | int query; 271 | GLuint rb; 272 | glGenRenderbuffers(1, &rb); 273 | glBindRenderbuffer(GL_RENDERBUFFER, rb); 274 | if(coverageSamples) 275 | { 276 | glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER, coverageSamples, samples, fmt, w, h); 277 | glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_COVERAGE_SAMPLES_NV, &query); 278 | if(query < coverageSamples) 279 | rb = 0; 280 | else if(query > coverageSamples) 281 | { 282 | // report back the actual number 283 | coverageSamples = query; 284 | LOGW("Warning: coverage samples is now %d\n", coverageSamples); 285 | } 286 | glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_COLOR_SAMPLES_NV, &query); 287 | if(query < samples) 288 | rb = 0; 289 | else if(query > samples) 290 | { 291 | // report back the actual number 292 | samples = query; 293 | LOGW("Warning: depth-samples is now %d\n", samples); 294 | } 295 | } 296 | else 297 | { 298 | // create a regular MSAA color buffer 299 | glRenderbufferStorageMultisample(GL_RENDERBUFFER, samples, fmt, w, h); 300 | // check the number of samples 301 | glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_SAMPLES, &query); 302 | 303 | if(query < samples) 304 | rb = 0; 305 | else if(query > samples) 306 | { 307 | samples = query; 308 | LOGW("Warning: depth-samples is now %d\n", samples); 309 | } 310 | } 311 | glBindRenderbuffer(GL_RENDERBUFFER, 0); 312 | return rb; 313 | } 314 | 315 | //------------------------------------------------------------------------------ 316 | // 317 | //------------------------------------------------------------------------------ 318 | inline GLuint createRenderBufferRGBA8(int w, int h, int samples, int coverageSamples) 319 | { 320 | return createRenderBuffer(w, h, samples, coverageSamples, GL_RGBA8); 321 | } 322 | 323 | //------------------------------------------------------------------------------ 324 | // 325 | //------------------------------------------------------------------------------ 326 | inline GLuint createRenderBufferD24S8(int w, int h, int samples, int coverageSamples) 327 | { 328 | return createRenderBuffer(w, h, samples, coverageSamples, GL_DEPTH24_STENCIL8); 329 | } 330 | //------------------------------------------------------------------------------ 331 | // 332 | //------------------------------------------------------------------------------ 333 | inline GLuint createRenderBufferS8(int w, int h, int samples, int coverageSamples) 334 | { 335 | return createRenderBuffer(w, h, samples, coverageSamples, GL_STENCIL_INDEX8); 336 | } 337 | 338 | //------------------------------------------------------------------------------ 339 | // 340 | //------------------------------------------------------------------------------ 341 | #ifdef USE_RENDERBUFFERS 342 | inline void deleteRenderBuffer(GLuint rb) 343 | { 344 | glDeleteRenderbuffers(1, &rb); 345 | } 346 | #endif 347 | } // namespace renderbuffer 348 | #endif 349 | #endif //#ifndef __HELPERFBO__ 350 | -------------------------------------------------------------------------------- /mt/CThread.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | //#include "comms.h" 20 | #if defined WIN32 21 | #include 22 | #else 23 | #include 24 | #include 25 | #ifdef IOS 26 | # include 27 | #endif 28 | #include 29 | #include 30 | #include 31 | #endif 32 | 33 | #include 34 | #include 35 | #include "CThread.h" 36 | 37 | //---------------------------------------------------------------------------------- 38 | // This Function is used as the main callback for all. Then the argument passed will 39 | // be used to jump at the right derived class 40 | //---------------------------------------------------------------------------------- 41 | void thread_function(void *pData) 42 | { 43 | NXPROFILEFUNCCOL(__FUNCTION__, 0xFF800000); 44 | CThread* pthread = static_cast(pData); 45 | pthread->CThreadProc(); 46 | } 47 | 48 | #if defined WIN32 49 | ///////////////////////////////////////////////////////////////////////////////////// 50 | ///////////////////////////////////////////////////////////////////////////////////// 51 | // WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS // 52 | ///////////////////////////////////////////////////////////////////////////////////// 53 | ///////////////////////////////////////////////////////////////////////////////////// 54 | 55 | ///////////////////////////////////////////////////////////////////////////////////// 56 | // THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD // 57 | ///////////////////////////////////////////////////////////////////////////////////// 58 | CThread::CThread(const bool startNow, const bool Critical) 59 | { 60 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 61 | m_thread = CreateThread(thread_function, static_cast(this), startNow, Critical); 62 | } 63 | 64 | CThread::~CThread() 65 | { 66 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 67 | CancelThread(); 68 | DeleteThread(); 69 | } 70 | 71 | // CpuCount 72 | int CThread::CpuCount() 73 | { 74 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 75 | static int Cpus = -1; 76 | if(-1 == Cpus) { 77 | SYSTEM_INFO si; 78 | GetSystemInfo(&si); 79 | Cpus = (int)si.dwNumberOfProcessors > 1 ? (int)si.dwNumberOfProcessors : 1; 80 | //if(Cpus>4)Cpus=4; 81 | } 82 | return Cpus; 83 | } 84 | //int CThread::CpuCount0() 85 | //{ 86 | // static int Cpus = -1; 87 | // if(-1 == Cpus) { 88 | // SYSTEM_INFO si; 89 | // GetSystemInfo(&si); 90 | // Cpus = (int)si.dwNumberOfProcessors > 1 ? (int)si.dwNumberOfProcessors : 1; 91 | // } 92 | // return Cpus; 93 | //} 94 | 95 | // Sleep 96 | void CThread::Sleep(const unsigned long Milliseconds) 97 | { 98 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 99 | ::Sleep(Milliseconds); 100 | } 101 | 102 | // CreateThread 103 | NThreadHandle CThread::CreateThread(ThreadProc Proc, void *Param, bool startNow, const bool Critical) 104 | { 105 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 106 | NThreadHandle hThread = ::CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Param, CREATE_SUSPENDED, NULL); 107 | if(Critical) { 108 | SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL); 109 | } 110 | if(startNow) 111 | ::ResumeThread(hThread); 112 | return hThread; 113 | } 114 | 115 | // CancelThread 116 | void CThread::CancelThread() 117 | { 118 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 119 | TerminateThread(m_thread, 0); 120 | } 121 | 122 | // DeleteThread 123 | void CThread::DeleteThread() 124 | { 125 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 126 | CloseHandle(m_thread); 127 | m_thread = NULL; 128 | } 129 | 130 | // WaitThread 131 | void CThread::WaitThread() 132 | { 133 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 134 | WaitForSingleObject(m_thread, INFINITE); 135 | } 136 | 137 | // WaitThreads 138 | void CThread::WaitThreads(const NThreadHandle *Threads, const int Count) 139 | { 140 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 141 | WaitForMultipleObjects(Count, Threads, TRUE, INFINITE); 142 | } 143 | 144 | // SuspendThread 145 | bool CThread::SuspendThread() 146 | { 147 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 148 | return ::SuspendThread(m_thread) == -1 ? false : true; 149 | } 150 | 151 | // ResumeThread 152 | bool CThread::ResumeThread() 153 | { 154 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 155 | return ::ResumeThread(m_thread) == -1 ? false : true; 156 | } 157 | 158 | //SetThreadAffinity 159 | void CThread::SetThreadAffinity(unsigned int mask) 160 | { 161 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 162 | SetThreadAffinityMask(m_thread, mask); 163 | } 164 | ///////////////////////////////////////////////////////////////////////////////////// 165 | // MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX // 166 | ///////////////////////////////////////////////////////////////////////////////////// 167 | CMutex::CMutex() 168 | { 169 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 170 | CMutex::CreateMutex(m_mutex); 171 | } 172 | 173 | CMutex::~CMutex() 174 | { 175 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 176 | CMutex::DeleteMutex(); 177 | } 178 | 179 | // CreateMutex 180 | void CMutex::CreateMutex(NMutexHandle &Mutex) 181 | { 182 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 183 | Mutex = ::CreateMutex(NULL, FALSE, NULL); 184 | } 185 | 186 | // DeleteMutex 187 | void CMutex::DeleteMutex() 188 | { 189 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 190 | CloseHandle(m_mutex); 191 | m_mutex = NULL; 192 | } 193 | 194 | // LockMutex 195 | bool CMutex::LockMutex(int ms, long *dbg) 196 | { 197 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 198 | DWORD res = WaitForSingleObjectEx(m_mutex, ms == -1 ? INFINITE : ms, FALSE); 199 | if(dbg) *dbg = res; 200 | return res == WAIT_OBJECT_0 ? true : false; 201 | } 202 | 203 | // UnlockMutex 204 | void CMutex::UnlockMutex() 205 | { 206 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 207 | BOOL bRes = ReleaseMutex(m_mutex); 208 | assert(bRes); 209 | } 210 | 211 | ///////////////////////////////////////////////////////////////////////////////////// 212 | // SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE // 213 | ///////////////////////////////////////////////////////////////////////////////////// 214 | CSemaphore::CSemaphore() 215 | { 216 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 217 | CSemaphore::CreateSemaphore(m_semaphore, 0, 0xFFFF); 218 | } 219 | 220 | CSemaphore::CSemaphore(long initialCnt, long maxCnt) 221 | { 222 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 223 | CSemaphore::CreateSemaphore(m_semaphore, initialCnt, maxCnt); 224 | } 225 | 226 | CSemaphore::~CSemaphore() 227 | { 228 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 229 | CSemaphore::DeleteSemaphore(); 230 | } 231 | 232 | int CSemaphore::num_Semaphores = 0; 233 | // CreateSemaphore 234 | void CSemaphore::CreateSemaphore(NSemaphoreHandle &Semaphore, long initialCnt, long maxCnt) 235 | { 236 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 237 | num_Semaphores++; 238 | Semaphore = ::CreateSemaphoreA(NULL, initialCnt, maxCnt, NULL); 239 | } 240 | 241 | // DeleteSemaphore 242 | void CSemaphore::DeleteSemaphore() 243 | { 244 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 245 | CloseHandle(m_semaphore); 246 | m_semaphore = NULL; 247 | num_Semaphores--; 248 | } 249 | 250 | // AcquireSemaphore 251 | bool CSemaphore::AcquireSemaphore(int ms) 252 | { 253 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 254 | return WaitForSingleObject(m_semaphore, ms == -1 ? INFINITE : ms) == WAIT_TIMEOUT ? false : true; 255 | } 256 | 257 | // ReleaseSemaphore 258 | void CSemaphore::ReleaseSemaphore(long cnt) 259 | { 260 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 261 | BOOL bRes = ::ReleaseSemaphore(m_semaphore, cnt, NULL); 262 | } 263 | 264 | ///////////////////////////////////////////////////////////////////////////////////// 265 | // EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT // 266 | ///////////////////////////////////////////////////////////////////////////////////// 267 | int CEvent::num_events = 0; 268 | CEvent::CEvent(bool manualReset, bool initialState) 269 | { 270 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 271 | CEvent::CreateEvent(m_event, manualReset, initialState); 272 | } 273 | CEvent::~CEvent() 274 | { 275 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 276 | CEvent::DeleteEvent(); 277 | } 278 | 279 | void CEvent::CreateEvent(NEventHandle &Event, bool manualReset, bool initialState) 280 | { 281 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 282 | num_events++; 283 | Event = ::CreateEventA(NULL, manualReset, initialState, NULL); 284 | } 285 | void CEvent::DeleteEvent() 286 | { 287 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 288 | CloseHandle(m_event); 289 | m_event = NULL; 290 | num_events--; 291 | } 292 | bool CEvent::Set() 293 | { 294 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 295 | return SetEvent(m_event) ? true : false; 296 | } 297 | bool CEvent::Pulse() 298 | { 299 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 300 | return PulseEvent(m_event) ? true : false; 301 | } 302 | bool CEvent::Reset() 303 | { 304 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 305 | return ResetEvent(m_event) ? true : false; 306 | } 307 | bool CEvent::WaitOnEvent(int ms) 308 | { 309 | NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED); 310 | return WaitForSingleObject(m_event, ms == -1 ? INFINITE : ms) == WAIT_TIMEOUT ? false : true; 311 | } 312 | 313 | 314 | #endif // WINDOWS 315 | 316 | ///////////////////////////////////////////////////////////////////////////////////// 317 | ///////////////////////////////////////////////////////////////////////////////////// 318 | // UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX // 319 | ///////////////////////////////////////////////////////////////////////////////////// 320 | ///////////////////////////////////////////////////////////////////////////////////// 321 | 322 | #if defined IOS || defined ANDROID || defined LINUX 323 | 324 | ///////////////////////////////////////////////////////////////////////////////////// 325 | // CThread CThread CThread CThread CThread CThread CThread CThread CThread CThread // 326 | ///////////////////////////////////////////////////////////////////////////////////// 327 | CThread::CThread(const bool startNow, const bool Critical) 328 | { 329 | m_thread = CreateThread(thread_function, static_cast(this), startNow, Critical); 330 | } 331 | 332 | CThread::~CThread() 333 | { 334 | CancelThread(); 335 | DeleteThread(); 336 | } 337 | 338 | 339 | // CpuCount 340 | int CThread::CpuCount() { 341 | static int Cpus = -1; 342 | if(-1 == Cpus) { 343 | #ifdef IOS 344 | size_t s = sizeof(Cpus); 345 | sysctlbyname("hw.logicalcpu", &Cpus, &s, NULL, 0); 346 | #endif // IOS 347 | #ifdef ANDROID 348 | Cpus = sysconf(_SC_NPROCESSORS_ONLN); 349 | #endif // ANDROID 350 | Cpus = Cpus > 1 ? Cpus : 1; 351 | } 352 | return Cpus; 353 | } 354 | 355 | // Sleep 356 | void CThread::Sleep(const unsigned long Milliseconds) { 357 | usleep(1000 * (useconds_t)Milliseconds); 358 | } 359 | 360 | // CreateThread 361 | NThreadHandle CThread::CreateThread(ThreadProc Proc, void *Param, bool startNow, const bool Critical) 362 | { 363 | pthread_t th; 364 | //bool startNow, ? 365 | pthread_create(&th, NULL, (void *(*)(void *))Proc, Param); 366 | return th; 367 | } 368 | 369 | // CancelThread 370 | void CThread::CancelThread() { 371 | #if defined ANDROID 372 | pthread_kill(m_thread, SIGUSR1); 373 | #else 374 | pthread_cancel(m_thread); 375 | #endif 376 | } 377 | 378 | // DeleteThread 379 | void CThread::DeleteThread() { 380 | pthread_detach(m_thread); 381 | } 382 | 383 | // WaitThread 384 | void CThread::WaitThread() { 385 | pthread_join(m_thread, NULL); 386 | } 387 | 388 | // WaitThreads 389 | void CThread::WaitThreads(const NThreadHandle *Threads, const int Count) { 390 | int i; 391 | for(i = 0; i < Count; i++) { 392 | pthread_join(Threads[i], NULL); 393 | } 394 | } 395 | 396 | /////////////////////////////////////////////////////////////////////////////////// 397 | // MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX // 398 | /////////////////////////////////////////////////////////////////////////////////// 399 | CMutex::CMutex() 400 | { 401 | CMutex::CreateMutex(m_mutex); 402 | } 403 | 404 | CMutex::~CMutex() 405 | { 406 | CMutex::DeleteMutex(); 407 | } 408 | // CreateMutex 409 | void CMutex::CreateMutex(NMutexHandle &Mutex) 410 | { 411 | pthread_mutex_init(&Mutex, NULL); 412 | } 413 | 414 | // NMutexHandle 415 | void CMutex::DeleteMutex() 416 | { 417 | pthread_mutex_destroy(&m_mutex); 418 | } 419 | 420 | // LockMutex 421 | bool CMutex::LockMutex(int ms, long *dbg) { 422 | pthread_mutex_lock(&m_mutex); 423 | return true; 424 | } 425 | 426 | // UnlockMutex 427 | void CMutex::UnlockMutex() { 428 | pthread_mutex_unlock(&m_mutex); 429 | } 430 | 431 | ///////////////////////////////////////////////////////////////////////////////////// 432 | // SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE // 433 | ///////////////////////////////////////////////////////////////////////////////////// 434 | CSemaphore::CSemaphore(long initialCnt, long maxCnt) 435 | { 436 | CSemaphore::CreateSemaphore(m_semaphore, initialCnt, maxCnt); 437 | } 438 | 439 | CSemaphore::~CSemaphore() 440 | { 441 | CSemaphore::DeleteSemaphore(); 442 | } 443 | 444 | int CSemaphore::num_Semaphores = 0; 445 | // CreateSemaphore 446 | void CSemaphore::CreateSemaphore(NSemaphoreHandle &Semaphore, long initialCnt, long maxCnt) 447 | { 448 | num_Semaphores++; 449 | sem_init(&Semaphore, 0, (unsigned int)initialCnt); 450 | } 451 | 452 | // DeleteSemaphore 453 | void CSemaphore::DeleteSemaphore() 454 | { 455 | sem_destroy(&m_semaphore); 456 | //!@$!#$@#$m_semaphore = NULL; 457 | num_Semaphores--; 458 | } 459 | 460 | // AcquireSemaphore 461 | bool CSemaphore::AcquireSemaphore(int msTimeOut) 462 | { 463 | //if(msTimeOut == 0) 464 | sem_wait(&m_semaphore); 465 | /*else { 466 | //convert timeout to a timespec, pthreads wants the final time not the length 467 | #if 0 468 | timespec ts; 469 | clock_gettime(CLOCK_REALTIME, &ts); 470 | ts.tv_sec += time_t(msTimeOut) / time_t(1000); 471 | ts.tv_nsec += (long(msTimeOut) % long(1000)) * long(1000*1000); 472 | #else 473 | struct timeval tv; 474 | struct timespec ts; 475 | gettimeofday(&tv, NULL); 476 | ts.tv_sec = tv.tv_sec + time_t(msTimeOut) / time_t(1000); 477 | ts.tv_nsec = tv.tv_usec*1000 + (long(msTimeOut) % long(1000)) * long(1000*1000); 478 | #endif 479 | 480 | if (sem_timedwait(&m_semaphore, &ts))// WTF ?!?!? == ETIMEDOUT) 481 | { 482 | //timed out 483 | return false; 484 | } 485 | return true; 486 | }*/ 487 | return true; 488 | } 489 | 490 | //void NCountSemaphore::Post() const 491 | // ReleaseSemaphore 492 | void CSemaphore::ReleaseSemaphore(long cnt) 493 | { 494 | for(;cnt > 0; cnt--) 495 | sem_post(&m_semaphore); 496 | 497 | } 498 | 499 | 500 | ///////////////////////////////////////////////////////////////////////////////////// 501 | // EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT // 502 | ///////////////////////////////////////////////////////////////////////////////////// 503 | int CEvent::num_events = 0; 504 | CEvent::CEvent(bool manualReset, bool initialState) 505 | { 506 | CEvent::CreateEvent(m_event, manualReset, initialState); 507 | m_signaled = initialState; 508 | m_manualReset = manualReset; 509 | // TODO: put it in CreateEvent 510 | pthread_mutex_init(&m_mutex, NULL); // do we need non-default attrs (second arg)? 511 | } 512 | CEvent::~CEvent() 513 | { 514 | CEvent::DeleteEvent(); 515 | } 516 | 517 | void CEvent::CreateEvent(NEventHandle &event, bool manualReset, bool initialState) 518 | { 519 | num_events++; 520 | pthread_cond_init(&event, NULL); 521 | } 522 | void CEvent::DeleteEvent() 523 | { 524 | //m_event = NULL; 525 | num_events--; 526 | pthread_cond_destroy(&m_event); 527 | pthread_mutex_destroy(&m_mutex); 528 | } 529 | bool CEvent::Set() 530 | { 531 | int r = 0; 532 | pthread_mutex_lock(&m_mutex); 533 | 534 | if (m_signaled == false) 535 | { 536 | m_signaled = true; 537 | r = pthread_cond_broadcast(&m_event); 538 | } 539 | 540 | pthread_mutex_unlock(&m_mutex); 541 | return r ? false : true; 542 | } 543 | bool CEvent::Pulse() 544 | { 545 | pthread_mutex_lock(&m_mutex); 546 | 547 | int r = pthread_cond_broadcast(&m_event); 548 | 549 | pthread_mutex_unlock(&m_mutex); 550 | return r ? false : true; 551 | } 552 | bool CEvent::Reset() 553 | { 554 | pthread_mutex_lock(&m_mutex); 555 | m_signaled = false; 556 | pthread_mutex_unlock(&m_mutex); 557 | return m_signaled; 558 | } 559 | bool CEvent::WaitOnEvent(int msTimeOut) 560 | { 561 | pthread_mutex_lock(&m_mutex); 562 | 563 | //convert timeout to a timespec, pthreads wants the final time not the length 564 | struct timeval tv; 565 | struct timespec ts; 566 | if(msTimeOut >= 0) 567 | { 568 | gettimeofday(&tv, NULL); 569 | //timespec ts; 570 | //clock_gettime(CLOCK_REALTIME, &ts); 571 | ts.tv_sec = tv.tv_sec + time_t(msTimeOut) / time_t(1000); 572 | ts.tv_nsec = tv.tv_usec*1000 + (long(msTimeOut) % long(1000)) * long(1000*1000); 573 | } 574 | bool ret = true; 575 | while(m_signaled == false) 576 | { 577 | if(msTimeOut >= 0) 578 | { 579 | if (::pthread_cond_timedwait(&m_event, &m_mutex, &ts)) // WTF?!?! == ETIMEDOUT) 580 | { 581 | //timed out 582 | ret = false; 583 | break; 584 | } 585 | } else { 586 | if (::pthread_cond_wait(&m_event, &m_mutex)) 587 | { 588 | //must be an error, then 589 | ret = false; 590 | break; 591 | } 592 | } 593 | } 594 | 595 | if (ret && !m_manualReset) 596 | { 597 | m_signaled = false; 598 | } 599 | 600 | pthread_mutex_unlock(&m_mutex); 601 | return ret; 602 | } 603 | #endif // IOS || ANDROID 604 | 605 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 606 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 607 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 608 | // NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase 609 | // NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase 610 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 611 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 612 | /////////////////////////////////////////////////////////////////////////////////////////////////////// 613 | 614 | 615 | CCriticalSection& NThreadLocalNonPODBase::s_listLock() 616 | { 617 | static CCriticalSection cs; 618 | return cs; 619 | } 620 | 621 | std::vector& NThreadLocalNonPODBase::s_tlsList() 622 | { 623 | static std::vector v; 624 | return v; 625 | } 626 | 627 | void NThreadLocalNonPODBase::DeleteAllTLSDataInThisThread() 628 | { 629 | CCriticalSectionHolder h(s_listLock()); 630 | std::vector& l = s_tlsList(); 631 | for (size_t i = 0; i < l.size(); i++) 632 | { 633 | l[i]->DeleteMyTLSData(); 634 | } 635 | } 636 | 637 | 638 | -------------------------------------------------------------------------------- /mt/CThreadWork.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | #pragma once 20 | //#define NOWIN32BUILTIN 21 | //#define DBGTHREAD 22 | #ifdef DBGTHREAD 23 | # define LOGDBG LOGI 24 | #else 25 | # define LOGDBG(...) 26 | #endif 27 | 28 | #include 29 | #include 30 | #include "CThread.h" 31 | 32 | #include "RingBuffer.h" 33 | 34 | //#define CB_CALL_CONV 35 | #ifdef WIN32 36 | #define CB_CONV CALLBACK 37 | #define CALL_CONV __stdcall 38 | #else 39 | #define CB_CONV 40 | #define CALL_CONV 41 | #endif 42 | 43 | 44 | 45 | #if defined IOS || defined ANDROID 46 | #endif 47 | 48 | #ifdef WIN32 49 | #endif 50 | 51 | class TaskQueue; 52 | class ThreadWorker; 53 | 54 | //#pragma mark - Globals // MacOSX thing 55 | #ifdef USEGLOBALS 56 | /************************************************************************************/ 57 | /** 58 | ** \brief return the main thread 59 | **/ 60 | NThreadHandle GetMainThread(); 61 | /** 62 | ** \brief 63 | **/ 64 | void DeclareMainThread(); 65 | /** 66 | ** \brief 67 | **/ 68 | bool IsMainThread(); 69 | /** 70 | ** \brief 71 | **/ 72 | TaskQueue* GetMainTaskQueue(); 73 | /** 74 | ** \brief 75 | **/ 76 | uint GetNumCPUCores(); 77 | /** 78 | ** \brief 79 | **/ 80 | NThreadID GetCurrentThreadID(); 81 | /** 82 | ** \brief 83 | **/ 84 | TaskQueue* GetCurrentTaskQueue(); 85 | #endif //USEGLOBALS 86 | 87 | //#pragma mark - Events, Mutex, Semaphores with altertable feature // MacOSX thing 88 | 89 | //pthreads doesn't support real alertable waiting, so we need to wake up and poll for it 90 | #define NV_FAKE_WAIT_ALERTABLE_SLICES_MS 5 91 | 92 | class CEventAlertable : public CEvent 93 | { 94 | public: 95 | bool WaitOnEventAltertable(); 96 | bool WaitOnEventAltertable(::uint msTimeOut); 97 | }; 98 | 99 | //#pragma mark - TaskBase // MacOSX thing 100 | /************************************************************************************/ 101 | /** 102 | ** \brief Base for a Task, with the common way to invoke the task through Invoke() 103 | ** 104 | ** this is the very base for any task to be part of the game: derive a class from this 105 | ** one and create a dedicated constructor that will contain the function arguments 106 | ** (arguments that normally you would pass to the method for executing what you want 107 | ** then the thread pool will call "Invoke()": the overloaded implementation will allow 108 | ** you to find back the arguments that you passed in the constructor 109 | ** This allows to normalize the invokation of a task, while allowing you to pre-declare 110 | ** arguments via the constructor 111 | **/ 112 | class TaskBase 113 | { 114 | private: 115 | /// \brief pointer to TaskQueue::m_taskCount 116 | NInterlockedValue* m_queueCountRef; 117 | protected: 118 | TaskBase() /*: m_queueCountRef(NULL)*/ {} 119 | virtual ~TaskBase(); 120 | public: 121 | /// \brief the main entry point for Task execution : this method is the one called to exectute the Task 122 | virtual void Invoke() = 0; 123 | /// \brief when the Task got accomplished, Done() gets called. 124 | virtual void Done(); 125 | #ifdef DBGTHREAD 126 | virtual const char *getDbgString() { return "NONAME"; }; 127 | #endif 128 | friend class TaskQueue; 129 | }; 130 | 131 | // ?? Shall we create a bas class for tasks that we want to be able to invoke other Tasks 132 | 133 | //#pragma mark - Cross Thread // MacOSX thing 134 | //************************************************************************************ 135 | // 136 | // TaskSyncCall is a class for function call(s) that would require waiting for the result 137 | // prior moving forward: method "Call()" is what needs to be called. The caller will wait 138 | // for the method "Call" to be completed. This method might execute on another thread 139 | // This privately inherits from TaskBase so you can't pass it to the task invoker directly 140 | // 141 | class TaskSyncCall : private TaskBase 142 | { 143 | private: 144 | CEventAlertable m_doneEvent; 145 | virtual void Done(); 146 | protected: 147 | TaskSyncCall(); 148 | 149 | // Implement this 150 | virtual void Invoke() = 0; 151 | public: 152 | /// \brief performs a synchonous call : wait for the result 153 | void Call(TaskQueue* destThread = NULL, bool waitAltertable = false);//GetMainTaskQueue()); 154 | }; 155 | 156 | //#pragma mark - Task Queue // MacOSX thing 157 | 158 | 159 | /************************************************************************************/ 160 | /** 161 | ** \brief thread-local variable of the current Queue list of task 162 | **/ 163 | extern TaskQueue* getCurrentTaskQueue(); 164 | /** 165 | ** \brief sets thread-local variable of the current Queue list of task 166 | **/ 167 | extern void setCurrentTaskQueue(TaskQueue * tb); 168 | extern int getThreadNumber(); 169 | extern void setThreadNumber(int n); 170 | 171 | /************************************************************************************/ 172 | /** 173 | ** \brief Queue list of task to execute 174 | ** 175 | ** each thread-worker owns a TaskQueue in which tasks are put ( pushTask() ) 176 | ** Thread workers will get a TaskQueue assigned by default. 177 | ** 178 | ** Special and optional case: the MAIN thread might also have such a TaskQueue: you can instanciate 179 | ** it and do while(g_mainTB->pollTask()) {} to execute possibly queued tasks that other 180 | ** workers might have pushed to the main thread. 181 | **/ 182 | class TaskQueue 183 | { 184 | private: 185 | //CThread *m_thread; 186 | NThreadHandle m_thread; 187 | #ifdef WIN32 188 | NThreadID m_threadID; 189 | #endif 190 | 191 | /// \brief prototype for the function that is invoked for the task execution 192 | /// In many cases this function is TaskQueue::taskThreadFunc() with a TaskBase as the argument 193 | typedef void (CB_CONV *ThreadFunc)(void* params); 194 | #if !defined WIN32 || defined NOWIN32BUILTIN 195 | /// \name non Win32 queue implementation 196 | /// @{ 197 | typedef NRingBuffer > Ring; 198 | // Seems like we could ask Windows to do this work for us... 199 | CEvent* m_dataReadyEvent; 200 | CCriticalSection m_taskQueueLock; 201 | Ring m_taskQueue; 202 | /// @} 203 | #endif 204 | 205 | /// \brief entry point to execute the task 206 | static void CB_CONV taskThreadFunc(void* params); 207 | /// \name Constructors/Destructors 208 | /// @{ 209 | #if !defined WIN32 || defined NOWIN32BUILTIN 210 | TaskQueue(CEvent* dataReadyEvent); 211 | #endif 212 | TaskQueue(const TaskQueue&); //these are purposely not implemented 213 | TaskQueue& operator= (const TaskQueue&); 214 | public: 215 | TaskQueue(); 216 | TaskQueue(/*CThread **/NThreadHandle thread, CEvent* dataReadyEvent=NULL); 217 | #ifdef WIN32 218 | TaskQueue(NThreadID id, CEvent* dataReadyEvent=NULL); 219 | #endif 220 | ~TaskQueue(); 221 | private: 222 | /// @} 223 | 224 | /// \brief maintains the total amount of tasks 225 | NInterlockedValue m_taskCount; 226 | 227 | /// \brief push a function in the list ring of functions to call 228 | void pushTaskFunc(ThreadFunc call, void* params); 229 | public: 230 | inline int GetQueuedTaskCount() { return (int)m_taskCount; } 231 | /// \brief push a task into the execution buffer. Using taskThreadFunc. 232 | void pushTask(TaskBase * task); 233 | /// \brief poll a Task's function from the execution buffer and execute it 234 | bool pollTask(int timeout=0); 235 | void FlushTasks(bool waitAlertable = false); 236 | 237 | inline NThreadHandle GetDestinationThread() { return m_thread; } 238 | #ifdef WIN32 239 | inline NThreadID GetDestinationThreadID() { return m_threadID; } 240 | #else 241 | /// \brief pthreads doesn't differentiate between handles and IDs 242 | inline NThreadID GetDestinationThreadID() { return GetDestinationThread(); } 243 | #endif 244 | 245 | friend class ThreadWorker; 246 | }; 247 | 248 | 249 | //#pragma mark - Task Worker // MacOSX thing 250 | /************************************************************************************/ 251 | /** 252 | ** \brief Pool of workers 253 | **/ 254 | class ThreadWorker 255 | { 256 | private: 257 | /// \name contructors/destructor 258 | /// @{ 259 | ThreadWorker(const ThreadWorker&); //these are purposely not implemented 260 | ThreadWorker& operator= (const ThreadWorker&); 261 | public: 262 | /// \brief this starts the thread 263 | ThreadWorker(const std::string& threadName = std::string(), bool discardQueuedOnExit = false, bool waitAleratableOnExit = false); 264 | // \brief this blocks until all queued work is done unless you had told it to discard 265 | ~ThreadWorker(); 266 | /// @} 267 | private: 268 | unsigned long m_workerID; 269 | std::string m_threadName; 270 | CCriticalSection m_threadNameSec; 271 | CEvent m_doneEvent; 272 | TaskQueue m_invoker; 273 | volatile bool m_discardQueuedOnExit; 274 | //volatile bool m_alertableOnExit; 275 | 276 | #ifdef WIN32 277 | /// \brief the real function that the thread will invoke - Win32 version 278 | static ::uint CALL_CONV threadFunc(void* p); 279 | #else 280 | /// \brief the real function that the thread will invoke - Unix version 281 | static void* CALL_CONV threadFunc(void* p); 282 | #endif 283 | /// whenever the work is done, signals data are ready to pickup 284 | #if !defined WIN32 || defined NOWIN32BUILTIN 285 | CEvent m_dataReadyEvent; 286 | #endif 287 | public: 288 | /// \name getters/setters 289 | /// @{ 290 | inline bool GetDiscardQueuedOnExit() const { return m_discardQueuedOnExit; } 291 | inline void SetDiscardQueuedOnExit(bool b) { m_discardQueuedOnExit = b; } 292 | 293 | //inline bool GetWaitAlertableOnExit() const { return m_alertableOnExit; } 294 | //inline void SetWaitAleratbleOnExit(bool b) { m_alertableOnExit = b; } 295 | 296 | inline TaskQueue& GetTaskQueue() { return m_invoker; } 297 | inline int GetWorkerID() { return m_workerID; } 298 | 299 | const std::string& GetThreadName()/* const*/; 300 | void SetThreadName(const std::string& n); 301 | /// @} 302 | void SetBackgroundMode(bool b); 303 | }; 304 | 305 | //#pragma mark - Pool of workers // MacOSX thing 306 | /************************************************************************************/ 307 | /** 308 | ** \brief various cases to schedule work 309 | **/ 310 | enum NWORKER_THREADPOOL_SCHEDULE 311 | { 312 | //the task is assigned to whichever thread has the least total queued tasks 313 | NWTPS_LEAST_QUEUED_TASKS, 314 | //the tasks are assigned sequentially to threads wrapping around 315 | NWTPS_ROUND_ROBIN, 316 | //the threads read from a central queue of tasks. 317 | //this one is higher overhead, but it might be worth if you have very variable task completion times 318 | NWTPS_SHARED_QUEUE, 319 | }; 320 | 321 | /************************************************************************************/ 322 | /** 323 | ** \brief Pool of thread workers 324 | ** 325 | ** this class contains all the thread workers available for any jobs/tasks 326 | ** 327 | **/ 328 | class ThreadWorkerPool 329 | { 330 | private: 331 | ThreadWorkerPool(const ThreadWorkerPool&); //these are purposely not implemented 332 | ThreadWorkerPool& operator= (const ThreadWorkerPool&); 333 | 334 | ::uint m_threadCount; 335 | ThreadWorker* m_threads; 336 | NWORKER_THREADPOOL_SCHEDULE m_schedule; 337 | ::uint m_invokedTaskCount; 338 | 339 | /// \brief this task is invoked on multiple threads at once 340 | struct QueuedWorkProcessorTask : public TaskBase 341 | { 342 | const bool m_discardOnExit; 343 | CEvent m_doneEvent; 344 | CSemaphore m_dataReadySem, // Threads (QueuedWorkProcessorTask::Invoke()) are stuck in this semaphore. Pushing a Task will Release one so one of them can wake-the-F@#K-up and work 345 | m_dataProcessedSem; 346 | NRingBuffer m_taskQueue; 347 | CCriticalSection m_taskQueueLock; 348 | 349 | QueuedWorkProcessorTask(bool discardOnExit); 350 | virtual void Invoke(); 351 | virtual void Done(); 352 | #ifdef DBGTHREAD 353 | const char *getDbgString() { return __FUNCTION__; }; 354 | #endif 355 | }; 356 | //this is only non-null if you are using NWTPS_SHARED_QUEUE 357 | QueuedWorkProcessorTask* m_queueTask; 358 | 359 | public: 360 | /// \brief constructor 361 | ThreadWorkerPool(::uint numThreads, bool discardQueuedOnExit = false, bool waitAleratableOnExit = false, NWORKER_THREADPOOL_SCHEDULE sched = NWTPS_ROUND_ROBIN, const std::string& threadName = std::string()); 362 | /// this will block until all the threads are done 363 | ~ThreadWorkerPool(); 364 | 365 | ::uint getThreadCount() { return m_threadCount; } 366 | ThreadWorker * getThreadWorker(int n); 367 | /// this destroys the task when it's done 368 | void pushTask(TaskBase* task); 369 | 370 | void SetBackgroundMode(bool b); 371 | void FlushTasks(bool waitAlertable = false); 372 | void Terminate(); 373 | }; 374 | 375 | 376 | -------------------------------------------------------------------------------- /mt/CThreadWork.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/mt/CThreadWork.pptx -------------------------------------------------------------------------------- /mt/RingBuffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #ifndef ThreadTest_RingBuffer_h 21 | #define ThreadTest_RingBuffer_h 22 | 23 | //#pragma mark - Ring Buffer // MacOSX thing 24 | #include 25 | #include 26 | 27 | /******************************************************************************/ 28 | /** 29 | ** \brief Ring buffer implementation "a la STL"... 30 | **/ 31 | template > 32 | class NRingBuffer 33 | { 34 | public: 35 | template 36 | class iterator_impl : public std::iterator 37 | { 38 | friend class NRingBuffer; 39 | V* m_buffer; 40 | size_t m_capacity; 41 | V* m_ptr; 42 | //if the buffer is full end (one after the last) will == begin which means empty...we need to fake this 43 | mutable bool m_isFreshBeginAndIsFull; 44 | mutable bool m_isFreshEndAndIsFull; 45 | iterator_impl(V* b, size_t c, V* curPtr, bool freshB, bool freshE) 46 | : m_buffer(b) 47 | , m_capacity(c) 48 | , m_ptr(curPtr) 49 | , m_isFreshBeginAndIsFull(freshB) 50 | , m_isFreshEndAndIsFull(freshE){}; 51 | 52 | public: 53 | iterator_impl(const iterator_impl& i) 54 | : m_buffer(i.m_buffer) 55 | , m_capacity(i.m_capacity) 56 | , m_ptr(i.m_ptr) 57 | , m_isFreshBeginAndIsFull(i.m_isFreshBeginAndIsFull) 58 | , m_isFreshEndAndIsFull(i.m_isFreshEndAndIsFull){}; 59 | iterator_impl& operator++() 60 | { 61 | m_isFreshBeginAndIsFull = false; //once you move them they are not fresh 62 | m_isFreshEndAndIsFull = false; 63 | m_ptr++; 64 | assert(m_ptr <= (m_buffer + m_capacity)); 65 | if(m_ptr == (m_buffer + m_capacity)) 66 | m_ptr = m_buffer; 67 | 68 | return *this; 69 | } 70 | iterator_impl operator++(int) 71 | { 72 | iterator_impl tmp(*this); 73 | operator++(); 74 | return tmp; 75 | } 76 | 77 | iterator_impl& operator--() 78 | { 79 | assert(m_ptr >= m_buffer); 80 | m_isFreshBeginAndIsFull = false; 81 | m_isFreshEndAndIsFull = false; 82 | m_ptr--; 83 | if(m_ptr == (m_buffer - 1)) 84 | m_ptr += m_capacity; 85 | 86 | return *this; 87 | } 88 | iterator_impl operator--(int) 89 | { 90 | iterator_impl tmp(*this); 91 | operator--(); 92 | return tmp; 93 | } 94 | 95 | bool operator==(const iterator_impl& rhs) const 96 | { 97 | if(m_ptr == rhs.m_ptr) 98 | { 99 | if((m_isFreshBeginAndIsFull && rhs.m_isFreshEndAndIsFull) || (m_isFreshEndAndIsFull && rhs.m_isFreshBeginAndIsFull)) 100 | return false; //if we are full then initally ptr begin == ptr end and that violates how iterators work 101 | else 102 | return true; 103 | } 104 | else 105 | { 106 | return false; 107 | } 108 | } 109 | bool operator!=(const iterator_impl& rhs) const { return !(operator==(rhs)); } 110 | V& operator*() const { return *m_ptr; } 111 | 112 | //convert to non-const 113 | operator iterator_impl() 114 | { 115 | return iterator_impl(m_buffer, m_capacity, m_ptr, m_isFull, m_isFreshBeginAndIsFull, m_isFreshEndAndIsFull); 116 | } 117 | }; 118 | 119 | template 120 | friend class iterator_impl; 121 | 122 | //typedefs to make our class stl-compliant 123 | typedef iterator_impl const_iterator; 124 | typedef iterator_impl iterator; 125 | typedef size_t size_type; 126 | typedef ptrdiff_t difference_type; 127 | typedef T value_type; 128 | typedef Alloc allocator_type; 129 | typedef typename std::allocator_traits::pointer pointer; 130 | typedef typename std::allocator_traits::const_pointer const_pointer; 131 | 132 | 133 | iterator begin() { return iterator(m_buffer, m_capacity, m_readPtr, m_isFull, false); } 134 | 135 | iterator end() 136 | { 137 | if(m_capacity > 0) 138 | return iterator(m_buffer, m_capacity, (((m_readPtr - m_buffer) + GetStoredSize()) % m_capacity) + m_buffer, false, m_isFull); 139 | else 140 | return begin(); 141 | } 142 | 143 | const_iterator begin() const { return const_iterator(m_buffer, m_capacity, m_readPtr, m_isFull, false); } 144 | 145 | const_iterator end() const 146 | { 147 | if(m_capacity > 0) 148 | return const_iterator(m_buffer, m_capacity, (((m_readPtr - m_buffer) + GetStoredSize()) % m_capacity) + m_buffer, 149 | false, m_isFull); 150 | else 151 | return begin(); 152 | } 153 | 154 | enum OVERFLOW_BEHAVIOR 155 | { 156 | OF_FAIL, //return failure and do nothing 157 | OF_DISCARD, //overwrite the oldest data 158 | OF_EXPAND, //resize 159 | }; 160 | 161 | NRingBuffer(size_t capacity, OVERFLOW_BEHAVIOR overflow = OF_EXPAND, const Alloc& alloc = Alloc()) 162 | : m_overflowBehavoir(overflow) 163 | , m_capacity(capacity) 164 | , m_isFull(false) 165 | , m_allocator(alloc) 166 | { 167 | m_buffer = m_capacity ? m_allocator.allocate(m_capacity) : NULL; 168 | m_readPtr = m_writePtr = m_buffer; 169 | } 170 | 171 | NRingBuffer(const NRingBuffer& src) 172 | : m_overflowBehavoir(src.m_overflowBehavoir) 173 | , m_capacity(src.m_capacity) 174 | , m_isFull(src.m_isFull) 175 | , m_allocator(src.m_allocator) 176 | { 177 | CopyFrom(src); 178 | } 179 | 180 | NRingBuffer& operator=(const NRingBuffer& rhs) 181 | { 182 | if(this == &rhs) 183 | return *this; 184 | 185 | KillBuffer(); 186 | CopyFrom(rhs); 187 | return *this; 188 | } 189 | 190 | 191 | ~NRingBuffer() { KillBuffer(); } 192 | 193 | size_t GetCapacity() const { return m_capacity; } 194 | 195 | void Reset(size_t newCapacity) 196 | { 197 | if(newCapacity != m_capacity) 198 | { 199 | KillBuffer(); 200 | m_capacity = newCapacity; 201 | m_buffer = m_capacity ? m_allocator.allocate(m_capacity) : NULL; 202 | } 203 | m_readPtr = m_writePtr = m_buffer; 204 | m_isFull = false; 205 | } 206 | 207 | void Reset() { Reset(GetCapacity()); } 208 | 209 | size_t GetFreeSize() const { return m_capacity - GetStoredSize(); } 210 | 211 | size_t GetStoredSize() const 212 | { 213 | if(m_writePtr < m_readPtr) //it wraps around 214 | { 215 | size_t rightStored = (m_buffer + m_capacity) - m_readPtr; //stuff after the read pointer 216 | size_t leftStored = m_writePtr - m_buffer; //stuff before the end read pointer 217 | return rightStored + leftStored; 218 | } 219 | else 220 | { 221 | return m_isFull ? m_capacity : (m_writePtr - m_readPtr); 222 | } 223 | } 224 | 225 | bool WriteData(const T* src, size_t len) 226 | { 227 | size_t freeSize = GetFreeSize(); 228 | if(len > freeSize) 229 | { 230 | switch(m_overflowBehavoir) 231 | { 232 | case OF_FAIL: { 233 | return false; //just fail 234 | } 235 | case OF_DISCARD: { 236 | if(len > m_capacity) //if its larget than our total size or would over 237 | return false; 238 | ConsumeData(len - freeSize); 239 | break; 240 | } 241 | case OF_EXPAND: { 242 | size_t storedSize = GetStoredSize(); 243 | size_t newCap = m_capacity * 2; 244 | while((len + storedSize) > newCap) 245 | { 246 | newCap *= 2; 247 | } 248 | T* newBuffer = m_allocator.allocate(newCap); 249 | ReadDataImpl(newBuffer, storedSize, true); //copy over data calling constructors 250 | T* newRead = newBuffer; 251 | T* newWrite = newBuffer + storedSize; 252 | 253 | KillBuffer(); 254 | 255 | m_buffer = newBuffer; 256 | m_writePtr = newWrite; 257 | m_readPtr = newRead; 258 | m_capacity = newCap; 259 | //NOutputString("Expanded\n"); 260 | } 261 | break; 262 | } 263 | } 264 | 265 | size_t rightFree = (m_writePtr >= m_readPtr && !m_isFull) ? (m_buffer + m_capacity) - m_writePtr : 266 | m_readPtr - m_writePtr; //stuff after the write pointer 267 | size_t leftFree = (m_writePtr >= m_readPtr && !m_isFull) ? m_readPtr - m_buffer : 0; //stuff before the read pointer 268 | 269 | size_t writeAmt = rightFree < len ? rightFree : len; 270 | for(size_t i = 0; i < writeAmt; i++) 271 | { 272 | std::allocator_traits::construct(m_allocator, &m_writePtr[i], src[i]); //copy construct the objects 273 | } 274 | m_writePtr += writeAmt; 275 | 276 | assert(m_writePtr <= (m_buffer + m_capacity)); 277 | if(m_writePtr == (m_buffer + m_capacity)) 278 | m_writePtr = m_buffer; 279 | 280 | if(len > writeAmt) 281 | { 282 | assert((len - writeAmt) <= leftFree); 283 | for(size_t i = 0; i < (len - writeAmt); i++) 284 | { 285 | std::allocator_traits::construct(m_allocator, &m_writePtr[i], src[i + writeAmt]); //copy construct the objects 286 | } 287 | m_writePtr += (len - writeAmt); 288 | } 289 | 290 | m_isFull = (m_writePtr == m_readPtr && (len || m_isFull)); 291 | return true; 292 | } 293 | 294 | bool WriteData(const T& d) { return WriteData(&d, 1); } 295 | 296 | size_t ReadData(T* dest, size_t destSize) { return ReadDataImpl(dest, destSize, false); } 297 | 298 | bool ReadData(T& dest) { return ReadData(&dest, 1) > 0; } 299 | 300 | size_t ConsumeData(size_t count) { return ReadData(NULL, count); } 301 | 302 | //reads data but doesn't remove it 303 | size_t PeekData(T* dest, size_t destSize, size_t offset = 0) const 304 | { 305 | //spoof the offset as a largeer buffer 306 | destSize += offset; 307 | 308 | size_t rightStored = (m_writePtr < m_readPtr || m_isFull) ? (m_buffer + m_capacity) - m_readPtr : 309 | m_writePtr - m_readPtr; //stuff after the read pointer 310 | size_t leftStored = (m_writePtr < m_readPtr || m_isFull) ? m_writePtr - m_buffer : 0; //stuff before the end read pointer 311 | 312 | size_t readFromRightSize = std::min(rightStored, destSize); 313 | if(dest && offset <= readFromRightSize) 314 | dest = std::copy(m_readPtr + offset, m_readPtr + readFromRightSize, dest); //readFromRightSize has offset baked in 315 | 316 | T* nextPtr = m_readPtr + readFromRightSize; 317 | 318 | destSize -= readFromRightSize; 319 | size_t readFromLeftSize = std::min(leftStored, destSize); 320 | 321 | assert(nextPtr <= (m_buffer + m_capacity)); 322 | if(nextPtr == (m_buffer + m_capacity)) 323 | nextPtr = m_buffer; 324 | 325 | if(dest) 326 | std::copy(nextPtr, nextPtr + readFromLeftSize, dest); 327 | 328 | size_t readAmt = readFromRightSize + readFromLeftSize - offset; 329 | return readAmt; 330 | } 331 | 332 | bool PeekData(T& dest, size_t offset = 0) const { return PeekData(&dest, 1, offset) > 0; } 333 | 334 | private: 335 | T* m_buffer; 336 | T* m_writePtr; 337 | T* m_readPtr; 338 | 339 | size_t m_capacity; 340 | bool m_isFull; 341 | 342 | OVERFLOW_BEHAVIOR m_overflowBehavoir; 343 | Alloc m_allocator; 344 | 345 | size_t ReadDataImpl(T* dest, size_t destSize, bool construct) 346 | { 347 | size_t rightStored = (m_writePtr < m_readPtr || m_isFull) ? (m_buffer + m_capacity) - m_readPtr : 348 | m_writePtr - m_readPtr; //stuff after the read pointer 349 | size_t leftStored = (m_writePtr < m_readPtr || m_isFull) ? m_writePtr - m_buffer : 0; //stuff before the end read pointer 350 | 351 | size_t readFromRightSize = (rightStored < destSize) ? rightStored : destSize; 352 | 353 | for(size_t i = 0; i < readFromRightSize; i++) 354 | { 355 | if(dest) 356 | { 357 | if(construct) 358 | std::allocator_traits::construct(m_allocator, &dest[i], m_readPtr[i]); 359 | else 360 | dest[i] = m_readPtr[i]; 361 | } 362 | 363 | std::allocator_traits::destroy(m_allocator, &m_readPtr[i]); 364 | } 365 | 366 | 367 | m_readPtr += readFromRightSize; 368 | 369 | destSize -= readFromRightSize; 370 | size_t readFromLeftSize = (leftStored < destSize) ? leftStored : destSize; 371 | 372 | assert(m_readPtr <= (m_buffer + m_capacity)); 373 | if(m_readPtr == (m_buffer + m_capacity)) 374 | m_readPtr = m_buffer; 375 | 376 | if(readFromLeftSize > 0) 377 | { 378 | for(size_t i = 0; i < readFromLeftSize; i++) 379 | { 380 | if(dest) 381 | { 382 | if(construct) 383 | std::allocator_traits::construct(m_allocator, &dest[i + readFromRightSize], m_readPtr[i]); 384 | else 385 | dest[i + readFromRightSize] = m_readPtr[i]; 386 | } 387 | 388 | std::allocator_traits::destroy(m_allocator, &m_readPtr[i]); 389 | } 390 | 391 | m_readPtr += readFromLeftSize; 392 | } 393 | 394 | size_t readAmt = readFromRightSize + readFromLeftSize; 395 | m_isFull = (readAmt == 0 && m_isFull); 396 | return readAmt; 397 | } 398 | 399 | void KillBuffer() 400 | { 401 | iterator e = end(); 402 | for(iterator i = begin(); i != e; i++) 403 | std::allocator_traits::destroy(m_allocator, &(*i)); 404 | 405 | m_allocator.deallocate(m_buffer, m_capacity); 406 | } 407 | 408 | void CopyFrom(const NRingBuffer& src) 409 | { 410 | m_overflowBehavoir = src.m_overflowBehavoir; 411 | m_capacity = src.m_capacity; 412 | m_isFull = src.m_isFull; 413 | m_allocator = src.m_allocator; 414 | 415 | m_buffer = m_capacity ? m_allocator.allocate(m_capacity) : NULL; 416 | m_readPtr = (src.m_readPtr - src.m_buffer) + m_buffer; 417 | m_writePtr = (src.m_writePtr - src.m_buffer) + m_buffer; 418 | const_iterator citb = src.begin(); 419 | const_iterator cite = src.end(); 420 | 421 | iterator itb = begin(); 422 | iterator ite = end(); 423 | while(citb != cite) 424 | { 425 | m_allocator.construct(&(*itb), *citb); 426 | itb++; 427 | citb++; 428 | } 429 | } 430 | }; 431 | 432 | 433 | #endif 434 | -------------------------------------------------------------------------------- /window_surface_vk.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef NV_VK_DEFAULTWINDOWSURFACE_INCLUDED 22 | #define NV_VK_DEFAULTWINDOWSURFACE_INCLUDED 23 | 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #define VK_MAX_QUEUED_FRAMES 4 34 | #define MAX_POSSIBLE_BACK_BUFFERS 16 35 | 36 | 37 | /* 38 | WindowSurface is a basic implementation of whatever is required to have a regular color+Depthstencil setup realted to a window 39 | This class is *not mandatory* for a sample to run. It's just a convenient way to have something put together for quick 40 | rendering in a window 41 | - a render-pass associated with the framebuffer(s) 42 | - buffers/framebuffers associated with the views of the window 43 | - command-buffers to match the current swapchain index 44 | typical use : 45 | 0) ... 46 | m_WindowSurface.acquire() 47 | ... 48 | 1) m_WindowSurface.setClearValue(); 49 | VkCommandBuffer command_buffer = m_windowSurface.beginCommandBuffer(); 50 | 2) m_windowSurface.beginRenderPass(); 51 | vkCmd...() 52 | ... 53 | 3) //for MSAA case: advances in the sub-pass to render *after* the resolve of AA 54 | m_windowSurface.nextSubPassForOverlay(); 55 | ... draw some non MSAA stuff (UI...) 56 | 4) m_windowSurface.endRPassCBufferSubmitAndPresent(); 57 | */ 58 | class WindowSurface { 59 | public: 60 | nvvk::SwapChain m_swapChain; 61 | private: 62 | nvvk::Context* m_pContext; 63 | VkSurfaceKHR m_surface; 64 | // framebuffer size and # of samples 65 | int fb_width = 0, fb_height = 0; 66 | VkSampleCountFlagBits m_samples = VK_SAMPLE_COUNT_1_BIT; 67 | bool m_swapVsync; 68 | 69 | VkClearColorValue m_clearColor; 70 | VkClearDepthStencilValue m_clearDST; 71 | 72 | VkRenderPass m_renderPass = VK_NULL_HANDLE; 73 | 74 | VkCommandPool m_commandPool[VK_MAX_QUEUED_FRAMES]; 75 | VkCommandBuffer m_curCommandBuffer = VK_NULL_HANDLE; 76 | VkCommandBuffer m_commandBuffer[VK_MAX_QUEUED_FRAMES]; 77 | VkFence m_curFence = VK_NULL_HANDLE; 78 | VkFence m_fence[VK_MAX_QUEUED_FRAMES]; 79 | 80 | VkFramebuffer m_framebuffer[MAX_POSSIBLE_BACK_BUFFERS] = {}; 81 | 82 | VkImage m_depthImage = {}; 83 | VkImage m_msaaColorImage = {}; 84 | VkDeviceMemory m_depthImageMemory = {}; 85 | VkDeviceMemory m_msaaColorImageMemory = {}; 86 | VkImageView m_depthImageView = {}; 87 | VkImageView m_msaaColorImageView = {}; 88 | 89 | VkAllocationCallbacks *m_allocator = VK_NULL_HANDLE; 90 | 91 | bool hasStencilComponent(VkFormat format); 92 | 93 | public: 94 | bool init(nvvk::Context* pContext, NVPWindow* pWin, int MSAA); 95 | void deinit(); 96 | bool resize(int w, int h); 97 | void createFrameBuffer(); 98 | //void createImageViews(); 99 | void createRenderPass(); 100 | 101 | void acquire(); 102 | VkCommandBuffer beginCommandBuffer(VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); 103 | void beginRenderPass(VkSubpassContents contents = VK_SUBPASS_CONTENTS_INLINE); // could be VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS 104 | void nextSubPassForOverlay(VkSubpassContents contents = VK_SUBPASS_CONTENTS_INLINE); 105 | void endRenderPass(); 106 | void endCommandBuffer(); 107 | void submit(); 108 | void present() 109 | { 110 | m_swapChain.present(m_pContext->m_queueGCT);//m_presentQueue.queue); 111 | } 112 | void endCBufferSubmitAndPresent() // does the 3 methods in 1 call 113 | { 114 | endCommandBuffer(); 115 | submit(); 116 | present(); 117 | } 118 | void endRPassCBufferSubmitAndPresent() // does the 4 methods in 1 call 119 | { 120 | endRenderPass(); 121 | endCommandBuffer(); 122 | submit(); 123 | present(); 124 | } 125 | void createDepthResources(); 126 | void createMSAAColorResources(); 127 | void swapVsync(bool state) 128 | { 129 | if (m_swapVsync != state) 130 | { 131 | m_swapChain.update(fb_width, fb_height, state); 132 | m_swapVsync = state; 133 | } 134 | } 135 | // 136 | // Setters 137 | // 138 | void setClearValue(VkClearColorValue clearColor, VkClearDepthStencilValue clearDST = { 1.0f, 0 }) 139 | { 140 | m_clearColor = clearColor; 141 | m_clearDST = clearDST; 142 | } 143 | void setClearValue(VkClearValue clearColor, VkClearValue clearDST = { 1.0f, 0 }) 144 | { 145 | m_clearColor = clearColor.color; 146 | m_clearDST = clearDST.depthStencil; 147 | } 148 | // 149 | // getters 150 | // 151 | uint32_t getHeight() { return fb_height; } 152 | uint32_t getWidth() { return fb_width; } 153 | uint32_t getFrameIndex() { return m_swapChain.getActiveImageIndex(); } 154 | const VkRenderPass &getRenderPass() { return m_renderPass; } 155 | VkFormat getSurfaceFormat() const { return m_swapChain.getFormat(); } 156 | VkImage getCurrentBackBuffer() const { return m_swapChain.getActiveImage(); } 157 | VkImageView getCurrentBackBufferView() const { return m_swapChain.getActiveImageView(); } 158 | VkCommandBuffer getCurrentCommandBuffer() { return m_curCommandBuffer; } 159 | VkFramebuffer getCurrentFramebuffer() { return m_framebuffer[m_swapChain.getActiveImageIndex()]; } 160 | nvvk::Context* getContext() { return m_pContext; } 161 | }; 162 | 163 | #endif --------------------------------------------------------------------------------