├── .gitattributes
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CONTRIBUTING
├── GLSL
    ├── GLSL_grid.frag
    ├── GLSL_grid.vert
    ├── GLSL_mesh.frag
    ├── GLSL_mesh.vert
    ├── GLSL_mesh_lines.frag
    ├── GLSL_mesh_lines.vert
    └── noise64x64_RGB.dds
├── GLSLShader.cpp
├── GLSLShader.h
├── LICENSE
├── NVK.cpp
├── NVK.h
├── README.md
├── bk3dBase.h
├── bk3dDefs.h
├── bk3dEx.h
├── bk3d_glcommandlist.cpp
├── bk3d_glstandard.cpp
├── bk3d_vk.cpp
├── dedicated_image.cpp
├── dedicated_image.h
├── doc
    ├── Fences.JPG
    ├── Memory_chunks.JPG
    ├── Multithreading.md
    ├── NSight_Captures.md
    ├── OpenGL.JPG
    ├── Rendering_Modes.md
    ├── Results.md
    ├── Thread_workers.JPG
    ├── Vulkan.JPG
    ├── Vulkan_Code_Style.md
    ├── Vulkan_MT.JPG
    ├── Vulkan_Renderer.md
    ├── cmd-buffers.JPG
    ├── offsets.JPG
    ├── sample.jpg
    ├── toggles.JPG
    ├── vkbuffers.JPG
    └── vulkan_bk3dthreaded.md
├── gl_nv_command_list.h
├── gl_nv_commandlist_helpers.h
├── gl_vk_bk3dthreaded.cpp
├── gl_vk_bk3dthreaded.h
├── helper_fbo.h
├── mt
    ├── CThread.cpp
    ├── CThread.h
    ├── CThreadWork.cpp
    ├── CThreadWork.h
    ├── CThreadWork.pptx
    └── RingBuffer.h
├── window_surface_vk.cpp
└── window_surface_vk.hpp


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #############################
  2 | #Spirv
  3 | #############################
  4 | *.spv
  5 | 
  6 | #################
  7 | ## Eclipse
  8 | #################
  9 | 
 10 | *.pydevproject
 11 | .project
 12 | .metadata
 13 | bin/
 14 | tmp/
 15 | *.tmp
 16 | *.bak
 17 | *.swp
 18 | *~.nib
 19 | local.properties
 20 | .classpath
 21 | .settings/
 22 | .loadpath
 23 | 
 24 | # External tool builders
 25 | .externalToolBuilders/
 26 | 
 27 | # Locally stored "Eclipse launch configurations"
 28 | *.launch
 29 | 
 30 | # CDT-specific
 31 | .cproject
 32 | 
 33 | # PDT-specific
 34 | .buildpath
 35 | 
 36 | #################
 37 | ## KDev / Linux
 38 | #################
 39 | .kdev4
 40 | *.kdev4
 41 | *.*~
 42 | #################
 43 | ## Visual Studio
 44 | #################
 45 | 
 46 | ## Ignore Visual Studio temporary files, build results, and
 47 | ## files generated by popular Visual Studio add-ons.
 48 | 
 49 | # User-specific files
 50 | *.vcxproj
 51 | *.filters
 52 | *.sln
 53 | *.user
 54 | *.suo
 55 | *.user
 56 | *.sln.docstates
 57 | 
 58 | # Build results
 59 | [Dd]ebug/
 60 | [Rr]elease/
 61 | *_i.c
 62 | *_p.c
 63 | *.ilk
 64 | *.meta
 65 | *.obj
 66 | *.pch
 67 | *.pdb
 68 | *.pgc
 69 | *.pgd
 70 | *.rsp
 71 | *.sbr
 72 | *.tlb
 73 | *.tli
 74 | *.tlh
 75 | *.tmp
 76 | *.vspscc
 77 | .builds
 78 | *.dotCover
 79 | 
 80 | ## TODO: If you have NuGet Package Restore enabled, uncomment this
 81 | #packages/
 82 | 
 83 | # Visual C++ cache files
 84 | ipch/
 85 | *.aps
 86 | *.ncb
 87 | *.opensdf
 88 | *.sdf
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | 
 94 | # ReSharper is a .NET coding add-in
 95 | _ReSharper*
 96 | 
 97 | # Installshield output folder
 98 | [Ee]xpress
 99 | 
100 | # DocProject is a documentation generator add-in
101 | DocProject/buildhelp/
102 | DocProject/Help/*.HxT
103 | DocProject/Help/*.HxC
104 | DocProject/Help/*.hhc
105 | DocProject/Help/*.hhk
106 | DocProject/Help/*.hhp
107 | DocProject/Help/Html2
108 | DocProject/Help/html
109 | 
110 | # Click-Once directory
111 | publish
112 | 
113 | # Others
114 | [Bb]in
115 | [Oo]bj
116 | sql
117 | TestResults
118 | *.Cache
119 | ClientBin
120 | stylecop.*
121 | ~$*
122 | *.dbmdl
123 | Generated_Code #added for RIA/Silverlight projects
124 | 
125 | # Backup & report files from converting an old project file to a newer
126 | # Visual Studio version. Backup files are not needed, because we have git ;-)
127 | _UpgradeReport_Files/
128 | Backup*/
129 | UpgradeLog*.XML
130 | 
131 | 
132 | 
133 | ############
134 | ## Windows
135 | ############
136 | 
137 | # Windows image file caches
138 | Thumbs.db
139 | 
140 | # Folder config file
141 | Desktop.ini
142 | 
143 | 
144 | #############
145 | ## Python
146 | #############
147 | 
148 | *.py[co]
149 | 
150 | # Packages
151 | *.egg
152 | *.egg-info
153 | dist
154 | build
155 | eggs
156 | parts
157 | bin
158 | var
159 | sdist
160 | develop-eggs
161 | .installed.cfg
162 | 
163 | # Installer logs
164 | pip-log.txt
165 | 
166 | # Unit test / coverage reports
167 | .coverage
168 | .tox
169 | 
170 | #Translations
171 | *.mo
172 | 
173 | #Mr Developer
174 | .mr.developer.cfg
175 | 
176 | # Mac crap
177 | .DS_Store
178 | 
179 | #############################
180 | #specific to the project
181 | #############################
182 | cmake_built
183 | cmake_build
184 | build_kd6
185 | build
186 | 
187 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "nvpro_core"]
2 | 	path = nvpro_core
3 | 	url = https://github.com/nvpro-samples/nvpro_core.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.5)
  2 | set(PROJNAME gl_vk_bk3dthreaded)
  3 | Project(${PROJNAME})
  4 | Message(STATUS "-------------------------------")
  5 | Message(STATUS "Processing Project ${PROJNAME}:")
  6 | 
  7 | #####################################################################################
  8 | # offer the choice of having nvpro_core as a sub-folder... good for packaging a sample
  9 | #
 10 | if(NOT BASE_DIRECTORY)
 11 | 
 12 |   find_path(BASE_DIRECTORY
 13 |     NAMES nvpro_core/cmake/setup.cmake
 14 |     PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. 
 15 |     REQUIRED
 16 |     DOC "Directory containing nvpro_core"
 17 |     )
 18 | endif()
 19 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
 20 |   include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
 21 |   include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake)
 22 | else()
 23 |   message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core")
 24 | endif()
 25 |     
 26 | _add_project_definitions(${PROJNAME})
 27 | 
 28 | #####################################################################################
 29 | # additions from packages needed for this sample
 30 | # add refs  in LIBRARIES_OPTIMIZED
 31 | # add refs  in LIBRARIES_DEBUG
 32 | # add files in PACKAGE_SOURCE_FILES
 33 | #
 34 | #set(USING_OPENGL "YES")
 35 | _add_package_OpenGL()
 36 | _add_package_VulkanSDK()
 37 | _add_package_ImGUI()
 38 | _add_package_ZLIB()
 39 | 
 40 | #####################################################################################
 41 | # process the rest of some cmake code that needs to be done *after* the packages add
 42 | _add_nvpro_core_lib()
 43 | 
 44 | #####################################################################################
 45 | # Source files for this project
 46 | #
 47 | file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c mt/*.cpp mt/*.h VK_nvidia/*.c VK_nvidia/*.h)
 48 | 
 49 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/VK_nvidia)
 50 | 
 51 | #####################################################################################
 52 | # download model for this demo to run. Avoids using GitHub for this
 53 | # TODO: we need to put the models online. Should happen next week (>Dec.22)
 54 | #
 55 | unset(FILELISTOUT)
 56 | if(1)
 57 |   set(FILELIST
 58 |   "SubMarine_134.bk3d.gz"
 59 |   )
 60 | else()
 61 |   set(FILELIST 
 62 |   "SubMarine_134.bk3d.gz"
 63 |   "Jet_134.bk3d.gz"
 64 |   "Driveline_v134.bk3d.gz"
 65 |   "Body_v134.bk3d.gz"
 66 |   "Camera_134.bk3d.gz"
 67 |   "ConceptCar_134.bk3d.gz"
 68 |   "Eiffel_133.bk3d.gz"
 69 |   "Smobby_134.bk3d.gz"
 70 |   )
 71 | endif()
 72 | 
 73 | download_files(FILENAMES ${FILELIST})
 74 | #####################################################################################
 75 | # GLSL to SPIR-V custom build
 76 | #
 77 | #more than one file can be given: _compile_GLSL("GLSL_mesh.vert;GLSL_mesh.frag" "GLSL_mesh.spv" GLSL_SOURCES)
 78 | # the SpirV validator is fine as long as files are for different pipeline stages (entry points still need to be main())
 79 | #_compile_GLSL(<source(s)> <target spv> <LIST where files are appended>)
 80 | UNSET(GLSL_SOURCES)
 81 | UNSET(SPV_OUTPUT)
 82 | _compile_GLSL("GLSL/GLSL_mesh.vert" "GLSL/GLSL_mesh_vert.spv" GLSL_SOURCES SPV_OUTPUT)
 83 | _compile_GLSL("GLSL/GLSL_mesh.frag" "GLSL/GLSL_mesh_frag.spv" GLSL_SOURCES SPV_OUTPUT)
 84 | _compile_GLSL("GLSL/GLSL_mesh_lines.frag" "GLSL/GLSL_mesh_lines_frag.spv" GLSL_SOURCES SPV_OUTPUT)
 85 | _compile_GLSL("GLSL/GLSL_mesh_lines.vert" "GLSL/GLSL_mesh_lines_vert.spv" GLSL_SOURCES SPV_OUTPUT)
 86 | _compile_GLSL("GLSL/GLSL_grid.vert" "GLSL/GLSL_grid_vert.spv" GLSL_SOURCES SPV_OUTPUT)
 87 | _compile_GLSL("GLSL/GLSL_grid.frag" "GLSL/GLSL_grid_frag.spv" GLSL_SOURCES SPV_OUTPUT)
 88 | source_group(GLSL_Files FILES ${GLSL_SOURCES})
 89 | 
 90 | #####################################################################################
 91 | # additional files from helpers
 92 | #
 93 | #LIST(APPEND COMMON_SOURCE_FILES 
 94 | #  ${BASE_DIRECTORY}/nvpro_core/nvgl/WindowInertiaCamera.h
 95 | #  ${BASE_DIRECTORY}/nvpro_core/nvh/TimeSampler.h
 96 | #  ${BASE_DIRECTORY}/nvpro_core/nvh/InertiaCamera.h
 97 | #  ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath.inl
 98 | #  ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath.h
 99 | #  ${BASE_DIRECTORY}/nvpro_core/nvmath/nvmath_types.h
100 | #  ${BASE_DIRECTORY}/nvpro_core/nvh/profiler.hpp
101 | #  ${BASE_DIRECTORY}/nvpro_core/nvh/profiler.cpp
102 | #)
103 | #####################################################################################
104 | # Executable
105 | #
106 | if(WIN32 AND NOT GLUT_FOUND)
107 |   add_definitions(/wd4267) #remove size_t to int warning
108 |   add_definitions(/wd4996) #remove printf warning
109 |   add_definitions(/wd4244) #remove double to float conversion warning
110 |   add_definitions(/wd4305) #remove double to float truncation warning
111 | else()
112 |   # allow gcc to be tolerant on some issues. TODO:should remove this option
113 |   add_definitions(-fpermissive)
114 | endif()
115 | add_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_SOURCES})
116 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJNAME})
117 | 
118 | #####################################################################################
119 | # common source code needed for this sample
120 | #
121 | source_group(common FILES 
122 |   ${COMMON_SOURCE_FILES}
123 |   ${PACKAGE_SOURCE_FILES}
124 | )
125 | #####################################################################################
126 | # Linkage
127 | #
128 | target_link_libraries(${PROJNAME} optimized
129 |     ${LIBRARIES_OPTIMIZED}
130 |     ${PLATFORM_LIBRARIES}
131 |     nvpro_core
132 | )
133 | target_link_libraries(${PROJNAME} debug
134 |     ${LIBRARIES_DEBUG}
135 |     ${PLATFORM_LIBRARIES}
136 |     nvpro_core
137 | )
138 | 
139 | #####################################################################################
140 | # copies binaries that need to be put next to the exe files (ZLib, etc.)
141 | #
142 | _finalize_target( ${PROJNAME} )
143 | 
144 | # additional copies for standalone run from install folder
145 | install(FILES ${SPV_OUTPUT} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/SPV_${PROJNAME}")
146 | install(FILES ${SPV_OUTPUT} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/SPV_${PROJNAME}")
147 | 


--------------------------------------------------------------------------------
/CONTRIBUTING:
--------------------------------------------------------------------------------
 1 | https://developercertificate.org/
 2 | 
 3 | Developer Certificate of Origin
 4 | Version 1.1
 5 | 
 6 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
 7 | 
 8 | Everyone is permitted to copy and distribute verbatim copies of this
 9 | license document, but changing it is not allowed.
10 | 
11 | 
12 | Developer's Certificate of Origin 1.1
13 | 
14 | By making a contribution to this project, I certify that:
15 | 
16 | (a) The contribution was created in whole or in part by me and I
17 |     have the right to submit it under the open source license
18 |     indicated in the file; or
19 | 
20 | (b) The contribution is based upon previous work that, to the best
21 |     of my knowledge, is covered under an appropriate open source
22 |     license and I have the right under that license to submit that
23 |     work with modifications, whether created in whole or in part
24 |     by me, under the same open source license (unless I am
25 |     permitted to submit under a different license), as indicated
26 |     in the file; or
27 | 
28 | (c) The contribution was provided directly to me by some other
29 |     person who certified (a), (b) or (c) and I have not modified
30 |     it.
31 | 
32 | (d) I understand and agree that this project and the contribution
33 |     are public and that a record of the contribution (including all
34 |     personal information I submit with it, including my sign-off) is
35 |     maintained indefinitely and may be redistributed consistent with
36 |     this project or the open source license(s) involved.


--------------------------------------------------------------------------------
/GLSL/GLSL_grid.frag:
--------------------------------------------------------------------------------
 1 | #version 440 core
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define DSET_GLOBAL  0
 5 | #   define BINDING_MATRIX 0
 6 | #   define BINDING_LIGHT  1
 7 | 
 8 | #define DSET_OBJECT  1
 9 | #   define BINDING_MATRIXOBJ   0
10 | #   define BINDING_MATERIAL    1
11 | ////////////////////////////////////////////////////////////////////////////////
12 | ////////////////////////////////////////////////////////////////////////////////
13 | layout(location=0,index=0) out vec4 out_Color;
14 | void main()
15 | {
16 |    out_Color = vec4(0.5,0.7,0.5,1);
17 | }
18 | 
19 | /*
20 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
21 |  *
22 |  * Licensed under the Apache License, Version 2.0 (the "License");
23 |  * you may not use this file except in compliance with the License.
24 |  * You may obtain a copy of the License at
25 |  *
26 |  *     http://www.apache.org/licenses/LICENSE-2.0
27 |  *
28 |  * Unless required by applicable law or agreed to in writing, software
29 |  * distributed under the License is distributed on an "AS IS" BASIS,
30 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 |  * See the License for the specific language governing permissions and
32 |  * limitations under the License.
33 |  *
34 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
35 |  * SPDX-License-Identifier: Apache-2.0
36 |  */


--------------------------------------------------------------------------------
/GLSL/GLSL_grid.vert:
--------------------------------------------------------------------------------
 1 | #version 440 core
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define DSET_GLOBAL  0
 5 | #   define BINDING_MATRIX 0
 6 | #   define BINDING_LIGHT  1
 7 | 
 8 | #define DSET_OBJECT  1
 9 | #   define BINDING_MATRIXOBJ   0
10 | #   define BINDING_MATERIAL    1
11 | ////////////////////////////////////////////////////////////////////////////////
12 | ////////////////////////////////////////////////////////////////////////////////
13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer {
14 |    mat4 mW;
15 |    mat4 mVP;
16 | } matrix;
17 | in layout(location=0)      vec3 pos;
18 | out gl_PerVertex {
19 |     vec4  gl_Position;
20 | };
21 | void main()
22 | {
23 |   vec4 wPos     = /*matrix.mW  **/ ( vec4(pos,0.51) );
24 |   gl_Position   = matrix.mVP * wPos;
25 | }
26 | 
27 | /*
28 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
29 |  *
30 |  * Licensed under the Apache License, Version 2.0 (the "License");
31 |  * you may not use this file except in compliance with the License.
32 |  * You may obtain a copy of the License at
33 |  *
34 |  *     http://www.apache.org/licenses/LICENSE-2.0
35 |  *
36 |  * Unless required by applicable law or agreed to in writing, software
37 |  * distributed under the License is distributed on an "AS IS" BASIS,
38 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39 |  * See the License for the specific language governing permissions and
40 |  * limitations under the License.
41 |  *
42 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
43 |  * SPDX-License-Identifier: Apache-2.0
44 |  */


--------------------------------------------------------------------------------
/GLSL/GLSL_mesh.frag:
--------------------------------------------------------------------------------
  1 | //
  2 | // License Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
  3 | // https://creativecommons.org/licenses/by-nc-sa/3.0/us/
  4 | // Voronoi part taken from Ben Weston: https://www.shadertoy.com/view/ldsGzl
  5 | // few changes made for the purpose of Vulkan code
  6 | //
  7 | 
  8 | #version 440 core
  9 | #extension GL_ARB_separate_shader_objects : enable
 10 | 
 11 | #define DSET_GLOBAL  0
 12 | #   define BINDING_MATRIX 0
 13 | #   define BINDING_LIGHT  1
 14 | #   define BINDING_NOISE  2
 15 | 
 16 | #define DSET_OBJECT  1
 17 | #   define BINDING_MATRIXOBJ   0
 18 | #   define BINDING_MATERIAL    1
 19 | ////////////////////////////////////////////////////////////////////////////////
 20 | ////////////////////////////////////////////////////////////////////////////////
 21 | layout(set= DSET_GLOBAL, binding= BINDING_NOISE ) uniform sampler3D iChannel0;
 22 | 
 23 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATERIAL ) uniform materialBuffer {
 24 |    vec3  diffuse;
 25 |    float a;
 26 | } material;
 27 | 
 28 | layout(location=1) in  vec3 N;
 29 | layout(location=2) in  vec3 inWPos;
 30 | layout(location=3) in  vec3 inEyePos;
 31 | 
 32 | layout(location=0,index=0) out vec4 outColor;
 33 | 
 34 | vec3 Sky( vec3 ray )
 35 | {
 36 |     return mix( vec3(.8), vec3(0), exp2(-(1.0/max(ray.y,.01))*vec3(.4,.6,1.0)) );
 37 | }
 38 | 
 39 | mat2 mm2(in float a){float c = cos(a), s = sin(a);return mat2(c,-s,s,c);}
 40 | 
 41 | vec3 Voronoi( vec3 pos )
 42 | {
 43 |     vec3 d[8];
 44 |     d[0] = vec3(0,0,0);
 45 |     d[1] = vec3(1,0,0);
 46 |     d[2] = vec3(0,1,0);
 47 |     d[3] = vec3(1,1,0);
 48 |     d[4] = vec3(0,0,1);
 49 |     d[5] = vec3(1,0,1);
 50 |     d[6] = vec3(0,1,1);
 51 |     d[7] = vec3(1,1,1);
 52 |     
 53 |     const float maxDisplacement = .7; //tweak this to hide grid artefacts
 54 |     
 55 |     vec3 pf = floor(pos);
 56 | 
 57 |     const float phi = 1.61803398875;
 58 | 
 59 |     float closest = 12.0;
 60 |     vec3 result;
 61 |     for ( int i=0; i < 8; i++ )
 62 |     {
 63 |         vec3 v = (pf+d[i]);
 64 |         vec3 r = fract(phi*v.yzx+17.*fract(v.zxy*phi)+v*v*.03);//Noise(ivec3(floor(pos+d[i])));
 65 |         vec3 p = d[i] + maxDisplacement*(r.xyz-.5);
 66 |         p -= fract(pos);
 67 |         float lsq = dot(p,p);
 68 |         if ( lsq < closest )
 69 |         {
 70 |             closest = lsq;
 71 |             result = r;
 72 |         }
 73 |     }
 74 |     return fract(result.xyz);//+result.www); // random colour
 75 | }
 76 | 
 77 | vec3 shade( vec3 pos, vec3 norm, vec3 rayDir, vec3 lightDir )
 78 | {
 79 |     vec3 paint = material.diffuse;
 80 | 
 81 |     vec3 norm2 = normalize(norm+.02*(Voronoi(pos*800.0)*2.0-1.0));
 82 |     
 83 |     if ( dot(norm2,rayDir) > 0.0 ) // we shouldn't see flecks that point away from us
 84 |         norm2 -= 2.0*dot(norm2,rayDir)*rayDir;
 85 | 
 86 | 
 87 |     // diffuse layer, reduce overall contrast
 88 |     vec3 result = paint*.6*(pow(max(0.0,dot(norm,lightDir)),2.0)+.2);
 89 | 
 90 |     vec3 h = normalize( lightDir-rayDir );
 91 |     vec3 s = pow(max(0.0,dot(h,norm2)),50.0)*10.0*vec3(1);
 92 | 
 93 |     float rdotn = dot(rayDir,norm2);
 94 |     vec3 reflection = rayDir-2.0*rdotn*norm;
 95 |     s += Sky( reflection );
 96 | 
 97 |     float f = pow(1.0+rdotn,5.0);
 98 |     f = mix( .2, 1.0, f );
 99 |     
100 |     result = mix(result,paint*s,f);
101 |     
102 |     // gloss layer
103 |     s = pow(max(0.0,dot(h,norm)),1000.0)*32.0*vec3(1);
104 |     
105 |     rdotn = dot(rayDir,norm);
106 |     reflection = rayDir-2.0*rdotn*norm;
107 |     
108 |     return result;
109 | }
110 | 
111 | void main()
112 | {
113 |    vec3 lightDir = vec3(0, 0.707, 0.707);
114 |    vec3 ray = inWPos;
115 |    ray -= inEyePos;
116 |    ray = normalize(ray);
117 |    vec3 shaded = shade( inWPos, N, ray, lightDir );
118 |    outColor = vec4(shaded, 1);
119 | }
120 | 


--------------------------------------------------------------------------------
/GLSL/GLSL_mesh.vert:
--------------------------------------------------------------------------------
 1 | #version 440 core
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define DSET_GLOBAL  0
 5 | #   define BINDING_MATRIX 0
 6 | #   define BINDING_LIGHT  1
 7 | 
 8 | #define DSET_OBJECT  1
 9 | #   define BINDING_MATRIXOBJ   0
10 | #   define BINDING_MATERIAL    1
11 | ////////////////////////////////////////////////////////////////////////////////
12 | ////////////////////////////////////////////////////////////////////////////////
13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer {
14 |    mat4 mW;
15 |    mat4 mVP;
16 |    vec3 eyePos;
17 | } matrix;
18 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATRIXOBJ ) uniform matrixObjBuffer {
19 |    mat4 mO;
20 | } object;
21 | layout(location=0) in  vec3 pos;
22 | layout(location=1) in  vec3 N;
23 | 
24 | layout(location=1) out vec3 outN;
25 | layout(location=2) out vec3 outWPos;
26 | layout(location=3) out vec3 outEyePos;
27 | out gl_PerVertex {
28 |     vec4  gl_Position;
29 | };
30 | void main()
31 | {
32 |   outN = N.xzy;
33 |   vec4 wpos     = matrix.mW  * (object.mO * vec4(pos,1)); 
34 |   gl_Position   = matrix.mVP * wpos;
35 |   outWPos       = wpos.xyz;
36 |   outEyePos     = matrix.eyePos;
37 | }
38 | 
39 | /*
40 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
41 |  *
42 |  * Licensed under the Apache License, Version 2.0 (the "License");
43 |  * you may not use this file except in compliance with the License.
44 |  * You may obtain a copy of the License at
45 |  *
46 |  *     http://www.apache.org/licenses/LICENSE-2.0
47 |  *
48 |  * Unless required by applicable law or agreed to in writing, software
49 |  * distributed under the License is distributed on an "AS IS" BASIS,
50 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
51 |  * See the License for the specific language governing permissions and
52 |  * limitations under the License.
53 |  *
54 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
55 |  * SPDX-License-Identifier: Apache-2.0
56 |  */


--------------------------------------------------------------------------------
/GLSL/GLSL_mesh_lines.frag:
--------------------------------------------------------------------------------
 1 | #version 440 core
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define DSET_GLOBAL  0
 5 | #   define BINDING_MATRIX 0
 6 | #   define BINDING_LIGHT  1
 7 | 
 8 | #define DSET_OBJECT  1
 9 | #   define BINDING_MATRIXOBJ   0
10 | #   define BINDING_MATERIAL    1
11 | ////////////////////////////////////////////////////////////////////////////////
12 | ////////////////////////////////////////////////////////////////////////////////
13 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATERIAL ) uniform materialBuffer {
14 |    uniform vec3 diffuse;
15 | } material;
16 | //layout(std140, set= DSET_GLOBAL , binding= BINDING_LIGHT ) uniform lightBuffer {
17 | //   uniform vec3 dir;
18 | //} light;
19 | layout(location=0) out vec4 outColor;
20 | void main() {
21 | 
22 |    outColor = vec4(0.5,0.5,0.2,1);
23 | }
24 | 
25 | /*
26 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
27 |  *
28 |  * Licensed under the Apache License, Version 2.0 (the "License");
29 |  * you may not use this file except in compliance with the License.
30 |  * You may obtain a copy of the License at
31 |  *
32 |  *     http://www.apache.org/licenses/LICENSE-2.0
33 |  *
34 |  * Unless required by applicable law or agreed to in writing, software
35 |  * distributed under the License is distributed on an "AS IS" BASIS,
36 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 |  * See the License for the specific language governing permissions and
38 |  * limitations under the License.
39 |  *
40 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
41 |  * SPDX-License-Identifier: Apache-2.0
42 |  */


--------------------------------------------------------------------------------
/GLSL/GLSL_mesh_lines.vert:
--------------------------------------------------------------------------------
 1 | #version 440 core
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define DSET_GLOBAL  0
 5 | #   define BINDING_MATRIX 0
 6 | #   define BINDING_LIGHT  1
 7 | 
 8 | #define DSET_OBJECT  1
 9 | #   define BINDING_MATRIXOBJ   0
10 | #   define BINDING_MATERIAL    1
11 | ////////////////////////////////////////////////////////////////////////////////
12 | ////////////////////////////////////////////////////////////////////////////////
13 | layout(std140, set= DSET_GLOBAL , binding= BINDING_MATRIX ) uniform matrixBuffer {
14 |    mat4 mW;
15 |    mat4 mVP;
16 |    vec3 eyePos;
17 | } matrix;
18 | layout(std140, set= DSET_OBJECT , binding= BINDING_MATRIXOBJ ) uniform matrixObjBuffer {
19 |    mat4 mO;
20 | } object;
21 | layout(location=0) in  vec3 pos;
22 | 
23 | layout(location=2) out vec3 outWPos;
24 | layout(location=3) out vec3 outEyePos;
25 | out gl_PerVertex {
26 |     vec4  gl_Position;
27 | };
28 | void main()
29 | {
30 |   vec4 wpos     = matrix.mW  * (object.mO * vec4(pos,1)); 
31 |   gl_Position   = matrix.mVP * wpos;
32 |   outWPos       = wpos.xyz;
33 |   outEyePos     = matrix.eyePos;
34 | }
35 | 
36 | /*
37 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
38 |  *
39 |  * Licensed under the Apache License, Version 2.0 (the "License");
40 |  * you may not use this file except in compliance with the License.
41 |  * You may obtain a copy of the License at
42 |  *
43 |  *     http://www.apache.org/licenses/LICENSE-2.0
44 |  *
45 |  * Unless required by applicable law or agreed to in writing, software
46 |  * distributed under the License is distributed on an "AS IS" BASIS,
47 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
48 |  * See the License for the specific language governing permissions and
49 |  * limitations under the License.
50 |  *
51 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
52 |  * SPDX-License-Identifier: Apache-2.0
53 |  */


--------------------------------------------------------------------------------
/GLSL/noise64x64_RGB.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/GLSL/noise64x64_RGB.dds


--------------------------------------------------------------------------------
/GLSLShader.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | #include "GLSLShader.h"
 21 | #include <string.h>
 22 | #include <string>
 23 | #include <fstream>
 24 | #include "nvh/nvprint.hpp"
 25 | 
 26 | GLSLShader::GLSLShader()
 27 | {
 28 |     m_linkNeeded = false;
 29 |     m_program = 0;
 30 | }
 31 | 
 32 | //*NVTL*
 33 | GLSLShader::~GLSLShader()
 34 | {
 35 |     cleanup();
 36 | }
 37 | //*NVTL*
 38 | void GLSLShader::cleanup()
 39 | {
 40 |     m_fragFiles.clear();
 41 |     m_vertFiles.clear();
 42 |     m_fragSrc.clear();
 43 |     m_vertSrc.clear();
 44 |     if(m_program) glDeleteProgram(m_program);
 45 |     m_program = 0;
 46 | }
 47 | bool GLSLShader::compileShaderFromString(const char *shader, GLenum type)
 48 | {    
 49 |     bool bRes = true;
 50 |     if(!shader)
 51 |         return false;
 52 |     if(0 == m_program)
 53 |         m_program = glCreateProgram();
 54 |     GLuint obj = glCreateShader(type);
 55 | 
 56 |     // set source
 57 |     GLint size = (GLint)strlen(shader);
 58 |     const GLchar* progString = (const GLchar*)shader;
 59 |     glShaderSource(obj, 1, &progString, &size);
 60 |     glCompileShader(obj);
 61 |     bRes = outputShaderLog(obj);
 62 | 
 63 |     glAttachShader(m_program, obj);
 64 |   glDeleteShader(obj);
 65 | 
 66 |     m_linkNeeded = true;
 67 |     return bRes;
 68 | }
 69 | 
 70 | bool GLSLShader::compileShader(const char *filename, GLenum type)
 71 | {    
 72 |     bool bRes;
 73 |     if(0 == m_program)
 74 |         m_program = glCreateProgram();
 75 | 
 76 |     std::ifstream ifs;
 77 |     ifs.open(filename);
 78 |     if(ifs.bad())
 79 |         return false;
 80 |     
 81 |     std::string file((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
 82 |     //*NVTL* added the checking here because ifs.bad() doesn't fail when the file doesn't exist...
 83 |     if(file.size()==0)
 84 |     {
 85 |         //*NVTL* : added the message. Because I lost quite some time because of this missing feature ;-P
 86 |         if(filename)    LOGE("\nGLSL ERROR: loading file %s \n", filename ? filename : "NULL");
 87 |         fflush(stdout);
 88 | //#        ifdef _DEBUG
 89 | //        _asm {int 3 }
 90 | //#        endif
 91 |         return false;
 92 |     }
 93 | 
 94 |     //std::string file;
 95 |     //while(!ifs.eof()) file += ifs.get();
 96 | 
 97 | 
 98 |     GLuint obj = glCreateShader(type);
 99 | 
100 |     // set source
101 |     GLint size = (GLint)file.size();
102 |     const GLchar* progString = (const GLchar*)file.c_str();
103 |     glShaderSource(obj, 1, &progString, &size);
104 |     glCompileShader(obj);
105 |     bRes = outputShaderLog(obj);
106 | 
107 |     glAttachShader(m_program, obj);
108 |   glDeleteShader(obj);
109 | 
110 |     ifs.close();
111 |     m_linkNeeded = true;
112 |     return bRes;
113 | }
114 | 
115 | bool GLSLShader::addFragmentShader(const char *filename, bool isNew)
116 | {
117 |     bool bRes;
118 | 
119 |     bRes = compileShader(filename, GL_FRAGMENT_SHADER);
120 |     
121 |     if(isNew) m_fragFiles.push_back(filename);
122 |     return bRes;
123 | }
124 | 
125 | 
126 | bool GLSLShader::addVertexShader(const char *filename, bool isNew)
127 | {
128 |     bool bRes;
129 | 
130 |     bRes = compileShader(filename, GL_VERTEX_SHADER);
131 | 
132 |     if(isNew) m_vertFiles.push_back(filename);
133 |     return bRes;
134 | }
135 | 
136 | //----> *NVTL*
137 | bool GLSLShader::addFragmentShaderFromString(const char *shader)
138 | {
139 |     bool bRes;
140 | 
141 |     bRes = compileShaderFromString(shader, GL_FRAGMENT_SHADER);
142 |     
143 |     m_fragSrc.push_back(shader);
144 |     return bRes;
145 | }
146 | 
147 | 
148 | bool GLSLShader::addVertexShaderFromString(const char *shader)
149 | {
150 |     bool bRes;
151 | 
152 |     bRes = compileShaderFromString(shader, GL_VERTEX_SHADER);
153 | 
154 |     m_vertSrc.push_back(shader);
155 |     return bRes;
156 | }
157 | // <---- *NVTL*
158 | bool GLSLShader::link()
159 | {
160 |     bool bRes = true;
161 |     if(m_linkNeeded)
162 |     {
163 |         glLinkProgram(m_program);
164 |         bRes = outputProgramLog(m_program);
165 |         m_linkNeeded = false;
166 |     }
167 |     return bRes;
168 | }
169 | 
170 | bool GLSLShader::bindShader()
171 | {
172 |     bool bRes = true;
173 |    
174 |     if(m_linkNeeded)
175 |         bRes = link();
176 | 
177 |     glUseProgram(m_program);
178 | 
179 |     //GL_FLOAT_RGBA32_NV;
180 |     //GL_RGBA_FLOAT32_ATI;
181 |     return bRes;
182 | }
183 | 
184 | void GLSLShader::unbindShader()
185 | {
186 |     glUseProgram(0);
187 | }
188 | 
189 | bool GLSLShader::outputProgramLog(GLuint obj)
190 | {
191 |     char buf[1024];
192 |     int len;
193 |     glGetProgramInfoLog(obj, 1024, &len, buf);
194 |     if(len)
195 |     {
196 |         LOGW("Log for %d:\n%s\n\n", obj, buf);
197 |         if(strstr(buf, "error") != nullptr)
198 |             return false;
199 | #    ifdef _DEBUG
200 |         //if(strstr(buf, "error") > 0)
201 |         //{
202 |         //    _asm {int 3 }
203 |         //}
204 | #    endif
205 |     }
206 |     return true;
207 | }
208 | 
209 | bool GLSLShader::outputShaderLog(GLuint obj)
210 | {
211 |     char buf[1024];
212 |     int len;
213 |     glGetShaderInfoLog(obj, 1024, &len, buf);
214 |     if(len)
215 |     {
216 |         LOGW("Log for %d:\n%s\n\n", obj, buf);
217 |         if(strstr(buf, "error") != nullptr)
218 |             return false;
219 | #    ifdef _DEBUG
220 |         //if(strstr(buf, "error") > 0)
221 |         //{
222 |         //    _asm {int 3 }
223 |         //}
224 | #    endif
225 |     }
226 |     return true;
227 | }
228 | 
229 | void GLSLShader::setUniformFloat(const char *name, float val)
230 | {
231 |     glUniform1f(glGetUniformLocation(m_program, name), val);
232 | }
233 | 
234 | void GLSLShader::setUniformInt(const char *name, int val)
235 | {
236 |     glUniform1i(glGetUniformLocation(m_program, name), val);
237 | }
238 | 
239 | //----> *NVTL*
240 | void GLSLShader::setUniformVector(const char * name, float* val, int count)
241 | {
242 |     GLint id = glGetUniformLocation(m_program, name);
243 |     if (id == -1) {
244 |         return;
245 |     }
246 |     switch (count) {
247 |         case 1:
248 |             glUniform1fv(id, 1, val);
249 |             break;
250 |         case 2:
251 |             glUniform2fv(id, 1, val);
252 |             break;
253 |         case 3:
254 |             glUniform3fv(id, 1, val);
255 |             break;
256 |         case 4:
257 |             glUniform4fv(id, 1, val);
258 |             break;
259 |     }
260 | }
261 | 
262 | void GLSLShader::setTextureUnit(const char * texname, int texunit)
263 | {
264 |     GLint linked;
265 |     glGetProgramiv(m_program, GL_LINK_STATUS, &linked);
266 |     if (linked != GL_TRUE) {
267 |         return;
268 |     }
269 |     GLint id = glGetUniformLocation(m_program, texname);
270 |     if (id == -1) {
271 |         return;
272 |     }
273 |     glUniform1i(id, texunit);
274 | }
275 | 
276 | void GLSLShader::bindTexture(GLenum target, const char * texname, GLuint texid, int texunit)
277 | {
278 |     glActiveTexture(GL_TEXTURE0 + texunit);
279 |     glBindTexture(target, texid);
280 |     setTextureUnit(texname, texunit);
281 |     glActiveTexture(GL_TEXTURE0);
282 | }
283 | //<---- *NVTL*
284 | 
285 | void GLSLShader::reloadShader()
286 | {
287 |     glDeleteProgram(m_program);
288 | 
289 |     //We should really also detach the old fragment and vertex shaders
290 |     //and delete them as well...
291 | 
292 |     m_program = 0;
293 | 
294 |     for(unsigned int i = 0; i < m_vertFiles.size(); ++i)
295 |         addVertexShader(m_vertFiles[i].c_str(), false);
296 |     for(unsigned int i = 0; i < m_vertSrc.size(); ++i)
297 |         addVertexShader(m_vertSrc[i].c_str(), false);
298 | 
299 |     for(unsigned int i = 0; i < m_fragFiles.size(); ++i)
300 |         addFragmentShader(m_fragFiles[i].c_str(), false);
301 |     for(unsigned int i = 0; i < m_fragSrc.size(); ++i)
302 |         addFragmentShader(m_fragSrc[i].c_str(), false);
303 | }
304 | 


--------------------------------------------------------------------------------
/GLSLShader.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | #pragma once
21 | #ifdef WIN32
22 | #  include <windows.h>
23 | #endif
24 | #include <nvgl/extensions_gl.hpp>
25 | #include <vector>
26 | #include <string>
27 | 
28 | class GLSLShader 
29 | {
30 | public:
31 |     GLSLShader();
32 |     ~GLSLShader();
33 | 
34 |     void cleanup();
35 | 
36 |     bool addFragmentShader(const char* filename, bool isNew=true);
37 |     bool addVertexShader(const char* filename, bool isNew=true);
38 |     bool addFragmentShaderFromString(const char* shader);
39 |     bool addVertexShaderFromString(const char* shader); 
40 |     bool link();
41 | 
42 |     bool bindShader();
43 |     void unbindShader();
44 | 
45 |     void setUniformFloat(const char* name, float val);
46 |     void setUniformInt(const char* name, int val);
47 |     void setUniformVector(const char * name, float* val, int count);
48 |     void setTextureUnit(const char * texname, int texunit);
49 |     void bindTexture(GLenum target, const char * texname, GLuint texid, int texunit);
50 |     
51 |     void reloadShader();
52 | 
53 |     inline GLuint getProgram() {return m_program;}
54 | 
55 |     inline int getUniformLocation(const char* name) { return glGetUniformLocation(m_program, name); }
56 | 
57 | private:
58 | 
59 |     bool compileShader(const char* filename, GLenum type);
60 |     bool compileShaderFromString(const char *shader, GLenum type);
61 |     bool outputProgramLog(GLuint obj);
62 |     bool outputShaderLog(GLuint obj);
63 | 
64 |     bool m_bound;
65 |     bool m_linkNeeded;
66 | 
67 |     std::vector<std::string> m_vertFiles;
68 |     std::vector<std::string> m_fragFiles;
69 |     std::vector<std::string> m_vertSrc;
70 |     std::vector<std::string> m_fragSrc;
71 | 
72 |     GLuint m_program;
73 | 
74 | };


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DEPRECATED
 2 | 
 3 | This sample is from earlier versions of the Vulkan API and we do not recommend it anymore.
 4 | 
 5 | # Vulkan & OpenGL & Command-list Sample using "Thread-Workers"
 6 | 
 7 | With the official release of Vulkan, NVIDIA and the "Devtech-Proviz" Team released new samples on [professional graphics repository](https://github.com/nvpro-samples). 
 8 | 
 9 | The Purpose of this Blog post is to give more details on what is happening in the Sample called `gl_vk_bk3dthreaded` [(available here)](https://github.com/nvpro-samples/gl_vk_bk3dthreaded).
10 | 
11 | ![Example](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/sample.jpg)
12 | 
13 | ## How to build the sample
14 | For now, I am sorry to say that the sample might only run on Windows. I didn't consolidate it for Linux, yet.
15 | 
16 | This sample requires the following:
17 | 
18 | - LunarG SDK v1.0.3.1 : just install it from https://vulkan.lunarg.com : cmake should be able to locate it
19 | -  the submarine model: when you will configure the project with cmake, cmake script will perform a *wget* to get the model and store it locally: `MODEL_DOWNLOAD_SUBMARINE` Checked. The model is 32Mb and will be stored in a shared folder called `downloaded_resources`
20 | 
21 | Optionally, be aware that other *bk3d* models could be used in this sample. But to avoid heavy download, only the submarine will be taken by default. Check `MODEL_DOWNLOAD_MORE` On for more models...  
22 | 
23 | ## How does the sample work
24 | 
25 | The sample will run by default with the *submarine* model *and some camera animation*. So if you want to freely move the camera, don't forget to stop the animation (UI or 'a' key) 
26 | 
27 | If you give as cmd-line argument another model (*.bk3d.gz or *.bk3d), the sample should be able to render it but the animation will be turned off; and it is possible that the camera won't focus exactly over the new model...
28 | 
29 | Vulkan renderer will be the default one at startup. You can switch between:
30 | 
31 | - **OpenGL & Command-lists**: an example on how to feed the token-buffers
32 | - **OpenGL**: a basic implementation of how would you render 3D with OpenGL
33 | - **Vulkan**: the default renderer
34 | 
35 | ![toggles](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/toggles.JPG)
36 | 
37 | > **Note**: toggles are preceded by a character between quotes: when the viewport has the focus, you can use the keyboard instead. 
38 | 
39 | - **Use Workers**: checked for multi-threading. Unchecked: only the main thread will update the draw commands (cmd-buffers)
40 | - **command-buffer amount**: by default, 16 secondary command-buffers will be created to render everything. In the multi-threading case, thread-workers will get spawned and will work on building them: when **'c'** toggle is checked (*command-buffer continuous refresh*)
41 | - **Cmd-buf-style**: this model came from a CAD application. It turns out that at the time this model was created, primitives were issued depending on their 'parts', rather than depending on their primitive type and/or materials (hence shaders). **"sort on primitive type"** would allow to first render triangles; then strips; then lines...
42 | -  **MSAA**: Multispampling mode
43 | - 'c', &g_bRefreshCmdBuffers, "c: toggles command buffer continuous refresh\n");
44 | - <space-bar>, &m_realtime.bNonStopRendering, "space: toggles continuous rendering\n");
45 | - toggles from 'o'to '5' are obvious options... just give a try
46 | 
47 | ###cmd-line arguments
48 | 
49 | - -v (VBO max Size)
50 | - -m (bk3d model)
51 | - -c 0 or 1 : use command-lists
52 | - -o 0 or 1 : display meshes
53 | - -g 0 or 1 : display grid
54 | - -s 0 or 1 : stats
55 | - -a 0 or 1 : animate camera
56 | - -d 0 or 1 : debug stuff (ui)
57 | - -m (bk3d file) : load a specific model
58 | - (bk3d file name)    : load a specific model
59 | - -q (msaa) : MSAA
60 | 
61 | ### mouse
62 | special Key with the mouse allows few to move around the model. The camera is always targeting a focus point and is essentially working in "polar coordinates" (**TODO**: I need to display the focus point with a cross...)
63 | 
64 | - **mouse wheel**: zoom in/out from the focus point
65 | - **left mouse button**: rotate around the focus point
66 | - **right mouse button**: rotate around Ox axis and zoom in/out from focus point
67 | - **right mouse button + Ctrl**: will push forward/backward the focus point 
68 | - **middle mouse button**: pan left/right up/down the focus point along camera axis
69 | - **arrows**: rotate around the focus point
70 | - **Pg-up/Pg-down**: zoom in/out
71 | - **Pg-up/Pg-down + Ctrl**: push forward/backward the focus point along camera axis
72 | 
73 | ## 3D model(s)
74 | the 3D model comes from a *pre-baked* format (see [here](https://github.com/tlorach/Bak3d) ). There is no value to understand how it is working: main interest is that it loads fast (baked format... saving us parsing time) and that I managed to 'capture' some models as they were issued by various applications.
75 | 
76 | The sample will load the model, then attach it to the renderers. The resource creation will thus depend on which Graphic API is being used. 
77 | 
78 | ## More technical details
79 | 
80 | Here are more details in separate sub-sections :
81 | 
82 | * [Rendering Modes](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Rendering_Modes.md) : details on what is this sample rendering and how
83 | * [Rendering with Vulkan](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_Renderer.md)
84 | : key steps in order to make Vulkan API work for this sample, including `GL_NV_draw_vulkan_image` extension
85 | * [Vulkan Code Style](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_Code_Style.md)
86 | : helper file that allows to write Vulkan code in a more compact fashion 
87 | * [Multithreading](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Multithreading.md "Multithreading")
88 | : based on "Thread-workers", and how to use Vulkan in this case
89 | * [Results / performances](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Results.md "Results")
90 | * [NSight captures](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/NSight_Captures.md "NSight captures") : some NSight captures showing what is happening
91 | 
92 | 


--------------------------------------------------------------------------------
/bk3dDefs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | #ifndef __BK3DDEFS__
 21 | #define __BK3DDEFS__
 22 | 
 23 | //-------------------------------------------------
 24 | // 
 25 | // D3D9 definitions
 26 | // ----------------
 27 | // This part is needed when OpenGL/DX9-10 is not used :
 28 | // some enums & defines are needed anyways.
 29 | // Instead of including OpenGL only for that,
 30 | // this section will define them
 31 | // Furthermore : this can be used by any exporter/converter
 32 | // 
 33 | //-------------------------------------------------
 34 | #ifndef _d3d9TYPES_H_
 35 | #ifdef BK3DVERBOSE
 36 | #pragma message("defining D3DPRIMITIVETYPE here...")
 37 | #endif
 38 |   enum D3DPRIMITIVETYPE
 39 |   {
 40 |     D3DPT_UNDEFINED             = 0,
 41 |     D3DPT_POINTLIST             = 1,
 42 |     D3DPT_LINELIST              = 2,
 43 |     D3DPT_LINESTRIP             = 3,
 44 |     D3DPT_TRIANGLELIST          = 4,
 45 |     D3DPT_TRIANGLESTRIP         = 5,
 46 |     D3DPT_TRIANGLEFAN           = 6,
 47 |     D3DPT_END = -1
 48 | };
 49 | #ifdef BK3DVERBOSE
 50 | #pragma message("defining D3DFORMAT here...")
 51 | #endif
 52 |   enum D3DFORMAT
 53 |   {
 54 |     D3DFMT_INDEX16              = 101,
 55 |     D3DFMT_INDEX32              = 102,
 56 |     D3DFMT_END = -1
 57 |   };
 58 | #ifdef BK3DVERBOSE
 59 | #pragma message("defining D3DDECLTYPE here...")
 60 | #endif
 61 |   enum D3DDECLTYPE
 62 |   {
 63 |     D3DDECLTYPE_FLOAT1 = 0,
 64 |     D3DDECLTYPE_FLOAT2 = 1,
 65 |     D3DDECLTYPE_FLOAT3 = 2,
 66 |     D3DDECLTYPE_FLOAT4 = 3,
 67 |     D3DDECLTYPE_D3DCOLOR = 4,
 68 |     D3DDECLTYPE_UBYTE4 = 5,
 69 |     D3DDECLTYPE_SHORT2 = 6,
 70 |     D3DDECLTYPE_SHORT4 = 7,
 71 |     D3DDECLTYPE_UBYTE4N = 8,
 72 |     D3DDECLTYPE_SHORT2N = 9,
 73 |     D3DDECLTYPE_SHORT4N = 10,
 74 |     D3DDECLTYPE_USHORT2N = 11,
 75 |     D3DDECLTYPE_USHORT4N = 12,
 76 |     D3DDECLTYPE_UDEC3 = 13,
 77 |     D3DDECLTYPE_DEC3N = 14,
 78 |     D3DDECLTYPE_FLOAT16_2 = 15,
 79 |     D3DDECLTYPE_FLOAT16_4 = 16,
 80 |     D3DDECLTYPE_UNUSED = 17,
 81 | 
 82 |     D3DDECLTYPE_UNDEF     =  -1,
 83 |   };
 84 | #endif
 85 | 
 86 | //-------------------------------------------------
 87 | // 
 88 | // D3D10 definitions
 89 | // ----------------
 90 | // This part is needed when OpenGL is not used :
 91 | // some enums & defines are needed anyways.
 92 | // Instead of including OpenGL only for that,
 93 | // this section will define them
 94 | // Furthermore : this can be used by any exporter/converter
 95 | // 
 96 | //-------------------------------------------------
 97 | #ifndef __d3d10_h__
 98 | #ifdef BK3DVERBOSE
 99 | #pragma message("defining D3D10_PRIMITIVE_TOPOLOGY enum...")
100 | #endif
101 |   enum D3D10_PRIMITIVE_TOPOLOGY
102 |   {    
103 |       D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED            = 0,
104 |       D3D10_PRIMITIVE_TOPOLOGY_POINTLIST            = 1,
105 |       D3D10_PRIMITIVE_TOPOLOGY_LINELIST                = 2,
106 |       D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP            = 3,
107 |       D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST            = 4,
108 |       D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP        = 5,
109 |       D3D10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ            = 10,
110 |       D3D10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ        = 11,
111 |       D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ        = 12,
112 |       D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ    = 13,
113 |       //D3D10_PRIMITIVE_TOPOLOGY_FAN                = 14                // Doesn't exist in DXGI...
114 |       D3D10_PT_END = -1
115 |   };
116 | #ifdef BK3DVERBOSE
117 | #pragma message("defining DXGI_FORMAT enum...")
118 | #endif
119 |   enum DXGI_FORMAT // stick to DXGI values
120 |   {
121 |     DXGI_FORMAT_UNKNOWN                        = 0,
122 |     DXGI_FORMAT_R32G32B32A32_TYPELESS       = 1,
123 |     DXGI_FORMAT_R32G32B32A32_FLOAT          = 2,
124 |     DXGI_FORMAT_R32G32B32A32_UINT           = 3,
125 |     DXGI_FORMAT_R32G32B32A32_SINT           = 4,
126 |     DXGI_FORMAT_R32G32B32_TYPELESS          = 5,
127 |     DXGI_FORMAT_R32G32B32_FLOAT             = 6,
128 |     DXGI_FORMAT_R32G32B32_UINT              = 7,
129 |     DXGI_FORMAT_R32G32B32_SINT              = 8,
130 |     DXGI_FORMAT_R16G16B16A16_TYPELESS       = 9,
131 |     DXGI_FORMAT_R16G16B16A16_FLOAT          = 10,
132 |     DXGI_FORMAT_R16G16B16A16_UNORM          = 11,
133 |     DXGI_FORMAT_R16G16B16A16_UINT           = 12,
134 |     DXGI_FORMAT_R16G16B16A16_SNORM          = 13,
135 |     DXGI_FORMAT_R16G16B16A16_SINT           = 14,
136 |     DXGI_FORMAT_R32G32_TYPELESS             = 15,
137 |     DXGI_FORMAT_R32G32_FLOAT                = 16,
138 |     DXGI_FORMAT_R32G32_UINT                 = 17,
139 |     DXGI_FORMAT_R32G32_SINT                 = 18,
140 |     DXGI_FORMAT_R32G8X24_TYPELESS           = 19,
141 |     DXGI_FORMAT_D32_FLOAT_S8X24_UINT        = 20,
142 |     DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS    = 21,
143 |     DXGI_FORMAT_X32_TYPELESS_G8X24_UINT     = 22,
144 |     DXGI_FORMAT_R10G10B10A2_TYPELESS        = 23,
145 |     DXGI_FORMAT_R10G10B10A2_UNORM           = 24,
146 |     DXGI_FORMAT_R10G10B10A2_UINT            = 25,
147 |     DXGI_FORMAT_R11G11B10_FLOAT             = 26,
148 |     DXGI_FORMAT_R8G8B8A8_TYPELESS           = 27,
149 |     DXGI_FORMAT_R8G8B8A8_UNORM              = 28,
150 |     DXGI_FORMAT_R8G8B8A8_UNORM_SRGB         = 29,
151 |     DXGI_FORMAT_R8G8B8A8_UINT               = 30,
152 |     DXGI_FORMAT_R8G8B8A8_SNORM              = 31,
153 |     DXGI_FORMAT_R8G8B8A8_SINT               = 32,
154 |     DXGI_FORMAT_R16G16_TYPELESS             = 33,
155 |     DXGI_FORMAT_R16G16_FLOAT                = 34,
156 |     DXGI_FORMAT_R16G16_UNORM                = 35,
157 |     DXGI_FORMAT_R16G16_UINT                 = 36,
158 |     DXGI_FORMAT_R16G16_SNORM                = 37,
159 |     DXGI_FORMAT_R16G16_SINT                 = 38,
160 |     DXGI_FORMAT_R32_TYPELESS                = 39,
161 |     DXGI_FORMAT_D32_FLOAT                   = 40,
162 |     DXGI_FORMAT_R32_FLOAT                   = 41,
163 |     DXGI_FORMAT_R32_UINT                    = 42,
164 |     DXGI_FORMAT_R32_SINT                    = 43,
165 |     DXGI_FORMAT_R24G8_TYPELESS              = 44,
166 |     DXGI_FORMAT_D24_UNORM_S8_UINT           = 45,
167 |     DXGI_FORMAT_R24_UNORM_X8_TYPELESS       = 46,
168 |     DXGI_FORMAT_X24_TYPELESS_G8_UINT        = 47,
169 |     DXGI_FORMAT_R8G8_TYPELESS               = 48,
170 |     DXGI_FORMAT_R8G8_UNORM                  = 49,
171 |     DXGI_FORMAT_R8G8_UINT                   = 50,
172 |     DXGI_FORMAT_R8G8_SNORM                  = 51,
173 |     DXGI_FORMAT_R8G8_SINT                   = 52,
174 |     DXGI_FORMAT_R16_TYPELESS                = 53,
175 |     DXGI_FORMAT_R16_FLOAT                   = 54,
176 |     DXGI_FORMAT_D16_UNORM                   = 55,
177 |     DXGI_FORMAT_R16_UNORM                   = 56,
178 |     DXGI_FORMAT_R16_UINT                    = 57,
179 |     DXGI_FORMAT_R16_SNORM                   = 58,
180 |     DXGI_FORMAT_R16_SINT                    = 59,
181 |     DXGI_FORMAT_R8_TYPELESS                 = 60,
182 |     DXGI_FORMAT_R8_UNORM                    = 61,
183 |     DXGI_FORMAT_R8_UINT                     = 62,
184 |     DXGI_FORMAT_R8_SNORM                    = 63,
185 |     DXGI_FORMAT_R8_SINT                     = 64,
186 |     DXGI_FORMAT_A8_UNORM                    = 65,
187 |     DXGI_FORMAT_R1_UNORM                    = 66,
188 |     DXGI_FORMAT_R9G9B9E5_SHAREDEXP          = 67,
189 |     DXGI_FORMAT_R8G8_B8G8_UNORM             = 68,
190 |     DXGI_FORMAT_G8R8_G8B8_UNORM             = 69,
191 |     DXGI_FORMAT_BC1_TYPELESS                = 70,
192 |     DXGI_FORMAT_BC1_UNORM                   = 71,
193 |     DXGI_FORMAT_BC1_UNORM_SRGB              = 72,
194 |     DXGI_FORMAT_BC2_TYPELESS                = 73,
195 |     DXGI_FORMAT_BC2_UNORM                   = 74,
196 |     DXGI_FORMAT_BC2_UNORM_SRGB              = 75,
197 |     DXGI_FORMAT_BC3_TYPELESS                = 76,
198 |     DXGI_FORMAT_BC3_UNORM                   = 77,
199 |     DXGI_FORMAT_BC3_UNORM_SRGB              = 78,
200 |     DXGI_FORMAT_BC4_TYPELESS                = 79,
201 |     DXGI_FORMAT_BC4_UNORM                   = 80,
202 |     DXGI_FORMAT_BC4_SNORM                   = 81,
203 |     DXGI_FORMAT_BC5_TYPELESS                = 82,
204 |     DXGI_FORMAT_BC5_UNORM                   = 83,
205 |     DXGI_FORMAT_BC5_SNORM                   = 84,
206 |     DXGI_FORMAT_B5G6R5_UNORM                = 85,
207 |     DXGI_FORMAT_B5G5R5A1_UNORM              = 86,
208 |     DXGI_FORMAT_B8G8R8A8_UNORM              = 87,
209 |     DXGI_FORMAT_B8G8R8X8_UNORM              = 88,
210 |     DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM  = 89,
211 |     DXGI_FORMAT_B8G8R8A8_TYPELESS           = 90,
212 |     DXGI_FORMAT_B8G8R8A8_UNORM_SRGB         = 91,
213 |     DXGI_FORMAT_B8G8R8X8_TYPELESS           = 92,
214 |     DXGI_FORMAT_B8G8R8X8_UNORM_SRGB         = 93,
215 |     DXGI_FORMAT_BC6H_TYPELESS               = 94,
216 |     DXGI_FORMAT_BC6H_UF16                   = 95,
217 |     DXGI_FORMAT_BC6H_SF16                   = 96,
218 |     DXGI_FORMAT_BC7_TYPELESS                = 97,
219 |     DXGI_FORMAT_BC7_UNORM                   = 98,
220 |     DXGI_FORMAT_BC7_UNORM_SRGB              = 99,
221 |     DXGI_FORMAT_END = -1
222 |   };
223 | enum D3D10_INPUT_CLASSIFICATION
224 |     {    D3D10_INPUT_PER_VERTEX_DATA    = 0,
225 |     D3D10_INPUT_PER_INSTANCE_DATA    = 1
226 |     };
227 | #endif
228 | 
229 | #if !defined( __d3d11_h__ )
230 | #if defined __d3dcommon_h__
231 | #define D3D11_PRIMITIVE_TOPOLOGY D3D_PRIMITIVE_TOPOLOGY
232 | #else
233 | typedef 
234 | enum D3D11_PRIMITIVE_TOPOLOGY
235 |     {
236 |     D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED    = 0,
237 |     D3D11_PRIMITIVE_TOPOLOGY_POINTLIST    = 1,
238 |     D3D11_PRIMITIVE_TOPOLOGY_LINELIST    = 2,
239 |     D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP    = 3,
240 |     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST    = 4,
241 |     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP    = 5,
242 |     D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ    = 10,
243 |     D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ    = 11,
244 |     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ    = 12,
245 |     D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ    = 13,
246 |     D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST    = 33,
247 |     D3D11_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST    = 34,
248 |     D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST    = 35,
249 |     D3D11_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST    = 36,
250 |     D3D11_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST    = 37,
251 |     D3D11_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST    = 38,
252 |     D3D11_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST    = 39,
253 |     D3D11_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST    = 40,
254 |     D3D11_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST    = 41,
255 |     D3D11_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST    = 42,
256 |     D3D11_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST    = 43,
257 |     D3D11_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST    = 44,
258 |     D3D11_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST    = 45,
259 |     D3D11_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST    = 46,
260 |     D3D11_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST    = 47,
261 |     D3D11_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST    = 48,
262 |     D3D11_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST    = 49,
263 |     D3D11_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST    = 50,
264 |     D3D11_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST    = 51,
265 |     D3D11_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST    = 52,
266 |     D3D11_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST    = 53,
267 |     D3D11_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST    = 54,
268 |     D3D11_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST    = 55,
269 |     D3D11_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST    = 56,
270 |     D3D11_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST    = 57,
271 |     D3D11_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST    = 58,
272 |     D3D11_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST    = 59,
273 |     D3D11_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST    = 60,
274 |     D3D11_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST    = 61,
275 |     D3D11_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST    = 62,
276 |     D3D11_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST    = 63,
277 |     D3D11_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST    = 64
278 |     }     D3D11_PRIMITIVE_TOPOLOGY;
279 | #endif // common
280 | #endif //d3d11
281 | 
282 | //-------------------------------------------------
283 | // 
284 | // OpenGL enums...
285 | // This part is needed when OpenGL is not used :
286 | // some enums & defines are needed anyways.
287 | // Instead of including OpenGL only for that,
288 | // this section will define them
289 | // Furthermore : this can be used by any exporter/converter
290 | // 
291 | //-------------------------------------------------
292 | #ifndef __gl_h_
293 |   typedef unsigned int GLenum;
294 |   typedef GLenum GLType;
295 |   typedef GLenum GLTopology;
296 |   //enum GLTopology // turn GL enums in real enums ?
297 |   //{
298 | #define    GL_POINTS                         0x0000
299 | #define    GL_LINES                          0x0001
300 | #define    GL_LINE_LOOP                      0x0002
301 | #define    GL_LINE_STRIP                     0x0003
302 | #define    GL_TRIANGLES                      0x0004
303 | #define    GL_TRIANGLE_STRIP                 0x0005
304 | #define    GL_TRIANGLE_FAN                   0x0006
305 | #define    GL_QUADS                          0x0007
306 | #define    GL_QUAD_STRIP                     0x0008
307 | #define    GL_PATCHES                        0x000E
308 |   //};
309 |   //enum GLType
310 |   //{
311 |     // enums from OpenGL so that we are directly ready
312 | #define    GL_BYTE                           0x1400
313 | #define    GL_UNSIGNED_BYTE                  0x1401
314 | #define    GL_SHORT                          0x1402
315 | #define    GL_UNSIGNED_SHORT                 0x1403
316 | #define    GL_INT                            0x1404
317 | #define    GL_UNSIGNED_INT                   0x1405
318 | #define    GL_FLOAT                          0x1406
319 | #define    GL_2_BYTES                        0x1407
320 | #define    GL_3_BYTES                        0x1408
321 | #define    GL_4_BYTES                        0x1409
322 | #define    GL_DOUBLE                         0x140A
323 |   //};
324 | #else
325 |   typedef GLenum GLType;
326 |   typedef GLenum GLTopology;
327 | #endif
328 | enum OGL_PATCH_VERTICES
329 | {
330 |     GL_PATCH_VERTICES_0    = 32,
331 |     GL_PATCH_VERTICES_1    = 33,
332 |     GL_PATCH_VERTICES_2    = 34,
333 |     GL_PATCH_VERTICES_3    = 35,
334 |     GL_PATCH_VERTICES_4    = 36,
335 |     GL_PATCH_VERTICES_5    = 37,
336 |     GL_PATCH_VERTICES_6    = 38,
337 |     GL_PATCH_VERTICES_7    = 39,
338 |     GL_PATCH_VERTICES_8    = 40,
339 |     GL_PATCH_VERTICES_9    = 41,
340 |     GL_PATCH_VERTICES_10    = 42,
341 |     GL_PATCH_VERTICES_11    = 43,
342 |     GL_PATCH_VERTICES_12    = 44,
343 |     GL_PATCH_VERTICES_13    = 45,
344 |     GL_PATCH_VERTICES_14    = 46,
345 |     GL_PATCH_VERTICES_15    = 47,
346 |     GL_PATCH_VERTICES_16    = 48,
347 |     GL_PATCH_VERTICES_17    = 49,
348 |     GL_PATCH_VERTICES_18    = 50,
349 |     GL_PATCH_VERTICES_19    = 51,
350 |     GL_PATCH_VERTICES_20    = 52,
351 |     GL_PATCH_VERTICES_21    = 53,
352 |     GL_PATCH_VERTICES_22    = 54,
353 |     GL_PATCH_VERTICES_23    = 55,
354 |     GL_PATCH_VERTICES_24    = 56,
355 |     GL_PATCH_VERTICES_25    = 57,
356 |     GL_PATCH_VERTICES_26    = 58,
357 |     GL_PATCH_VERTICES_27    = 59,
358 |     GL_PATCH_VERTICES_28    = 60,
359 |     GL_PATCH_VERTICES_29    = 61,
360 |     GL_PATCH_VERTICES_30    = 62,
361 |     GL_PATCH_VERTICES_31    = 63,
362 |     GL_PATCH_VERTICES_32    = 64
363 | };
364 | 
365 | 
366 | ///
367 | /// \brief These are the typical names of attributes that could be in the bk3d baked file
368 | /// @{
369 | #define MESH_POSITION        "position"
370 | #define MESH_VERTEXID        "vertexid"
371 | #define MESH_COLOR           "color"
372 | #define MESH_FACENORMAL      "facenormal"
373 | #define MESH_TANGENT         "tangent"
374 | #define MESH_BINORMAL        "binormal"
375 | #define MESH_NORMAL          "normal"
376 | #define MESH_TEXCOORD0       "texcoord0"
377 | #define MESH_TEXCOORD1       "texcoord1"
378 | #define MESH_TEXCOORD2       "texcoord2"
379 | #define MESH_TEXCOORD3       "texcoord3"
380 | #define MESH_BLIND0          "blind0"
381 | #define MESH_BLIND1          "blind1"
382 | #define MESH_BLIND2          "blind2"
383 | #define MESH_BLIND3          "blind3"
384 | #define MESH_BONESOFFSETS    "bonesoffsets"
385 | #define MESH_BONESWEIGHTS    "bonesweights"
386 | #define MESH_2BONES2WEIGHTS  "2Bones2Weights"
387 | /// @}
388 | 
389 | #endif //__BK3DDEFS__
390 | 
391 | 


--------------------------------------------------------------------------------
/dedicated_image.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #include "dedicated_image.h"
 22 | #include "nvvk/commands_vk.hpp"
 23 | #include "nvvk/images_vk.hpp"
 24 | #include <assert.h>
 25 | 
 26 | namespace nvvk {
 27 | 
 28 | //////////////////////////////////////////////////////////////////////////
 29 | 
 30 | void DedicatedImage::init(VkDevice                 device,
 31 |                           VkPhysicalDevice         physical,
 32 |                           const VkImageCreateInfo& imageInfo,
 33 |                           VkMemoryPropertyFlags    memoryPropertyFlags,
 34 |                           const void*              pNextMemory /*= nullptr*/)
 35 | {
 36 | 
 37 |   m_device = device;
 38 | 
 39 |   if(vkCreateImage(device, &imageInfo, nullptr, &m_image) != VK_SUCCESS)
 40 |   {
 41 |     assert(0 && "image create failed");
 42 |   }
 43 | 
 44 |   VkMemoryRequirements2          memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
 45 |   VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
 46 |   VkImageMemoryRequirementsInfo2 imageReqs     = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
 47 | 
 48 |   imageReqs.image = m_image;
 49 |   memReqs.pNext   = &dedicatedRegs;
 50 |   vkGetImageMemoryRequirements2(device, &imageReqs, &memReqs);
 51 | 
 52 |   VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV};
 53 |   dedicatedInfo.image                         = m_image;
 54 |   dedicatedInfo.pNext                         = pNextMemory;
 55 | 
 56 |   VkMemoryAllocateInfo allocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
 57 |   allocInfo.pNext          = &dedicatedInfo;
 58 |   allocInfo.allocationSize = memReqs.memoryRequirements.size;
 59 | 
 60 |   VkPhysicalDeviceMemoryProperties memoryProperties;
 61 |   vkGetPhysicalDeviceMemoryProperties(physical, &memoryProperties);
 62 | 
 63 |   // Find an available memory type that satisfies the requested properties.
 64 |   for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
 65 |   {
 66 |     if((memReqs.memoryRequirements.memoryTypeBits & (1 << memoryTypeIndex))
 67 |        && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & memoryPropertyFlags) == memoryPropertyFlags)
 68 |     {
 69 |       allocInfo.memoryTypeIndex = memoryTypeIndex;
 70 |       break;
 71 |     }
 72 |   }
 73 |   assert(allocInfo.memoryTypeIndex != ~0);
 74 | 
 75 |   if(vkAllocateMemory(device, &allocInfo, nullptr, &m_memory) != VK_SUCCESS)
 76 |   {
 77 |     assert(0 && "failed to allocate image memory!");
 78 |   }
 79 | 
 80 |   vkBindImageMemory(device, m_image, m_memory, 0);
 81 | }
 82 | 
 83 | void DedicatedImage::initWithView(VkDevice              device,
 84 |                                   VkPhysicalDevice      physical,
 85 |                                   uint32_t              width,
 86 |                                   uint32_t              height,
 87 |                                   uint32_t              layers,
 88 |                                   VkFormat              format,
 89 |                                   VkImageUsageFlags     usage /*= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT*/,
 90 |                                   VkImageTiling         tiling /*= VK_IMAGE_TILING_OPTIMAL*/,
 91 |                                   VkMemoryPropertyFlags memoryPropertyFlags /*= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT*/,
 92 |                                   VkSampleCountFlagBits samples /*= VK_SAMPLE_COUNT_1_BIT*/,
 93 |                                   VkImageAspectFlags    aspect /*= VK_IMAGE_ASPECT_COLOR_BIT*/,
 94 |                                   const void*           pNextImage /*= nullptr*/,
 95 |                                   const void*           pNextMemory /*= nullptr*/,
 96 |                                   const void*           pNextImageView /*= nullptr*/)
 97 | {
 98 |   VkImageCreateInfo imageInfo{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
 99 |   imageInfo.pNext         = pNextImage;
100 |   imageInfo.imageType     = VK_IMAGE_TYPE_2D;
101 |   imageInfo.extent.width  = width;
102 |   imageInfo.extent.height = height;
103 |   imageInfo.extent.depth  = 1;
104 |   imageInfo.mipLevels     = 1;
105 |   imageInfo.arrayLayers   = layers;
106 |   imageInfo.format        = format;
107 |   imageInfo.tiling        = tiling;
108 |   imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
109 |   imageInfo.usage         = usage;
110 |   imageInfo.samples       = samples;
111 |   imageInfo.sharingMode   = VK_SHARING_MODE_EXCLUSIVE;
112 | 
113 |   init(device, physical, imageInfo, memoryPropertyFlags, pNextMemory);
114 |   initView(imageInfo, aspect, layers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, pNextImageView);
115 | }
116 | 
117 | void DedicatedImage::initView(const VkImageCreateInfo& imageInfo, VkImageAspectFlags aspect, VkImageViewType viewType, const void* pNextImageView /*= nullptr*/)
118 | {
119 |   VkImageViewCreateInfo createInfo           = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
120 |   createInfo.pNext                           = pNextImageView;
121 |   createInfo.components.r                    = VK_COMPONENT_SWIZZLE_R;
122 |   createInfo.components.g                    = VK_COMPONENT_SWIZZLE_G;
123 |   createInfo.components.b                    = VK_COMPONENT_SWIZZLE_B;
124 |   createInfo.components.a                    = VK_COMPONENT_SWIZZLE_A;
125 |   createInfo.subresourceRange.aspectMask     = aspect;
126 |   createInfo.subresourceRange.baseArrayLayer = 0;
127 |   createInfo.subresourceRange.baseMipLevel   = 0;
128 |   createInfo.subresourceRange.layerCount     = imageInfo.arrayLayers;
129 |   createInfo.subresourceRange.levelCount     = imageInfo.mipLevels;
130 |   createInfo.format                          = imageInfo.format;
131 |   createInfo.viewType                        = viewType;
132 |   createInfo.image                           = m_image;
133 | 
134 |   VkResult result = vkCreateImageView(m_device, &createInfo, nullptr, &m_imageView);
135 |   assert(result == VK_SUCCESS);
136 | }
137 | 
138 | void DedicatedImage::deinit()
139 | {
140 |   if(m_image != nullptr)
141 |     vkDestroyImage(m_device, m_image, nullptr);
142 |   if(m_imageView != nullptr)
143 |     vkDestroyImageView(m_device, m_imageView, nullptr);
144 |   if(m_memory != nullptr)
145 |     vkFreeMemory(m_device, m_memory, nullptr);
146 |   *this = {};
147 | }
148 | 
149 | void DedicatedImage::cmdInitialTransition(VkCommandBuffer cmd, VkImageLayout layout, VkAccessFlags access)
150 | {
151 |   VkPipelineStageFlags srcPipe = nvvk::makeAccessMaskPipelineStageFlags(0);
152 |   VkPipelineStageFlags dstPipe = nvvk::makeAccessMaskPipelineStageFlags(access);
153 | 
154 |   VkImageMemoryBarrier memBarrier = nvvk::makeImageMemoryBarrier(m_image, 0, access, VK_IMAGE_LAYOUT_UNDEFINED, layout);
155 | 
156 |   vkCmdPipelineBarrier(cmd, srcPipe, dstPipe, VK_FALSE, 0, NULL, 0, NULL, 1, &memBarrier);
157 | }
158 | 
159 | }  // namespace nvvk
160 | 


--------------------------------------------------------------------------------
/dedicated_image.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
17 |  * SPDX-License-Identifier: Apache-2.0
18 |  */
19 | 
20 | 
21 | #pragma once
22 | #include <vulkan/vulkan_core.h>
23 | 
24 | namespace nvvk {
25 | //////////////////////////////////////////////////////////////////////////
26 | /**
27 | # class DedicatedImage
28 | 
29 | DedicatedImages have their own dedicated device memory allocation.
30 | This can be beneficial for render pass attachments.
31 | 
32 | Also provides utility function setup the initial image layout.
33 | */
34 | class DedicatedImage
35 | {
36 | public:
37 |   VkDevice       m_device    = {};  // Logical device, help for many operations
38 |   VkImage        m_image     = {};  // Vulkan image representation (handle)
39 |   VkImageView    m_imageView = {};  // view of the image (optional)
40 |   VkDeviceMemory m_memory    = {};  // Device allocation of the image
41 |   VkFormat       m_format    = {};  // Format when created
42 | 
43 |   operator VkImage() const { return m_image; }
44 |   operator VkImageView() const { return m_imageView; }
45 | 
46 |   void init(VkDevice                 device,
47 |             VkPhysicalDevice         physical,
48 |             const VkImageCreateInfo& createInfo,
49 |             VkMemoryPropertyFlags    memoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
50 |             const void*              pNextMemory         = nullptr);
51 | 
52 |   void initWithView(VkDevice              device,
53 |                     VkPhysicalDevice      physical,
54 |                     uint32_t              width,
55 |                     uint32_t              height,
56 |                     uint32_t              layers,
57 |                     VkFormat              format,
58 |                     VkImageUsageFlags     usage               = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
59 |                     VkImageTiling         tiling              = VK_IMAGE_TILING_OPTIMAL,
60 |                     VkMemoryPropertyFlags memoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
61 |                     VkSampleCountFlagBits samples             = VK_SAMPLE_COUNT_1_BIT,
62 |                     VkImageAspectFlags    aspect              = VK_IMAGE_ASPECT_COLOR_BIT,
63 |                     const void*           pNextImage          = nullptr,
64 |                     const void*           pNextMemory         = nullptr,
65 |                     const void*           pNextImageView      = nullptr);
66 | 
67 |   void initView(const VkImageCreateInfo& imageInfo, VkImageAspectFlags aspect, VkImageViewType viewType, const void* pNextImageView = nullptr);
68 |   void deinit();
69 | 
70 |   void cmdInitialTransition(VkCommandBuffer cmd, VkImageLayout layout, VkAccessFlags access);
71 | };
72 | 
73 | }  // namespace nvvk
74 | 


--------------------------------------------------------------------------------
/doc/Fences.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Fences.JPG


--------------------------------------------------------------------------------
/doc/Memory_chunks.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Memory_chunks.JPG


--------------------------------------------------------------------------------
/doc/Multithreading.md:
--------------------------------------------------------------------------------
 1 | # Multithreading: Thread-Workers
 2 | 
 3 | The source code of the sample containing Vulkan and OpenGL source code is **not** especially designed to be multi-threaded.
 4 | 
 5 | To be more specific, the *only* declaration that suggests multi-threading is `NThreadLocalVar` template: to allow TLS (Thread Local Storage) to happen...
 6 | 
 7 | All the rest of the multi-threading happens in the main sample file `gl_vk_bk3dthreaded.cpp`.
 8 | 
 9 | In other words, some methods of OpenGL or Vulkan renderers become multithreaded because *they got wrapped by dedicated Class*, making them multi-threaded.
10 | 
11 | ## Thread-Workers job assignment
12 | To assign a job to a worker, you must declare a specific Class, where:
13 | 
14 | - the *constructor* will become the receiver for **the function arguments**
15 | - the worker will start his job through a specific method: `Invoke()`
16 | 
17 | This approach allows to prepare function arguments so that they are ready for later use: when the thread will finally be kicked-off by the thread-worker manager.
18 | Generic example:
19 | 
20 |     class TskXXX : public TaskBase
21 |     {
22 |     private:
23 |         int arg1;
24 |         int arg2;
25 |     public:
26 |         TskUpdateCommandBuffer(int _arg1, int _arg2)
27 |         {
28 |             arg1 = _arg1; arg2 = _arg2;
29 |         }
30 |         virtual void Invoke()
31 |         {
32 |             s_pCurRenderer->SomeMethod(arg1, arg2);
33 |         }
34 |     };
35 |  
36 | To execute this job, we can queue workers for TskXXX as follow:
37 | 
38 |     for(int n=0; n<100; n++)
39 |     {
40 |         // worker will be deleted by the default method Done()
41 |         TskXXX *tskXXX = new TskXXX(10, 2);
42 |         g_mainThreadPool->pushTask(tskXXX);
43 |     }
44 | 
45 | `g_mainThreadPool` is the main thread Pool manager that god initialized as follow:
46 | 
47 |     g_mainThreadPool = new ThreadWorkerPool(NUMTHREADS, false, false, NWTPS_ROUND_ROBIN, std::string("Main Worker Pool"));
48 | 
49 | ![ThreadWorkers](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Thread_workers.JPG)
50 | 
51 | ## Workers for command-buffer creation
52 | 
53 | for more details, one of the most important part of multi-threading in this sample is in `refreshCmdBuffers()`
54 | 
55 | Here is what multi-threaded command-buffer updates do:
56 | 
57 | - walk through the 3D model and split it in equal parts (almost...)
58 | - push a Worker for the command-buffer creation of this part ( `g_mainThreadPool->pushTask(tskUpdateCommandBuffer)` )
59 | - workers will execute in specific thread: what the worker-manager (`g_mainThreadPool`) will chose for you
60 | - each worker will signal an *event* object when it finished the command-buffer creation
61 | - the main thread in the meantime will have to wait for all to be done: looping into all the *event objects*
62 | - Once secondary command-buffers are ready, the main thread will put them together in the primary command-buffer: . This task is not supposed to take time
63 | 


--------------------------------------------------------------------------------
/doc/NSight_Captures.md:
--------------------------------------------------------------------------------
 1 | ### NSight captures
 2 | 
 3 | Here is an image of NSight Custom-markers when using OpenGL
 4 | 
 5 | ![OpenGL](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/OpenGL.JPG)
 6 | 
 7 | You can see the expected "display" function where 
 8 | 
 9 | - in the bottom: the cascade of GPU commands pipelined through the GPU
10 | - on the top, the brown line: a very dense series of OpenGL commands for state changes, buffer-binding and drawcalls. It shows how busy is the display() function in issuing commands to OpenGL. It shows how much the CPU is involved in this task (including the driver)
11 |  
12 | 
13 | ----------
14 | 
15 | Here is an image of NSight Custom-markers when Vulkan is using thread-workers.
16 | 
17 | ![VulkanMT](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan_MT.JPG)
18 | 
19 | You can see how much rooms is available for anything else on the CPU: the 8 thread finished very quickly the command-buffer update. Not only the main thread could do something while waiting for other threads to build command-buffers, but more thread workers could be allocated onto the 8 threads available.
20 | 
21 | ----------
22 | 
23 | And here is an NSight capture when Vulkan is only using the main-thread. 
24 | 
25 | ![VulkanMT](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Vulkan.JPG)
26 | 
27 | Despite the fact that the framerate is the same, there is not much room for more CPU processing. Now, one could argue that 8 additional threads could be used in parallel for other tasks, too. 
28 | 
29 | This is true: multi-threading can offer a wide range of possibilities. It all depends on what kind of design is needed...
30 | 
31 | And what is really exciting with Vulkan is exactly this kind of flexibility that many engineers have patiently been waiting for some time. Now the challenge is to make good use of this strength... it may not be always easy.
32 | 
33 | ````
34 |    Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
35 |   
36 |    Licensed under the Apache License, Version 2.0 (the "License");
37 |    you may not use this file except in compliance with the License.
38 |    You may obtain a copy of the License at
39 |   
40 |        http://www.apache.org/licenses/LICENSE-2.0
41 |   
42 |    Unless required by applicable law or agreed to in writing, software
43 |    distributed under the License is distributed on an "AS IS" BASIS,
44 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
45 |    See the License for the specific language governing permissions and
46 |    limitations under the License.
47 |   
48 |    SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
49 |    SPDX-License-Identifier: Apache-2.0
50 | ````
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/doc/OpenGL.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/OpenGL.JPG


--------------------------------------------------------------------------------
/doc/Rendering_Modes.md:
--------------------------------------------------------------------------------
 1 | ## Rendering modes
 2 | This is a simple sample, so I took the liberty to make the shader-system extremely simple: *only 3 fragment shaders* are in involved: one for the grid; one for lines; the other for filled primitives. So I cannot claim showcasing complex use-case made of tons of shaders as it often happens. On the other hand, it might allow the sample to keep simple... 
 3 | 
 4 | In any renderer, we are trying to be efficient: the model contains lots of transformations as well as lots of materials. Rather than updating them on the flight (by updating uniforms, for example), we will generate *arrays of materials* and *arrays of transformations*. Then we will bind the right buffer offsets thanks to:
 5 | 
 6 | * `glBindBufferRange` for OpenGL
 7 | * *Bindless pointers* for Command-lists
 8 | * `vkCmdBindDescriptorSets` offset argument for Vulkan
 9 | 
10 | In many cases, especially for OpenGL, 'bucketing' Primitives and/or grouping them according to their shaders allows greater performance than taking primitives as they come. Although Vulkan & Command-lists adds lots of tolerance over the amount of state transition in their command buffers, it is better practice to avoid overloading them too much.
11 | 


--------------------------------------------------------------------------------
/doc/Results.md:
--------------------------------------------------------------------------------
 1 | # Results
 2 | 
 3 | - **Vulkan static**: means we render with Vulkan but command-buffers for geometry is never updated. The scene is made of **static meshes**
 4 | - **Vulkan dynamic 16 workers**: command-buffers are all built during each frame, like if it required constant update or change. Typic for dynamic scenes. 16 workers involved
 5 | - **Vulkan dynamic 1  worker**: same above but like if no multi-threading involved
 6 | - **OpenGL**: regular OpenGL. It pretty much corresponds to a dynamic scene, because OpenGL *requires you to update render-states and drawcalls each frames* (except for Display-Lists)
 7 | - Cmd-Lists static: assuming we created once all the **token-buffers**. Scene is static
 8 | - Cmd-Lists dynamic 16 workers: re-building the token buffers each frame in multithread
 9 | - Cmd-Lists dynamic 1 worker:re-building the token buffers each frame with one thread
10 | 
11 | 
12 | rendering mode              | GPU time | CPU time [ms]| 
13 | --------------------------- | -------- | -------- |
14 | Vulkan static               |      5.7 |    0.688 |
15 | Vulkan dynamic 16 workers   |      5.7 |     3.0  |
16 | Vulkan dynamic 1  worker    |      5.7 |     5.2  |
17 |                             |          |          |
18 | OpenGL                      |      9.9 |     9.4  |
19 |                             |          |          |
20 | Cmd-Lists static            |      5.0 |    0.097 |
21 | Cmd-Lists dynamic 16 workers|     40.0 |     40.0 |
22 | Cmd-Lists dynamic 1 worker  |     20.0 |     20.0 |
23 | 
24 | Vulkans shows as expected that it performs very well in multi-threaded mode. this model may not be the best use-case for multi-threading, but we can already see that workers allows parallel processing, almost dividing by 2 the amount of time required.
25 | 
26 | **OpenGL is driver limited**: the fact that a lot of state changes and drawcalls are required for each frame doesn't leave much room (none, in fact) for *more CPU processing*. So if the engine had to perform some *Physic simulation over the scene*, the performances would *drop even more*. On the other hand, Vulkan left some room for the CPU to process additional tasks: the frame-rate could stay the same with more processing !
27 | 
28 | Command-lists are the best for static scenes. It makes sense because the token-buffers are really very close to the GPU front-End. So the *driver has nearly nothing to do*.
29 | 
30 | On the other hand: as soon as we want to make command-lists dynamic, things get complicated and way less efficient. However:
31 | 
32 | - there must be a **bug** (sorry) in the multi-threaded command-list approach. Even though it may not be as efficient as Vulkan, it shouldn't be as bad... to be continued in upcoming github updates :-). But in a way, this bug shows one thing: it *shows that Command-lists in multithreaded mode is not as straightforward as Vulkan API*. Even if it might be possible to get good performances, the source code could become hard to maintain. 
33 | - Command-lists are in OpenGL API. And OpenGL is really bad for multi-threading. In this sample, the token-buffer creation in thread-workers is absolutely not dealing with OpenGL. It means that it postponed the stitching of token-buffers to later, by the main thread.
34 | 


--------------------------------------------------------------------------------
/doc/Thread_workers.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Thread_workers.JPG


--------------------------------------------------------------------------------
/doc/Vulkan.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Vulkan.JPG


--------------------------------------------------------------------------------
/doc/Vulkan_Code_Style.md:
--------------------------------------------------------------------------------
 1 | # Vulkan code style
 2 | This previous source code snippet reveals a weird syntax that is not native to Vulkan...
 3 | 
 4 | `NVK.h` and `NVK.cpp` contain an experimental overlay that turns many (ideally, all) structures of Vulkan to simple Classes made of **constructors** and occasionally **functors**.
 5 | 
 6 | My purpose was to find a way to lower the amount of C code required to fill all these Vulkan structures: To be honest I was quite scared the first time I saw Vulkan include file!
 7 | 
 8 | >
 9 | **Note**: I don't claim this is an ideal solution. Not even sure that it makes the code more readable. But I wanted to try it through few samples and stress the idea. Feedback or better ideas are most welcome.
10 | 
11 | The best examples are in the source code of `bk3d_vk.cpp`. But here is a simple example:
12 | 
13 | When creating a **Vertex Input State**, there are a bunch of nested structures to put together in order to finalize the description.
14 | 
15 | constructors and functors are interesting because they can turn C/C++ code into some sort of *functional* programming, where declarations are nested into one another and don't require *explicit temporary storage*.
16 | 
17 | Besides, they need less space in the code and can even have default argument values.
18 | 
19 |     NVK::VkPipelineVertexInputStateCreateInfo vkPipelineVertexInputStateCreateInfo(
20 |         NVK::VkVertexInputBindingDescription    (0/*binding*/, 2*sizeof(vec3f)/*stride*/, VK_VERTEX_INPUT_RATE_VERTEX),
21 |         NVK::VkVertexInputAttributeDescription  (0/*location*/, 0/*binding*/, VK_FORMAT_R32G32B32_SFLOAT, 0            /*offset*/) // pos
22 |                                                 (1/*location*/, 0/*binding*/, VK_FORMAT_R32G32B32_SFLOAT, sizeof(vec3f)/*offset*/) // normal
23 |     );
24 | 
25 | > **Notes**: `VkVertexInputBindingDescription` pretty much corresponds to **D3D10 Slots** : a way to group interleaved attributes together in one buffer. You can have many of these 'Slots'
26 | > `VkVertexInputAttributeDescription` corresponds to the attribute that lives in one of these slots, Hence the reference to the binding
27 | 
28 | In this example, the structure `VkPipelineVertexInputStateCreateInfo` is filled with parameters without the need to declare any temporary intermediate structure, to then pass its pointer:
29 | 
30 | - `NVK::VkVertexInputBindingDescription` constructor directly creates a local instance of the structure; which obviously will be destroyed with `vkPipelineVertexInputStateCreateInfo`
31 | - if there were more than one Input-binding, a functor with the same arguments as the constructor would be added right afterward. This is what happens with the next class:
32 | - `NVK::VkVertexInputAttributeDescription` is needed for more than one attribute: position and normal
33 | 	- the first tuple `(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0)` is its **constructor**
34 | 	- the second tuple `(1, 0, VK_FORMAT_R32G32B32_SFLOAT, sizeof(vec3f))` is its **functor**
35 | 	- if there was more than 2 attributes, *another functor* would follow, etc.  
36 | 
37 | >**Note**: I tried to *avoid 'shortcuts'* and keep the *original names* and structures so there is less confusion when translating Vulkan structures to this kind of writing.
38 | 
39 | Another example I find particularly nicer to read is for the **RenderPass** creation:
40 | 
41 |     NVK::VkRenderPassCreateInfo rpinfo = NVK::VkRenderPassCreateInfo(
42 |         NVK::VkAttachmentDescription
43 |             (   VK_FORMAT_R8G8B8A8_UNORM, (VkSampleCountFlagBits)MSAA,                                        //format, samples
44 |                 VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE,          //loadOp, storeOp
45 |                 VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE,  //stencilLoadOp, stencilStoreOp
46 |                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL //initialLayout, finalLayout
47 |             )
48 |             (   VK_FORMAT_D24_UNORM_S8_UINT, (VkSampleCountFlagBits)MSAA,
49 |                 VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE,
50 |                 VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE,
51 |                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
52 |             )
53 |             (   VK_FORMAT_R8G8B8A8_UNORM, (VkSampleCountFlagBits)1,                                        //format, samples
54 |                 VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE,          //loadOp, storeOp
55 |                 VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE,  //stencilLoadOp, stencilStoreOp
56 |                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL //initialLayout, finalLayout
57 |             ),
58 |         // many sub-passes could be put after one another
59 |         NVK::VkSubpassDescription
60 |         (   VK_PIPELINE_BIND_POINT_GRAPHICS,                                                                        //pipelineBindPoint
61 |             NVK::VkAttachmentReference(),                                                                           //inputAttachments
62 |             NVK::VkAttachmentReference(0/*attachment*/, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL/*layout*/),        //colorAttachments
63 |             NVK::VkAttachmentReference(2/*attachment*/, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL/*layout*/),        //resolveAttachments
64 |             NVK::VkAttachmentReference(1/*attachment*/, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL/*layout*/),//depthStencilAttachment
65 |             NVK::Uint32Array(),                                                                           //preserveAttachments
66 |             0                                                                                                       //flags
67 |         ),
68 | 
69 | 
70 | Of course there is not magic and what you don't do yourself is done behind the scene (check `class VkPipelineVertexInputStateCreateInfo` for example). One could argue that it would be even more expensive than using regular Vulkan structures... But let's not forget that this part of the code is happening at **initialization time**... so does it really matter ?
71 | 
72 | Now, more attention should be brought when dealing with the *main rendering loop*...
73 | 


--------------------------------------------------------------------------------
/doc/Vulkan_MT.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/Vulkan_MT.JPG


--------------------------------------------------------------------------------
/doc/Vulkan_Renderer.md:
--------------------------------------------------------------------------------
  1 | # vulkan renderer
  2 | Vulkan renderer is located in `bk3d_vk.cpp` file:
  3 | 
  4 | - `RendererVk` is the class for the renderer
  5 | - `Bk3dModelVk` is the class for the model being rendered
  6 | 
  7 | ## Initialization of resources
  8 | 
  9 | `RendererVk::initGraphics` will setup most of the Vulkan objects and related memory
 10 | 
 11 | Vulkan requires you to manage memory as much as possible. Of course you can rely on driver memory allocation ( `vkAllocateMemory` ), but better practice is to allocate memory with `vkAllocateMemory` in larger chunks and later take care about partitioning what is inside.
 12 | 
 13 | few possibilities to reach the right resources:
 14 | 
 15 | 1. bind many VkBuffers or images at various offsets of the device memory chunk (`vkBindBufferMemory`...)
 16 | 2. or use the binding offsets available in `vkCmdBindVertexBuffers` or `vkCmdBindIndexBuffer` or `vkCmdBindDescriptorSets` to reach the right section in the current buffer
 17 | 3. Or a mix of both!
 18 | 
 19 | Note that in a real situation, more chunks of memory would be allocated: when previous ones are full, the engine might create a new one; and in a real situation, the application should have a more clever heap management from what gets allocated to what gets freed in chunks of memory.  
 20 | 
 21 | Ideally, the memory areas are a mix of buffers mapped to various offsets, while drawcalls do also use offsets withing buffers that are active:
 22 | 
 23 | ![memory chunks](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Memory_chunks.JPG)
 24 | 
 25 | This sample doesn't implement this general case, but implements both of the 'extreme' cases:
 26 | 
 27 | the default one (see `#define USE_VKCMDBINDVERTEXBUFFERS_OFFSET`) will allocate *one VkBuffer for one chunk of Device Memory*; then offsets will be maintained for the 3D parts to find back their vertices/indices
 28 | ![offsets](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/offsets.JPG)
 29 | 
 30 | Another approach will 'forget' about offsets in buffers and naively create a VkBuffer for each required VBO/IBO. A basic allocator will bind these buffers to the right offsets in the device memory chunk: 
 31 | ![offsets](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/vkbuffers.JPG)
 32 | 
 33 | It turns-out that, out of demonstrating how to bind buffers at different areas of a device memory, **this latter approach could rapidly reach the limits of available objects** (here VkBuffer). This is precisely what happened to me once, with a big model from a CAD application...
 34 | 
 35 | In other words, it is **not a good idea to blindly use object handles only**: there are good reasons for why the *offset* parameter in command-binding exist. The renderer should be clever enough aggregate small buffers together thanks to the offset-binding in the command-buffer creation. The best solution would be *to mix both, depending on the requirements of the engine*.
 36 | 
 37 | ## Initialization of Vulkan components
 38 | This section should be self-explanatory. Essentially the idea is to prepare things up-front, as much as possible (in fact: everything, except command-buffers) so that the *rendering loop doesn't involve any sort of expensive validation*. 
 39 | 
 40 | Validation is what made OpenGL so tricky: the state-machine of OpenGL tends to transform the driver into a *paranoid state* where it can never really know or guess what exactly is the application doing: everything is possible at any time and the driver must be ready for this!
 41 | 
 42 | Vulkan on the other hand expects the opposite: the developer must exactly know what he'll use. He must prepare things so that Vulkan driver doesn't have to worry anymore on un-expected situations.
 43 | 
 44 | This section will setup the following components:
 45 | 
 46 | - **Spir-V shaders** (*.spv)
 47 | - **semaphores** for glDrawVkImageNV synchronization
 48 | - a combination of various **Graphics-Pipelines**: One for 'lines' primitives; one for triangle-fans; another for triangle lists...
 49 | - **Sampler(s) and Texture(s)** (Note that I do load a Noise DDS texture but the latest shaders don't use it, finally...) 
 50 | - **general Uniform buffer**: needed for Projection/view dependent matrices, for example
 51 | - **descriptor-set layouts**: how the descriptor-sets are put together for various situations. You can see the Descriptor-Set layout as a way to reduce the scope of resource addressing: a layout that allows the driver to identify the scope of which memory pointers need to be set for a given situation.
 52 | - **pipeline-layout**: created from the *descriptor-set layouts*
 53 | - a list of states we want to **keep dynamic** (meaning they can be modified from withing a command-buffer): Viewport and scissors etc.
 54 | -  a **Descriptor-Pool** and some **Descriptor-Sets**: we will associate some resources to some descriptor-sets
 55 | -  **Fences** for command-buffer update (later below)
 56 | -  **Render-Pass** and its sub-pass(es)
 57 | -  **Frame-buffer** to associate with the Render-Pass 
 58 | -  Vulkan **timer** initialization
 59 | 
 60 | ## Initialization of Command-Buffer Pools
 61 | *Command-buffer Pools must be created per thread*: the allocation/deallocation of command-buffers can only be performed in a concurrent manner if each thread owns its own allocation pool. In our sample, we will use the **TLS** (Thread Local Storage) for each thread to refer to his own pool.
 62 | 
 63 | The main initialization function will issue a series of calls to each thread in order to have them store their command-buffer pool in their own TLS (see `initThreadLocalVars`)
 64 | 
 65 | ## Command-Buffers
 66 | Vulkan introduced the concept of **primary** and **secondary** command-buffers. The idea behind is to allow a more generic primary command-buffer to call secondary ones that would contain more details about the scene. Note that Vulkan restricted the hierarchy to only 2 levels.
 67 | 
 68 | Command-buffer usage is rather flexible. In our case, we will use various command-buffers with the idea that:
 69 | 
 70 | - for every frame, we will re-create the *primary command-buffer*
 71 | - *secondary command-buffers* might be created every frame or recycled: it is optional (see 'c' option in the UI)
 72 | - secondary command-buffers are used for specific purposes: 
 73 | 	- one for *memory barrier* and *viewport setup*: this buffer will be created/updated *only when the viewport size changed*
 74 | 	- another one for the grid of the floor: this command-buffer can also be very static and can be created once and for all...
 75 | 	- finally other secondary command-buffers are used to render the geometry of the scene
 76 | 
 77 | ![cmd-buffers](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/cmd-buffers.JPG)
 78 |  
 79 | I mentioned earlier in the "initialization section" the creation of **Fences**.
 80 | 
 81 | As a reminder, the GPU is a co-processor that we want to fill with tasks in parallel with what a CPU would do on its side. Because we really want both of them to work in parallel, it is bad to 'serialize' the CPU with the GPU. Neverthelessm, it is still necessary to synchronize them at various critical steps.
 82 | 
 83 | The update of command-buffers is one of them: After we generated a bunch of command-buffers, we will en-queue them for the GPU to consume them. But it is possible that the CPU already looped back to the next frame for command-buffer creation, and this *before* the GPU was finished with the previous batch of command-buffers.
 84 | 
 85 | One naive solution is to wait for the GPU to be done and finally recycle the command-buffers for the next iteration. But waiting for the GPU would a be waste...
 86 | 
 87 | The *ideal solution* would be to allocate new command-buffers for the next frame so that we don't wait for GPU completion. Later, the consumed command-buffers should be identified and put back to the pool (garbage collection).
 88 | 
 89 | This sample is doing a bit of the latter: using a 2 caches of command-buffers and doing a **"Ping-pong"** transaction: 
 90 | 
 91 | - while GPU is dealing with cache #1; we will check the completion of cache #2 thanks to the Fence #2;
 92 | - most of the time it might be ready; worst case: a bit of wait.
 93 | - When ready, we will Free the command-buffer from cache #2
 94 | - then we would Allocate new command-buffers in this cache #2, while GPU finishes to consume what is in cache #1
 95 | - Then we will enqueue the new cmd-buffers from cache #2 to the GPU, tagged with Fence #2
 96 | - Next frame, the CPU will check the Fence #1, to see if the GPU was done with it (normally it should... GPU would have already started to consume cmd-buffers from cache #2)
 97 | - etc.
 98 | 
 99 | ![Fences](https://github.com/nvpro-samples/gl_vk_bk3dthreaded/blob/master/doc/Fences.JPG)
100 | 
101 | This approach cannot allow building more than 1 frame ahead (in fact, many games artificially limit the # of frames ahead: to prevent lagging in game controls. Frame-ahead are Good for Benchmarks... not so good for gaming experience ;-). 
102 | 
103 | I suppose that a more generic approach would be to use a **ring-buffer** or a **command-buffer 'garbage' collector**, rather than limiting ourselves to 2 slots (ping-pong). Next revision of the sample might have a better approach...
104 | 
105 | ## Blit to OpenGL back-buffer: `GL_NV_draw_vulkan_image`
106 | 
107 | The driver team introduced a convenient way to mix Vulkan rendering with an existing OpenGL context associated with the window.
108 | 
109 | Normally, **WSI** should be the way to work with Vulkan: dealing with a swapchain; associating it with the windows surface etc.
110 | 
111 | The interesting part of GL_NV_draw_vulkan_image is that it can spare you the work of setting up WSI; but more importantly is allows you to **mix Vulkan with openGL**. As an example: most of our samples are currently running Vulkan with an overlay in OpenGL: **AntTweakBar** or any other UI overlay is are still in OpenGL. If we didn't have this feature, No overlay would have worked right away and would have required quite some time to port...
112 | 
113 | GL_NV_draw_vulkan_image requires 2 **semaphores**:
114 | 
115 | - one that will be signaled as soon as the blit of the Vulkan image to the backbuffer is done (`m_semOpenGLReadDone` in the sample)
116 | - the other one to be signaled by the Queue (`vkQueueSubmit`) when the GPU finally finished the rendering (`m_semVKRenderingDone` in the sample)
117 | 
118 |     	nvk.vkQueueSubmit( NVK::VkSubmitInfo(
119 |     	  1, &m_semOpenGLReadDone,			// <== might make the queue wait to be signaled
120 |     	  1, &m_cmdScene[m_cmdSceneIdx],
121 |     	  1, &m_semVKRenderingDone),		// <== might make the copy to OpenGL to wait
122 |     	  m_sceneFence[m_cmdSceneIdx] );`
123 | 
124 | The sample will call `RendererVk::blitToBackbuffer()` at the end for the final copy to the OpenGL backbuffer:
125 | 
126 |     glWaitVkSemaphoreNV((GLuint64)m_semVKRenderingDone);
127 |     glDrawVkImageNV((GLuint64)m_colorImage.img, 0, 0,0,w,h, 0, 0,1,1,0);
128 |     glSignalVkSemaphoreNV((GLuint64)m_semOpenGLReadDone);
129 | 


--------------------------------------------------------------------------------
/doc/cmd-buffers.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/cmd-buffers.JPG


--------------------------------------------------------------------------------
/doc/offsets.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/offsets.JPG


--------------------------------------------------------------------------------
/doc/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/sample.jpg


--------------------------------------------------------------------------------
/doc/toggles.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/toggles.JPG


--------------------------------------------------------------------------------
/doc/vkbuffers.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/doc/vkbuffers.JPG


--------------------------------------------------------------------------------
/gl_nv_command_list.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #ifndef NV_COMMANDLIST_H__
 22 | #define NV_COMMANDLIST_H__
 23 | 
 24 | #include <nvgl/extensions_gl.hpp>
 25 | 
 26 | 
 27 | #  if defined(__MINGW32__) || defined(__CYGWIN__)
 28 | #    define GLEXT_APIENTRY __stdcall
 29 | #  elif (_MSC_VER >= 800) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__)
 30 | #    define GLEXT_APIENTRY __stdcall
 31 | #  else
 32 | #    define GLEXT_APIENTRY
 33 | #  endif
 34 | 
 35 | /*
 36 | #pragma pack(push,1)
 37 | 
 38 | typedef struct {
 39 |     GLuint  header;
 40 |   } TerminateSequenceCommandNV;
 41 | 
 42 |   typedef struct {
 43 |     GLuint  header;
 44 |   } NOPCommandNV;
 45 | 
 46 |   typedef  struct {
 47 |     GLuint  header;
 48 |     GLuint  count;
 49 |     GLuint  firstIndex;
 50 |     GLuint  baseVertex;
 51 |   } DrawElementsCommandNV;
 52 | 
 53 |   typedef  struct {
 54 |     GLuint  header;
 55 |     GLuint  count;
 56 |     GLuint  first;
 57 |   } DrawArraysCommandNV;
 58 | 
 59 |   typedef  struct {
 60 |     GLuint  header;
 61 |     GLenum  mode;
 62 |     GLuint  count;
 63 |     GLuint  instanceCount;
 64 |     GLuint  firstIndex;
 65 |     GLuint  baseVertex;
 66 |     GLuint  baseInstance;
 67 |   } DrawElementsInstancedCommandNV;
 68 | 
 69 |   typedef  struct {
 70 |     GLuint  header;
 71 |     GLenum  mode;
 72 |     GLuint  count;
 73 |     GLuint  instanceCount;
 74 |     GLuint  first;
 75 |     GLuint  baseInstance;
 76 |   } DrawArraysInstancedCommandNV;
 77 | 
 78 |   typedef struct {
 79 |     GLuint  header;
 80 |     GLuint  addressLo;
 81 |     GLuint  addressHi;
 82 |     GLuint  typeSizeInByte;
 83 |   } ElementAddressCommandNV;
 84 | 
 85 |   typedef struct {
 86 |     GLuint  header;
 87 |     GLuint  index;
 88 |     GLuint  addressLo;
 89 |     GLuint  addressHi;
 90 |   } AttributeAddressCommandNV;
 91 | 
 92 |   typedef struct {
 93 |     GLuint    header;
 94 |     GLushort  index;
 95 |     GLushort  stage;
 96 |     GLuint    addressLo;
 97 |     GLuint    addressHi;
 98 |   } UniformAddressCommandNV;
 99 | 
100 |   typedef struct {
101 |     GLuint  header;
102 |     float   red;
103 |     float   green;
104 |     float   blue;
105 |     float   alpha;
106 |   } BlendColorCommandNV;
107 | 
108 |   typedef struct {
109 |     GLuint  header;
110 |     GLuint  frontStencilRef;
111 |     GLuint  backStencilRef;
112 |   } StencilRefCommandNV;
113 | 
114 |   typedef struct {
115 |     GLuint  header;
116 |     float   lineWidth;
117 |   } LineWidthCommandNV;
118 | 
119 |   typedef struct {
120 |     GLuint  header;
121 |     float   scale;
122 |     float   bias;
123 |   } PolygonOffsetCommandNV;
124 | 
125 |   typedef struct {
126 |     GLuint  header;
127 |     float   alphaRef;
128 |   } AlphaRefCommandNV;
129 | 
130 |   typedef struct {
131 |     GLuint  header;
132 |     GLuint  x;
133 |     GLuint  y;
134 |     GLuint  width;
135 |     GLuint  height;
136 |   } ViewportCommandNV;  // only ViewportIndex 0
137 | 
138 |   typedef struct {
139 |     GLuint  header;
140 |     GLuint  x;
141 |     GLuint  y;
142 |     GLuint  width;
143 |     GLuint  height;
144 |   } ScissorCommandNV;   // only ViewportIndex 0
145 | 
146 |   typedef struct {
147 |     GLuint  header;
148 |     GLuint  frontFace;  // 0 for CW, 1 for CCW
149 |   } FrontFaceCommandNV;
150 | 
151 | #pragma pack(pop)
152 | */
153 | 
154 | #endif
155 | 
156 | 


--------------------------------------------------------------------------------
/gl_nv_commandlist_helpers.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | //
 22 | // Shader stages for command-list
 23 | //
 24 | enum ShaderStages {
 25 |     STAGE_VERTEX,
 26 |     STAGE_TESS_CONTROL,
 27 |     STAGE_TESS_EVALUATION,
 28 |     STAGE_GEOMETRY,
 29 |     STAGE_FRAGMENT,
 30 |     STAGES,
 31 | };
 32 | 
 33 | //
 34 | // Put together all what is needed to give to the extension function
 35 | // for a token buffer
 36 | //
 37 | struct TokenBuffer
 38 | {
 39 |     GLuint                  bufferID;   // buffer containing all
 40 |     GLuint64EXT             bufferAddr; // buffer GPU-pointer
 41 |     std::string             data;       // bytes of data containing the structures to send to the driver
 42 |     void release() {
 43 |         glDeleteBuffers(1, &bufferID);
 44 |         bufferAddr  = 0;
 45 |         bufferID    = 0;
 46 |         data.clear();
 47 |     }
 48 | };
 49 | //
 50 | // Grouping together what is needed to issue a single command made of many states, fbos and Token Buffer pointers
 51 | //
 52 | struct CommandStatesBatch
 53 | {
 54 |     void release()
 55 |     {
 56 |         dataGPUPtrs.clear();
 57 |         dataPtrs.clear();
 58 |         sizes.clear();
 59 |         stateGroups.clear();
 60 |         fbos.clear();
 61 |         numItems = 0;
 62 |     }
 63 |     void pushBatch(GLuint stateGroup_, GLuint fbo_, GLuint64EXT dataGPUPtr_, const GLvoid* dataPtr_, GLsizei size_)
 64 |     {
 65 |         dataGPUPtrs.push_back(dataGPUPtr_);
 66 |         dataPtrs.push_back(dataPtr_);
 67 |         sizes.push_back(size_);
 68 |         stateGroups.push_back(stateGroup_);
 69 |         fbos.push_back(fbo_);
 70 |         numItems = fbos.size();
 71 |     }
 72 |     CommandStatesBatch& operator+= (CommandStatesBatch &cb)
 73 |     {
 74 |         // TODO: do better than that...
 75 |         size_t sz = cb.fbos.size();
 76 |         for(int i=0; i<sz; i++)
 77 |         {
 78 |             dataGPUPtrs.push_back(cb.dataGPUPtrs[i]);
 79 |             dataPtrs.push_back(cb.dataPtrs[i]);
 80 |             sizes.push_back(cb.sizes[i]);
 81 |             stateGroups.push_back(cb.stateGroups[i]);
 82 |             fbos.push_back(cb.fbos[i]);
 83 |         }
 84 |         numItems += sz;
 85 |         return *this;
 86 |     }
 87 |     std::vector<GLuint64EXT> dataGPUPtrs;   // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV)
 88 |     std::vector<const GLvoid*> dataPtrs;   // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV)
 89 |     std::vector<GLsizei>    sizes;      // sizes of each groups
 90 |     std::vector<GLuint>       stateGroups;// state-group IDs used for each groups
 91 |     std::vector<GLuint>       fbos;       // FBOs being used for each groups
 92 |     size_t                  numItems;   // == fbos.size() or sizes.size()...
 93 | 
 94 |     //CommandStatesBatch& operator+= (CommandStatesBatch &cb)
 95 |     //{
 96 |     //    dataGPUPtrs += cb.dataGPUPtrs;
 97 |     //    dataPtrs += cb.dataPtrs;
 98 |     //    sizes += cb.sizes;
 99 |     //    stateGroups += cb.stateGroups;
100 |     //    fbos += cb.fbos;
101 |     //    numItems += fbos.size();
102 |     //    return *this;
103 |     //}
104 |     //std::basic_string<GLuint64EXT, std::char_traits<GLuint64EXT>, std::allocator<GLuint64EXT> > dataGPUPtrs;   // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV)
105 |     //std::basic_string<const GLvoid*, std::char_traits<const GLvoid*>, std::allocator<const GLvoid*> > dataPtrs;   // pointer in data where to locate each separate groups (for glListDrawCommandsStatesClientNV)
106 |     //std::basic_string<GLsizei, std::char_traits<GLsizei>, std::allocator<GLsizei> >    sizes;      // sizes of each groups
107 |     //std::basic_string<GLuint, std::char_traits<GLuint>, std::allocator<GLuint> >       stateGroups;// state-group IDs used for each groups
108 |     //std::basic_string<GLuint, std::char_traits<GLuint>, std::allocator<GLuint> >       fbos;       // FBOs being used for each groups
109 |     //size_t                  numItems;   // == fbos.size() or sizes.size()...
110 | };
111 | 
112 | //-----------------------------------------------------------------------------
113 | // Useful stuff for Command-list
114 | //-----------------------------------------------------------------------------
115 | static GLuint   s_header[GL_FRONT_FACE_COMMAND_NV+1] = { 0 };
116 | static GLuint   s_headerSizes[GL_FRONT_FACE_COMMAND_NV+1] = { 0 };
117 | 
118 | static GLushort s_stages[STAGES];
119 | 
120 | struct Token_Nop {
121 |     static const GLenum   ID = GL_NOP_COMMAND_NV;
122 |     NOPCommandNV      cmd;
123 |     Token_Nop() {
124 |         cmd.header = s_header[ID];
125 |     }
126 | };
127 | 
128 | struct Token_TerminateSequence {
129 |     static const GLenum   ID = GL_TERMINATE_SEQUENCE_COMMAND_NV;
130 | 
131 |     TerminateSequenceCommandNV cmd;
132 | 
133 |     Token_TerminateSequence() {
134 |         cmd.header = s_header[ID];
135 |     }
136 | };
137 | 
138 | struct Token_DrawElemsInstanced {
139 |     static const GLenum   ID = GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV;
140 | 
141 |     DrawElementsInstancedCommandNV   cmd;
142 | 
143 |     Token_DrawElemsInstanced() {
144 |         cmd.baseInstance = 0;
145 |         cmd.baseVertex = 0;
146 |         cmd.firstIndex = 0;
147 |         cmd.count = 0;
148 |         cmd.instanceCount = 1;
149 | 
150 |         cmd.header = s_header[ID];
151 |     }
152 | };
153 | 
154 | struct Token_DrawArraysInstanced {
155 |     static const GLenum   ID = GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV;
156 | 
157 |     DrawArraysInstancedCommandNV   cmd;
158 | 
159 |     Token_DrawArraysInstanced() {
160 |         cmd.baseInstance = 0;
161 |         cmd.first = 0;
162 |         cmd.count = 0;
163 |         cmd.instanceCount = 1;
164 | 
165 |         cmd.header = s_header[ID];
166 |     }
167 | };
168 | 
169 | struct Token_DrawElements {
170 |     static const GLenum   ID = GL_DRAW_ELEMENTS_COMMAND_NV;
171 | 
172 |     DrawElementsCommandNV   cmd;
173 | 
174 |     Token_DrawElements() {
175 |         cmd.baseVertex = 0;
176 |         cmd.firstIndex = 0;
177 |         cmd.count = 0;
178 | 
179 |         cmd.header = s_header[ID];
180 |     }
181 | };
182 | 
183 | struct Token_DrawArrays {
184 |     static const GLenum   ID = GL_DRAW_ARRAYS_COMMAND_NV;
185 | 
186 |     DrawArraysCommandNV   cmd;
187 | 
188 |     Token_DrawArrays() {
189 |         cmd.first = 0;
190 |         cmd.count = 0;
191 | 
192 |         cmd.header = s_header[ID];
193 |     }
194 | };
195 | 
196 | struct Token_DrawElementsStrip {
197 |     static const GLenum   ID = GL_DRAW_ELEMENTS_STRIP_COMMAND_NV;
198 | 
199 |     DrawElementsCommandNV   cmd;
200 | 
201 |     Token_DrawElementsStrip() {
202 |         cmd.baseVertex = 0;
203 |         cmd.firstIndex = 0;
204 |         cmd.count = 0;
205 | 
206 |         cmd.header = s_header[ID];
207 |     }
208 | };
209 | 
210 | struct Token_DrawArraysStrip {
211 |     static const GLenum   ID = GL_DRAW_ARRAYS_STRIP_COMMAND_NV;
212 | 
213 |     DrawArraysCommandNV   cmd;
214 | 
215 |     Token_DrawArraysStrip() {
216 |         cmd.first = 0;
217 |         cmd.count = 0;
218 | 
219 |         cmd.header = s_header[ID];
220 |     }
221 | };
222 | 
223 | struct Token_AttributeAddress {
224 |     static const GLenum   ID = GL_ATTRIBUTE_ADDRESS_COMMAND_NV;
225 | 
226 |     AttributeAddressCommandNV cmd;
227 | 
228 |     Token_AttributeAddress() {
229 |         cmd.header = s_header[ID];
230 |     }
231 | };
232 | 
233 | struct Token_ElementAddress {
234 |     static const GLenum   ID = GL_ELEMENT_ADDRESS_COMMAND_NV;
235 | 
236 |     ElementAddressCommandNV cmd;
237 | 
238 |     Token_ElementAddress() {
239 |         cmd.header = s_header[ID];
240 |     }
241 | };
242 | 
243 | struct Token_UniformAddress {
244 |     static const GLenum   ID = GL_UNIFORM_ADDRESS_COMMAND_NV;
245 | 
246 |     UniformAddressCommandNV   cmd;
247 | 
248 |     Token_UniformAddress() {
249 |         cmd.header = s_header[ID];
250 |     }
251 | };
252 | 
253 | struct Token_BlendColor{
254 |     static const GLenum   ID = GL_BLEND_COLOR_COMMAND_NV;
255 | 
256 |     BlendColorCommandNV     cmd;
257 | 
258 |     Token_BlendColor() {
259 |         cmd.header = s_header[ID];
260 |     }
261 | };
262 | 
263 | struct Token_StencilRef{
264 |     static const GLenum   ID = GL_STENCIL_REF_COMMAND_NV;
265 | 
266 |     StencilRefCommandNV cmd;
267 | 
268 |     Token_StencilRef() {
269 |         cmd.header = s_header[ID];
270 |     }
271 | };
272 | 
273 | struct Token_LineWidth{
274 |     static const GLenum   ID = GL_LINE_WIDTH_COMMAND_NV;
275 | 
276 |     LineWidthCommandNV  cmd;
277 | 
278 |     Token_LineWidth() {
279 |         cmd.header = s_header[ID];
280 |     }
281 | };
282 | 
283 | struct Token_PolygonOffset{
284 |     static const GLenum   ID = GL_POLYGON_OFFSET_COMMAND_NV;
285 | 
286 |     PolygonOffsetCommandNV  cmd;
287 | 
288 |     Token_PolygonOffset() {
289 |         cmd.header = s_header[ID];
290 |     }
291 | };
292 | 
293 | struct Token_AlphaRef{
294 |     static const GLenum   ID = GL_ALPHA_REF_COMMAND_NV;
295 | 
296 |     AlphaRefCommandNV cmd;
297 | 
298 |     Token_AlphaRef() {
299 |         cmd.header = s_header[ID];
300 |     }
301 | };
302 | 
303 | struct Token_Viewport{
304 |     static const GLenum   ID = GL_VIEWPORT_COMMAND_NV;
305 |     ViewportCommandNV cmd;
306 |     Token_Viewport() {
307 |         cmd.header = s_header[ID];
308 |     }
309 | };
310 | 
311 | struct Token_Scissor {
312 |     static const GLenum   ID = GL_SCISSOR_COMMAND_NV;
313 |     ScissorCommandNV  cmd;
314 |     Token_Scissor() {
315 |         cmd.header = s_header[ID];
316 |     }
317 | };
318 | 
319 | struct Token_FrontFace {
320 |     static const GLenum   ID = GL_FRONT_FACE_COMMAND_NV;
321 |     FrontFaceCommandNV cmd;
322 |     Token_FrontFace() {
323 |         cmd.header = s_header[ID];
324 |     }
325 | };
326 | 
327 | //-----------------------------------------------------------------------------
328 | // 
329 | //-----------------------------------------------------------------------------
330 | template <class T>
331 | void registerSize()
332 | {
333 |     s_headerSizes[T::ID] = sizeof(T);
334 | }
335 | 
336 | 
337 | //-----------------------------------------------------------------------------
338 | // 
339 | //-----------------------------------------------------------------------------
340 | void initTokenInternals()
341 | {
342 |     registerSize<Token_TerminateSequence>();
343 |     registerSize<Token_Nop>();
344 |     registerSize<Token_DrawElements>();
345 |     registerSize<Token_DrawArrays>();
346 |     registerSize<Token_DrawElementsStrip>();
347 |     registerSize<Token_DrawArraysStrip>();
348 |     registerSize<Token_DrawElemsInstanced>();
349 |     registerSize<Token_DrawArraysInstanced>();
350 |     registerSize<Token_AttributeAddress>();
351 |     registerSize<Token_ElementAddress>();
352 |     registerSize<Token_UniformAddress>();
353 |     registerSize<Token_LineWidth>();
354 |     registerSize<Token_PolygonOffset>();
355 |     registerSize<Token_Scissor>();
356 |     registerSize<Token_BlendColor>();
357 |     registerSize<Token_Viewport>();
358 |     registerSize<Token_AlphaRef>();
359 |     registerSize<Token_StencilRef>();
360 |     registerSize<Token_FrontFace>();
361 | 
362 |     for (int i = 0; i < (GL_FRONT_FACE_COMMAND_NV+1); i++){
363 |         // using i instead of a table of token IDs because the are arranged in the same order as i incrementing.
364 |         // shortcut for the source code. See gl_nv_command_list.h
365 |         s_header[i] = glGetCommandHeaderNV(i/*==Token enum*/, s_headerSizes[i]);
366 |     }
367 |     s_stages[STAGE_VERTEX] = glGetStageIndexNV(GL_VERTEX_SHADER);
368 |     s_stages[STAGE_TESS_CONTROL] = glGetStageIndexNV(GL_TESS_CONTROL_SHADER);
369 |     s_stages[STAGE_TESS_EVALUATION] = glGetStageIndexNV(GL_TESS_EVALUATION_SHADER);
370 |     s_stages[STAGE_GEOMETRY] = glGetStageIndexNV(GL_GEOMETRY_SHADER);
371 |     s_stages[STAGE_FRAGMENT] = glGetStageIndexNV(GL_FRAGMENT_SHADER);
372 | }
373 | 
374 | //------------------------------------------------------------------------------
375 | // build 
376 | //------------------------------------------------------------------------------
377 | std::string buildLineWidthCommand(float w)
378 | {
379 |     std::string cmd;
380 |     Token_LineWidth lw;
381 |     lw.cmd.lineWidth = w;
382 |     cmd = std::string((const char*)&lw, sizeof(Token_LineWidth));
383 | 
384 |     return cmd;
385 | }
386 | //------------------------------------------------------------------------------
387 | // build 
388 | //------------------------------------------------------------------------------
389 | std::string buildUniformAddressCommand(int idx, GLuint64 p, GLsizeiptr sizeBytes, ShaderStages stage)
390 | {
391 |     std::string cmd;
392 |     Token_UniformAddress attr;
393 |     attr.cmd.stage = s_stages[stage];
394 |     attr.cmd.index = idx;
395 |     ((GLuint64EXT*)&attr.cmd.addressLo)[0] = p;
396 |     cmd = std::string((const char*)&attr, sizeof(Token_UniformAddress));
397 | 
398 |     return cmd;
399 | }
400 | //------------------------------------------------------------------------------
401 | // build 
402 | //------------------------------------------------------------------------------
403 | std::string buildAttributeAddressCommand(int idx, GLuint64 p, GLsizeiptr sizeBytes)
404 | {
405 |     std::string cmd;
406 |     Token_AttributeAddress attr;
407 |     attr.cmd.index = idx;
408 |     ((GLuint64EXT*)&attr.cmd.addressLo)[0] = p;
409 |     cmd = std::string((const char*)&attr, sizeof(Token_AttributeAddress));
410 | 
411 |     return cmd;
412 | }
413 | //------------------------------------------------------------------------------
414 | // build 
415 | //------------------------------------------------------------------------------
416 | std::string buildElementAddressCommand(GLuint64 ptr, GLenum indexFormatGL)
417 | {
418 |     std::string cmd;
419 |     Token_ElementAddress attr;
420 |     ((GLuint64EXT*)&attr.cmd.addressLo)[0] = ptr;
421 |     switch (indexFormatGL)
422 |     {
423 |     case GL_UNSIGNED_INT:
424 |         attr.cmd.typeSizeInByte = 4;
425 |         break;
426 |     case GL_UNSIGNED_SHORT:
427 |         attr.cmd.typeSizeInByte = 2;
428 |         break;
429 |     }
430 |     cmd = std::string((const char*)&attr, sizeof(Token_AttributeAddress));
431 | 
432 |     return cmd;
433 | }
434 | //------------------------------------------------------------------------------
435 | // 
436 | //------------------------------------------------------------------------------
437 | std::string buildDrawElementsCommand(GLenum topologyGL, GLuint indexCount)
438 | {
439 |     std::string cmd;
440 |     Token_DrawElements dc;
441 |     Token_DrawElementsStrip dcstrip;
442 |     switch (topologyGL)
443 |     {
444 |     case GL_TRIANGLE_STRIP:
445 |     case GL_QUAD_STRIP:
446 |     case GL_LINE_STRIP:
447 |         dcstrip.cmd.baseVertex = 0;
448 |         dcstrip.cmd.firstIndex = 0;
449 |         dcstrip.cmd.count = indexCount;
450 |         cmd = std::string((const char*)&dcstrip, sizeof(Token_DrawElementsStrip));
451 |         break;
452 |     default:
453 |         dc.cmd.baseVertex = 0;
454 |         dc.cmd.firstIndex = 0;
455 |         dc.cmd.count = indexCount;
456 |         cmd = std::string((const char*)&dc, sizeof(Token_DrawElements));
457 |         break;
458 |     }
459 |     return cmd;
460 | }
461 | //------------------------------------------------------------------------------
462 | // 
463 | //------------------------------------------------------------------------------
464 | std::string buildDrawArraysCommand(GLenum topologyGL, GLuint indexCount)
465 | {
466 |     std::string cmd;
467 |     Token_DrawArrays dc;
468 |     Token_DrawArraysStrip dcstrip;
469 |     switch (topologyGL)
470 |     {
471 |     case GL_TRIANGLE_STRIP:
472 |     case GL_QUAD_STRIP:
473 |     case GL_LINE_STRIP:
474 |         dcstrip.cmd.first = 0;
475 |         dcstrip.cmd.count = indexCount;
476 |         cmd = std::string((const char*)&dcstrip, sizeof(Token_DrawArraysStrip));
477 |         break;
478 |     default:
479 |         dc.cmd.first = 0;
480 |         dc.cmd.count = indexCount;
481 |         cmd = std::string((const char*)&dc, sizeof(Token_DrawArrays));
482 |         break;
483 |     }
484 |     return cmd;
485 | }
486 | 
487 | //------------------------------------------------------------------------------
488 | // 
489 | //------------------------------------------------------------------------------
490 | std::string buildViewportCommand(GLint x, GLint y, GLsizei width, GLsizei height)
491 | {
492 |     std::string cmd;
493 |     Token_Viewport dc;
494 |     dc.cmd.x = x;
495 |     dc.cmd.y = y;
496 |     dc.cmd.width = width;
497 |     dc.cmd.height = height;
498 |     cmd = std::string((const char*)&dc, sizeof(Token_Viewport));
499 |     return cmd;
500 | }
501 | 
502 | //------------------------------------------------------------------------------
503 | // 
504 | //------------------------------------------------------------------------------
505 | std::string buildBlendColorCommand(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha)
506 | {
507 |     std::string cmd;
508 |     Token_BlendColor dc;
509 |     dc.cmd.red = red;
510 |     dc.cmd.green = green;
511 |     dc.cmd.blue = blue;
512 |     dc.cmd.alpha = alpha;
513 |     cmd = std::string((const char*)&dc, sizeof(Token_BlendColor));
514 |     return cmd;
515 | }
516 | 
517 | //------------------------------------------------------------------------------
518 | // 
519 | //------------------------------------------------------------------------------
520 | std::string buildStencilRefCommand(GLuint frontStencilRef, GLuint backStencilRef)
521 | {
522 |     std::string cmd;
523 |     Token_StencilRef dc;
524 |     dc.cmd.frontStencilRef = frontStencilRef;
525 |     dc.cmd.backStencilRef = backStencilRef;
526 |     cmd = std::string((const char*)&dc, sizeof(Token_StencilRef));
527 |     return cmd;
528 | }
529 | 
530 | //------------------------------------------------------------------------------
531 | // 
532 | //------------------------------------------------------------------------------
533 | std::string buildPolygonOffsetCommand(GLfloat scale, GLfloat bias)
534 | {
535 |     std::string cmd;
536 |     Token_PolygonOffset dc;
537 |     dc.cmd.bias = bias;
538 |     dc.cmd.scale = scale;
539 |     cmd = std::string((const char*)&dc, sizeof(Token_PolygonOffset));
540 |     return cmd;
541 | }
542 | 
543 | //------------------------------------------------------------------------------
544 | // 
545 | //------------------------------------------------------------------------------
546 | std::string buildScissorCommand(GLint x, GLint y, GLsizei width, GLsizei height)
547 | {
548 |     std::string cmd;
549 |     Token_Scissor dc;
550 |     dc.cmd.x = x;
551 |     dc.cmd.y = y;
552 |     dc.cmd.width = width;
553 |     dc.cmd.height = height;
554 |     cmd = std::string((const char*)&dc, sizeof(Token_Scissor));
555 |     return cmd;
556 | }
557 | 


--------------------------------------------------------------------------------
/gl_vk_bk3dthreaded.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | #define USEWORKERS
 21 | #define MAXCMDBUFFERS 100
 22 | #ifdef USEWORKERS
 23 | #define CCRITICALSECTIONHOLDER(c) CCriticalSectionHolder _cs(c);
 24 | #else
 25 | #define CCRITICALSECTIONHOLDER(c)
 26 | #endif
 27 | 
 28 | #include <assert.h>
 29 | #include "nvpwindow.hpp"
 30 | 
 31 | #include <glm/glm.hpp>
 32 | using namespace glm;
 33 | 
 34 | #include "GLSLShader.h"
 35 | #include "gl_nv_command_list.h"
 36 | #include <nvh/profiler.hpp>
 37 | 
 38 | #include <nvh/appwindowcamerainertia.hpp>
 39 | 
 40 | #include "helper_fbo.h"
 41 | 
 42 | #ifdef NVP_SUPPORTS_GZLIB
 43 | #include "zlib.h"
 44 | #endif
 45 | #include "bk3dEx.h"  // a baked binary format for few models
 46 | 
 47 | #define PROFILE_SECTION(name) nvh::Profiler::Section _tempTimer(g_profiler, name)
 48 | 
 49 | //
 50 | // For the case where we work with Descriptor Sets (Vulkan)
 51 | //
 52 | #define DSET_GLOBAL 0
 53 | #define BINDING_MATRIX 0
 54 | #define BINDING_LIGHT 1
 55 | #define BINDING_NOISE 2
 56 | 
 57 | #define DSET_OBJECT 1
 58 | #define BINDING_MATRIXOBJ 0
 59 | #define BINDING_MATERIAL 1
 60 | 
 61 | #define DSET_TOTALAMOUNT 2
 62 | //
 63 | // For the case where we just assign UBO bindings (cmd-list)
 64 | //
 65 | #define UBO_MATRIX 0
 66 | #define UBO_MATRIXOBJ 1
 67 | #define UBO_MATERIAL 2
 68 | #define UBO_LIGHT 3
 69 | #define NUM_UBOS 4
 70 | 
 71 | #define TOSTR_(x) #x
 72 | #define TOSTR(x) TOSTR_(x)
 73 | 
 74 | //
 75 | // Let's assume we would put any matrix that don't get impacted by the local object transformation
 76 | //
 77 | NV_ALIGN(
 78 |     256,
 79 |     struct MatrixBufferGlobal {
 80 |       mat4 mW;
 81 |       mat4 mVP;
 82 |       vec3 eyePos;
 83 |     });
 84 | //
 85 | // Let's assume these are the ones that can change for each object
 86 | // will used at an array of MatrixBufferObject
 87 | //
 88 | NV_ALIGN(
 89 |     256,
 90 |     struct MatrixBufferObject { mat4 mO; });
 91 | //
 92 | // if we create arrays with a structure, we must be aligned according to
 93 | // GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT (to query)
 94 | //
 95 | NV_ALIGN(
 96 |     256,
 97 |     struct MaterialBuffer {
 98 |       vec3  diffuse;
 99 |       float a;
100 |     });
101 | 
102 | NV_ALIGN(
103 |     256,
104 |     struct LightBuffer { vec3 dir; });
105 | 
106 | //
107 | // Externs
108 | //
109 | extern nvh::Profiler g_profiler;
110 | 
111 | extern bool   g_bDisplayObject;
112 | extern GLuint g_MaxBOSz;
113 | extern bool   g_bDisplayGrid;
114 | 
115 | extern MatrixBufferGlobal g_globalMatrices;
116 | 
117 | //------------------------------------------------------------------------------
118 | class Bk3dModel;
119 | //------------------------------------------------------------------------------
120 | // Renderer: can be OpenGL or other
121 | //------------------------------------------------------------------------------
122 | class Renderer
123 | {
124 | public:
125 |   Renderer() {}
126 |   virtual ~Renderer() {}
127 |   virtual const char* getName()                            = 0;
128 |   virtual bool        valid()                              = 0;
129 |   virtual bool        initGraphics(int w, int h, int MSAA) = 0;
130 |   virtual bool        terminateGraphics()                  = 0;
131 |   virtual bool        initThreadLocalVars(int threadId)    = 0;
132 |   virtual void        releaseThreadLocalVars()             = 0;
133 |   virtual void        destroyCommandBuffers(bool bAll)     = 0;
134 |   virtual void        resetCommandBuffersPool() {}
135 |   virtual void        waitForGPUIdle() = 0;
136 | 
137 |   virtual bool attachModel(Bk3dModel* pModel) = 0;
138 |   virtual bool detachModels()                 = 0;
139 | 
140 |   virtual bool initResourcesModel(Bk3dModel* pModel) = 0;
141 | 
142 |   virtual bool buildPrimaryCmdBuffer() = 0;
143 |   // bufIdx: index of cmdBuffer to create, containing mesh mstart to mend-1 (for testing concurrent cmd buffer creation)
144 |   virtual bool buildCmdBufferModel(Bk3dModel* pModelcmd, int bufIdx = 0, int mstart = 0, int mend = -1) = 0;
145 |   virtual void consolidateCmdBuffersModel(Bk3dModel* pModelcmd, int numCmdBuffers)                      = 0;
146 |   virtual bool deleteCmdBufferModel(Bk3dModel* pModel)                                                  = 0;
147 | 
148 |   virtual bool updateForChangedRenderTarget(Bk3dModel* pModel) = 0;
149 | 
150 | 
151 |   virtual void displayStart(const mat4& world, const InertiaCamera& camera, const mat4& projection, bool bTimingGlitch) = 0;
152 |   virtual void displayEnd() {}
153 |   virtual void displayGrid(const InertiaCamera& camera, const mat4 projection) = 0;
154 |   // topologies: bits for each primitive type (Lines:1, linestrip:2, triangles:4, tristrips:8, trifans:16)
155 |   virtual void displayBk3dModel(Bk3dModel* pModel, const mat4& cameraView, const mat4 projection, unsigned char topologies = 0xFF) = 0;
156 |   virtual void blitToBackbuffer() = 0;
157 | 
158 |   virtual void updateViewport(GLint x, GLint y, GLsizei width, GLsizei height) = 0;
159 | 
160 |   virtual bool bFlipViewport() { return false; }
161 | };
162 | extern Renderer* g_renderers[10];
163 | extern int       g_numRenderers;
164 | 
165 | //------------------------------------------------------------------------------
166 | // Class for Object (made of 1 to N meshes)
167 | // This class is agnostic to any renderer: just contains the data of geometry
168 | //------------------------------------------------------------------------------
169 | class Bk3dModel
170 | {
171 | public:
172 |   Bk3dModel(const char* name, vec3* pPos = NULL, float* pScale = NULL);
173 |   ~Bk3dModel();
174 | 
175 |   vec3        m_posOffset;
176 |   float       m_scale;
177 |   std::string m_name;
178 |   struct Stats
179 |   {
180 |     unsigned int primitives;
181 |     unsigned int drawcalls;
182 |     unsigned int attr_update;
183 |     unsigned int uniform_update;
184 |   };
185 | 
186 |   MatrixBufferObject* m_objectMatrices;
187 |   int                 m_objectMatricesNItems;
188 | 
189 |   MaterialBuffer* m_material;
190 |   int             m_materialNItems;
191 | 
192 |   bk3d::FileHeader* m_meshFile;
193 | 
194 |   Stats m_stats;
195 | 
196 |   Renderer* m_pRenderer;
197 |   void*     m_pRendererData;
198 | 
199 |   bool updateForChangedRenderTarget();
200 |   bool loadModel();
201 |   void printPosition();
202 |   void addStats(Stats& stats);
203 | };  //Class Bk3dModel
204 | 
205 | 
206 | extern std::vector<Bk3dModel*> g_bk3dModels;
207 | 
208 | #define FOREACHMODEL(cmd)                                                                                              \
209 |   {                                                                                                                    \
210 |     for(int m = 0; m < g_bk3dModels.size(); m++)                                                                       \
211 |     {                                                                                                                  \
212 |       g_bk3dModels[m]->cmd;                                                                                            \
213 |     }                                                                                                                  \
214 |   }
215 | 


--------------------------------------------------------------------------------
/helper_fbo.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2023, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | #ifndef __HELPERFBO__
 21 | #define __HELPERFBO__
 22 | #include <nvgl/extensions_gl.hpp>
 23 | #include <nvh/nvprint.hpp>
 24 | namespace fbo {
 25 | inline bool CheckStatus()
 26 | {
 27 |   GLenum status;
 28 |   status = (GLenum)glCheckFramebufferStatus(GL_FRAMEBUFFER);
 29 |   switch(status)
 30 |   {
 31 |     case GL_FRAMEBUFFER_COMPLETE:
 32 |       return true;
 33 |     case GL_FRAMEBUFFER_UNSUPPORTED:
 34 |       LOGE("Unsupported framebuffer format\n");
 35 |       assert(!"Unsupported framebuffer format");
 36 |       break;
 37 |     case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
 38 |       LOGE("Framebuffer incomplete, missing attachment\n");
 39 |       assert(!"Framebuffer incomplete, missing attachment");
 40 |       break;
 41 |     //case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
 42 |     //    PRINTF(("Framebuffer incomplete, attached images must have same dimensions\n"));
 43 |     //    assert(!"Framebuffer incomplete, attached images must have same dimensions");
 44 |     //    break;
 45 |     //case GL_FRAMEBUFFER_INCOMPLETE_FORMATS:
 46 |     //    PRINTF(("Framebuffer incomplete, attached images must have same format\n"));
 47 |     //    assert(!"Framebuffer incomplete, attached images must have same format");
 48 |     //    break;
 49 |     case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER:
 50 |       LOGE("Framebuffer incomplete, missing draw buffer\n");
 51 |       assert(!"Framebuffer incomplete, missing draw buffer");
 52 |       break;
 53 |     case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER:
 54 |       LOGE("Framebuffer incomplete, missing read buffer\n");
 55 |       assert(!"Framebuffer incomplete, missing read buffer");
 56 |       break;
 57 |     case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
 58 |       LOGE("Framebuffer incomplete attachment\n");
 59 |       assert(!"Framebuffer incomplete attachment");
 60 |       break;
 61 |     case GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE:
 62 |       LOGE("Framebuffer incomplete multisample\n");
 63 |       assert(!"Framebuffer incomplete multisample");
 64 |       break;
 65 |     default:
 66 |       LOGE("Error %x\n", status);
 67 |       assert(!"unknown FBO Error");
 68 |       break;
 69 |   }
 70 |   return false;
 71 | }
 72 | //------------------------------------------------------------------------------
 73 | //
 74 | //------------------------------------------------------------------------------
 75 | inline GLuint create()
 76 | {
 77 |   GLuint fb;
 78 |   glGenFramebuffers(1, &fb);
 79 |   return fb;
 80 | }
 81 | 
 82 | //------------------------------------------------------------------------------
 83 | //
 84 | //------------------------------------------------------------------------------
 85 | inline void bind(GLuint framebuffer)
 86 | {
 87 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
 88 | }
 89 | 
 90 | //------------------------------------------------------------------------------
 91 | //
 92 | //------------------------------------------------------------------------------
 93 | inline bool attachTexture2DTarget(GLuint framebuffer, GLuint textureID, int colorAttachment, GLenum target = GL_TEXTURE_2D)
 94 | {
 95 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
 96 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, target, textureID, 0);
 97 |   return true;  //CheckStatus();
 98 | }
 99 | 
100 | //------------------------------------------------------------------------------
101 | //
102 | //------------------------------------------------------------------------------
103 | inline bool attachTexture2D(GLuint framebuffer, GLuint textureID, int colorAttachment, int samples)
104 | {
105 |   return attachTexture2DTarget(framebuffer, textureID, colorAttachment, samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);
106 | }
107 | //------------------------------------------------------------------------------
108 | //
109 | //------------------------------------------------------------------------------
110 | inline bool detachColorTexture(GLuint framebuffer, int colorAttachment, int samples)
111 | {
112 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
113 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment,
114 |                          samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D, 0, 0);
115 |   return true;  //CheckStatus();
116 | }
117 | //------------------------------------------------------------------------------
118 | //
119 | //------------------------------------------------------------------------------
120 | #ifdef USE_RENDERBUFFERS
121 | inline bool attachRenderbuffer(GLuint framebuffer, GLuint rb, int colorAttachment)
122 | {
123 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
124 |   glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, GL_RENDERBUFFER, rb);
125 |   return true;  //CheckStatus();
126 | }
127 | //------------------------------------------------------------------------------
128 | //
129 | //------------------------------------------------------------------------------
130 | inline bool attachDSTRenderbuffer(GLuint framebuffer, GLuint dstrb)
131 | {
132 |   bool bRes;
133 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
134 |   //glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, dstrb);
135 |   glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, dstrb);
136 |   return true;  //CheckStatus() ;
137 | }
138 | #endif
139 | //------------------------------------------------------------------------------
140 | //
141 | //------------------------------------------------------------------------------
142 | inline bool attachDSTTexture2DTarget(GLuint framebuffer, GLuint textureDepthID, GLenum target = GL_TEXTURE_2D)
143 | {
144 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
145 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, target, textureDepthID, 0);
146 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, target, textureDepthID, 0);
147 |   return true;  //CheckStatus();
148 | }
149 | 
150 | //------------------------------------------------------------------------------
151 | //
152 | //------------------------------------------------------------------------------
153 | inline bool attachDSTTexture2D(GLuint framebuffer, GLuint textureDepthID, int msaaRaster)
154 | {
155 |   return attachDSTTexture2DTarget(framebuffer, textureDepthID, (msaaRaster > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);
156 | }
157 | //------------------------------------------------------------------------------
158 | //
159 | //------------------------------------------------------------------------------
160 | inline bool detachDSTTexture(GLuint framebuffer, int msaaRaster)
161 | {
162 |   glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
163 |   GLenum target = (msaaRaster > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
164 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, target, 0, 0);
165 |   glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, target, 0, 0);
166 | 
167 |   return true;  //CheckStatus();
168 | }
169 | 
170 | //------------------------------------------------------------------------------
171 | //
172 | //------------------------------------------------------------------------------
173 | inline void deleteFBO(GLuint fbo)
174 | {
175 |   glDeleteFramebuffers(1, &fbo);
176 | }
177 | 
178 | //------------------------------------------------------------------------------
179 | //
180 | //------------------------------------------------------------------------------
181 | inline void blitFBO(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLenum filtering)
182 | {
183 |   glBindFramebuffer(GL_READ_FRAMEBUFFER, srcFBO);
184 |   glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dstFBO);
185 |   // GL_NEAREST is needed when Stencil/depth are involved
186 |   glBlitFramebuffer(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
187 |                     GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, filtering);
188 |   glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
189 |   glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
190 | }
191 | //------------------------------------------------------------------------------
192 | //
193 | //------------------------------------------------------------------------------
194 | inline void blitFBONearest(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1)
195 | {
196 |   blitFBO(srcFBO, dstFBO, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, GL_NEAREST);
197 | }
198 | //------------------------------------------------------------------------------
199 | //
200 | //------------------------------------------------------------------------------
201 | inline void blitFBOLinear(GLuint srcFBO, GLuint dstFBO, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1)
202 | {
203 |   blitFBO(srcFBO, dstFBO, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, GL_LINEAR);
204 | }
205 | 
206 | }  // namespace fbo
207 | //------------------------------------------------------------------------------
208 | //
209 | //------------------------------------------------------------------------------
210 | namespace texture {
211 | inline GLuint create(int w, int h, int samples, int coverageSamples, GLenum intfmt, GLenum fmt, GLuint textureID = 0)
212 | {
213 |   if(samples <= 1)
214 |   {
215 |     if(textureID == 0)
216 |       glCreateTextures(GL_TEXTURE_2D, 1, &textureID);
217 |     glTextureStorage2D(textureID, 1, intfmt, w, h);
218 |     glTextureParameterf(textureID, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
219 |     glTextureParameterf(textureID, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
220 |     glTextureParameterf(textureID, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
221 |     glTextureParameterf(textureID, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
222 |   }
223 |   else
224 |   {
225 |     if(textureID == 0)
226 |       glCreateTextures(GL_TEXTURE_2D_MULTISAMPLE, 1, &textureID);
227 |     // Note: fixed-samples set to GL_TRUE, otherwise it could fail when attaching to FBO having render-buffer !!
228 |     if(coverageSamples > 1)
229 |     {
230 |       glTextureImage2DMultisampleCoverageNV(textureID, GL_TEXTURE_2D_MULTISAMPLE, coverageSamples, samples, intfmt, w, h, GL_TRUE);
231 |     }
232 |     else
233 |     {
234 |       glTextureStorage2DMultisample(textureID, samples, intfmt, w, h, GL_TRUE);
235 |     }
236 |   }
237 |   return textureID;
238 | }
239 | //------------------------------------------------------------------------------
240 | //
241 | //------------------------------------------------------------------------------
242 | inline GLuint createRGBA8(int w, int h, int samples, int coverageSamples = 0, GLuint textureID = 0)
243 | {
244 |   return create(w, h, samples, coverageSamples, GL_RGBA8, GL_RGBA, textureID);
245 | }
246 | 
247 | //------------------------------------------------------------------------------
248 | //
249 | //------------------------------------------------------------------------------
250 | inline GLuint createDST(int w, int h, int samples, int coverageSamples = 0, GLuint textureID = 0)
251 | {
252 |   return create(w, h, samples, coverageSamples, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, textureID);
253 | }
254 | //------------------------------------------------------------------------------
255 | //
256 | //------------------------------------------------------------------------------
257 | inline void deleteTexture(GLuint texture)
258 | {
259 |   glDeleteTextures(1, &texture);
260 | }
261 | }  // namespace texture
262 | //------------------------------------------------------------------------------
263 | // Render-buffers should be forgotten. Thing of the past
264 | //------------------------------------------------------------------------------
265 | #ifdef USE_RENDERBUFFERS
266 | namespace renderbuffer {
267 | 
268 | inline GLuint createRenderBuffer(int w, int h, int samples, int coverageSamples, GLenum fmt)
269 | {
270 |   int    query;
271 |   GLuint rb;
272 |   glGenRenderbuffers(1, &rb);
273 |   glBindRenderbuffer(GL_RENDERBUFFER, rb);
274 |   if(coverageSamples)
275 |   {
276 |     glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER, coverageSamples, samples, fmt, w, h);
277 |     glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_COVERAGE_SAMPLES_NV, &query);
278 |     if(query < coverageSamples)
279 |       rb = 0;
280 |     else if(query > coverageSamples)
281 |     {
282 |       // report back the actual number
283 |       coverageSamples = query;
284 |       LOGW("Warning: coverage samples is now %d\n", coverageSamples);
285 |     }
286 |     glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_COLOR_SAMPLES_NV, &query);
287 |     if(query < samples)
288 |       rb = 0;
289 |     else if(query > samples)
290 |     {
291 |       // report back the actual number
292 |       samples = query;
293 |       LOGW("Warning: depth-samples is now %d\n", samples);
294 |     }
295 |   }
296 |   else
297 |   {
298 |     // create a regular MSAA color buffer
299 |     glRenderbufferStorageMultisample(GL_RENDERBUFFER, samples, fmt, w, h);
300 |     // check the number of samples
301 |     glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_SAMPLES, &query);
302 | 
303 |     if(query < samples)
304 |       rb = 0;
305 |     else if(query > samples)
306 |     {
307 |       samples = query;
308 |       LOGW("Warning: depth-samples is now %d\n", samples);
309 |     }
310 |   }
311 |   glBindRenderbuffer(GL_RENDERBUFFER, 0);
312 |   return rb;
313 | }
314 | 
315 | //------------------------------------------------------------------------------
316 | //
317 | //------------------------------------------------------------------------------
318 | inline GLuint createRenderBufferRGBA8(int w, int h, int samples, int coverageSamples)
319 | {
320 |   return createRenderBuffer(w, h, samples, coverageSamples, GL_RGBA8);
321 | }
322 | 
323 | //------------------------------------------------------------------------------
324 | //
325 | //------------------------------------------------------------------------------
326 | inline GLuint createRenderBufferD24S8(int w, int h, int samples, int coverageSamples)
327 | {
328 |   return createRenderBuffer(w, h, samples, coverageSamples, GL_DEPTH24_STENCIL8);
329 | }
330 | //------------------------------------------------------------------------------
331 | //
332 | //------------------------------------------------------------------------------
333 | inline GLuint createRenderBufferS8(int w, int h, int samples, int coverageSamples)
334 | {
335 |   return createRenderBuffer(w, h, samples, coverageSamples, GL_STENCIL_INDEX8);
336 | }
337 | 
338 | //------------------------------------------------------------------------------
339 | //
340 | //------------------------------------------------------------------------------
341 | #ifdef USE_RENDERBUFFERS
342 | inline void deleteRenderBuffer(GLuint rb)
343 | {
344 |   glDeleteRenderbuffers(1, &rb);
345 | }
346 | #endif
347 | }  // namespace renderbuffer
348 | #endif
349 | #endif  //#ifndef __HELPERFBO__
350 | 


--------------------------------------------------------------------------------
/mt/CThread.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | //#include "comms.h"
 20 | #if defined WIN32
 21 | #include <windows.h>
 22 | #else
 23 | #include <unistd.h>
 24 | #include <sys/types.h>
 25 | #ifdef IOS
 26 | #   include <sys/sysctl.h>
 27 | #endif
 28 | #include <sys/time.h>
 29 | #include <unistd.h>
 30 | #include <pthread.h>
 31 | #endif
 32 | 
 33 | #include <assert.h>
 34 | #include <stdio.h>
 35 | #include "CThread.h"
 36 | 
 37 | //----------------------------------------------------------------------------------
 38 | // This Function is used as the main callback for all. Then the argument passed will
 39 | // be used to jump at the right derived class
 40 | //----------------------------------------------------------------------------------
 41 | void thread_function(void *pData)
 42 | {
 43 |     NXPROFILEFUNCCOL(__FUNCTION__, 0xFF800000);
 44 |     CThread* pthread = static_cast<CThread*>(pData);
 45 |     pthread->CThreadProc();
 46 | }
 47 | 
 48 | #if defined WIN32
 49 | /////////////////////////////////////////////////////////////////////////////////////
 50 | /////////////////////////////////////////////////////////////////////////////////////
 51 | // WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS WINDOWS //
 52 | /////////////////////////////////////////////////////////////////////////////////////
 53 | /////////////////////////////////////////////////////////////////////////////////////
 54 | 
 55 | /////////////////////////////////////////////////////////////////////////////////////
 56 | // THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD THREAD    //
 57 | /////////////////////////////////////////////////////////////////////////////////////
 58 | CThread::CThread(const bool startNow, const bool Critical)
 59 | {
 60 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
 61 |     m_thread = CreateThread(thread_function, static_cast<CThread*>(this), startNow, Critical);
 62 | }
 63 | 
 64 | CThread::~CThread()
 65 | {
 66 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
 67 |     CancelThread();
 68 |     DeleteThread();
 69 | }
 70 | 
 71 | // CpuCount
 72 | int CThread::CpuCount() 
 73 | {
 74 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
 75 |     static int Cpus = -1;
 76 |     if(-1 == Cpus) {
 77 |         SYSTEM_INFO si;
 78 |         GetSystemInfo(&si);
 79 |         Cpus = (int)si.dwNumberOfProcessors > 1 ? (int)si.dwNumberOfProcessors : 1;
 80 |         //if(Cpus>4)Cpus=4;
 81 |     }
 82 |     return Cpus;
 83 | }
 84 | //int CThread::CpuCount0() 
 85 | //{
 86 | //    static int Cpus = -1;
 87 | //    if(-1 == Cpus) {
 88 | //        SYSTEM_INFO si;
 89 | //        GetSystemInfo(&si);
 90 | //        Cpus = (int)si.dwNumberOfProcessors > 1 ? (int)si.dwNumberOfProcessors : 1;
 91 | //    }
 92 | //    return Cpus;
 93 | //}
 94 | 
 95 | // Sleep
 96 | void CThread::Sleep(const unsigned long Milliseconds) 
 97 | {
 98 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
 99 |     ::Sleep(Milliseconds);
100 | }
101 | 
102 | // CreateThread
103 | NThreadHandle CThread::CreateThread(ThreadProc Proc, void *Param, bool startNow, const bool Critical) 
104 | {
105 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
106 |     NThreadHandle hThread = ::CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Param, CREATE_SUSPENDED, NULL);
107 |     if(Critical) {
108 |         SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL);
109 |     }
110 |     if(startNow)
111 |         ::ResumeThread(hThread);
112 |     return hThread;
113 | }
114 | 
115 | // CancelThread
116 | void CThread::CancelThread() 
117 | {
118 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
119 |     TerminateThread(m_thread, 0);
120 | }
121 | 
122 | // DeleteThread
123 | void CThread::DeleteThread() 
124 | {
125 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
126 |     CloseHandle(m_thread);
127 |     m_thread = NULL;
128 | }
129 | 
130 | // WaitThread
131 | void CThread::WaitThread() 
132 | {
133 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
134 |     WaitForSingleObject(m_thread, INFINITE);
135 | }
136 | 
137 | // WaitThreads
138 | void CThread::WaitThreads(const NThreadHandle *Threads, const int Count) 
139 | {
140 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
141 |     WaitForMultipleObjects(Count, Threads, TRUE, INFINITE);
142 | }
143 | 
144 | // SuspendThread
145 | bool CThread::SuspendThread()
146 | {
147 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
148 |     return ::SuspendThread(m_thread) == -1 ? false : true;
149 | }
150 | 
151 | // ResumeThread
152 | bool CThread::ResumeThread()
153 | {
154 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
155 |     return ::ResumeThread(m_thread) == -1 ? false : true;
156 | }
157 | 
158 | //SetThreadAffinity
159 | void CThread::SetThreadAffinity(unsigned int mask)
160 | {
161 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
162 |     SetThreadAffinityMask(m_thread, mask);
163 | }
164 | /////////////////////////////////////////////////////////////////////////////////////
165 | // MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX   //
166 | /////////////////////////////////////////////////////////////////////////////////////
167 | CMutex::CMutex()
168 | {
169 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
170 |     CMutex::CreateMutex(m_mutex);
171 | }
172 | 
173 | CMutex::~CMutex()
174 | {
175 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
176 |     CMutex::DeleteMutex();
177 | }
178 | 
179 | // CreateMutex
180 | void CMutex::CreateMutex(NMutexHandle &Mutex) 
181 | {
182 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
183 |     Mutex = ::CreateMutex(NULL, FALSE, NULL);
184 | }
185 | 
186 | // DeleteMutex
187 | void CMutex::DeleteMutex() 
188 | {
189 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
190 |     CloseHandle(m_mutex);
191 |     m_mutex = NULL;
192 | }
193 | 
194 | // LockMutex
195 | bool CMutex::LockMutex(int ms, long *dbg) 
196 | {
197 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
198 |     DWORD res = WaitForSingleObjectEx(m_mutex, ms == -1 ? INFINITE : ms, FALSE);
199 |     if(dbg) *dbg = res;
200 |     return res == WAIT_OBJECT_0 ? true : false;
201 | }
202 | 
203 | // UnlockMutex
204 | void CMutex::UnlockMutex() 
205 | {    
206 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
207 |     BOOL bRes = ReleaseMutex(m_mutex);
208 |     assert(bRes);
209 | }
210 | 
211 | /////////////////////////////////////////////////////////////////////////////////////
212 | // SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE //
213 | /////////////////////////////////////////////////////////////////////////////////////
214 | CSemaphore::CSemaphore()
215 | {
216 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
217 |     CSemaphore::CreateSemaphore(m_semaphore, 0, 0xFFFF);
218 | }
219 | 
220 | CSemaphore::CSemaphore(long initialCnt, long maxCnt)
221 | {
222 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
223 |     CSemaphore::CreateSemaphore(m_semaphore, initialCnt, maxCnt);
224 | }
225 | 
226 | CSemaphore::~CSemaphore()
227 | {
228 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
229 |     CSemaphore::DeleteSemaphore();
230 | }
231 | 
232 | int    CSemaphore::num_Semaphores = 0;
233 | // CreateSemaphore
234 | void CSemaphore::CreateSemaphore(NSemaphoreHandle &Semaphore, long initialCnt, long maxCnt) 
235 | {
236 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
237 |     num_Semaphores++;
238 |     Semaphore = ::CreateSemaphoreA(NULL, initialCnt, maxCnt, NULL);
239 | }
240 | 
241 | // DeleteSemaphore
242 | void CSemaphore::DeleteSemaphore() 
243 | {
244 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
245 |     CloseHandle(m_semaphore);
246 |     m_semaphore = NULL;
247 |     num_Semaphores--;
248 | }
249 | 
250 | // AcquireSemaphore
251 | bool CSemaphore::AcquireSemaphore(int ms) 
252 | {
253 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
254 |     return WaitForSingleObject(m_semaphore, ms == -1 ? INFINITE : ms) == WAIT_TIMEOUT ? false : true;
255 | }
256 | 
257 | // ReleaseSemaphore
258 | void CSemaphore::ReleaseSemaphore(long cnt) 
259 | {    
260 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
261 |     BOOL bRes = ::ReleaseSemaphore(m_semaphore, cnt, NULL);
262 | }
263 | 
264 | /////////////////////////////////////////////////////////////////////////////////////
265 | // EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT   //
266 | /////////////////////////////////////////////////////////////////////////////////////
267 | int    CEvent::num_events = 0;
268 | CEvent::CEvent(bool manualReset, bool initialState)
269 | {
270 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
271 |     CEvent::CreateEvent(m_event, manualReset, initialState);
272 | }
273 | CEvent::~CEvent()
274 | {
275 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
276 |     CEvent::DeleteEvent();
277 | }
278 | 
279 | void CEvent::CreateEvent(NEventHandle &Event, bool manualReset, bool initialState)
280 | {
281 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
282 |     num_events++;
283 |     Event = ::CreateEventA(NULL, manualReset, initialState, NULL);
284 | }
285 | void CEvent::DeleteEvent()
286 | {
287 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
288 |     CloseHandle(m_event);
289 |     m_event = NULL;
290 |     num_events--;
291 | }
292 | bool CEvent::Set()
293 | {
294 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
295 |     return SetEvent(m_event) ? true : false;
296 | }
297 | bool CEvent::Pulse()
298 | {
299 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
300 |     return PulseEvent(m_event) ? true : false;
301 | }
302 | bool CEvent::Reset()
303 | {
304 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
305 |     return ResetEvent(m_event) ? true : false;
306 | }
307 | bool CEvent::WaitOnEvent(int ms)
308 | {
309 |     NXPROFILEFUNCCOL(__FUNCTION__, COLOR_RED);
310 |     return WaitForSingleObject(m_event, ms == -1 ? INFINITE : ms) == WAIT_TIMEOUT ? false : true;
311 | }
312 | 
313 | 
314 | #endif // WINDOWS
315 | 
316 | /////////////////////////////////////////////////////////////////////////////////////
317 | /////////////////////////////////////////////////////////////////////////////////////
318 | // UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX UNIX //
319 | /////////////////////////////////////////////////////////////////////////////////////
320 | /////////////////////////////////////////////////////////////////////////////////////
321 | 
322 | #if defined IOS || defined ANDROID || defined LINUX
323 | 
324 | /////////////////////////////////////////////////////////////////////////////////////
325 | // CThread CThread CThread CThread CThread CThread CThread CThread CThread CThread //
326 | /////////////////////////////////////////////////////////////////////////////////////
327 | CThread::CThread(const bool startNow, const bool Critical)
328 | {
329 |     m_thread = CreateThread(thread_function, static_cast<CThread*>(this), startNow, Critical);
330 | }
331 | 
332 | CThread::~CThread()
333 | {
334 |     CancelThread();
335 |     DeleteThread();
336 | }
337 | 
338 | 
339 | // CpuCount
340 | int CThread::CpuCount() {
341 |     static int Cpus = -1;
342 |     if(-1 == Cpus) {
343 | #ifdef IOS
344 |         size_t s = sizeof(Cpus);
345 |         sysctlbyname("hw.logicalcpu", &Cpus, &s, NULL, 0);
346 | #endif // IOS
347 | #ifdef ANDROID
348 |         Cpus = sysconf(_SC_NPROCESSORS_ONLN);
349 | #endif // ANDROID
350 |         Cpus = Cpus > 1 ? Cpus : 1;
351 |     }
352 |     return Cpus;
353 | }
354 | 
355 | // Sleep
356 | void CThread::Sleep(const unsigned long Milliseconds) {
357 |     usleep(1000 * (useconds_t)Milliseconds);
358 | }
359 | 
360 | // CreateThread
361 | NThreadHandle CThread::CreateThread(ThreadProc Proc, void *Param, bool startNow, const bool Critical) 
362 | {
363 |     pthread_t th;
364 |     //bool startNow, ?
365 |     pthread_create(&th, NULL, (void *(*)(void *))Proc, Param);
366 |     return th;
367 | }
368 | 
369 | // CancelThread
370 | void CThread::CancelThread() {
371 | #if defined ANDROID
372 |     pthread_kill(m_thread, SIGUSR1);
373 | #else
374 |     pthread_cancel(m_thread);
375 | #endif
376 | }
377 | 
378 | // DeleteThread
379 | void CThread::DeleteThread() {
380 |     pthread_detach(m_thread);
381 | }
382 | 
383 | // WaitThread
384 | void CThread::WaitThread() {
385 |     pthread_join(m_thread, NULL);
386 | }
387 | 
388 | // WaitThreads
389 | void CThread::WaitThreads(const NThreadHandle *Threads, const int Count) {
390 |     int i;
391 |     for(i = 0; i < Count; i++) {
392 |         pthread_join(Threads[i], NULL);
393 |     }
394 | }
395 | 
396 | ///////////////////////////////////////////////////////////////////////////////////
397 | // MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX MUTEX //
398 | ///////////////////////////////////////////////////////////////////////////////////
399 | CMutex::CMutex()
400 | {
401 |     CMutex::CreateMutex(m_mutex);
402 | }
403 | 
404 | CMutex::~CMutex()
405 | {
406 |     CMutex::DeleteMutex();
407 | }
408 | // CreateMutex
409 | void CMutex::CreateMutex(NMutexHandle &Mutex) 
410 | {
411 |     pthread_mutex_init(&Mutex, NULL);
412 | }
413 | 
414 | // NMutexHandle
415 | void CMutex::DeleteMutex() 
416 | {
417 |     pthread_mutex_destroy(&m_mutex);
418 | }
419 | 
420 | // LockMutex
421 | bool CMutex::LockMutex(int ms, long *dbg) {
422 |     pthread_mutex_lock(&m_mutex);
423 |     return true;
424 | }
425 | 
426 | // UnlockMutex
427 | void CMutex::UnlockMutex() {
428 |     pthread_mutex_unlock(&m_mutex);
429 | }
430 | 
431 | /////////////////////////////////////////////////////////////////////////////////////
432 | // SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE SEMAPHORE //
433 | /////////////////////////////////////////////////////////////////////////////////////
434 | CSemaphore::CSemaphore(long initialCnt, long maxCnt)
435 | {
436 |     CSemaphore::CreateSemaphore(m_semaphore, initialCnt, maxCnt);
437 | }
438 | 
439 | CSemaphore::~CSemaphore()
440 | {
441 |     CSemaphore::DeleteSemaphore();
442 | }
443 | 
444 | int    CSemaphore::num_Semaphores = 0;
445 | // CreateSemaphore
446 | void CSemaphore::CreateSemaphore(NSemaphoreHandle &Semaphore, long initialCnt, long maxCnt) 
447 | {
448 |     num_Semaphores++;
449 |     sem_init(&Semaphore, 0, (unsigned int)initialCnt);
450 | }
451 | 
452 | // DeleteSemaphore
453 | void CSemaphore::DeleteSemaphore() 
454 | {
455 |     sem_destroy(&m_semaphore);
456 |     //!@$!#$@#$m_semaphore = NULL;
457 |     num_Semaphores--;
458 | }
459 | 
460 | // AcquireSemaphore
461 | bool CSemaphore::AcquireSemaphore(int msTimeOut) 
462 | {
463 |     //if(msTimeOut == 0)
464 |         sem_wait(&m_semaphore);
465 |     /*else {
466 |         //convert timeout to a timespec, pthreads wants the final time not the length
467 | #if 0
468 |         timespec ts;
469 |         clock_gettime(CLOCK_REALTIME, &ts);
470 |         ts.tv_sec += time_t(msTimeOut) / time_t(1000);
471 |         ts.tv_nsec += (long(msTimeOut) % long(1000)) * long(1000*1000);
472 | #else
473 |         struct timeval tv;
474 |         struct timespec ts;
475 |         gettimeofday(&tv, NULL);
476 |         ts.tv_sec = tv.tv_sec + time_t(msTimeOut) / time_t(1000);
477 |         ts.tv_nsec = tv.tv_usec*1000 + (long(msTimeOut) % long(1000)) * long(1000*1000);
478 | #endif
479 |         
480 |         if (sem_timedwait(&m_semaphore, &ts))// WTF ?!?!? == ETIMEDOUT)
481 |         {
482 |             //timed out
483 |             return false;
484 |         }
485 |         return true;
486 |     }*/
487 |     return true;
488 | }
489 | 
490 | //void NCountSemaphore::Post() const
491 | // ReleaseSemaphore
492 | void CSemaphore::ReleaseSemaphore(long cnt) 
493 | {    
494 |     for(;cnt > 0; cnt--)
495 |         sem_post(&m_semaphore);
496 |     
497 | }
498 | 
499 | 
500 | /////////////////////////////////////////////////////////////////////////////////////
501 | // EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT EVENT   //
502 | /////////////////////////////////////////////////////////////////////////////////////
503 | int    CEvent::num_events = 0;
504 | CEvent::CEvent(bool manualReset, bool initialState)
505 | {
506 |     CEvent::CreateEvent(m_event, manualReset, initialState);
507 |     m_signaled = initialState;
508 |     m_manualReset = manualReset;
509 |     // TODO: put it in CreateEvent
510 |     pthread_mutex_init(&m_mutex, NULL); // do we need non-default attrs (second arg)?
511 | }
512 | CEvent::~CEvent()
513 | {
514 |     CEvent::DeleteEvent();
515 | }
516 | 
517 | void CEvent::CreateEvent(NEventHandle &event, bool manualReset, bool initialState)
518 | {
519 |     num_events++;
520 |     pthread_cond_init(&event, NULL);
521 | }
522 | void CEvent::DeleteEvent()
523 | {
524 |     //m_event = NULL;
525 |     num_events--;
526 |     pthread_cond_destroy(&m_event);
527 |     pthread_mutex_destroy(&m_mutex);
528 | }
529 | bool CEvent::Set()
530 | {
531 |     int r = 0;
532 |     pthread_mutex_lock(&m_mutex);
533 |     
534 |     if (m_signaled == false)
535 |     {
536 |         m_signaled = true;
537 |         r = pthread_cond_broadcast(&m_event);
538 |     }
539 |     
540 |     pthread_mutex_unlock(&m_mutex);
541 |     return r ? false : true;
542 | }
543 | bool CEvent::Pulse()
544 | {
545 |     pthread_mutex_lock(&m_mutex);
546 |     
547 |     int r = pthread_cond_broadcast(&m_event);
548 |     
549 |     pthread_mutex_unlock(&m_mutex);
550 |     return r ? false : true;
551 | }
552 | bool CEvent::Reset()
553 | {
554 |     pthread_mutex_lock(&m_mutex);
555 |     m_signaled = false;
556 |     pthread_mutex_unlock(&m_mutex);
557 |     return m_signaled;
558 | }
559 | bool CEvent::WaitOnEvent(int msTimeOut)
560 | {
561 |     pthread_mutex_lock(&m_mutex);
562 |     
563 |     //convert timeout to a timespec, pthreads wants the final time not the length
564 |     struct timeval tv;
565 |     struct timespec ts;
566 |     if(msTimeOut >= 0)
567 |     {
568 |         gettimeofday(&tv, NULL);
569 |         //timespec ts;
570 |         //clock_gettime(CLOCK_REALTIME, &ts);
571 |         ts.tv_sec = tv.tv_sec + time_t(msTimeOut) / time_t(1000);
572 |         ts.tv_nsec = tv.tv_usec*1000 + (long(msTimeOut) % long(1000)) * long(1000*1000);
573 |     }    
574 |     bool ret = true;
575 |     while(m_signaled == false)
576 |     {
577 |         if(msTimeOut >= 0)
578 |         {
579 |             if (::pthread_cond_timedwait(&m_event, &m_mutex, &ts)) // WTF?!?! == ETIMEDOUT)
580 |             {
581 |                 //timed out
582 |                 ret = false;
583 |                 break;
584 |             }
585 |         } else {
586 |             if (::pthread_cond_wait(&m_event, &m_mutex))
587 |             {
588 |                 //must be an error, then
589 |                 ret = false;
590 |                 break;
591 |             }
592 |         }
593 |     }
594 |     
595 |     if (ret && !m_manualReset)
596 |     {
597 |         m_signaled = false;
598 |     }
599 |     
600 |     pthread_mutex_unlock(&m_mutex);
601 |     return ret;
602 | }
603 | #endif // IOS || ANDROID
604 | 
605 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
606 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
607 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
608 | // NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase
609 | // NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase NThreadLocalNonPODBase
610 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
611 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
612 | ///////////////////////////////////////////////////////////////////////////////////////////////////////
613 | 
614 | 
615 | CCriticalSection& NThreadLocalNonPODBase::s_listLock()
616 | {
617 |   static CCriticalSection cs;
618 |   return cs;
619 | }
620 | 
621 | std::vector<NThreadLocalNonPODBase*>& NThreadLocalNonPODBase::s_tlsList()
622 | {
623 |   static std::vector<NThreadLocalNonPODBase*> v;
624 |   return v;
625 | }
626 | 
627 | void NThreadLocalNonPODBase::DeleteAllTLSDataInThisThread()
628 | {
629 |   CCriticalSectionHolder h(s_listLock()); 
630 |   std::vector<NThreadLocalNonPODBase*>& l = s_tlsList();
631 |   for (size_t i = 0; i < l.size(); i++)
632 |   {
633 |     l[i]->DeleteMyTLSData();
634 |   }
635 | }
636 | 
637 | 
638 | 


--------------------------------------------------------------------------------
/mt/CThreadWork.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | #pragma once
 20 | //#define NOWIN32BUILTIN
 21 | //#define DBGTHREAD
 22 | #ifdef DBGTHREAD
 23 | # define LOGDBG LOGI
 24 | #else
 25 | # define LOGDBG(...)
 26 | #endif
 27 | 
 28 | #include <string>
 29 | #include <assert.h>
 30 | #include "CThread.h"
 31 | 
 32 | #include "RingBuffer.h"
 33 | 
 34 | //#define CB_CALL_CONV
 35 | #ifdef WIN32
 36 | #define CB_CONV      CALLBACK
 37 | #define CALL_CONV    __stdcall
 38 | #else
 39 | #define CB_CONV
 40 | #define CALL_CONV
 41 | #endif
 42 | 
 43 | 
 44 | 
 45 | #if defined IOS || defined ANDROID
 46 | #endif
 47 | 
 48 | #ifdef WIN32
 49 | #endif
 50 | 
 51 | class TaskQueue;
 52 | class ThreadWorker;
 53 | 
 54 | //#pragma mark - Globals // MacOSX thing
 55 | #ifdef USEGLOBALS
 56 | /************************************************************************************/
 57 | /**
 58 |  ** \brief return the main thread
 59 |  **/
 60 | NThreadHandle           GetMainThread();
 61 | /**
 62 |  ** \brief 
 63 |  **/
 64 | void                    DeclareMainThread();
 65 | /**
 66 |  ** \brief 
 67 |  **/
 68 | bool                    IsMainThread();
 69 | /**
 70 |  ** \brief 
 71 |  **/
 72 | TaskQueue*              GetMainTaskQueue();
 73 | /**
 74 |  ** \brief 
 75 |  **/
 76 | uint                    GetNumCPUCores();
 77 | /**
 78 |  ** \brief 
 79 |  **/
 80 | NThreadID               GetCurrentThreadID();
 81 | /**
 82 |  ** \brief 
 83 |  **/
 84 | TaskQueue*    GetCurrentTaskQueue();
 85 | #endif //USEGLOBALS
 86 | 
 87 | //#pragma mark - Events, Mutex, Semaphores with altertable feature // MacOSX thing
 88 | 
 89 | //pthreads doesn't support real alertable waiting, so we need to wake up and poll for it
 90 | #define NV_FAKE_WAIT_ALERTABLE_SLICES_MS 5
 91 | 
 92 | class CEventAlertable : public CEvent
 93 | {
 94 | public:
 95 |     bool WaitOnEventAltertable();
 96 |     bool WaitOnEventAltertable(::uint msTimeOut);
 97 | };
 98 | 
 99 | //#pragma mark - TaskBase // MacOSX thing
100 | /************************************************************************************/
101 | /**
102 |  ** \brief Base for a Task, with the common way to invoke the task through Invoke()
103 |  **
104 |  ** this is the very base for any task to be part of the game: derive a class from this
105 |  ** one and create a dedicated constructor that will contain the function arguments
106 |  ** (arguments that normally you would pass to the method for executing what you want
107 |  ** then the thread pool will call "Invoke()": the overloaded implementation will allow
108 |  ** you to find back the arguments that you passed in the constructor
109 |  ** This allows to normalize the invokation of a task, while allowing you to pre-declare
110 |  ** arguments via the constructor
111 |  **/
112 | class TaskBase
113 | {
114 | private:
115 |     /// \brief pointer to TaskQueue::m_taskCount
116 |     NInterlockedValue* m_queueCountRef; 
117 | protected:
118 |     TaskBase() /*: m_queueCountRef(NULL)*/ {}
119 |     virtual ~TaskBase();
120 | public:
121 |     /// \brief the main entry point for Task execution : this method is the one called to exectute the Task
122 |     virtual void Invoke() = 0;
123 |     /// \brief when the Task got accomplished, Done() gets called.
124 |     virtual void Done();
125 | #ifdef DBGTHREAD
126 |     virtual const char *getDbgString() { return "NONAME"; };
127 | #endif
128 |     friend class TaskQueue;
129 | };
130 | 
131 | // ?? Shall we create a bas class for tasks that we want to be able to invoke other Tasks
132 | 
133 | //#pragma mark - Cross Thread // MacOSX thing
134 | //************************************************************************************
135 | //
136 | // TaskSyncCall is a class for function call(s) that would require waiting for the result
137 | // prior moving forward: method "Call()" is what needs to be called. The caller will wait
138 | // for the method "Call" to be completed. This method might execute on another thread
139 | // This privately inherits from TaskBase so you can't pass it to the task invoker directly
140 | //
141 | class TaskSyncCall : private TaskBase 
142 | {
143 | private:
144 |   CEventAlertable m_doneEvent;
145 |   virtual void Done();
146 | protected:
147 |   TaskSyncCall();
148 | 
149 |   // Implement this
150 |   virtual void Invoke() = 0;
151 | public:
152 |   /// \brief performs a synchonous call : wait for the result
153 |     void Call(TaskQueue* destThread = NULL, bool waitAltertable = false);//GetMainTaskQueue());
154 | };
155 | 
156 | //#pragma mark - Task Queue // MacOSX thing
157 | 
158 | 
159 | /************************************************************************************/
160 | /**
161 |  ** \brief thread-local variable of the current Queue list of task 
162 |  **/
163 | extern TaskQueue* getCurrentTaskQueue();
164 | /**
165 |  ** \brief sets thread-local variable of the current Queue list of task 
166 |  **/
167 | extern void setCurrentTaskQueue(TaskQueue * tb);
168 | extern int getThreadNumber();
169 | extern void setThreadNumber(int n);
170 | 
171 | /************************************************************************************/
172 | /**
173 |  ** \brief Queue list of task to execute
174 |  **
175 |  ** each thread-worker owns a TaskQueue in which tasks are put ( pushTask() )
176 |  ** Thread workers will get a TaskQueue assigned by default.
177 |  **
178 |  ** Special and optional case: the MAIN thread might also have such a TaskQueue: you can instanciate
179 |  ** it and do while(g_mainTB->pollTask()) {} to execute possibly queued tasks that other
180 |  ** workers might have pushed to the main thread.
181 |  **/
182 | class TaskQueue
183 | {
184 | private:
185 |     //CThread       *m_thread;
186 |     NThreadHandle   m_thread;
187 | #ifdef WIN32
188 |     NThreadID       m_threadID;
189 | #endif
190 | 
191 |     /// \brief prototype for the function that is invoked for the task execution
192 |     /// In many cases this function is TaskQueue::taskThreadFunc() with a TaskBase as the argument
193 |     typedef void (CB_CONV *ThreadFunc)(void* params);
194 | #if !defined WIN32 || defined NOWIN32BUILTIN
195 |     /// \name non Win32 queue implementation
196 |     /// @{
197 |     typedef NRingBuffer<std::pair<ThreadFunc, void*> > Ring;
198 |     // Seems like we could ask Windows to do this work for us...
199 |     CEvent*                 m_dataReadyEvent;
200 |     CCriticalSection        m_taskQueueLock;
201 |     Ring                    m_taskQueue;
202 |     /// @}
203 | #endif
204 |     
205 |     /// \brief entry point to execute the task
206 |     static void CB_CONV taskThreadFunc(void* params);
207 |     /// \name Constructors/Destructors
208 |     /// @{
209 | #if !defined WIN32 || defined NOWIN32BUILTIN
210 |     TaskQueue(CEvent* dataReadyEvent);
211 | #endif
212 |     TaskQueue(const TaskQueue&); //these are purposely not implemented
213 |     TaskQueue& operator= (const TaskQueue&);
214 | public:
215 |     TaskQueue();
216 |     TaskQueue(/*CThread **/NThreadHandle thread, CEvent* dataReadyEvent=NULL);
217 | #ifdef WIN32
218 |     TaskQueue(NThreadID id, CEvent* dataReadyEvent=NULL);
219 | #endif
220 |     ~TaskQueue();
221 | private:
222 |     /// @}
223 | 
224 |     /// \brief maintains the total amount of tasks
225 |     NInterlockedValue m_taskCount;
226 |     
227 |     /// \brief push a function in the list ring of functions to call
228 |     void pushTaskFunc(ThreadFunc call, void* params);    
229 | public:
230 |     inline int GetQueuedTaskCount() { return (int)m_taskCount; }
231 |     /// \brief push a task into the execution buffer. Using taskThreadFunc.
232 |     void pushTask(TaskBase * task);
233 |     /// \brief poll a Task's function from the execution buffer and execute it
234 |     bool pollTask(int timeout=0);
235 |     void FlushTasks(bool waitAlertable = false);
236 |     
237 |     inline NThreadHandle GetDestinationThread() { return m_thread; }
238 |   #ifdef WIN32
239 |     inline NThreadID GetDestinationThreadID() { return m_threadID; }
240 |   #else
241 |     /// \brief pthreads doesn't differentiate between handles and IDs
242 |     inline NThreadID GetDestinationThreadID() { return GetDestinationThread(); }
243 |   #endif
244 | 
245 |     friend class ThreadWorker;
246 | };
247 | 
248 | 
249 | //#pragma mark - Task Worker // MacOSX thing
250 | /************************************************************************************/
251 | /**
252 |  ** \brief Pool of workers
253 |  **/
254 | class ThreadWorker
255 | {
256 | private:
257 |     /// \name contructors/destructor
258 |     /// @{
259 |     ThreadWorker(const ThreadWorker&); //these are purposely not implemented
260 |     ThreadWorker& operator= (const ThreadWorker&);
261 | public:
262 |     /// \brief this starts the thread
263 |     ThreadWorker(const std::string& threadName = std::string(), bool discardQueuedOnExit = false, bool waitAleratableOnExit = false);
264 |     // \brief this blocks until all queued work is done unless you had told it to discard
265 |     ~ThreadWorker();
266 |     /// @}
267 | private:
268 |     unsigned long        m_workerID;
269 |     std::string         m_threadName;
270 |     CCriticalSection    m_threadNameSec;
271 |     CEvent              m_doneEvent;
272 |     TaskQueue           m_invoker; 
273 |     volatile bool       m_discardQueuedOnExit;
274 |     //volatile bool       m_alertableOnExit;
275 | 
276 | #ifdef WIN32
277 |     /// \brief the real function that the thread will invoke - Win32 version
278 |     static ::uint CALL_CONV threadFunc(void* p);
279 | #else
280 |     /// \brief the real function that the thread will invoke - Unix version
281 |     static void* CALL_CONV threadFunc(void* p);
282 | #endif
283 |     /// whenever the work is done, signals data are ready to pickup
284 | #if !defined WIN32 || defined NOWIN32BUILTIN
285 |     CEvent              m_dataReadyEvent;
286 | #endif
287 | public:
288 |     /// \name getters/setters
289 |     /// @{
290 |     inline bool GetDiscardQueuedOnExit() const { return m_discardQueuedOnExit; }
291 |     inline void SetDiscardQueuedOnExit(bool b) { m_discardQueuedOnExit = b; }
292 | 
293 |     //inline bool GetWaitAlertableOnExit() const { return m_alertableOnExit; }
294 |     //inline void SetWaitAleratbleOnExit(bool b) { m_alertableOnExit = b; }
295 | 
296 |     inline TaskQueue& GetTaskQueue() { return m_invoker; }
297 |     inline int GetWorkerID() { return m_workerID; }
298 | 
299 |     const std::string& GetThreadName()/* const*/;
300 |     void SetThreadName(const std::string& n);
301 |     /// @}
302 |     void SetBackgroundMode(bool b);
303 | };
304 | 
305 | //#pragma mark - Pool of workers // MacOSX thing
306 | /************************************************************************************/
307 | /**
308 |  ** \brief various cases to schedule work
309 |  **/
310 | enum NWORKER_THREADPOOL_SCHEDULE
311 | {
312 |     //the task is assigned to whichever thread has the least total queued tasks
313 |     NWTPS_LEAST_QUEUED_TASKS, 
314 |     //the tasks are assigned sequentially to threads wrapping around
315 |     NWTPS_ROUND_ROBIN, 
316 |     //the threads read from a central queue of tasks. 
317 |     //this one is higher overhead, but it might be worth if you have very variable task completion times
318 |     NWTPS_SHARED_QUEUE, 
319 | };
320 | 
321 | /************************************************************************************/
322 | /**
323 |  ** \brief Pool of thread workers
324 |  ** 
325 |  ** this class contains all the thread workers available for any jobs/tasks
326 |  **
327 |  **/
328 | class ThreadWorkerPool
329 | {
330 | private:
331 |     ThreadWorkerPool(const ThreadWorkerPool&); //these are purposely not implemented
332 |     ThreadWorkerPool& operator= (const ThreadWorkerPool&);
333 |     
334 |     ::uint m_threadCount;
335 |     ThreadWorker* m_threads;
336 |     NWORKER_THREADPOOL_SCHEDULE m_schedule;
337 |     ::uint m_invokedTaskCount;
338 |     
339 |     /// \brief this task is invoked on multiple threads at once
340 |     struct QueuedWorkProcessorTask : public TaskBase
341 |     {
342 |         const bool  m_discardOnExit;
343 |         CEvent      m_doneEvent;
344 |         CSemaphore  m_dataReadySem,         // Threads (QueuedWorkProcessorTask::Invoke()) are stuck in this semaphore. Pushing a Task will Release one so one of them can wake-the-F@#K-up and work
345 |                     m_dataProcessedSem;
346 |         NRingBuffer<TaskBase*>  m_taskQueue;
347 |         CCriticalSection        m_taskQueueLock;
348 |         
349 |         QueuedWorkProcessorTask(bool discardOnExit);
350 |         virtual void Invoke();
351 |         virtual void Done();
352 | #ifdef DBGTHREAD
353 |         const char *getDbgString() { return __FUNCTION__; };
354 | #endif
355 |     };
356 |     //this is only non-null if you are using NWTPS_SHARED_QUEUE
357 |     QueuedWorkProcessorTask* m_queueTask;
358 |     
359 | public:
360 |     /// \brief constructor
361 |     ThreadWorkerPool(::uint numThreads, bool discardQueuedOnExit = false, bool waitAleratableOnExit = false, NWORKER_THREADPOOL_SCHEDULE sched = NWTPS_ROUND_ROBIN,  const std::string& threadName = std::string());
362 |     /// this will block until all the threads are done
363 |     ~ThreadWorkerPool();
364 |     
365 |     ::uint getThreadCount() { return m_threadCount; }
366 |     ThreadWorker * getThreadWorker(int n);
367 |     /// this destroys the task when it's done
368 |     void pushTask(TaskBase* task);
369 |     
370 |     void SetBackgroundMode(bool b);
371 |     void FlushTasks(bool waitAlertable = false);
372 |     void Terminate();
373 | };
374 | 
375 | 
376 | 


--------------------------------------------------------------------------------
/mt/CThreadWork.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nvpro-samples/gl_vk_bk3dthreaded/9b4211566d7e315644b8e9f3e86a26f13b78242c/mt/CThreadWork.pptx


--------------------------------------------------------------------------------
/mt/RingBuffer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2024, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | #ifndef ThreadTest_RingBuffer_h
 21 | #define ThreadTest_RingBuffer_h
 22 | 
 23 | //#pragma mark - Ring Buffer // MacOSX thing
 24 | #include <algorithm>
 25 | #include <cstddef>
 26 | 
 27 | /******************************************************************************/
 28 | /**
 29 |  ** \brief Ring buffer implementation "a la STL"...
 30 |  **/
 31 | template <typename T, class Alloc = std::allocator<T>>
 32 | class NRingBuffer
 33 | {
 34 | public:
 35 |   template <typename V>
 36 |   class iterator_impl : public std::iterator<std::forward_iterator_tag, T>
 37 |   {
 38 |     friend class NRingBuffer<T>;
 39 |     V*     m_buffer;
 40 |     size_t m_capacity;
 41 |     V*     m_ptr;
 42 |     //if the buffer is full end (one after the last)  will == begin which means empty...we need to fake this
 43 |     mutable bool m_isFreshBeginAndIsFull;
 44 |     mutable bool m_isFreshEndAndIsFull;
 45 |     iterator_impl(V* b, size_t c, V* curPtr, bool freshB, bool freshE)
 46 |         : m_buffer(b)
 47 |         , m_capacity(c)
 48 |         , m_ptr(curPtr)
 49 |         , m_isFreshBeginAndIsFull(freshB)
 50 |         , m_isFreshEndAndIsFull(freshE){};
 51 | 
 52 |   public:
 53 |     iterator_impl(const iterator_impl<V>& i)
 54 |         : m_buffer(i.m_buffer)
 55 |         , m_capacity(i.m_capacity)
 56 |         , m_ptr(i.m_ptr)
 57 |         , m_isFreshBeginAndIsFull(i.m_isFreshBeginAndIsFull)
 58 |         , m_isFreshEndAndIsFull(i.m_isFreshEndAndIsFull){};
 59 |     iterator_impl& operator++()
 60 |     {
 61 |       m_isFreshBeginAndIsFull = false;  //once you move them they are not fresh
 62 |       m_isFreshEndAndIsFull   = false;
 63 |       m_ptr++;
 64 |       assert(m_ptr <= (m_buffer + m_capacity));
 65 |       if(m_ptr == (m_buffer + m_capacity))
 66 |         m_ptr = m_buffer;
 67 | 
 68 |       return *this;
 69 |     }
 70 |     iterator_impl operator++(int)
 71 |     {
 72 |       iterator_impl<V> tmp(*this);
 73 |       operator++();
 74 |       return tmp;
 75 |     }
 76 | 
 77 |     iterator_impl& operator--()
 78 |     {
 79 |       assert(m_ptr >= m_buffer);
 80 |       m_isFreshBeginAndIsFull = false;
 81 |       m_isFreshEndAndIsFull   = false;
 82 |       m_ptr--;
 83 |       if(m_ptr == (m_buffer - 1))
 84 |         m_ptr += m_capacity;
 85 | 
 86 |       return *this;
 87 |     }
 88 |     iterator_impl operator--(int)
 89 |     {
 90 |       iterator_impl<V> tmp(*this);
 91 |       operator--();
 92 |       return tmp;
 93 |     }
 94 | 
 95 |     bool operator==(const iterator_impl<V>& rhs) const
 96 |     {
 97 |       if(m_ptr == rhs.m_ptr)
 98 |       {
 99 |         if((m_isFreshBeginAndIsFull && rhs.m_isFreshEndAndIsFull) || (m_isFreshEndAndIsFull && rhs.m_isFreshBeginAndIsFull))
100 |           return false;  //if we are full then initally ptr begin == ptr end and that violates how iterators work
101 |         else
102 |           return true;
103 |       }
104 |       else
105 |       {
106 |         return false;
107 |       }
108 |     }
109 |     bool operator!=(const iterator_impl<V>& rhs) const { return !(operator==(rhs)); }
110 |     V&   operator*() const { return *m_ptr; }
111 | 
112 |     //convert to non-const
113 |     operator iterator_impl<T>()
114 |     {
115 |       return iterator_impl<T>(m_buffer, m_capacity, m_ptr, m_isFull, m_isFreshBeginAndIsFull, m_isFreshEndAndIsFull);
116 |     }
117 |   };
118 | 
119 |   template <typename V>
120 |   friend class iterator_impl;
121 | 
122 |   //typedefs to make our class stl-compliant
123 |   typedef iterator_impl<const T>                               const_iterator;
124 |   typedef iterator_impl<T>                                     iterator;
125 |   typedef size_t                                               size_type;
126 |   typedef ptrdiff_t                                            difference_type;
127 |   typedef T                                                    value_type;
128 |   typedef Alloc                                                allocator_type;
129 |   typedef typename std::allocator_traits<Alloc>::pointer       pointer;
130 |   typedef typename std::allocator_traits<Alloc>::const_pointer const_pointer;
131 | 
132 | 
133 |   iterator begin() { return iterator(m_buffer, m_capacity, m_readPtr, m_isFull, false); }
134 | 
135 |   iterator end()
136 |   {
137 |     if(m_capacity > 0)
138 |       return iterator(m_buffer, m_capacity, (((m_readPtr - m_buffer) + GetStoredSize()) % m_capacity) + m_buffer, false, m_isFull);
139 |     else
140 |       return begin();
141 |   }
142 | 
143 |   const_iterator begin() const { return const_iterator(m_buffer, m_capacity, m_readPtr, m_isFull, false); }
144 | 
145 |   const_iterator end() const
146 |   {
147 |     if(m_capacity > 0)
148 |       return const_iterator(m_buffer, m_capacity, (((m_readPtr - m_buffer) + GetStoredSize()) % m_capacity) + m_buffer,
149 |                             false, m_isFull);
150 |     else
151 |       return begin();
152 |   }
153 | 
154 |   enum OVERFLOW_BEHAVIOR
155 |   {
156 |     OF_FAIL,     //return failure and do nothing
157 |     OF_DISCARD,  //overwrite the oldest data
158 |     OF_EXPAND,   //resize
159 |   };
160 | 
161 |   NRingBuffer(size_t capacity, OVERFLOW_BEHAVIOR overflow = OF_EXPAND, const Alloc& alloc = Alloc())
162 |       : m_overflowBehavoir(overflow)
163 |       , m_capacity(capacity)
164 |       , m_isFull(false)
165 |       , m_allocator(alloc)
166 |   {
167 |     m_buffer  = m_capacity ? m_allocator.allocate(m_capacity) : NULL;
168 |     m_readPtr = m_writePtr = m_buffer;
169 |   }
170 | 
171 |   NRingBuffer(const NRingBuffer<T, Alloc>& src)
172 |       : m_overflowBehavoir(src.m_overflowBehavoir)
173 |       , m_capacity(src.m_capacity)
174 |       , m_isFull(src.m_isFull)
175 |       , m_allocator(src.m_allocator)
176 |   {
177 |     CopyFrom(src);
178 |   }
179 | 
180 |   NRingBuffer<T, Alloc>& operator=(const NRingBuffer<T, Alloc>& rhs)
181 |   {
182 |     if(this == &rhs)
183 |       return *this;
184 | 
185 |     KillBuffer();
186 |     CopyFrom(rhs);
187 |     return *this;
188 |   }
189 | 
190 | 
191 |   ~NRingBuffer() { KillBuffer(); }
192 | 
193 |   size_t GetCapacity() const { return m_capacity; }
194 | 
195 |   void Reset(size_t newCapacity)
196 |   {
197 |     if(newCapacity != m_capacity)
198 |     {
199 |       KillBuffer();
200 |       m_capacity = newCapacity;
201 |       m_buffer   = m_capacity ? m_allocator.allocate(m_capacity) : NULL;
202 |     }
203 |     m_readPtr = m_writePtr = m_buffer;
204 |     m_isFull               = false;
205 |   }
206 | 
207 |   void Reset() { Reset(GetCapacity()); }
208 | 
209 |   size_t GetFreeSize() const { return m_capacity - GetStoredSize(); }
210 | 
211 |   size_t GetStoredSize() const
212 |   {
213 |     if(m_writePtr < m_readPtr)  //it wraps around
214 |     {
215 |       size_t rightStored = (m_buffer + m_capacity) - m_readPtr;  //stuff after the read pointer
216 |       size_t leftStored  = m_writePtr - m_buffer;                //stuff before the end read pointer
217 |       return rightStored + leftStored;
218 |     }
219 |     else
220 |     {
221 |       return m_isFull ? m_capacity : (m_writePtr - m_readPtr);
222 |     }
223 |   }
224 | 
225 |   bool WriteData(const T* src, size_t len)
226 |   {
227 |     size_t freeSize = GetFreeSize();
228 |     if(len > freeSize)
229 |     {
230 |       switch(m_overflowBehavoir)
231 |       {
232 |         case OF_FAIL: {
233 |           return false;  //just fail
234 |         }
235 |         case OF_DISCARD: {
236 |           if(len > m_capacity)  //if its larget than our total size or would over
237 |             return false;
238 |           ConsumeData(len - freeSize);
239 |           break;
240 |         }
241 |         case OF_EXPAND: {
242 |           size_t storedSize = GetStoredSize();
243 |           size_t newCap     = m_capacity * 2;
244 |           while((len + storedSize) > newCap)
245 |           {
246 |             newCap *= 2;
247 |           }
248 |           T* newBuffer = m_allocator.allocate(newCap);
249 |           ReadDataImpl(newBuffer, storedSize, true);  //copy over data calling constructors
250 |           T* newRead  = newBuffer;
251 |           T* newWrite = newBuffer + storedSize;
252 | 
253 |           KillBuffer();
254 | 
255 |           m_buffer   = newBuffer;
256 |           m_writePtr = newWrite;
257 |           m_readPtr  = newRead;
258 |           m_capacity = newCap;
259 |           //NOutputString("Expanded\n");
260 |         }
261 |         break;
262 |       }
263 |     }
264 | 
265 |     size_t rightFree = (m_writePtr >= m_readPtr && !m_isFull) ? (m_buffer + m_capacity) - m_writePtr :
266 |                                                                 m_readPtr - m_writePtr;  //stuff after the write pointer
267 |     size_t leftFree = (m_writePtr >= m_readPtr && !m_isFull) ? m_readPtr - m_buffer : 0;  //stuff before the read pointer
268 | 
269 |     size_t writeAmt = rightFree < len ? rightFree : len;
270 |     for(size_t i = 0; i < writeAmt; i++)
271 |     {
272 |       std::allocator_traits<Alloc>::construct(m_allocator, &m_writePtr[i], src[i]);  //copy construct the objects
273 |     }
274 |     m_writePtr += writeAmt;
275 | 
276 |     assert(m_writePtr <= (m_buffer + m_capacity));
277 |     if(m_writePtr == (m_buffer + m_capacity))
278 |       m_writePtr = m_buffer;
279 | 
280 |     if(len > writeAmt)
281 |     {
282 |       assert((len - writeAmt) <= leftFree);
283 |       for(size_t i = 0; i < (len - writeAmt); i++)
284 |       {
285 |         std::allocator_traits<Alloc>::construct(m_allocator, &m_writePtr[i], src[i + writeAmt]);  //copy construct the objects
286 |       }
287 |       m_writePtr += (len - writeAmt);
288 |     }
289 | 
290 |     m_isFull = (m_writePtr == m_readPtr && (len || m_isFull));
291 |     return true;
292 |   }
293 | 
294 |   bool WriteData(const T& d) { return WriteData(&d, 1); }
295 | 
296 |   size_t ReadData(T* dest, size_t destSize) { return ReadDataImpl(dest, destSize, false); }
297 | 
298 |   bool ReadData(T& dest) { return ReadData(&dest, 1) > 0; }
299 | 
300 |   size_t ConsumeData(size_t count) { return ReadData(NULL, count); }
301 | 
302 |   //reads data but doesn't remove it
303 |   size_t PeekData(T* dest, size_t destSize, size_t offset = 0) const
304 |   {
305 |     //spoof the offset as a largeer buffer
306 |     destSize += offset;
307 | 
308 |     size_t rightStored = (m_writePtr < m_readPtr || m_isFull) ? (m_buffer + m_capacity) - m_readPtr :
309 |                                                                 m_writePtr - m_readPtr;  //stuff after the read pointer
310 |     size_t leftStored = (m_writePtr < m_readPtr || m_isFull) ? m_writePtr - m_buffer : 0;  //stuff before the end read pointer
311 | 
312 |     size_t readFromRightSize = std::min(rightStored, destSize);
313 |     if(dest && offset <= readFromRightSize)
314 |       dest = std::copy(m_readPtr + offset, m_readPtr + readFromRightSize, dest);  //readFromRightSize has offset baked in
315 | 
316 |     T* nextPtr = m_readPtr + readFromRightSize;
317 | 
318 |     destSize -= readFromRightSize;
319 |     size_t readFromLeftSize = std::min(leftStored, destSize);
320 | 
321 |     assert(nextPtr <= (m_buffer + m_capacity));
322 |     if(nextPtr == (m_buffer + m_capacity))
323 |       nextPtr = m_buffer;
324 | 
325 |     if(dest)
326 |       std::copy(nextPtr, nextPtr + readFromLeftSize, dest);
327 | 
328 |     size_t readAmt = readFromRightSize + readFromLeftSize - offset;
329 |     return readAmt;
330 |   }
331 | 
332 |   bool PeekData(T& dest, size_t offset = 0) const { return PeekData(&dest, 1, offset) > 0; }
333 | 
334 | private:
335 |   T* m_buffer;
336 |   T* m_writePtr;
337 |   T* m_readPtr;
338 | 
339 |   size_t m_capacity;
340 |   bool   m_isFull;
341 | 
342 |   OVERFLOW_BEHAVIOR m_overflowBehavoir;
343 |   Alloc             m_allocator;
344 | 
345 |   size_t ReadDataImpl(T* dest, size_t destSize, bool construct)
346 |   {
347 |     size_t rightStored = (m_writePtr < m_readPtr || m_isFull) ? (m_buffer + m_capacity) - m_readPtr :
348 |                                                                 m_writePtr - m_readPtr;  //stuff after the read pointer
349 |     size_t leftStored = (m_writePtr < m_readPtr || m_isFull) ? m_writePtr - m_buffer : 0;  //stuff before the end read pointer
350 | 
351 |     size_t readFromRightSize = (rightStored < destSize) ? rightStored : destSize;
352 | 
353 |     for(size_t i = 0; i < readFromRightSize; i++)
354 |     {
355 |       if(dest)
356 |       {
357 |         if(construct)
358 |           std::allocator_traits<Alloc>::construct(m_allocator, &dest[i], m_readPtr[i]);
359 |         else
360 |           dest[i] = m_readPtr[i];
361 |       }
362 | 
363 |       std::allocator_traits<Alloc>::destroy(m_allocator, &m_readPtr[i]);
364 |     }
365 | 
366 | 
367 |     m_readPtr += readFromRightSize;
368 | 
369 |     destSize -= readFromRightSize;
370 |     size_t readFromLeftSize = (leftStored < destSize) ? leftStored : destSize;
371 | 
372 |     assert(m_readPtr <= (m_buffer + m_capacity));
373 |     if(m_readPtr == (m_buffer + m_capacity))
374 |       m_readPtr = m_buffer;
375 | 
376 |     if(readFromLeftSize > 0)
377 |     {
378 |       for(size_t i = 0; i < readFromLeftSize; i++)
379 |       {
380 |         if(dest)
381 |         {
382 |           if(construct)
383 |             std::allocator_traits<Alloc>::construct(m_allocator, &dest[i + readFromRightSize], m_readPtr[i]);
384 |           else
385 |             dest[i + readFromRightSize] = m_readPtr[i];
386 |         }
387 | 
388 |         std::allocator_traits<Alloc>::destroy(m_allocator, &m_readPtr[i]);
389 |       }
390 | 
391 |       m_readPtr += readFromLeftSize;
392 |     }
393 | 
394 |     size_t readAmt = readFromRightSize + readFromLeftSize;
395 |     m_isFull       = (readAmt == 0 && m_isFull);
396 |     return readAmt;
397 |   }
398 | 
399 |   void KillBuffer()
400 |   {
401 |     iterator e = end();
402 |     for(iterator i = begin(); i != e; i++)
403 |       std::allocator_traits<Alloc>::destroy(m_allocator, &(*i));
404 | 
405 |     m_allocator.deallocate(m_buffer, m_capacity);
406 |   }
407 | 
408 |   void CopyFrom(const NRingBuffer<T, Alloc>& src)
409 |   {
410 |     m_overflowBehavoir = src.m_overflowBehavoir;
411 |     m_capacity         = src.m_capacity;
412 |     m_isFull           = src.m_isFull;
413 |     m_allocator        = src.m_allocator;
414 | 
415 |     m_buffer            = m_capacity ? m_allocator.allocate(m_capacity) : NULL;
416 |     m_readPtr           = (src.m_readPtr - src.m_buffer) + m_buffer;
417 |     m_writePtr          = (src.m_writePtr - src.m_buffer) + m_buffer;
418 |     const_iterator citb = src.begin();
419 |     const_iterator cite = src.end();
420 | 
421 |     iterator itb = begin();
422 |     iterator ite = end();
423 |     while(citb != cite)
424 |     {
425 |       m_allocator.construct(&(*itb), *citb);
426 |       itb++;
427 |       citb++;
428 |     }
429 |   }
430 | };
431 | 
432 | 
433 | #endif
434 | 


--------------------------------------------------------------------------------
/window_surface_vk.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION
 17 |  * SPDX-License-Identifier: Apache-2.0
 18 |  */
 19 | 
 20 | 
 21 | #ifndef NV_VK_DEFAULTWINDOWSURFACE_INCLUDED
 22 | #define NV_VK_DEFAULTWINDOWSURFACE_INCLUDED
 23 | 
 24 | 
 25 | #include <stdio.h>
 26 | #include <vector>
 27 | #include <string>
 28 | #include <vulkan/vulkan.h>
 29 | #include <nvvk/context_vk.hpp>
 30 | #include <nvvk/swapchain_vk.hpp>
 31 | #include <nvpwindow.hpp>
 32 | 
 33 | #define VK_MAX_QUEUED_FRAMES 4
 34 | #define MAX_POSSIBLE_BACK_BUFFERS 16
 35 | 
 36 | 
 37 |   /*
 38 |     WindowSurface is a basic implementation of whatever is required to have a regular color+Depthstencil setup realted to a window
 39 |     This class is *not mandatory* for a sample to run. It's just a convenient way to have something put together for quick
 40 |     rendering in a window
 41 |     - a render-pass associated with the framebuffer(s)
 42 |     - buffers/framebuffers associated with the views of the window
 43 |     - command-buffers to match the current swapchain index
 44 |     typical use :
 45 |     0)  ...
 46 |         m_WindowSurface.acquire()
 47 |         ...
 48 |     1)  m_WindowSurface.setClearValue();
 49 |         VkCommandBuffer command_buffer = m_windowSurface.beginCommandBuffer();
 50 |     2)  m_windowSurface.beginRenderPass();
 51 |         vkCmd...()
 52 |         ...
 53 |     3)  //for MSAA case: advances in the sub-pass to render *after* the resolve of AA
 54 |         m_windowSurface.nextSubPassForOverlay();
 55 |         ... draw some non MSAA stuff (UI...)
 56 |     4)  m_windowSurface.endRPassCBufferSubmitAndPresent();
 57 |   */
 58 |   class WindowSurface {
 59 |   public:
 60 |     nvvk::SwapChain       m_swapChain;
 61 |   private:
 62 |     nvvk::Context*        m_pContext;
 63 |     VkSurfaceKHR          m_surface;
 64 |     // framebuffer size and # of samples
 65 |     int                   fb_width = 0, fb_height = 0;
 66 |     VkSampleCountFlagBits m_samples = VK_SAMPLE_COUNT_1_BIT;
 67 |     bool                  m_swapVsync;
 68 | 
 69 |     VkClearColorValue           m_clearColor;
 70 |     VkClearDepthStencilValue    m_clearDST;
 71 | 
 72 |     VkRenderPass    m_renderPass = VK_NULL_HANDLE;
 73 | 
 74 |     VkCommandPool   m_commandPool[VK_MAX_QUEUED_FRAMES];
 75 |     VkCommandBuffer m_curCommandBuffer = VK_NULL_HANDLE;
 76 |     VkCommandBuffer m_commandBuffer[VK_MAX_QUEUED_FRAMES];
 77 |     VkFence         m_curFence = VK_NULL_HANDLE;
 78 |     VkFence         m_fence[VK_MAX_QUEUED_FRAMES];
 79 | 
 80 |     VkFramebuffer   m_framebuffer[MAX_POSSIBLE_BACK_BUFFERS] = {};
 81 | 
 82 |     VkImage         m_depthImage = {};
 83 |     VkImage         m_msaaColorImage = {};
 84 |     VkDeviceMemory  m_depthImageMemory = {};
 85 |     VkDeviceMemory  m_msaaColorImageMemory = {};
 86 |     VkImageView     m_depthImageView = {};
 87 |     VkImageView     m_msaaColorImageView = {};
 88 | 
 89 |     VkAllocationCallbacks   *m_allocator = VK_NULL_HANDLE;
 90 | 
 91 |     bool        hasStencilComponent(VkFormat format);
 92 | 
 93 |   public:
 94 |     bool init(nvvk::Context* pContext, NVPWindow* pWin, int MSAA);
 95 |     void deinit();
 96 |     bool resize(int w, int h);
 97 |     void createFrameBuffer();
 98 |     //void createImageViews();
 99 |     void createRenderPass();
100 | 
101 |     void acquire();
102 |     VkCommandBuffer beginCommandBuffer(VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
103 |     void beginRenderPass(VkSubpassContents contents = VK_SUBPASS_CONTENTS_INLINE); // could be VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS
104 |     void nextSubPassForOverlay(VkSubpassContents contents = VK_SUBPASS_CONTENTS_INLINE);
105 |     void endRenderPass();
106 |     void endCommandBuffer();
107 |     void submit();
108 |     void present()
109 |     {
110 |       m_swapChain.present(m_pContext->m_queueGCT);//m_presentQueue.queue);
111 |     }
112 |     void endCBufferSubmitAndPresent()  // does the 3 methods in 1 call
113 |     {
114 |       endCommandBuffer();
115 |       submit();
116 |       present();
117 |     }
118 |     void endRPassCBufferSubmitAndPresent()  // does the 4 methods in 1 call
119 |     {
120 |       endRenderPass();
121 |       endCommandBuffer();
122 |       submit();
123 |       present();
124 |     }
125 |     void createDepthResources();
126 |     void createMSAAColorResources();
127 |     void swapVsync(bool state)
128 |     {
129 |       if (m_swapVsync != state)
130 |       {
131 |         m_swapChain.update(fb_width, fb_height, state);
132 |         m_swapVsync = state;
133 |       }
134 |     }
135 |     //
136 |     // Setters
137 |     //
138 |     void setClearValue(VkClearColorValue clearColor, VkClearDepthStencilValue clearDST = { 1.0f, 0 })
139 |     {
140 |       m_clearColor = clearColor;
141 |       m_clearDST = clearDST;
142 |     }
143 |     void setClearValue(VkClearValue clearColor, VkClearValue clearDST = { 1.0f, 0 })
144 |     {
145 |       m_clearColor = clearColor.color;
146 |       m_clearDST = clearDST.depthStencil;
147 |     }
148 |     //
149 |     // getters
150 |     //
151 |     uint32_t getHeight() { return fb_height; }
152 |     uint32_t getWidth()  { return fb_width; }
153 |     uint32_t getFrameIndex() { return m_swapChain.getActiveImageIndex(); }
154 |     const VkRenderPass &getRenderPass() { return m_renderPass; }
155 |     VkFormat getSurfaceFormat() const { return m_swapChain.getFormat(); }
156 |     VkImage getCurrentBackBuffer() const {  return m_swapChain.getActiveImage(); }
157 |     VkImageView getCurrentBackBufferView() const { return m_swapChain.getActiveImageView(); }
158 |     VkCommandBuffer getCurrentCommandBuffer() { return m_curCommandBuffer; }
159 |     VkFramebuffer   getCurrentFramebuffer() { return m_framebuffer[m_swapChain.getActiveImageIndex()]; }
160 |     nvvk::Context* getContext() { return m_pContext; }
161 |   };
162 | 
163 | #endif


--------------------------------------------------------------------------------