├── .clang-format ├── .gitattributes ├── .gitignore ├── 3rdparty ├── GLSL.std.450.h ├── decompress.c ├── decompress.h ├── microprofile.cpp ├── microprofile.h ├── microprofile_html.h ├── spirv.hpp ├── vk_icd.h ├── vk_platform.h └── vulkan.h ├── LICENSE.md ├── README.md ├── buffers.cpp ├── cmd_alloc.cpp ├── cmd_exec.cpp ├── cmd_record.cpp ├── commands.h ├── descriptors.cpp ├── gpu.h ├── icd_interface.cpp ├── icd_stubs.cpp ├── images.cpp ├── memory.cpp ├── precompiled.cpp ├── precompiled.h ├── query.cpp ├── rasterizer.cpp ├── renderpass.cpp ├── shaders.cpp ├── spirv_compile.cpp ├── spirv_compile.h ├── stats.cpp ├── stats.h ├── texture_sampling.cpp ├── visor.json ├── visor.sln ├── visor.vcxproj ├── visor.vcxproj.filters └── wsi.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Chromium 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlinesLeft: true 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: false 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: true 14 | AllowShortFunctionsOnASingleLine: Inline 15 | AllowShortIfStatementsOnASingleLine: false 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: true 25 | AfterControlStatement: true 26 | AfterEnum: true 27 | AfterFunction: true 28 | AfterNamespace: true 29 | AfterObjCDeclaration: true 30 | AfterStruct: true 31 | AfterUnion: true 32 | BeforeCatch: true 33 | BeforeElse: true 34 | IndentBraces: false 35 | BreakBeforeBinaryOperators: None 36 | BreakBeforeBraces: Custom 37 | BreakBeforeTernaryOperators: true 38 | BreakConstructorInitializersBeforeComma: false 39 | ColumnLimit: 100 40 | CommentPragmas: '^ IWYU pragma:' 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: false 46 | DisableFormat: false 47 | ExperimentalAutoDetectBinPacking: false 48 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 49 | IncludeCategories: 50 | - Regex: 'precompiled.h' 51 | Priority: -1 52 | - Regex: '<.*\.h>' 53 | Priority: 1 54 | - Regex: '^<[^.]*>' 55 | Priority: 2 56 | - Regex: '.*/.*' 57 | Priority: 3 58 | - Regex: '.*' 59 | Priority: 4 60 | IndentCaseLabels: true 61 | IndentWidth: 2 62 | IndentWrappedFunctionNames: false 63 | KeepEmptyLinesAtTheStartOfBlocks: false 64 | MacroBlockBegin: '' 65 | MacroBlockEnd: '' 66 | MaxEmptyLinesToKeep: 1 67 | NamespaceIndentation: None 68 | ObjCBlockIndentWidth: 2 69 | ObjCSpaceAfterProperty: false 70 | ObjCSpaceBeforeProtocolList: false 71 | PenaltyBreakBeforeFirstCallParameter: 1 72 | PenaltyBreakComment: 300 73 | PenaltyBreakFirstLessLess: 120 74 | PenaltyBreakString: 1000 75 | PenaltyExcessCharacter: 10 76 | PenaltyReturnTypeOnItsOwnLine: 2000 77 | PointerAlignment: Right 78 | ReflowComments: true 79 | SortIncludes: true 80 | SpaceAfterCStyleCast: false 81 | SpaceBeforeAssignmentOperators: true 82 | SpaceBeforeParens: Never 83 | SpaceInEmptyParentheses: false 84 | SpacesBeforeTrailingComments: 4 85 | SpacesInAngles: false 86 | SpacesInContainerLiterals: true 87 | SpacesInCStyleCastParentheses: false 88 | SpacesInParentheses: false 89 | SpacesInSquareBrackets: false 90 | Standard: Auto 91 | TabWidth: 2 92 | UseTab: Never 93 | ... 94 | 95 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Based on .gitattributes template from https://help.github.com/articles/dealing-with-line-endings 2 | 3 | # Set default behaviour, in case users don't have core.autocrlf set. 4 | * text=auto 5 | 6 | # Explicitly declare text files we want to always be normalized and converted 7 | # to native line endings on checkout. 8 | *.c text 9 | *.cc text 10 | *.cpp text 11 | *.h text 12 | *.hlsl text 13 | *.htm text 14 | *.md text 15 | *.xml text 16 | *.txt text 17 | 18 | # Declare files that will always have CRLF line endings on checkout. 19 | *.sln text eol=crlf 20 | *.csproj text eol=crlf 21 | *.vcxproj text eol=crlf 22 | *.shfbproj text eol=crlf 23 | 24 | *.resx text eol=crlf 25 | *.aml text eol=crlf 26 | 27 | *.cs text eol=crlf diff=csharp 28 | 29 | # Denote all files that are truly binary and should not be modified. 30 | *.png binary 31 | *.jpg binary 32 | *.bmp binary 33 | *.ico binary 34 | 35 | *.exe binary 36 | *.dll binary 37 | *.lib binary 38 | *.pdb binary 39 | 40 | *.rtf binary 41 | *.snk binary 42 | *.rc binary 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Win32/ 2 | x64/ 3 | .vs/ 4 | ipch/ 5 | *.VC.* 6 | *.user 7 | -------------------------------------------------------------------------------- /3rdparty/GLSL.std.450.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** Copyright (c) 2014-2016 The Khronos Group Inc. 3 | ** 4 | ** Permission is hereby granted, free of charge, to any person obtaining a copy 5 | ** of this software and/or associated documentation files (the "Materials"), 6 | ** to deal in the Materials without restriction, including without limitation 7 | ** the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | ** and/or sell copies of the Materials, and to permit persons to whom the 9 | ** Materials are furnished to do so, subject to the following conditions: 10 | ** 11 | ** The above copyright notice and this permission notice shall be included in 12 | ** all copies or substantial portions of the Materials. 13 | ** 14 | ** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS 15 | ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND 16 | ** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 17 | ** 18 | ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 | ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | ** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | ** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS 24 | ** IN THE MATERIALS. 25 | */ 26 | 27 | #ifndef GLSLstd450_H 28 | #define GLSLstd450_H 29 | 30 | static const int GLSLstd450Version = 100; 31 | static const int GLSLstd450Revision = 3; 32 | 33 | enum GLSLstd450 { 34 | GLSLstd450Bad = 0, // Don't use 35 | 36 | GLSLstd450Round = 1, 37 | GLSLstd450RoundEven = 2, 38 | GLSLstd450Trunc = 3, 39 | GLSLstd450FAbs = 4, 40 | GLSLstd450SAbs = 5, 41 | GLSLstd450FSign = 6, 42 | GLSLstd450SSign = 7, 43 | GLSLstd450Floor = 8, 44 | GLSLstd450Ceil = 9, 45 | GLSLstd450Fract = 10, 46 | 47 | GLSLstd450Radians = 11, 48 | GLSLstd450Degrees = 12, 49 | GLSLstd450Sin = 13, 50 | GLSLstd450Cos = 14, 51 | GLSLstd450Tan = 15, 52 | GLSLstd450Asin = 16, 53 | GLSLstd450Acos = 17, 54 | GLSLstd450Atan = 18, 55 | GLSLstd450Sinh = 19, 56 | GLSLstd450Cosh = 20, 57 | GLSLstd450Tanh = 21, 58 | GLSLstd450Asinh = 22, 59 | GLSLstd450Acosh = 23, 60 | GLSLstd450Atanh = 24, 61 | GLSLstd450Atan2 = 25, 62 | 63 | GLSLstd450Pow = 26, 64 | GLSLstd450Exp = 27, 65 | GLSLstd450Log = 28, 66 | GLSLstd450Exp2 = 29, 67 | GLSLstd450Log2 = 30, 68 | GLSLstd450Sqrt = 31, 69 | GLSLstd450InverseSqrt = 32, 70 | 71 | GLSLstd450Determinant = 33, 72 | GLSLstd450MatrixInverse = 34, 73 | 74 | GLSLstd450Modf = 35, // second operand needs an OpVariable to write to 75 | GLSLstd450ModfStruct = 36, // no OpVariable operand 76 | GLSLstd450FMin = 37, 77 | GLSLstd450UMin = 38, 78 | GLSLstd450SMin = 39, 79 | GLSLstd450FMax = 40, 80 | GLSLstd450UMax = 41, 81 | GLSLstd450SMax = 42, 82 | GLSLstd450FClamp = 43, 83 | GLSLstd450UClamp = 44, 84 | GLSLstd450SClamp = 45, 85 | GLSLstd450FMix = 46, 86 | GLSLstd450IMix = 47, // Reserved 87 | GLSLstd450Step = 48, 88 | GLSLstd450SmoothStep = 49, 89 | 90 | GLSLstd450Fma = 50, 91 | GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to 92 | GLSLstd450FrexpStruct = 52, // no OpVariable operand 93 | GLSLstd450Ldexp = 53, 94 | 95 | GLSLstd450PackSnorm4x8 = 54, 96 | GLSLstd450PackUnorm4x8 = 55, 97 | GLSLstd450PackSnorm2x16 = 56, 98 | GLSLstd450PackUnorm2x16 = 57, 99 | GLSLstd450PackHalf2x16 = 58, 100 | GLSLstd450PackDouble2x32 = 59, 101 | GLSLstd450UnpackSnorm2x16 = 60, 102 | GLSLstd450UnpackUnorm2x16 = 61, 103 | GLSLstd450UnpackHalf2x16 = 62, 104 | GLSLstd450UnpackSnorm4x8 = 63, 105 | GLSLstd450UnpackUnorm4x8 = 64, 106 | GLSLstd450UnpackDouble2x32 = 65, 107 | 108 | GLSLstd450Length = 66, 109 | GLSLstd450Distance = 67, 110 | GLSLstd450Cross = 68, 111 | GLSLstd450Normalize = 69, 112 | GLSLstd450FaceForward = 70, 113 | GLSLstd450Reflect = 71, 114 | GLSLstd450Refract = 72, 115 | 116 | GLSLstd450FindILsb = 73, 117 | GLSLstd450FindSMsb = 74, 118 | GLSLstd450FindUMsb = 75, 119 | 120 | GLSLstd450InterpolateAtCentroid = 76, 121 | GLSLstd450InterpolateAtSample = 77, 122 | GLSLstd450InterpolateAtOffset = 78, 123 | 124 | GLSLstd450NMin = 79, 125 | GLSLstd450NMax = 80, 126 | GLSLstd450NClamp = 81, 127 | 128 | GLSLstd450Count 129 | }; 130 | 131 | #endif // #ifndef GLSLstd450_H 132 | -------------------------------------------------------------------------------- /3rdparty/decompress.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | DXT1/DXT3/DXT5 texture decompression 7 | 8 | The original code is from Benjamin Dobell, see below for details. Compared to 9 | the original the code is now valid C89, has support for 64-bit architectures 10 | and has been refactored. It also has support for additional formats and uses 11 | a different PackRGBA order. 12 | 13 | --- 14 | 15 | Copyright (c) 2012 - 2015 Matthäus G. "Anteru" Chajdas (http://anteru.net) 16 | 17 | Permission is hereby granted, free of charge, to any person obtaining a copy of 18 | this software and associated documentation files (the "Software"), to deal in 19 | the Software without restriction, including without limitation the rights to 20 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 21 | of the Software, and to permit persons to whom the Software is furnished to do 22 | so, subject to the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be included in all 25 | copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | SOFTWARE. 34 | 35 | --- 36 | 37 | Copyright (C) 2009 Benjamin Dobell, Glass Echidna 38 | 39 | Permission is hereby granted, free of charge, to any person obtaining a copy of 40 | this software and associated documentation files (the "Software"), to deal in 41 | the Software without restriction, including without limitation the rights to 42 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 43 | of the Software, and to permit persons to whom the Software is furnished to do 44 | so, subject to the following conditions: 45 | 46 | The above copyright notice and this permission notice shall be included in all 47 | copies or substantial portions of the Software. 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 52 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 54 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 55 | SOFTWARE. 56 | 57 | --- 58 | */ 59 | static uint32_t PackRGBA (uint8_t r, uint8_t g, uint8_t b, uint8_t a) 60 | { 61 | return r | (g << 8) | (b << 16) | (a << 24); 62 | } 63 | 64 | static float Int8ToFloat_SNORM (const uint8_t input) 65 | { 66 | return (float)((int8_t)input) / 127.0f; 67 | } 68 | 69 | static float Int8ToFloat_UNORM (const uint8_t input) 70 | { 71 | return (float)input / 255.0f; 72 | } 73 | 74 | /** 75 | Decompress a BC 16x3 index block stored as 76 | h g f e 77 | d c b a 78 | p o n m 79 | l k j i 80 | 81 | Bits packed as 82 | 83 | | h | g | f | e | d | c | b | a | // Entry 84 | |765 432 107 654 321 076 543 210| // Bit 85 | |0000000000111111111112222222222| // Byte 86 | 87 | into 16 8-bit indices. 88 | */ 89 | static void Decompress16x3bitIndices (const uint8_t* packed, uint8_t* unpacked) 90 | { 91 | uint32_t tmp, block, i; 92 | 93 | for (block = 0; block < 2; ++block) { 94 | tmp = 0; 95 | 96 | // Read three bytes 97 | for (i = 0; i < 3; ++i) { 98 | tmp |= ((uint32_t)packed [i]) << (i * 8); 99 | } 100 | 101 | // Unpack 8x3 bit from last 3 byte block 102 | for (i = 0; i < 8; ++i) { 103 | unpacked [i] = (tmp >> (i*3)) & 0x7; 104 | } 105 | 106 | packed += 3; 107 | unpacked += 8; 108 | } 109 | } 110 | 111 | static void DecompressBlockBC1Internal (const uint8_t* block, 112 | unsigned char* output, uint32_t outputStride, const uint8_t* alphaValues) 113 | { 114 | uint32_t temp, code; 115 | 116 | uint16_t color0, color1; 117 | uint8_t r0, g0, b0, r1, g1, b1; 118 | 119 | int i, j; 120 | 121 | color0 = *(const uint16_t*)(block); 122 | color1 = *(const uint16_t*)(block + 2); 123 | 124 | temp = (color0 >> 11) * 255 + 16; 125 | r0 = (uint8_t)((temp/32 + temp)/32); 126 | temp = ((color0 & 0x07E0) >> 5) * 255 + 32; 127 | g0 = (uint8_t)((temp/64 + temp)/64); 128 | temp = (color0 & 0x001F) * 255 + 16; 129 | b0 = (uint8_t)((temp/32 + temp)/32); 130 | 131 | temp = (color1 >> 11) * 255 + 16; 132 | r1 = (uint8_t)((temp/32 + temp)/32); 133 | temp = ((color1 & 0x07E0) >> 5) * 255 + 32; 134 | g1 = (uint8_t)((temp/64 + temp)/64); 135 | temp = (color1 & 0x001F) * 255 + 16; 136 | b1 = (uint8_t)((temp/32 + temp)/32); 137 | 138 | code = *(const uint32_t*)(block + 4); 139 | 140 | if (color0 > color1) { 141 | for (j = 0; j < 4; ++j) { 142 | for (i = 0; i < 4; ++i) { 143 | uint32_t finalColor, positionCode; 144 | uint8_t alpha; 145 | 146 | alpha = alphaValues [j*4+i]; 147 | 148 | finalColor = 0; 149 | positionCode = (code >> 2*(4*j+i)) & 0x03; 150 | 151 | switch (positionCode) { 152 | case 0: 153 | finalColor = PackRGBA(r0, g0, b0, alpha); 154 | break; 155 | case 1: 156 | finalColor = PackRGBA(r1, g1, b1, alpha); 157 | break; 158 | case 2: 159 | finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, alpha); 160 | break; 161 | case 3: 162 | finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, alpha); 163 | break; 164 | } 165 | 166 | *(uint32_t*)(output + j*outputStride + i * sizeof (uint32_t)) = finalColor; 167 | } 168 | } 169 | } else { 170 | for (j = 0; j < 4; ++j) { 171 | for (i = 0; i < 4; ++i) { 172 | uint32_t finalColor, positionCode; 173 | uint8_t alpha; 174 | 175 | alpha = alphaValues [j*4+i]; 176 | 177 | finalColor = 0; 178 | positionCode = (code >> 2*(4*j+i)) & 0x03; 179 | 180 | switch (positionCode) { 181 | case 0: 182 | finalColor = PackRGBA(r0, g0, b0, alpha); 183 | break; 184 | case 1: 185 | finalColor = PackRGBA(r1, g1, b1, alpha); 186 | break; 187 | case 2: 188 | finalColor = PackRGBA((r0+r1)/2, (g0+g1)/2, (b0+b1)/2, alpha); 189 | break; 190 | case 3: 191 | finalColor = PackRGBA(0, 0, 0, alpha); 192 | break; 193 | } 194 | 195 | *(uint32_t*)(output + j*outputStride + i * sizeof (uint32_t)) = finalColor; 196 | } 197 | } 198 | } 199 | } 200 | 201 | /* 202 | Decompresses one block of a BC1 (DXT1) texture and stores the resulting pixels at the appropriate offset in 'image'. 203 | 204 | uint32_t x: x-coordinate of the first pixel in the block. 205 | uint32_t y: y-coordinate of the first pixel in the block. 206 | uint32_t stride: stride of a scanline in bytes. 207 | const uint8_t* blockStorage: pointer to the block to decompress. 208 | uint32_t* image: pointer to image where the decompressed pixel data should be stored. 209 | */ 210 | void DecompressBlockBC1 (uint32_t x, uint32_t y, uint32_t stride, 211 | const uint8_t* blockStorage, unsigned char* image) 212 | { 213 | static const uint8_t const_alpha [] = { 214 | 255, 255, 255, 255, 215 | 255, 255, 255, 255, 216 | 255, 255, 255, 255, 217 | 255, 255, 255, 255 218 | }; 219 | 220 | DecompressBlockBC1Internal (blockStorage, 221 | image + x * sizeof (uint32_t) + (y * stride), stride, const_alpha); 222 | } 223 | 224 | /* 225 | Decompresses one block of a BC3 (DXT5) texture and stores the resulting pixels at the appropriate offset in 'image'. 226 | 227 | uint32_t x: x-coordinate of the first pixel in the block. 228 | uint32_t y: y-coordinate of the first pixel in the block. 229 | uint32_t stride: stride of a scanline in bytes. 230 | const uint8_t *blockStorage: pointer to the block to decompress. 231 | uint32_t *image: pointer to image where the decompressed pixel data should be stored. 232 | */ 233 | void DecompressBlockBC3 (uint32_t x, uint32_t y, uint32_t stride, 234 | const uint8_t* blockStorage, unsigned char* image) 235 | { 236 | uint8_t alpha0, alpha1; 237 | uint8_t alphaIndices [16]; 238 | 239 | uint16_t color0, color1; 240 | uint8_t r0, g0, b0, r1, g1, b1; 241 | 242 | int i, j; 243 | 244 | uint32_t temp, code; 245 | 246 | alpha0 = *(blockStorage); 247 | alpha1 = *(blockStorage + 1); 248 | 249 | Decompress16x3bitIndices (blockStorage + 2, alphaIndices); 250 | 251 | color0 = *(const uint16_t*)(blockStorage + 8); 252 | color1 = *(const uint16_t*)(blockStorage + 10); 253 | 254 | temp = (color0 >> 11) * 255 + 16; 255 | r0 = (uint8_t)((temp / 32 + temp) / 32); 256 | temp = ((color0 & 0x07E0) >> 5) * 255 + 32; 257 | g0 = (uint8_t)((temp / 64 + temp) / 64); 258 | temp = (color0 & 0x001F) * 255 + 16; 259 | b0 = (uint8_t)((temp / 32 + temp) / 32); 260 | 261 | temp = (color1 >> 11) * 255 + 16; 262 | r1 = (uint8_t)((temp / 32 + temp) / 32); 263 | temp = ((color1 & 0x07E0) >> 5) * 255 + 32; 264 | g1 = (uint8_t)((temp / 64 + temp) / 64); 265 | temp = (color1 & 0x001F) * 255 + 16; 266 | b1 = (uint8_t)((temp / 32 + temp) / 32); 267 | 268 | code = *(const uint32_t*)(blockStorage + 12); 269 | 270 | for (j = 0; j < 4; j++) { 271 | for (i = 0; i < 4; i++) { 272 | uint8_t finalAlpha; 273 | int alphaCode; 274 | uint8_t colorCode; 275 | uint32_t finalColor; 276 | 277 | alphaCode = alphaIndices [4 * j + i]; 278 | 279 | if (alphaCode == 0) { 280 | finalAlpha = alpha0; 281 | } else if (alphaCode == 1) { 282 | finalAlpha = alpha1; 283 | } else { 284 | if (alpha0 > alpha1) { 285 | finalAlpha = (uint8_t)(((8 - alphaCode)*alpha0 + (alphaCode - 1)*alpha1) / 7); 286 | } else { 287 | if (alphaCode == 6) { 288 | finalAlpha = 0; 289 | } else if (alphaCode == 7) { 290 | finalAlpha = 255; 291 | } else { 292 | finalAlpha = (uint8_t)(((6 - alphaCode)*alpha0 + (alphaCode - 1)*alpha1) / 5); 293 | } 294 | } 295 | } 296 | 297 | colorCode = (code >> 2 * (4 * j + i)) & 0x03; 298 | finalColor = 0; 299 | 300 | switch (colorCode) { 301 | case 0: 302 | finalColor = PackRGBA (r0, g0, b0, finalAlpha); 303 | break; 304 | case 1: 305 | finalColor = PackRGBA (r1, g1, b1, finalAlpha); 306 | break; 307 | case 2: 308 | finalColor = PackRGBA ((2 * r0 + r1) / 3, (2 * g0 + g1) / 3, (2 * b0 + b1) / 3, finalAlpha); 309 | break; 310 | case 3: 311 | finalColor = PackRGBA ((r0 + 2 * r1) / 3, (g0 + 2 * g1) / 3, (b0 + 2 * b1) / 3, finalAlpha); 312 | break; 313 | } 314 | 315 | 316 | *(uint32_t*)(image + sizeof (uint32_t) * (i + x) + (stride * (y + j))) = finalColor; 317 | } 318 | } 319 | } 320 | 321 | /* 322 | Decompresses one block of a BC2 (DXT3) texture and stores the resulting pixels at the appropriate offset in 'image'. 323 | 324 | uint32_t x: x-coordinate of the first pixel in the block. 325 | uint32_t y: y-coordinate of the first pixel in the block. 326 | uint32_t stride: stride of a scanline in bytes. 327 | const uint8_t *blockStorage: pointer to the block to decompress. 328 | uint32_t *image: pointer to image where the decompressed pixel data should be stored. 329 | */ 330 | void DecompressBlockBC2 (uint32_t x, uint32_t y, uint32_t stride, 331 | const uint8_t* blockStorage, unsigned char* image) 332 | { 333 | int i; 334 | 335 | uint8_t alphaValues [16] = { 0 }; 336 | 337 | for (i = 0; i < 4; ++i) { 338 | const uint16_t* alphaData = (const uint16_t*)(blockStorage); 339 | 340 | alphaValues [i * 4 + 0] = (((*alphaData) >> 0) & 0xF) * 17; 341 | alphaValues [i * 4 + 1] = (((*alphaData) >> 4) & 0xF) * 17; 342 | alphaValues [i * 4 + 2] = (((*alphaData) >> 8) & 0xF) * 17; 343 | alphaValues [i * 4 + 3] = (((*alphaData) >> 12) & 0xF) * 17; 344 | 345 | blockStorage += 2; 346 | } 347 | 348 | DecompressBlockBC1Internal (blockStorage, 349 | image + x * sizeof (uint32_t) + (y * stride), stride, alphaValues); 350 | } 351 | 352 | enum BC4Mode 353 | { 354 | BC4_UNORM = 0, 355 | BC4_SNORM = 1 356 | }; 357 | 358 | enum BC5Mode 359 | { 360 | BC5_UNORM = 0, 361 | BC5_SNORM = 1 362 | }; 363 | 364 | static void DecompressBlockBC4Internal ( 365 | const uint8_t* block, unsigned char* output, 366 | uint32_t outputStride, const float* colorTable) 367 | { 368 | uint8_t indices [16]; 369 | int x, y; 370 | 371 | Decompress16x3bitIndices (block + 2, indices); 372 | 373 | for (y = 0; y < 4; ++y) { 374 | for (x = 0; x < 4; ++x) { 375 | *(float*)(output + x * sizeof (float)) = colorTable [indices [y*4 + x]]; 376 | } 377 | 378 | output += outputStride; 379 | } 380 | } 381 | 382 | /* 383 | Decompresses one block of a BC4 texture and stores the resulting pixels at the appropriate offset in 'image'. 384 | 385 | uint32_t x: x-coordinate of the first pixel in the block. 386 | uint32_t y: y-coordinate of the first pixel in the block. 387 | uint32_t stride: stride of a scanline in bytes. 388 | const uint8_t* blockStorage: pointer to the block to decompress. 389 | float* image: pointer to image where the decompressed pixel data should be stored. 390 | */ 391 | void DecompressBlockBC4 (uint32_t x, uint32_t y, uint32_t stride, enum BC4Mode mode, 392 | const uint8_t* blockStorage, unsigned char* image) 393 | { 394 | float colorTable [8]; 395 | float r0, r1; 396 | 397 | if (mode == BC4_UNORM) { 398 | r0 = Int8ToFloat_UNORM (blockStorage [0]); 399 | r1 = Int8ToFloat_UNORM (blockStorage [1]); 400 | 401 | colorTable [0] = r0; 402 | colorTable [1] = r1; 403 | 404 | if (r0 > r1) { 405 | // 6 interpolated color values 406 | colorTable [2] = (6*r0 + 1*r1)/7.0f; // bit code 010 407 | colorTable [3] = (5*r0 + 2*r1)/7.0f; // bit code 011 408 | colorTable [4] = (4*r0 + 3*r1)/7.0f; // bit code 100 409 | colorTable [5] = (3*r0 + 4*r1)/7.0f; // bit code 101 410 | colorTable [6] = (2*r0 + 5*r1)/7.0f; // bit code 110 411 | colorTable [7] = (1*r0 + 6*r1)/7.0f; // bit code 111 412 | } else { 413 | // 4 interpolated color values 414 | colorTable [2] = (4*r0 + 1*r1)/5.0f; // bit code 010 415 | colorTable [3] = (3*r0 + 2*r1)/5.0f; // bit code 011 416 | colorTable [4] = (2*r0 + 3*r1)/5.0f; // bit code 100 417 | colorTable [5] = (1*r0 + 4*r1)/5.0f; // bit code 101 418 | colorTable [6] = 0.0f; // bit code 110 419 | colorTable [7] = 1.0f; // bit code 111 420 | } 421 | } else if (mode == BC4_SNORM) { 422 | r0 = Int8ToFloat_SNORM (blockStorage [0]); 423 | r1 = Int8ToFloat_SNORM (blockStorage [1]); 424 | 425 | colorTable [0] = r0; 426 | colorTable [1] = r1; 427 | 428 | if (r0 > r1) { 429 | // 6 interpolated color values 430 | colorTable [2] = (6*r0 + 1*r1)/7.0f; // bit code 010 431 | colorTable [3] = (5*r0 + 2*r1)/7.0f; // bit code 011 432 | colorTable [4] = (4*r0 + 3*r1)/7.0f; // bit code 100 433 | colorTable [5] = (3*r0 + 4*r1)/7.0f; // bit code 101 434 | colorTable [6] = (2*r0 + 5*r1)/7.0f; // bit code 110 435 | colorTable [7] = (1*r0 + 6*r1)/7.0f; // bit code 111 436 | } else { 437 | // 4 interpolated color values 438 | colorTable [2] = (4*r0 + 1*r1)/5.0f; // bit code 010 439 | colorTable [3] = (3*r0 + 2*r1)/5.0f; // bit code 011 440 | colorTable [4] = (2*r0 + 3*r1)/5.0f; // bit code 100 441 | colorTable [5] = (1*r0 + 4*r1)/5.0f; // bit code 101 442 | colorTable [6] = -1.0f; // bit code 110 443 | colorTable [7] = 1.0f; // bit code 111 444 | } 445 | } 446 | 447 | DecompressBlockBC4Internal (blockStorage, 448 | image + x * sizeof (float) + (y * stride), stride, colorTable); 449 | } 450 | 451 | 452 | /* 453 | Decompresses one block of a BC5 texture and stores the resulting pixels at the appropriate offset in 'image'. 454 | 455 | uint32_t x: x-coordinate of the first pixel in the block. 456 | uint32_t y: y-coordinate of the first pixel in the block. 457 | uint32_t stride: stride of a scanline in bytes. 458 | const uint8_t* blockStorage: pointer to the block to decompress. 459 | float* image: pointer to image where the decompressed pixel data should be stored. 460 | */ 461 | void DecompressBlockBC5 (uint32_t x, uint32_t y, uint32_t stride, enum BC5Mode mode, 462 | const uint8_t* blockStorage, unsigned char* image) 463 | { 464 | // We decompress the two channels separately and interleave them when 465 | // writing to the output 466 | float c0 [16]; 467 | float c1 [16]; 468 | 469 | int dx, dy; 470 | 471 | DecompressBlockBC4 (0, 0, 4 * sizeof (float), (enum BC4Mode)mode, 472 | blockStorage, (unsigned char*)c0); 473 | DecompressBlockBC4 (0, 0, 4 * sizeof (float), (enum BC4Mode)mode, 474 | blockStorage + 8, (unsigned char*)c1); 475 | 476 | for (dy = 0; dy < 4; ++dy) { 477 | for (dx = 0; dx < 4; ++dx) { 478 | *(float*)(image + stride * y + (x + dx + 0) * sizeof (float) * 2) = c0 [dy * 4 + dx]; 479 | *(float*)(image + stride * y + (x + dx + 1) * sizeof (float) * 2) = c1 [dy * 4 + dx]; 480 | } 481 | } 482 | } 483 | -------------------------------------------------------------------------------- /3rdparty/decompress.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | void DecompressBlockBC1(uint32_t x, uint32_t y, uint32_t stride, const uint8_t *blockStorage, 5 | unsigned char *image); 6 | void DecompressBlockBC3(uint32_t x, uint32_t y, uint32_t stride, const uint8_t *blockStorage, 7 | unsigned char *image); 8 | void DecompressBlockBC2(uint32_t x, uint32_t y, uint32_t stride, const uint8_t *blockStorage, 9 | unsigned char *image); 10 | void DecompressBlockBC4(uint32_t x, uint32_t y, uint32_t stride, enum BC4Mode mode, 11 | const uint8_t *blockStorage, unsigned char *image); 12 | void DecompressBlockBC5(uint32_t x, uint32_t y, uint32_t stride, enum BC5Mode mode, 13 | const uint8_t *blockStorage, unsigned char *image); 14 | }; -------------------------------------------------------------------------------- /3rdparty/spirv.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014-2018 The Khronos Group Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and/or associated documentation files (the "Materials"), 5 | // to deal in the Materials without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Materials, and to permit persons to whom the 8 | // Materials are furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Materials. 12 | // 13 | // MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS 14 | // STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND 15 | // HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 16 | // 17 | // THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | // FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS 23 | // IN THE MATERIALS. 24 | 25 | // This header is automatically generated by the same tool that creates 26 | // the Binary Section of the SPIR-V specification. 27 | 28 | // Enumeration tokens for SPIR-V, in various styles: 29 | // C, C++, C++11, JSON, Lua, Python 30 | // 31 | // - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL 32 | // - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL 33 | // - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL 34 | // - Lua will use tables, e.g.: spv.SourceLanguage.GLSL 35 | // - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] 36 | // 37 | // Some tokens act like mask values, which can be OR'd together, 38 | // while others are mutually exclusive. The mask-like ones have 39 | // "Mask" in their name, and a parallel enum that has the shift 40 | // amount (1 << x) for each corresponding enumerant. 41 | 42 | #ifndef spirv_HPP 43 | #define spirv_HPP 44 | 45 | namespace spv { 46 | 47 | typedef unsigned int Id; 48 | 49 | #define SPV_VERSION 0x10100 50 | #define SPV_REVISION 8 51 | 52 | static const unsigned int MagicNumber = 0x07230203; 53 | static const unsigned int Version = 0x00010100; 54 | static const unsigned int Revision = 8; 55 | static const unsigned int OpCodeMask = 0xffff; 56 | static const unsigned int WordCountShift = 16; 57 | 58 | enum SourceLanguage { 59 | SourceLanguageUnknown = 0, 60 | SourceLanguageESSL = 1, 61 | SourceLanguageGLSL = 2, 62 | SourceLanguageOpenCL_C = 3, 63 | SourceLanguageOpenCL_CPP = 4, 64 | SourceLanguageHLSL = 5, 65 | SourceLanguageMax = 0x7fffffff, 66 | }; 67 | 68 | enum ExecutionModel { 69 | ExecutionModelVertex = 0, 70 | ExecutionModelTessellationControl = 1, 71 | ExecutionModelTessellationEvaluation = 2, 72 | ExecutionModelGeometry = 3, 73 | ExecutionModelFragment = 4, 74 | ExecutionModelGLCompute = 5, 75 | ExecutionModelKernel = 6, 76 | ExecutionModelMax = 0x7fffffff, 77 | }; 78 | 79 | enum AddressingModel { 80 | AddressingModelLogical = 0, 81 | AddressingModelPhysical32 = 1, 82 | AddressingModelPhysical64 = 2, 83 | AddressingModelMax = 0x7fffffff, 84 | }; 85 | 86 | enum MemoryModel { 87 | MemoryModelSimple = 0, 88 | MemoryModelGLSL450 = 1, 89 | MemoryModelOpenCL = 2, 90 | MemoryModelMax = 0x7fffffff, 91 | }; 92 | 93 | enum ExecutionMode { 94 | ExecutionModeInvocations = 0, 95 | ExecutionModeSpacingEqual = 1, 96 | ExecutionModeSpacingFractionalEven = 2, 97 | ExecutionModeSpacingFractionalOdd = 3, 98 | ExecutionModeVertexOrderCw = 4, 99 | ExecutionModeVertexOrderCcw = 5, 100 | ExecutionModePixelCenterInteger = 6, 101 | ExecutionModeOriginUpperLeft = 7, 102 | ExecutionModeOriginLowerLeft = 8, 103 | ExecutionModeEarlyFragmentTests = 9, 104 | ExecutionModePointMode = 10, 105 | ExecutionModeXfb = 11, 106 | ExecutionModeDepthReplacing = 12, 107 | ExecutionModeDepthGreater = 14, 108 | ExecutionModeDepthLess = 15, 109 | ExecutionModeDepthUnchanged = 16, 110 | ExecutionModeLocalSize = 17, 111 | ExecutionModeLocalSizeHint = 18, 112 | ExecutionModeInputPoints = 19, 113 | ExecutionModeInputLines = 20, 114 | ExecutionModeInputLinesAdjacency = 21, 115 | ExecutionModeTriangles = 22, 116 | ExecutionModeInputTrianglesAdjacency = 23, 117 | ExecutionModeQuads = 24, 118 | ExecutionModeIsolines = 25, 119 | ExecutionModeOutputVertices = 26, 120 | ExecutionModeOutputPoints = 27, 121 | ExecutionModeOutputLineStrip = 28, 122 | ExecutionModeOutputTriangleStrip = 29, 123 | ExecutionModeVecTypeHint = 30, 124 | ExecutionModeContractionOff = 31, 125 | ExecutionModeInitializer = 33, 126 | ExecutionModeFinalizer = 34, 127 | ExecutionModeSubgroupSize = 35, 128 | ExecutionModeSubgroupsPerWorkgroup = 36, 129 | ExecutionModePostDepthCoverage = 4446, 130 | ExecutionModeStencilRefReplacingEXT = 5027, 131 | ExecutionModeMax = 0x7fffffff, 132 | }; 133 | 134 | enum StorageClass { 135 | StorageClassUniformConstant = 0, 136 | StorageClassInput = 1, 137 | StorageClassUniform = 2, 138 | StorageClassOutput = 3, 139 | StorageClassWorkgroup = 4, 140 | StorageClassCrossWorkgroup = 5, 141 | StorageClassPrivate = 6, 142 | StorageClassFunction = 7, 143 | StorageClassGeneric = 8, 144 | StorageClassPushConstant = 9, 145 | StorageClassAtomicCounter = 10, 146 | StorageClassImage = 11, 147 | StorageClassStorageBuffer = 12, 148 | StorageClassMax = 0x7fffffff, 149 | }; 150 | 151 | enum Dim { 152 | Dim1D = 0, 153 | Dim2D = 1, 154 | Dim3D = 2, 155 | DimCube = 3, 156 | DimRect = 4, 157 | DimBuffer = 5, 158 | DimSubpassData = 6, 159 | DimMax = 0x7fffffff, 160 | }; 161 | 162 | enum SamplerAddressingMode { 163 | SamplerAddressingModeNone = 0, 164 | SamplerAddressingModeClampToEdge = 1, 165 | SamplerAddressingModeClamp = 2, 166 | SamplerAddressingModeRepeat = 3, 167 | SamplerAddressingModeRepeatMirrored = 4, 168 | SamplerAddressingModeMax = 0x7fffffff, 169 | }; 170 | 171 | enum SamplerFilterMode { 172 | SamplerFilterModeNearest = 0, 173 | SamplerFilterModeLinear = 1, 174 | SamplerFilterModeMax = 0x7fffffff, 175 | }; 176 | 177 | enum ImageFormat { 178 | ImageFormatUnknown = 0, 179 | ImageFormatRgba32f = 1, 180 | ImageFormatRgba16f = 2, 181 | ImageFormatR32f = 3, 182 | ImageFormatRgba8 = 4, 183 | ImageFormatRgba8Snorm = 5, 184 | ImageFormatRg32f = 6, 185 | ImageFormatRg16f = 7, 186 | ImageFormatR11fG11fB10f = 8, 187 | ImageFormatR16f = 9, 188 | ImageFormatRgba16 = 10, 189 | ImageFormatRgb10A2 = 11, 190 | ImageFormatRg16 = 12, 191 | ImageFormatRg8 = 13, 192 | ImageFormatR16 = 14, 193 | ImageFormatR8 = 15, 194 | ImageFormatRgba16Snorm = 16, 195 | ImageFormatRg16Snorm = 17, 196 | ImageFormatRg8Snorm = 18, 197 | ImageFormatR16Snorm = 19, 198 | ImageFormatR8Snorm = 20, 199 | ImageFormatRgba32i = 21, 200 | ImageFormatRgba16i = 22, 201 | ImageFormatRgba8i = 23, 202 | ImageFormatR32i = 24, 203 | ImageFormatRg32i = 25, 204 | ImageFormatRg16i = 26, 205 | ImageFormatRg8i = 27, 206 | ImageFormatR16i = 28, 207 | ImageFormatR8i = 29, 208 | ImageFormatRgba32ui = 30, 209 | ImageFormatRgba16ui = 31, 210 | ImageFormatRgba8ui = 32, 211 | ImageFormatR32ui = 33, 212 | ImageFormatRgb10a2ui = 34, 213 | ImageFormatRg32ui = 35, 214 | ImageFormatRg16ui = 36, 215 | ImageFormatRg8ui = 37, 216 | ImageFormatR16ui = 38, 217 | ImageFormatR8ui = 39, 218 | ImageFormatMax = 0x7fffffff, 219 | }; 220 | 221 | enum ImageChannelOrder { 222 | ImageChannelOrderR = 0, 223 | ImageChannelOrderA = 1, 224 | ImageChannelOrderRG = 2, 225 | ImageChannelOrderRA = 3, 226 | ImageChannelOrderRGB = 4, 227 | ImageChannelOrderRGBA = 5, 228 | ImageChannelOrderBGRA = 6, 229 | ImageChannelOrderARGB = 7, 230 | ImageChannelOrderIntensity = 8, 231 | ImageChannelOrderLuminance = 9, 232 | ImageChannelOrderRx = 10, 233 | ImageChannelOrderRGx = 11, 234 | ImageChannelOrderRGBx = 12, 235 | ImageChannelOrderDepth = 13, 236 | ImageChannelOrderDepthStencil = 14, 237 | ImageChannelOrdersRGB = 15, 238 | ImageChannelOrdersRGBx = 16, 239 | ImageChannelOrdersRGBA = 17, 240 | ImageChannelOrdersBGRA = 18, 241 | ImageChannelOrderABGR = 19, 242 | ImageChannelOrderMax = 0x7fffffff, 243 | }; 244 | 245 | enum ImageChannelDataType { 246 | ImageChannelDataTypeSnormInt8 = 0, 247 | ImageChannelDataTypeSnormInt16 = 1, 248 | ImageChannelDataTypeUnormInt8 = 2, 249 | ImageChannelDataTypeUnormInt16 = 3, 250 | ImageChannelDataTypeUnormShort565 = 4, 251 | ImageChannelDataTypeUnormShort555 = 5, 252 | ImageChannelDataTypeUnormInt101010 = 6, 253 | ImageChannelDataTypeSignedInt8 = 7, 254 | ImageChannelDataTypeSignedInt16 = 8, 255 | ImageChannelDataTypeSignedInt32 = 9, 256 | ImageChannelDataTypeUnsignedInt8 = 10, 257 | ImageChannelDataTypeUnsignedInt16 = 11, 258 | ImageChannelDataTypeUnsignedInt32 = 12, 259 | ImageChannelDataTypeHalfFloat = 13, 260 | ImageChannelDataTypeFloat = 14, 261 | ImageChannelDataTypeUnormInt24 = 15, 262 | ImageChannelDataTypeUnormInt101010_2 = 16, 263 | ImageChannelDataTypeMax = 0x7fffffff, 264 | }; 265 | 266 | enum ImageOperandsShift { 267 | ImageOperandsBiasShift = 0, 268 | ImageOperandsLodShift = 1, 269 | ImageOperandsGradShift = 2, 270 | ImageOperandsConstOffsetShift = 3, 271 | ImageOperandsOffsetShift = 4, 272 | ImageOperandsConstOffsetsShift = 5, 273 | ImageOperandsSampleShift = 6, 274 | ImageOperandsMinLodShift = 7, 275 | ImageOperandsMax = 0x7fffffff, 276 | }; 277 | 278 | enum ImageOperandsMask { 279 | ImageOperandsMaskNone = 0, 280 | ImageOperandsBiasMask = 0x00000001, 281 | ImageOperandsLodMask = 0x00000002, 282 | ImageOperandsGradMask = 0x00000004, 283 | ImageOperandsConstOffsetMask = 0x00000008, 284 | ImageOperandsOffsetMask = 0x00000010, 285 | ImageOperandsConstOffsetsMask = 0x00000020, 286 | ImageOperandsSampleMask = 0x00000040, 287 | ImageOperandsMinLodMask = 0x00000080, 288 | }; 289 | 290 | enum FPFastMathModeShift { 291 | FPFastMathModeNotNaNShift = 0, 292 | FPFastMathModeNotInfShift = 1, 293 | FPFastMathModeNSZShift = 2, 294 | FPFastMathModeAllowRecipShift = 3, 295 | FPFastMathModeFastShift = 4, 296 | FPFastMathModeMax = 0x7fffffff, 297 | }; 298 | 299 | enum FPFastMathModeMask { 300 | FPFastMathModeMaskNone = 0, 301 | FPFastMathModeNotNaNMask = 0x00000001, 302 | FPFastMathModeNotInfMask = 0x00000002, 303 | FPFastMathModeNSZMask = 0x00000004, 304 | FPFastMathModeAllowRecipMask = 0x00000008, 305 | FPFastMathModeFastMask = 0x00000010, 306 | }; 307 | 308 | enum FPRoundingMode { 309 | FPRoundingModeRTE = 0, 310 | FPRoundingModeRTZ = 1, 311 | FPRoundingModeRTP = 2, 312 | FPRoundingModeRTN = 3, 313 | FPRoundingModeMax = 0x7fffffff, 314 | }; 315 | 316 | enum LinkageType { 317 | LinkageTypeExport = 0, 318 | LinkageTypeImport = 1, 319 | LinkageTypeMax = 0x7fffffff, 320 | }; 321 | 322 | enum AccessQualifier { 323 | AccessQualifierReadOnly = 0, 324 | AccessQualifierWriteOnly = 1, 325 | AccessQualifierReadWrite = 2, 326 | AccessQualifierMax = 0x7fffffff, 327 | }; 328 | 329 | enum FunctionParameterAttribute { 330 | FunctionParameterAttributeZext = 0, 331 | FunctionParameterAttributeSext = 1, 332 | FunctionParameterAttributeByVal = 2, 333 | FunctionParameterAttributeSret = 3, 334 | FunctionParameterAttributeNoAlias = 4, 335 | FunctionParameterAttributeNoCapture = 5, 336 | FunctionParameterAttributeNoWrite = 6, 337 | FunctionParameterAttributeNoReadWrite = 7, 338 | FunctionParameterAttributeMax = 0x7fffffff, 339 | }; 340 | 341 | enum Decoration { 342 | DecorationRelaxedPrecision = 0, 343 | DecorationSpecId = 1, 344 | DecorationBlock = 2, 345 | DecorationBufferBlock = 3, 346 | DecorationRowMajor = 4, 347 | DecorationColMajor = 5, 348 | DecorationArrayStride = 6, 349 | DecorationMatrixStride = 7, 350 | DecorationGLSLShared = 8, 351 | DecorationGLSLPacked = 9, 352 | DecorationCPacked = 10, 353 | DecorationBuiltIn = 11, 354 | DecorationNoPerspective = 13, 355 | DecorationFlat = 14, 356 | DecorationPatch = 15, 357 | DecorationCentroid = 16, 358 | DecorationSample = 17, 359 | DecorationInvariant = 18, 360 | DecorationRestrict = 19, 361 | DecorationAliased = 20, 362 | DecorationVolatile = 21, 363 | DecorationConstant = 22, 364 | DecorationCoherent = 23, 365 | DecorationNonWritable = 24, 366 | DecorationNonReadable = 25, 367 | DecorationUniform = 26, 368 | DecorationSaturatedConversion = 28, 369 | DecorationStream = 29, 370 | DecorationLocation = 30, 371 | DecorationComponent = 31, 372 | DecorationIndex = 32, 373 | DecorationBinding = 33, 374 | DecorationDescriptorSet = 34, 375 | DecorationOffset = 35, 376 | DecorationXfbBuffer = 36, 377 | DecorationXfbStride = 37, 378 | DecorationFuncParamAttr = 38, 379 | DecorationFPRoundingMode = 39, 380 | DecorationFPFastMathMode = 40, 381 | DecorationLinkageAttributes = 41, 382 | DecorationNoContraction = 42, 383 | DecorationInputAttachmentIndex = 43, 384 | DecorationAlignment = 44, 385 | DecorationMaxByteOffset = 45, 386 | DecorationExplicitInterpAMD = 4999, 387 | DecorationOverrideCoverageNV = 5248, 388 | DecorationPassthroughNV = 5250, 389 | DecorationViewportRelativeNV = 5252, 390 | DecorationSecondaryViewportRelativeNV = 5256, 391 | DecorationHlslCounterBufferGOOGLE = 5634, 392 | DecorationHlslSemanticGOOGLE = 5635, 393 | DecorationMax = 0x7fffffff, 394 | }; 395 | 396 | enum BuiltIn { 397 | BuiltInPosition = 0, 398 | BuiltInPointSize = 1, 399 | BuiltInClipDistance = 3, 400 | BuiltInCullDistance = 4, 401 | BuiltInVertexId = 5, 402 | BuiltInInstanceId = 6, 403 | BuiltInPrimitiveId = 7, 404 | BuiltInInvocationId = 8, 405 | BuiltInLayer = 9, 406 | BuiltInViewportIndex = 10, 407 | BuiltInTessLevelOuter = 11, 408 | BuiltInTessLevelInner = 12, 409 | BuiltInTessCoord = 13, 410 | BuiltInPatchVertices = 14, 411 | BuiltInFragCoord = 15, 412 | BuiltInPointCoord = 16, 413 | BuiltInFrontFacing = 17, 414 | BuiltInSampleId = 18, 415 | BuiltInSamplePosition = 19, 416 | BuiltInSampleMask = 20, 417 | BuiltInFragDepth = 22, 418 | BuiltInHelperInvocation = 23, 419 | BuiltInNumWorkgroups = 24, 420 | BuiltInWorkgroupSize = 25, 421 | BuiltInWorkgroupId = 26, 422 | BuiltInLocalInvocationId = 27, 423 | BuiltInGlobalInvocationId = 28, 424 | BuiltInLocalInvocationIndex = 29, 425 | BuiltInWorkDim = 30, 426 | BuiltInGlobalSize = 31, 427 | BuiltInEnqueuedWorkgroupSize = 32, 428 | BuiltInGlobalOffset = 33, 429 | BuiltInGlobalLinearId = 34, 430 | BuiltInSubgroupSize = 36, 431 | BuiltInSubgroupMaxSize = 37, 432 | BuiltInNumSubgroups = 38, 433 | BuiltInNumEnqueuedSubgroups = 39, 434 | BuiltInSubgroupId = 40, 435 | BuiltInSubgroupLocalInvocationId = 41, 436 | BuiltInVertexIndex = 42, 437 | BuiltInInstanceIndex = 43, 438 | BuiltInSubgroupEqMaskKHR = 4416, 439 | BuiltInSubgroupGeMaskKHR = 4417, 440 | BuiltInSubgroupGtMaskKHR = 4418, 441 | BuiltInSubgroupLeMaskKHR = 4419, 442 | BuiltInSubgroupLtMaskKHR = 4420, 443 | BuiltInBaseVertex = 4424, 444 | BuiltInBaseInstance = 4425, 445 | BuiltInDrawIndex = 4426, 446 | BuiltInDeviceIndex = 4438, 447 | BuiltInViewIndex = 4440, 448 | BuiltInBaryCoordNoPerspAMD = 4992, 449 | BuiltInBaryCoordNoPerspCentroidAMD = 4993, 450 | BuiltInBaryCoordNoPerspSampleAMD = 4994, 451 | BuiltInBaryCoordSmoothAMD = 4995, 452 | BuiltInBaryCoordSmoothCentroidAMD = 4996, 453 | BuiltInBaryCoordSmoothSampleAMD = 4997, 454 | BuiltInBaryCoordPullModelAMD = 4998, 455 | BuiltInFragStencilRefEXT = 5014, 456 | BuiltInViewportMaskNV = 5253, 457 | BuiltInSecondaryPositionNV = 5257, 458 | BuiltInSecondaryViewportMaskNV = 5258, 459 | BuiltInPositionPerViewNV = 5261, 460 | BuiltInViewportMaskPerViewNV = 5262, 461 | BuiltInMax = 0x7fffffff, 462 | }; 463 | 464 | enum SelectionControlShift { 465 | SelectionControlFlattenShift = 0, 466 | SelectionControlDontFlattenShift = 1, 467 | SelectionControlMax = 0x7fffffff, 468 | }; 469 | 470 | enum SelectionControlMask { 471 | SelectionControlMaskNone = 0, 472 | SelectionControlFlattenMask = 0x00000001, 473 | SelectionControlDontFlattenMask = 0x00000002, 474 | }; 475 | 476 | enum LoopControlShift { 477 | LoopControlUnrollShift = 0, 478 | LoopControlDontUnrollShift = 1, 479 | LoopControlDependencyInfiniteShift = 2, 480 | LoopControlDependencyLengthShift = 3, 481 | LoopControlMax = 0x7fffffff, 482 | }; 483 | 484 | enum LoopControlMask { 485 | LoopControlMaskNone = 0, 486 | LoopControlUnrollMask = 0x00000001, 487 | LoopControlDontUnrollMask = 0x00000002, 488 | LoopControlDependencyInfiniteMask = 0x00000004, 489 | LoopControlDependencyLengthMask = 0x00000008, 490 | }; 491 | 492 | enum FunctionControlShift { 493 | FunctionControlInlineShift = 0, 494 | FunctionControlDontInlineShift = 1, 495 | FunctionControlPureShift = 2, 496 | FunctionControlConstShift = 3, 497 | FunctionControlMax = 0x7fffffff, 498 | }; 499 | 500 | enum FunctionControlMask { 501 | FunctionControlMaskNone = 0, 502 | FunctionControlInlineMask = 0x00000001, 503 | FunctionControlDontInlineMask = 0x00000002, 504 | FunctionControlPureMask = 0x00000004, 505 | FunctionControlConstMask = 0x00000008, 506 | }; 507 | 508 | enum MemorySemanticsShift { 509 | MemorySemanticsAcquireShift = 1, 510 | MemorySemanticsReleaseShift = 2, 511 | MemorySemanticsAcquireReleaseShift = 3, 512 | MemorySemanticsSequentiallyConsistentShift = 4, 513 | MemorySemanticsUniformMemoryShift = 6, 514 | MemorySemanticsSubgroupMemoryShift = 7, 515 | MemorySemanticsWorkgroupMemoryShift = 8, 516 | MemorySemanticsCrossWorkgroupMemoryShift = 9, 517 | MemorySemanticsAtomicCounterMemoryShift = 10, 518 | MemorySemanticsImageMemoryShift = 11, 519 | MemorySemanticsMax = 0x7fffffff, 520 | }; 521 | 522 | enum MemorySemanticsMask { 523 | MemorySemanticsMaskNone = 0, 524 | MemorySemanticsAcquireMask = 0x00000002, 525 | MemorySemanticsReleaseMask = 0x00000004, 526 | MemorySemanticsAcquireReleaseMask = 0x00000008, 527 | MemorySemanticsSequentiallyConsistentMask = 0x00000010, 528 | MemorySemanticsUniformMemoryMask = 0x00000040, 529 | MemorySemanticsSubgroupMemoryMask = 0x00000080, 530 | MemorySemanticsWorkgroupMemoryMask = 0x00000100, 531 | MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, 532 | MemorySemanticsAtomicCounterMemoryMask = 0x00000400, 533 | MemorySemanticsImageMemoryMask = 0x00000800, 534 | }; 535 | 536 | enum MemoryAccessShift { 537 | MemoryAccessVolatileShift = 0, 538 | MemoryAccessAlignedShift = 1, 539 | MemoryAccessNontemporalShift = 2, 540 | MemoryAccessMax = 0x7fffffff, 541 | }; 542 | 543 | enum MemoryAccessMask { 544 | MemoryAccessMaskNone = 0, 545 | MemoryAccessVolatileMask = 0x00000001, 546 | MemoryAccessAlignedMask = 0x00000002, 547 | MemoryAccessNontemporalMask = 0x00000004, 548 | }; 549 | 550 | enum Scope { 551 | ScopeCrossDevice = 0, 552 | ScopeDevice = 1, 553 | ScopeWorkgroup = 2, 554 | ScopeSubgroup = 3, 555 | ScopeInvocation = 4, 556 | ScopeMax = 0x7fffffff, 557 | }; 558 | 559 | enum GroupOperation { 560 | GroupOperationReduce = 0, 561 | GroupOperationInclusiveScan = 1, 562 | GroupOperationExclusiveScan = 2, 563 | GroupOperationMax = 0x7fffffff, 564 | }; 565 | 566 | enum KernelEnqueueFlags { 567 | KernelEnqueueFlagsNoWait = 0, 568 | KernelEnqueueFlagsWaitKernel = 1, 569 | KernelEnqueueFlagsWaitWorkGroup = 2, 570 | KernelEnqueueFlagsMax = 0x7fffffff, 571 | }; 572 | 573 | enum KernelProfilingInfoShift { 574 | KernelProfilingInfoCmdExecTimeShift = 0, 575 | KernelProfilingInfoMax = 0x7fffffff, 576 | }; 577 | 578 | enum KernelProfilingInfoMask { 579 | KernelProfilingInfoMaskNone = 0, 580 | KernelProfilingInfoCmdExecTimeMask = 0x00000001, 581 | }; 582 | 583 | enum Capability { 584 | CapabilityMatrix = 0, 585 | CapabilityShader = 1, 586 | CapabilityGeometry = 2, 587 | CapabilityTessellation = 3, 588 | CapabilityAddresses = 4, 589 | CapabilityLinkage = 5, 590 | CapabilityKernel = 6, 591 | CapabilityVector16 = 7, 592 | CapabilityFloat16Buffer = 8, 593 | CapabilityFloat16 = 9, 594 | CapabilityFloat64 = 10, 595 | CapabilityInt64 = 11, 596 | CapabilityInt64Atomics = 12, 597 | CapabilityImageBasic = 13, 598 | CapabilityImageReadWrite = 14, 599 | CapabilityImageMipmap = 15, 600 | CapabilityPipes = 17, 601 | CapabilityGroups = 18, 602 | CapabilityDeviceEnqueue = 19, 603 | CapabilityLiteralSampler = 20, 604 | CapabilityAtomicStorage = 21, 605 | CapabilityInt16 = 22, 606 | CapabilityTessellationPointSize = 23, 607 | CapabilityGeometryPointSize = 24, 608 | CapabilityImageGatherExtended = 25, 609 | CapabilityStorageImageMultisample = 27, 610 | CapabilityUniformBufferArrayDynamicIndexing = 28, 611 | CapabilitySampledImageArrayDynamicIndexing = 29, 612 | CapabilityStorageBufferArrayDynamicIndexing = 30, 613 | CapabilityStorageImageArrayDynamicIndexing = 31, 614 | CapabilityClipDistance = 32, 615 | CapabilityCullDistance = 33, 616 | CapabilityImageCubeArray = 34, 617 | CapabilitySampleRateShading = 35, 618 | CapabilityImageRect = 36, 619 | CapabilitySampledRect = 37, 620 | CapabilityGenericPointer = 38, 621 | CapabilityInt8 = 39, 622 | CapabilityInputAttachment = 40, 623 | CapabilitySparseResidency = 41, 624 | CapabilityMinLod = 42, 625 | CapabilitySampled1D = 43, 626 | CapabilityImage1D = 44, 627 | CapabilitySampledCubeArray = 45, 628 | CapabilitySampledBuffer = 46, 629 | CapabilityImageBuffer = 47, 630 | CapabilityImageMSArray = 48, 631 | CapabilityStorageImageExtendedFormats = 49, 632 | CapabilityImageQuery = 50, 633 | CapabilityDerivativeControl = 51, 634 | CapabilityInterpolationFunction = 52, 635 | CapabilityTransformFeedback = 53, 636 | CapabilityGeometryStreams = 54, 637 | CapabilityStorageImageReadWithoutFormat = 55, 638 | CapabilityStorageImageWriteWithoutFormat = 56, 639 | CapabilityMultiViewport = 57, 640 | CapabilitySubgroupDispatch = 58, 641 | CapabilityNamedBarrier = 59, 642 | CapabilityPipeStorage = 60, 643 | CapabilitySubgroupBallotKHR = 4423, 644 | CapabilityDrawParameters = 4427, 645 | CapabilitySubgroupVoteKHR = 4431, 646 | CapabilityStorageBuffer16BitAccess = 4433, 647 | CapabilityStorageUniformBufferBlock16 = 4433, 648 | CapabilityStorageUniform16 = 4434, 649 | CapabilityUniformAndStorageBuffer16BitAccess = 4434, 650 | CapabilityStoragePushConstant16 = 4435, 651 | CapabilityStorageInputOutput16 = 4436, 652 | CapabilityDeviceGroup = 4437, 653 | CapabilityMultiView = 4439, 654 | CapabilityVariablePointersStorageBuffer = 4441, 655 | CapabilityVariablePointers = 4442, 656 | CapabilityAtomicStorageOps = 4445, 657 | CapabilitySampleMaskPostDepthCoverage = 4447, 658 | CapabilityImageGatherBiasLodAMD = 5009, 659 | CapabilityFragmentMaskAMD = 5010, 660 | CapabilityStencilExportEXT = 5013, 661 | CapabilityImageReadWriteLodAMD = 5015, 662 | CapabilitySampleMaskOverrideCoverageNV = 5249, 663 | CapabilityGeometryShaderPassthroughNV = 5251, 664 | CapabilityShaderViewportIndexLayerEXT = 5254, 665 | CapabilityShaderViewportIndexLayerNV = 5254, 666 | CapabilityShaderViewportMaskNV = 5255, 667 | CapabilityShaderStereoViewNV = 5259, 668 | CapabilityPerViewAttributesNV = 5260, 669 | CapabilitySubgroupShuffleINTEL = 5568, 670 | CapabilitySubgroupBufferBlockIOINTEL = 5569, 671 | CapabilitySubgroupImageBlockIOINTEL = 5570, 672 | CapabilityMax = 0x7fffffff, 673 | }; 674 | 675 | enum Op { 676 | OpNop = 0, 677 | OpUndef = 1, 678 | OpSourceContinued = 2, 679 | OpSource = 3, 680 | OpSourceExtension = 4, 681 | OpName = 5, 682 | OpMemberName = 6, 683 | OpString = 7, 684 | OpLine = 8, 685 | OpExtension = 10, 686 | OpExtInstImport = 11, 687 | OpExtInst = 12, 688 | OpMemoryModel = 14, 689 | OpEntryPoint = 15, 690 | OpExecutionMode = 16, 691 | OpCapability = 17, 692 | OpTypeVoid = 19, 693 | OpTypeBool = 20, 694 | OpTypeInt = 21, 695 | OpTypeFloat = 22, 696 | OpTypeVector = 23, 697 | OpTypeMatrix = 24, 698 | OpTypeImage = 25, 699 | OpTypeSampler = 26, 700 | OpTypeSampledImage = 27, 701 | OpTypeArray = 28, 702 | OpTypeRuntimeArray = 29, 703 | OpTypeStruct = 30, 704 | OpTypeOpaque = 31, 705 | OpTypePointer = 32, 706 | OpTypeFunction = 33, 707 | OpTypeEvent = 34, 708 | OpTypeDeviceEvent = 35, 709 | OpTypeReserveId = 36, 710 | OpTypeQueue = 37, 711 | OpTypePipe = 38, 712 | OpTypeForwardPointer = 39, 713 | OpConstantTrue = 41, 714 | OpConstantFalse = 42, 715 | OpConstant = 43, 716 | OpConstantComposite = 44, 717 | OpConstantSampler = 45, 718 | OpConstantNull = 46, 719 | OpSpecConstantTrue = 48, 720 | OpSpecConstantFalse = 49, 721 | OpSpecConstant = 50, 722 | OpSpecConstantComposite = 51, 723 | OpSpecConstantOp = 52, 724 | OpFunction = 54, 725 | OpFunctionParameter = 55, 726 | OpFunctionEnd = 56, 727 | OpFunctionCall = 57, 728 | OpVariable = 59, 729 | OpImageTexelPointer = 60, 730 | OpLoad = 61, 731 | OpStore = 62, 732 | OpCopyMemory = 63, 733 | OpCopyMemorySized = 64, 734 | OpAccessChain = 65, 735 | OpInBoundsAccessChain = 66, 736 | OpPtrAccessChain = 67, 737 | OpArrayLength = 68, 738 | OpGenericPtrMemSemantics = 69, 739 | OpInBoundsPtrAccessChain = 70, 740 | OpDecorate = 71, 741 | OpMemberDecorate = 72, 742 | OpDecorationGroup = 73, 743 | OpGroupDecorate = 74, 744 | OpGroupMemberDecorate = 75, 745 | OpVectorExtractDynamic = 77, 746 | OpVectorInsertDynamic = 78, 747 | OpVectorShuffle = 79, 748 | OpCompositeConstruct = 80, 749 | OpCompositeExtract = 81, 750 | OpCompositeInsert = 82, 751 | OpCopyObject = 83, 752 | OpTranspose = 84, 753 | OpSampledImage = 86, 754 | OpImageSampleImplicitLod = 87, 755 | OpImageSampleExplicitLod = 88, 756 | OpImageSampleDrefImplicitLod = 89, 757 | OpImageSampleDrefExplicitLod = 90, 758 | OpImageSampleProjImplicitLod = 91, 759 | OpImageSampleProjExplicitLod = 92, 760 | OpImageSampleProjDrefImplicitLod = 93, 761 | OpImageSampleProjDrefExplicitLod = 94, 762 | OpImageFetch = 95, 763 | OpImageGather = 96, 764 | OpImageDrefGather = 97, 765 | OpImageRead = 98, 766 | OpImageWrite = 99, 767 | OpImage = 100, 768 | OpImageQueryFormat = 101, 769 | OpImageQueryOrder = 102, 770 | OpImageQuerySizeLod = 103, 771 | OpImageQuerySize = 104, 772 | OpImageQueryLod = 105, 773 | OpImageQueryLevels = 106, 774 | OpImageQuerySamples = 107, 775 | OpConvertFToU = 109, 776 | OpConvertFToS = 110, 777 | OpConvertSToF = 111, 778 | OpConvertUToF = 112, 779 | OpUConvert = 113, 780 | OpSConvert = 114, 781 | OpFConvert = 115, 782 | OpQuantizeToF16 = 116, 783 | OpConvertPtrToU = 117, 784 | OpSatConvertSToU = 118, 785 | OpSatConvertUToS = 119, 786 | OpConvertUToPtr = 120, 787 | OpPtrCastToGeneric = 121, 788 | OpGenericCastToPtr = 122, 789 | OpGenericCastToPtrExplicit = 123, 790 | OpBitcast = 124, 791 | OpSNegate = 126, 792 | OpFNegate = 127, 793 | OpIAdd = 128, 794 | OpFAdd = 129, 795 | OpISub = 130, 796 | OpFSub = 131, 797 | OpIMul = 132, 798 | OpFMul = 133, 799 | OpUDiv = 134, 800 | OpSDiv = 135, 801 | OpFDiv = 136, 802 | OpUMod = 137, 803 | OpSRem = 138, 804 | OpSMod = 139, 805 | OpFRem = 140, 806 | OpFMod = 141, 807 | OpVectorTimesScalar = 142, 808 | OpMatrixTimesScalar = 143, 809 | OpVectorTimesMatrix = 144, 810 | OpMatrixTimesVector = 145, 811 | OpMatrixTimesMatrix = 146, 812 | OpOuterProduct = 147, 813 | OpDot = 148, 814 | OpIAddCarry = 149, 815 | OpISubBorrow = 150, 816 | OpUMulExtended = 151, 817 | OpSMulExtended = 152, 818 | OpAny = 154, 819 | OpAll = 155, 820 | OpIsNan = 156, 821 | OpIsInf = 157, 822 | OpIsFinite = 158, 823 | OpIsNormal = 159, 824 | OpSignBitSet = 160, 825 | OpLessOrGreater = 161, 826 | OpOrdered = 162, 827 | OpUnordered = 163, 828 | OpLogicalEqual = 164, 829 | OpLogicalNotEqual = 165, 830 | OpLogicalOr = 166, 831 | OpLogicalAnd = 167, 832 | OpLogicalNot = 168, 833 | OpSelect = 169, 834 | OpIEqual = 170, 835 | OpINotEqual = 171, 836 | OpUGreaterThan = 172, 837 | OpSGreaterThan = 173, 838 | OpUGreaterThanEqual = 174, 839 | OpSGreaterThanEqual = 175, 840 | OpULessThan = 176, 841 | OpSLessThan = 177, 842 | OpULessThanEqual = 178, 843 | OpSLessThanEqual = 179, 844 | OpFOrdEqual = 180, 845 | OpFUnordEqual = 181, 846 | OpFOrdNotEqual = 182, 847 | OpFUnordNotEqual = 183, 848 | OpFOrdLessThan = 184, 849 | OpFUnordLessThan = 185, 850 | OpFOrdGreaterThan = 186, 851 | OpFUnordGreaterThan = 187, 852 | OpFOrdLessThanEqual = 188, 853 | OpFUnordLessThanEqual = 189, 854 | OpFOrdGreaterThanEqual = 190, 855 | OpFUnordGreaterThanEqual = 191, 856 | OpShiftRightLogical = 194, 857 | OpShiftRightArithmetic = 195, 858 | OpShiftLeftLogical = 196, 859 | OpBitwiseOr = 197, 860 | OpBitwiseXor = 198, 861 | OpBitwiseAnd = 199, 862 | OpNot = 200, 863 | OpBitFieldInsert = 201, 864 | OpBitFieldSExtract = 202, 865 | OpBitFieldUExtract = 203, 866 | OpBitReverse = 204, 867 | OpBitCount = 205, 868 | OpDPdx = 207, 869 | OpDPdy = 208, 870 | OpFwidth = 209, 871 | OpDPdxFine = 210, 872 | OpDPdyFine = 211, 873 | OpFwidthFine = 212, 874 | OpDPdxCoarse = 213, 875 | OpDPdyCoarse = 214, 876 | OpFwidthCoarse = 215, 877 | OpEmitVertex = 218, 878 | OpEndPrimitive = 219, 879 | OpEmitStreamVertex = 220, 880 | OpEndStreamPrimitive = 221, 881 | OpControlBarrier = 224, 882 | OpMemoryBarrier = 225, 883 | OpAtomicLoad = 227, 884 | OpAtomicStore = 228, 885 | OpAtomicExchange = 229, 886 | OpAtomicCompareExchange = 230, 887 | OpAtomicCompareExchangeWeak = 231, 888 | OpAtomicIIncrement = 232, 889 | OpAtomicIDecrement = 233, 890 | OpAtomicIAdd = 234, 891 | OpAtomicISub = 235, 892 | OpAtomicSMin = 236, 893 | OpAtomicUMin = 237, 894 | OpAtomicSMax = 238, 895 | OpAtomicUMax = 239, 896 | OpAtomicAnd = 240, 897 | OpAtomicOr = 241, 898 | OpAtomicXor = 242, 899 | OpPhi = 245, 900 | OpLoopMerge = 246, 901 | OpSelectionMerge = 247, 902 | OpLabel = 248, 903 | OpBranch = 249, 904 | OpBranchConditional = 250, 905 | OpSwitch = 251, 906 | OpKill = 252, 907 | OpReturn = 253, 908 | OpReturnValue = 254, 909 | OpUnreachable = 255, 910 | OpLifetimeStart = 256, 911 | OpLifetimeStop = 257, 912 | OpGroupAsyncCopy = 259, 913 | OpGroupWaitEvents = 260, 914 | OpGroupAll = 261, 915 | OpGroupAny = 262, 916 | OpGroupBroadcast = 263, 917 | OpGroupIAdd = 264, 918 | OpGroupFAdd = 265, 919 | OpGroupFMin = 266, 920 | OpGroupUMin = 267, 921 | OpGroupSMin = 268, 922 | OpGroupFMax = 269, 923 | OpGroupUMax = 270, 924 | OpGroupSMax = 271, 925 | OpReadPipe = 274, 926 | OpWritePipe = 275, 927 | OpReservedReadPipe = 276, 928 | OpReservedWritePipe = 277, 929 | OpReserveReadPipePackets = 278, 930 | OpReserveWritePipePackets = 279, 931 | OpCommitReadPipe = 280, 932 | OpCommitWritePipe = 281, 933 | OpIsValidReserveId = 282, 934 | OpGetNumPipePackets = 283, 935 | OpGetMaxPipePackets = 284, 936 | OpGroupReserveReadPipePackets = 285, 937 | OpGroupReserveWritePipePackets = 286, 938 | OpGroupCommitReadPipe = 287, 939 | OpGroupCommitWritePipe = 288, 940 | OpEnqueueMarker = 291, 941 | OpEnqueueKernel = 292, 942 | OpGetKernelNDrangeSubGroupCount = 293, 943 | OpGetKernelNDrangeMaxSubGroupSize = 294, 944 | OpGetKernelWorkGroupSize = 295, 945 | OpGetKernelPreferredWorkGroupSizeMultiple = 296, 946 | OpRetainEvent = 297, 947 | OpReleaseEvent = 298, 948 | OpCreateUserEvent = 299, 949 | OpIsValidEvent = 300, 950 | OpSetUserEventStatus = 301, 951 | OpCaptureEventProfilingInfo = 302, 952 | OpGetDefaultQueue = 303, 953 | OpBuildNDRange = 304, 954 | OpImageSparseSampleImplicitLod = 305, 955 | OpImageSparseSampleExplicitLod = 306, 956 | OpImageSparseSampleDrefImplicitLod = 307, 957 | OpImageSparseSampleDrefExplicitLod = 308, 958 | OpImageSparseSampleProjImplicitLod = 309, 959 | OpImageSparseSampleProjExplicitLod = 310, 960 | OpImageSparseSampleProjDrefImplicitLod = 311, 961 | OpImageSparseSampleProjDrefExplicitLod = 312, 962 | OpImageSparseFetch = 313, 963 | OpImageSparseGather = 314, 964 | OpImageSparseDrefGather = 315, 965 | OpImageSparseTexelsResident = 316, 966 | OpNoLine = 317, 967 | OpAtomicFlagTestAndSet = 318, 968 | OpAtomicFlagClear = 319, 969 | OpImageSparseRead = 320, 970 | OpSizeOf = 321, 971 | OpTypePipeStorage = 322, 972 | OpConstantPipeStorage = 323, 973 | OpCreatePipeFromPipeStorage = 324, 974 | OpGetKernelLocalSizeForSubgroupCount = 325, 975 | OpGetKernelMaxNumSubgroups = 326, 976 | OpTypeNamedBarrier = 327, 977 | OpNamedBarrierInitialize = 328, 978 | OpMemoryNamedBarrier = 329, 979 | OpModuleProcessed = 330, 980 | OpDecorateId = 332, 981 | OpSubgroupBallotKHR = 4421, 982 | OpSubgroupFirstInvocationKHR = 4422, 983 | OpSubgroupAllKHR = 4428, 984 | OpSubgroupAnyKHR = 4429, 985 | OpSubgroupAllEqualKHR = 4430, 986 | OpSubgroupReadInvocationKHR = 4432, 987 | OpGroupIAddNonUniformAMD = 5000, 988 | OpGroupFAddNonUniformAMD = 5001, 989 | OpGroupFMinNonUniformAMD = 5002, 990 | OpGroupUMinNonUniformAMD = 5003, 991 | OpGroupSMinNonUniformAMD = 5004, 992 | OpGroupFMaxNonUniformAMD = 5005, 993 | OpGroupUMaxNonUniformAMD = 5006, 994 | OpGroupSMaxNonUniformAMD = 5007, 995 | OpFragmentMaskFetchAMD = 5011, 996 | OpFragmentFetchAMD = 5012, 997 | OpSubgroupShuffleINTEL = 5571, 998 | OpSubgroupShuffleDownINTEL = 5572, 999 | OpSubgroupShuffleUpINTEL = 5573, 1000 | OpSubgroupShuffleXorINTEL = 5574, 1001 | OpSubgroupBlockReadINTEL = 5575, 1002 | OpSubgroupBlockWriteINTEL = 5576, 1003 | OpSubgroupImageBlockReadINTEL = 5577, 1004 | OpSubgroupImageBlockWriteINTEL = 5578, 1005 | OpDecorateStringGOOGLE = 5632, 1006 | OpMemberDecorateStringGOOGLE = 5633, 1007 | OpMax = 0x7fffffff, 1008 | }; 1009 | 1010 | // Overload operator| for mask bit combining 1011 | 1012 | inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } 1013 | inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } 1014 | inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } 1015 | inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } 1016 | inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } 1017 | inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } 1018 | inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } 1019 | inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } 1020 | 1021 | } // end namespace spv 1022 | 1023 | #endif // #ifndef spirv_HPP 1024 | 1025 | -------------------------------------------------------------------------------- /3rdparty/vk_icd.h: -------------------------------------------------------------------------------- 1 | // 2 | // File: vk_icd.h 3 | // 4 | /* 5 | * Copyright (c) 2015-2016 The Khronos Group Inc. 6 | * Copyright (c) 2015-2016 Valve Corporation 7 | * Copyright (c) 2015-2016 LunarG, Inc. 8 | * 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * 21 | */ 22 | 23 | #ifndef VKICD_H 24 | #define VKICD_H 25 | 26 | #include "vulkan.h" 27 | 28 | // Loader-ICD version negotiation API. Versions add the following features: 29 | // Version 0 - Initial. Doesn't support vk_icdGetInstanceProcAddr 30 | // or vk_icdNegotiateLoaderICDInterfaceVersion. 31 | // Version 1 - Add support for vk_icdGetInstanceProcAddr. 32 | // Version 2 - Add Loader/ICD Interface version negotiation 33 | // via vk_icdNegotiateLoaderICDInterfaceVersion. 34 | // Version 3 - Add ICD creation/destruction of KHR_surface objects. 35 | // Version 4 - Add unknown physical device extension qyering via 36 | // vk_icdGetPhysicalDeviceProcAddr. 37 | #define CURRENT_LOADER_ICD_INTERFACE_VERSION 4 38 | #define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0 39 | #define MIN_PHYS_DEV_EXTENSION_ICD_INTERFACE_VERSION 4 40 | typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion); 41 | 42 | // This is defined in vk_layer.h which will be found by the loader, but if an ICD is building against this 43 | // flie directly, it won't be found. 44 | #ifndef PFN_GetPhysicalDeviceProcAddr 45 | typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_GetPhysicalDeviceProcAddr)(VkInstance instance, const char* pName); 46 | #endif 47 | 48 | /* 49 | * The ICD must reserve space for a pointer for the loader's dispatch 50 | * table, at the start of . 51 | * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro. 52 | */ 53 | 54 | #define ICD_LOADER_MAGIC 0x01CDC0DE 55 | 56 | typedef union { 57 | uintptr_t loaderMagic; 58 | void *loaderData; 59 | } VK_LOADER_DATA; 60 | 61 | static inline void set_loader_magic_value(void *pNewObject) { 62 | VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject; 63 | loader_info->loaderMagic = ICD_LOADER_MAGIC; 64 | } 65 | 66 | static inline bool valid_loader_magic_value(void *pNewObject) { 67 | const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject; 68 | return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC; 69 | } 70 | 71 | /* 72 | * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that 73 | * contains the platform-specific connection and surface information. 74 | */ 75 | typedef enum { 76 | VK_ICD_WSI_PLATFORM_MIR, 77 | VK_ICD_WSI_PLATFORM_WAYLAND, 78 | VK_ICD_WSI_PLATFORM_WIN32, 79 | VK_ICD_WSI_PLATFORM_XCB, 80 | VK_ICD_WSI_PLATFORM_XLIB, 81 | VK_ICD_WSI_PLATFORM_DISPLAY 82 | } VkIcdWsiPlatform; 83 | 84 | typedef struct { 85 | VkIcdWsiPlatform platform; 86 | } VkIcdSurfaceBase; 87 | 88 | #ifdef VK_USE_PLATFORM_MIR_KHR 89 | typedef struct { 90 | VkIcdSurfaceBase base; 91 | MirConnection *connection; 92 | MirSurface *mirSurface; 93 | } VkIcdSurfaceMir; 94 | #endif // VK_USE_PLATFORM_MIR_KHR 95 | 96 | #ifdef VK_USE_PLATFORM_WAYLAND_KHR 97 | typedef struct { 98 | VkIcdSurfaceBase base; 99 | struct wl_display *display; 100 | struct wl_surface *surface; 101 | } VkIcdSurfaceWayland; 102 | #endif // VK_USE_PLATFORM_WAYLAND_KHR 103 | 104 | #ifdef VK_USE_PLATFORM_WIN32_KHR 105 | typedef struct { 106 | VkIcdSurfaceBase base; 107 | HINSTANCE hinstance; 108 | HWND hwnd; 109 | } VkIcdSurfaceWin32; 110 | #endif // VK_USE_PLATFORM_WIN32_KHR 111 | 112 | #ifdef VK_USE_PLATFORM_XCB_KHR 113 | typedef struct { 114 | VkIcdSurfaceBase base; 115 | xcb_connection_t *connection; 116 | xcb_window_t window; 117 | } VkIcdSurfaceXcb; 118 | #endif // VK_USE_PLATFORM_XCB_KHR 119 | 120 | #ifdef VK_USE_PLATFORM_XLIB_KHR 121 | typedef struct { 122 | VkIcdSurfaceBase base; 123 | Display *dpy; 124 | Window window; 125 | } VkIcdSurfaceXlib; 126 | #endif // VK_USE_PLATFORM_XLIB_KHR 127 | 128 | #ifdef VK_USE_PLATFORM_ANDROID_KHR 129 | typedef struct { 130 | ANativeWindow* window; 131 | } VkIcdSurfaceAndroid; 132 | #endif //VK_USE_PLATFORM_ANDROID_KHR 133 | 134 | typedef struct { 135 | VkIcdSurfaceBase base; 136 | VkDisplayModeKHR displayMode; 137 | uint32_t planeIndex; 138 | uint32_t planeStackIndex; 139 | VkSurfaceTransformFlagBitsKHR transform; 140 | float globalAlpha; 141 | VkDisplayPlaneAlphaFlagBitsKHR alphaMode; 142 | VkExtent2D imageExtent; 143 | } VkIcdSurfaceDisplay; 144 | 145 | #endif // VKICD_H 146 | -------------------------------------------------------------------------------- /3rdparty/vk_platform.h: -------------------------------------------------------------------------------- 1 | // 2 | // File: vk_platform.h 3 | // 4 | /* 5 | ** Copyright (c) 2014-2017 The Khronos Group Inc. 6 | ** 7 | ** Licensed under the Apache License, Version 2.0 (the "License"); 8 | ** you may not use this file except in compliance with the License. 9 | ** You may obtain a copy of the License at 10 | ** 11 | ** http://www.apache.org/licenses/LICENSE-2.0 12 | ** 13 | ** Unless required by applicable law or agreed to in writing, software 14 | ** distributed under the License is distributed on an "AS IS" BASIS, 15 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | ** See the License for the specific language governing permissions and 17 | ** limitations under the License. 18 | */ 19 | 20 | 21 | #ifndef VK_PLATFORM_H_ 22 | #define VK_PLATFORM_H_ 23 | 24 | #ifdef __cplusplus 25 | extern "C" 26 | { 27 | #endif // __cplusplus 28 | 29 | /* 30 | *************************************************************************************************** 31 | * Platform-specific directives and type declarations 32 | *************************************************************************************************** 33 | */ 34 | 35 | /* Platform-specific calling convention macros. 36 | * 37 | * Platforms should define these so that Vulkan clients call Vulkan commands 38 | * with the same calling conventions that the Vulkan implementation expects. 39 | * 40 | * VKAPI_ATTR - Placed before the return type in function declarations. 41 | * Useful for C++11 and GCC/Clang-style function attribute syntax. 42 | * VKAPI_CALL - Placed after the return type in function declarations. 43 | * Useful for MSVC-style calling convention syntax. 44 | * VKAPI_PTR - Placed between the '(' and '*' in function pointer types. 45 | * 46 | * Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void); 47 | * Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void); 48 | */ 49 | #if defined(_WIN32) 50 | // On Windows, Vulkan commands use the stdcall convention 51 | #define VKAPI_ATTR 52 | #define VKAPI_CALL __stdcall 53 | #define VKAPI_PTR VKAPI_CALL 54 | #elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH < 7 55 | #error "Vulkan isn't supported for the 'armeabi' NDK ABI" 56 | #elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH >= 7 && defined(__ARM_32BIT_STATE) 57 | // On Android 32-bit ARM targets, Vulkan functions use the "hardfloat" 58 | // calling convention, i.e. float parameters are passed in registers. This 59 | // is true even if the rest of the application passes floats on the stack, 60 | // as it does by default when compiling for the armeabi-v7a NDK ABI. 61 | #define VKAPI_ATTR __attribute__((pcs("aapcs-vfp"))) 62 | #define VKAPI_CALL 63 | #define VKAPI_PTR VKAPI_ATTR 64 | #else 65 | // On other platforms, use the default calling convention 66 | #define VKAPI_ATTR 67 | #define VKAPI_CALL 68 | #define VKAPI_PTR 69 | #endif 70 | 71 | #include 72 | 73 | #if !defined(VK_NO_STDINT_H) 74 | #if defined(_MSC_VER) && (_MSC_VER < 1600) 75 | typedef signed __int8 int8_t; 76 | typedef unsigned __int8 uint8_t; 77 | typedef signed __int16 int16_t; 78 | typedef unsigned __int16 uint16_t; 79 | typedef signed __int32 int32_t; 80 | typedef unsigned __int32 uint32_t; 81 | typedef signed __int64 int64_t; 82 | typedef unsigned __int64 uint64_t; 83 | #else 84 | #include 85 | #endif 86 | #endif // !defined(VK_NO_STDINT_H) 87 | 88 | #ifdef __cplusplus 89 | } // extern "C" 90 | #endif // __cplusplus 91 | 92 | // Platform-specific headers required by platform window system extensions. 93 | // These are enabled prior to #including "vulkan.h". The same enable then 94 | // controls inclusion of the extension interfaces in vulkan.h. 95 | 96 | #ifdef VK_USE_PLATFORM_ANDROID_KHR 97 | #include 98 | #endif 99 | 100 | #ifdef VK_USE_PLATFORM_MIR_KHR 101 | #include 102 | #endif 103 | 104 | #ifdef VK_USE_PLATFORM_WAYLAND_KHR 105 | #include 106 | #endif 107 | 108 | #ifdef VK_USE_PLATFORM_WIN32_KHR 109 | #include 110 | #endif 111 | 112 | #ifdef VK_USE_PLATFORM_XLIB_KHR 113 | #include 114 | #endif 115 | 116 | #ifdef VK_USE_PLATFORM_XCB_KHR 117 | #include 118 | #endif 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Baldur Karlsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visor 2 | 3 | The Vulkan Ignoble Software Rasterizer. 4 | 5 | Just a little for-fun side project to experiment with making a full software ICD for Vulkan. No grand plans for a fully compliant or optimised ICD. 6 | -------------------------------------------------------------------------------- /buffers.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo, 4 | const VkAllocationCallbacks *pAllocator, 5 | VkBuffer *pBuffer) 6 | { 7 | VkBuffer ret = new VkBuffer_T; 8 | ret->size = pCreateInfo->size; 9 | ret->bytes = NULL; // no memory bound 10 | *pBuffer = ret; 11 | return VK_SUCCESS; 12 | } 13 | 14 | VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer(VkDevice device, VkBuffer buffer, 15 | const VkAllocationCallbacks *pAllocator) 16 | { 17 | delete buffer; 18 | } 19 | 20 | VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, 21 | VkMemoryRequirements *pMemoryRequirements) 22 | { 23 | // TODO 24 | pMemoryRequirements->alignment = 1; 25 | pMemoryRequirements->memoryTypeBits = 0x3; 26 | pMemoryRequirements->size = buffer->size; 27 | } 28 | 29 | VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory(VkDevice device, VkBuffer buffer, 30 | VkDeviceMemory memory, VkDeviceSize memoryOffset) 31 | { 32 | buffer->bytes = memory->bytes + memoryOffset; 33 | return VK_SUCCESS; 34 | } 35 | -------------------------------------------------------------------------------- /cmd_alloc.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VkCommandBuffer VkCommandPool_T::alloc() 4 | { 5 | // brain dead algorithm 6 | for(VkCommandBuffer c : buffers) 7 | { 8 | if(!c->live) 9 | { 10 | c->live = true; 11 | return c; 12 | } 13 | } 14 | 15 | VkCommandBuffer ret = new VkCommandBuffer_T; 16 | ret->live = true; 17 | buffers.push_back(ret); 18 | return ret; 19 | } 20 | 21 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(VkDevice device, 22 | const VkCommandPoolCreateInfo *pCreateInfo, 23 | const VkAllocationCallbacks *pAllocator, 24 | VkCommandPool *pCommandPool) 25 | { 26 | *pCommandPool = new VkCommandPool_T; 27 | return VK_SUCCESS; 28 | } 29 | 30 | VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(VkDevice device, VkCommandPool commandPool, 31 | const VkAllocationCallbacks *pAllocator) 32 | { 33 | for(VkCommandBuffer c : commandPool->buffers) 34 | delete c; 35 | delete commandPool; 36 | } 37 | 38 | VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(VkDevice device, 39 | const VkCommandBufferAllocateInfo *pAllocateInfo, 40 | VkCommandBuffer *pCommandBuffers) 41 | { 42 | for(uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) 43 | { 44 | VkCommandBuffer cmd = pAllocateInfo->commandPool->alloc(); 45 | set_loader_magic_value(cmd); 46 | pCommandBuffers[i] = cmd; 47 | } 48 | return VK_SUCCESS; 49 | } 50 | 51 | VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(VkDevice device, VkCommandPool commandPool, 52 | uint32_t commandBufferCount, 53 | const VkCommandBuffer *pCommandBuffers) 54 | { 55 | for(uint32_t i = 0; i < commandBufferCount; i++) 56 | pCommandBuffers[i]->live = false; 57 | } 58 | 59 | byte *VkCommandBuffer_T::pushbytes(size_t sz) 60 | { 61 | size_t spare = commandStream.capacity() - commandStream.size(); 62 | // if there's no spare capacity, allocate more 63 | if(sz > spare) 64 | commandStream.reserve(commandStream.capacity() * 2 + sz); 65 | 66 | // resize up to the newly used bytes, then return 67 | byte *ret = commandStream.data() + commandStream.size(); 68 | commandStream.resize(commandStream.size() + sz); 69 | return ret; 70 | } -------------------------------------------------------------------------------- /cmd_exec.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include "commands.h" 3 | #include "gpu.h" 4 | 5 | template 6 | const T &pull(const byte **offs) 7 | { 8 | const T *ret = (const T *)*offs; 9 | 10 | *offs += sizeof(T); 11 | 12 | return *ret; 13 | } 14 | 15 | void VkCommandBuffer_T::execute() const 16 | { 17 | const byte *cur = commandStream.data(); 18 | const byte *end = cur + commandStream.size(); 19 | 20 | GPUState state = {0}; 21 | 22 | while(cur < end) 23 | { 24 | const Command &cmd = pull(&cur); 25 | 26 | switch(cmd) 27 | { 28 | case Command::PipelineBarrier: 29 | { 30 | const cmd::PipelineBarrier &data = pull(&cur); 31 | 32 | (void)data; 33 | break; 34 | } 35 | case Command::BeginRenderPass: 36 | { 37 | const cmd::BeginRenderPass &data = pull(&cur); 38 | 39 | VkRenderPass_T::Subpass &subpass = data.renderPass->subpasses[0]; 40 | VkRenderPass_T::Attachment &col0 = subpass.colAttachments[0]; 41 | 42 | state.col[0] = data.framebuffer->attachments[col0.idx]->image; 43 | 44 | int clearIdx = 0; 45 | 46 | if(col0.clear) 47 | { 48 | ClearTarget(state.col[0], data.clearval[clearIdx++].color); 49 | } 50 | 51 | VkRenderPass_T::Attachment &depth = subpass.depthAttachment; 52 | 53 | if(depth.idx >= 0) 54 | { 55 | state.depth = data.framebuffer->attachments[depth.idx]->image; 56 | 57 | if(depth.clear) 58 | { 59 | ClearTarget(state.depth, data.clearval[clearIdx++].depthStencil); 60 | } 61 | } 62 | 63 | break; 64 | } 65 | case Command::EndRenderPass: 66 | { 67 | const cmd::EndRenderPass &data = pull(&cur); 68 | 69 | state.col[0] = VK_NULL_HANDLE; 70 | 71 | break; 72 | } 73 | case Command::BindPipeline: 74 | { 75 | const cmd::BindPipeline &data = pull(&cur); 76 | 77 | state.pipeline = data.pipeline; 78 | 79 | break; 80 | } 81 | case Command::BindDescriptorSets: 82 | { 83 | const cmd::BindDescriptorSets &data = pull(&cur); 84 | 85 | state.sets[data.idx] = data.set; 86 | 87 | break; 88 | } 89 | case Command::BindIB: 90 | { 91 | const cmd::BindIB &data = pull(&cur); 92 | 93 | state.ib.buffer = data.buffer; 94 | state.ib.offset = data.offset; 95 | state.ib.indexType = data.indexType; 96 | 97 | break; 98 | }; 99 | case Command::BindVB: 100 | { 101 | const cmd::BindVB &data = pull(&cur); 102 | 103 | state.vbs[data.slot].buffer = data.buffer; 104 | state.vbs[data.slot].offset = data.offset; 105 | 106 | break; 107 | }; 108 | case Command::SetViewport: 109 | { 110 | const cmd::SetViewport &data = pull(&cur); 111 | 112 | state.view = data.view; 113 | break; 114 | } 115 | case Command::SetScissors: 116 | { 117 | const cmd::SetScissors &data = pull(&cur); 118 | 119 | (void)data; 120 | break; 121 | } 122 | case Command::PushConstants: 123 | { 124 | const cmd::PushConstants &data = pull(&cur); 125 | 126 | memcpy(state.pushconsts + data.offset, data.values, data.size); 127 | break; 128 | } 129 | case Command::Draw: 130 | { 131 | const cmd::Draw &data = pull(&cur); 132 | 133 | DrawTriangles(state, data.vertexCount, data.firstVertex, false); 134 | break; 135 | } 136 | case Command::DrawIndexed: 137 | { 138 | const cmd::DrawIndexed &data = pull(&cur); 139 | 140 | DrawTriangles(state, data.indexCount, data.firstIndex, true); 141 | break; 142 | } 143 | case Command::CopyBuf2Img: 144 | { 145 | const cmd::CopyBuf2Img &data = pull(&cur); 146 | 147 | // only support tight packed copies right now 148 | assert(data.region.bufferRowLength == 0 && data.region.bufferImageHeight == 0); 149 | 150 | // only support non-offseted copies 151 | assert(data.region.imageOffset.x == 0 && data.region.imageOffset.y == 0 && 152 | data.region.imageOffset.z == 0); 153 | 154 | uint32_t mip = data.region.imageSubresource.mipLevel; 155 | 156 | const uint32_t w = std::max(1U, data.dstImage->extent.width >> mip); 157 | const uint32_t h = std::max(1U, data.dstImage->extent.height >> mip); 158 | const uint32_t bpp = data.dstImage->bytesPerPixel; 159 | 160 | // only support copying the whole mip 161 | assert(w == data.region.imageExtent.width && h == data.region.imageExtent.height); 162 | 163 | // only support copying one layer at a time 164 | assert(data.region.imageSubresource.layerCount == 1); 165 | 166 | VkDeviceSize offs = CalcSubresourceByteOffset(data.dstImage, mip, 167 | data.region.imageSubresource.baseArrayLayer); 168 | 169 | memcpy(data.dstImage->pixels + offs, data.srcBuffer->bytes + data.region.bufferOffset, 170 | w * h * bpp); 171 | 172 | break; 173 | } 174 | case Command::CopyBuf: 175 | { 176 | const cmd::CopyBuf &data = pull(&cur); 177 | 178 | memcpy(data.dstBuffer->bytes + data.region.dstOffset, 179 | data.srcBuffer->bytes + data.region.srcOffset, data.region.size); 180 | 181 | break; 182 | } 183 | } 184 | } 185 | } 186 | 187 | VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, uint32_t submitCount, 188 | const VkSubmitInfo *pSubmits, VkFence fence) 189 | { 190 | MICROPROFILE_SCOPE(vkQueueSubmit); 191 | 192 | for(uint32_t i = 0; i < submitCount; i++) 193 | { 194 | for(uint32_t c = 0; c < pSubmits[i].commandBufferCount; c++) 195 | { 196 | pSubmits[i].pCommandBuffers[c]->execute(); 197 | } 198 | } 199 | 200 | return VK_SUCCESS; 201 | } 202 | -------------------------------------------------------------------------------- /cmd_record.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include "commands.h" 3 | 4 | VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(VkCommandBuffer commandBuffer, 5 | const VkCommandBufferBeginInfo *pBeginInfo) 6 | { 7 | commandBuffer->commandStream.clear(); 8 | return VK_SUCCESS; 9 | } 10 | 11 | VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(VkCommandBuffer commandBuffer) 12 | { 13 | // TODO 14 | return VK_SUCCESS; 15 | } 16 | 17 | VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( 18 | VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, 19 | VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, 20 | uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, 21 | uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, 22 | uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers) 23 | { 24 | cmd::PipelineBarrier *cmd = commandBuffer->push(); 25 | } 26 | 27 | VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, 28 | const VkRenderPassBeginInfo *pRenderPassBegin, 29 | VkSubpassContents contents) 30 | { 31 | cmd::BeginRenderPass *cmd = commandBuffer->push(); 32 | cmd->renderPass = pRenderPassBegin->renderPass; 33 | cmd->framebuffer = pRenderPassBegin->framebuffer; 34 | 35 | memcpy(cmd->clearval, pRenderPassBegin->pClearValues, 36 | sizeof(VkClearValue) * std::min(pRenderPassBegin->clearValueCount, 8U)); 37 | } 38 | 39 | VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass(VkCommandBuffer commandBuffer) 40 | { 41 | cmd::EndRenderPass *cmd = commandBuffer->push(); 42 | } 43 | 44 | VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline(VkCommandBuffer commandBuffer, 45 | VkPipelineBindPoint pipelineBindPoint, 46 | VkPipeline pipeline) 47 | { 48 | cmd::BindPipeline *cmd = commandBuffer->push(); 49 | cmd->pipeline = pipeline; 50 | } 51 | 52 | VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( 53 | VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, 54 | uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet *pDescriptorSets, 55 | uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets) 56 | { 57 | assert(dynamicOffsetCount == 0); 58 | 59 | for(uint32_t i = 0; i < descriptorSetCount; i++) 60 | { 61 | cmd::BindDescriptorSets *cmd = commandBuffer->push(); 62 | 63 | cmd->idx = firstSet + i; 64 | cmd->set = pDescriptorSets[i]; 65 | } 66 | } 67 | 68 | VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers(VkCommandBuffer commandBuffer, 69 | uint32_t firstBinding, uint32_t bindingCount, 70 | const VkBuffer *pBuffers, 71 | const VkDeviceSize *pOffsets) 72 | { 73 | for(uint32_t i = 0; i < bindingCount; i++) 74 | { 75 | cmd::BindVB *cmd = commandBuffer->push(); 76 | cmd->slot = firstBinding + i; 77 | cmd->buffer = pBuffers[i]; 78 | cmd->offset = pOffsets[i]; 79 | } 80 | } 81 | 82 | VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, 83 | VkDeviceSize offset, VkIndexType indexType) 84 | { 85 | cmd::BindIB *cmd = commandBuffer->push(); 86 | cmd->buffer = buffer; 87 | cmd->offset = offset; 88 | cmd->indexType = indexType; 89 | } 90 | 91 | VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, 92 | uint32_t viewportCount, const VkViewport *pViewports) 93 | { 94 | cmd::SetViewport *cmd = commandBuffer->push(); 95 | cmd->view = pViewports[0]; 96 | } 97 | 98 | VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, 99 | uint32_t scissorCount, const VkRect2D *pScissors) 100 | { 101 | cmd::SetScissors *cmd = commandBuffer->push(); 102 | } 103 | 104 | VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, 105 | VkShaderStageFlags stageFlags, uint32_t offset, 106 | uint32_t size, const void *pValues) 107 | { 108 | cmd::PushConstants *cmd = commandBuffer->push(); 109 | cmd->offset = offset; 110 | cmd->size = size; 111 | memcpy(cmd->values, pValues, size); 112 | } 113 | 114 | VKAPI_ATTR void VKAPI_CALL vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, 115 | uint32_t instanceCount, uint32_t firstVertex, 116 | uint32_t firstInstance) 117 | { 118 | cmd::Draw *cmd = commandBuffer->push(); 119 | cmd->vertexCount = vertexCount; 120 | cmd->instanceCount = instanceCount; 121 | cmd->firstVertex = firstVertex; 122 | cmd->firstInstance = firstInstance; 123 | } 124 | 125 | VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, 126 | uint32_t instanceCount, uint32_t firstIndex, 127 | int32_t vertexOffset, uint32_t firstInstance) 128 | { 129 | cmd::DrawIndexed *cmd = commandBuffer->push(); 130 | cmd->indexCount = indexCount; 131 | cmd->instanceCount = instanceCount; 132 | cmd->firstIndex = firstIndex; 133 | cmd->vertexOffset = vertexOffset; 134 | cmd->firstInstance = firstInstance; 135 | } 136 | 137 | VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, 138 | VkImage dstImage, VkImageLayout dstImageLayout, 139 | uint32_t regionCount, 140 | const VkBufferImageCopy *pRegions) 141 | { 142 | for(uint32_t r = 0; r < regionCount; r++) 143 | { 144 | cmd::CopyBuf2Img *cmd = commandBuffer->push(); 145 | cmd->srcBuffer = srcBuffer; 146 | cmd->dstImage = dstImage; 147 | cmd->region = pRegions[r]; 148 | } 149 | } 150 | 151 | VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, 152 | VkBuffer dstBuffer, uint32_t regionCount, 153 | const VkBufferCopy *pRegions) 154 | { 155 | for(uint32_t r = 0; r < regionCount; r++) 156 | { 157 | cmd::CopyBuf *cmd = commandBuffer->push(); 158 | cmd->srcBuffer = srcBuffer; 159 | cmd->dstBuffer = dstBuffer; 160 | cmd->region = pRegions[r]; 161 | } 162 | } -------------------------------------------------------------------------------- /commands.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace cmd 4 | { 5 | struct PipelineBarrier 6 | { 7 | static const Command CommandID = Command::PipelineBarrier; 8 | }; 9 | 10 | struct BeginRenderPass 11 | { 12 | static const Command CommandID = Command::BeginRenderPass; 13 | VkRenderPass renderPass; 14 | VkFramebuffer framebuffer; 15 | VkClearValue clearval[8]; 16 | }; 17 | 18 | struct EndRenderPass 19 | { 20 | static const Command CommandID = Command::EndRenderPass; 21 | }; 22 | 23 | struct BindPipeline 24 | { 25 | static const Command CommandID = Command::BindPipeline; 26 | VkPipeline pipeline; 27 | }; 28 | 29 | struct BindDescriptorSets 30 | { 31 | static const Command CommandID = Command::BindDescriptorSets; 32 | uint32_t idx; 33 | VkDescriptorSet set; 34 | }; 35 | 36 | struct BindVB 37 | { 38 | static const Command CommandID = Command::BindVB; 39 | uint32_t slot; 40 | VkBuffer buffer; 41 | VkDeviceSize offset; 42 | }; 43 | 44 | struct BindIB 45 | { 46 | static const Command CommandID = Command::BindIB; 47 | VkBuffer buffer; 48 | VkDeviceSize offset; 49 | VkIndexType indexType; 50 | }; 51 | 52 | struct SetViewport 53 | { 54 | static const Command CommandID = Command::SetViewport; 55 | VkViewport view; 56 | }; 57 | 58 | struct SetScissors 59 | { 60 | static const Command CommandID = Command::SetScissors; 61 | }; 62 | 63 | struct PushConstants 64 | { 65 | static const Command CommandID = Command::PushConstants; 66 | uint32_t offset, size; 67 | byte values[128]; 68 | }; 69 | 70 | struct Draw 71 | { 72 | static const Command CommandID = Command::Draw; 73 | uint32_t vertexCount, instanceCount, firstVertex, firstInstance; 74 | }; 75 | 76 | struct DrawIndexed 77 | { 78 | static const Command CommandID = Command::DrawIndexed; 79 | uint32_t indexCount, instanceCount, firstIndex, firstInstance; 80 | int32_t vertexOffset; 81 | }; 82 | 83 | struct CopyBuf2Img 84 | { 85 | static const Command CommandID = Command::CopyBuf2Img; 86 | VkBuffer srcBuffer; 87 | VkImage dstImage; 88 | VkBufferImageCopy region; 89 | }; 90 | 91 | struct CopyBuf 92 | { 93 | static const Command CommandID = Command::CopyBuf; 94 | VkBuffer srcBuffer; 95 | VkBuffer dstBuffer; 96 | VkBufferCopy region; 97 | }; 98 | }; 99 | -------------------------------------------------------------------------------- /descriptors.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( 4 | VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, 5 | const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pDescriptorSetLayout) 6 | { 7 | VkDescriptorSetLayout ret = new VkDescriptorSetLayout_T; 8 | ret->bindingCount = pCreateInfo->bindingCount; 9 | *pDescriptorSetLayout = ret; 10 | return VK_SUCCESS; 11 | } 12 | 13 | VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout(VkDevice device, 14 | VkDescriptorSetLayout descriptorSetLayout, 15 | const VkAllocationCallbacks *pAllocator) 16 | { 17 | // nothing to do 18 | } 19 | 20 | VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout(VkDevice device, 21 | const VkPipelineLayoutCreateInfo *pCreateInfo, 22 | const VkAllocationCallbacks *pAllocator, 23 | VkPipelineLayout *pPipelineLayout) 24 | { 25 | // TODO but for now return unique values 26 | static uint64_t nextPipelineLayout = 1; 27 | *pPipelineLayout = (VkPipelineLayout)(nextPipelineLayout++); 28 | return VK_SUCCESS; 29 | } 30 | 31 | VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout(VkDevice device, VkPipelineLayout pipelineLayout, 32 | const VkAllocationCallbacks *pAllocator) 33 | { 34 | // nothing to do 35 | } 36 | 37 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool(VkDevice device, 38 | const VkDescriptorPoolCreateInfo *pCreateInfo, 39 | const VkAllocationCallbacks *pAllocator, 40 | VkDescriptorPool *pDescriptorPool) 41 | { 42 | // TODO but for now return unique values 43 | static uint64_t nextDescriptorPool = 1; 44 | *pDescriptorPool = (VkDescriptorPool)(nextDescriptorPool++); 45 | return VK_SUCCESS; 46 | } 47 | 48 | VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool(VkDevice device, VkDescriptorPool sampler, 49 | const VkAllocationCallbacks *pAllocator) 50 | { 51 | // nothing to do 52 | } 53 | 54 | VKAPI_ATTR VkResult VKAPI_CALL vkAllocateDescriptorSets(VkDevice device, 55 | const VkDescriptorSetAllocateInfo *pAllocateInfo, 56 | VkDescriptorSet *pDescriptorSets) 57 | { 58 | for(uint32_t i = 0; i < pAllocateInfo->descriptorSetCount; i++) 59 | { 60 | pDescriptorSets[0] = new VkDescriptorSet_T; 61 | pDescriptorSets[0]->binds = 62 | new VkDescriptorSet_T::Bind[pAllocateInfo->pSetLayouts[i]->bindingCount]; 63 | } 64 | return VK_SUCCESS; 65 | } 66 | 67 | VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool, 68 | uint32_t descriptorSetCount, 69 | const VkDescriptorSet *pDescriptorSets) 70 | { 71 | for(uint32_t i = 0; i < descriptorSetCount; i++) 72 | { 73 | delete[] pDescriptorSets[i]->binds; 74 | delete pDescriptorSets[i]; 75 | } 76 | return VK_SUCCESS; 77 | } 78 | 79 | VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount, 80 | const VkWriteDescriptorSet *pDescriptorWrites, 81 | uint32_t descriptorCopyCount, 82 | const VkCopyDescriptorSet *pDescriptorCopies) 83 | { 84 | for(uint32_t i = 0; i < descriptorWriteCount; i++) 85 | { 86 | const VkWriteDescriptorSet &w = pDescriptorWrites[i]; 87 | VkDescriptorSet_T::Bind &bind = w.dstSet->binds[w.dstBinding]; 88 | bind.type = w.descriptorType; 89 | 90 | switch(w.descriptorType) 91 | { 92 | case VK_DESCRIPTOR_TYPE_SAMPLER: 93 | case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 94 | case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 95 | case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 96 | case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: bind.data.imageInfo = w.pImageInfo[0]; break; 97 | case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 98 | case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 99 | bind.data.texelBufferView = w.pTexelBufferView[0]; 100 | break; 101 | case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 102 | case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 103 | case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 104 | case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 105 | bind.data.bufferInfo = w.pBufferInfo[0]; 106 | break; 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /gpu.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct GPUState 4 | { 5 | struct 6 | { 7 | VkBuffer buffer; 8 | VkDeviceSize offset; 9 | VkIndexType indexType; 10 | } ib; 11 | struct 12 | { 13 | VkBuffer buffer; 14 | VkDeviceSize offset; 15 | VkDeviceSize stride; 16 | } vbs[4]; 17 | VkViewport view; 18 | VkImage col[8]; 19 | VkImage depth; 20 | VkPipeline pipeline; 21 | VkDescriptorSet sets[8]; 22 | byte pushconsts[128]; 23 | }; 24 | 25 | __declspec(align(16)) struct int4 26 | { 27 | int4() {} 28 | int4(int X, int Y, int Z, int W) : x(X), y(Y), z(Z), w(W) {} 29 | union 30 | { 31 | struct 32 | { 33 | int x, y, z, w; 34 | }; 35 | int v[4]; 36 | }; 37 | }; 38 | 39 | __declspec(align(16)) struct float4 40 | { 41 | float4() {} 42 | float4(float X, float Y, float Z, float W) : x(X), y(Y), z(Z), w(W) {} 43 | union 44 | { 45 | struct 46 | { 47 | float x, y, z, w; 48 | }; 49 | float v[4]; 50 | }; 51 | }; 52 | 53 | struct VertexCacheEntry 54 | { 55 | float4 position; 56 | float4 interps[10]; 57 | }; 58 | 59 | void ClearTarget(VkImage target, const VkClearColorValue &col); 60 | void ClearTarget(VkImage target, const VkClearDepthStencilValue &col); 61 | void DrawTriangles(const GPUState &state, int numVerts, uint32_t first, bool indexed); 62 | 63 | void InitTextureCache(); 64 | extern "C" __declspec(dllexport) void sample_tex_wrapped(float u, float v, VkImage tex, 65 | VkDeviceSize byteOffs, float4 &out); 66 | extern "C" __declspec(dllexport) void sample_cube_wrapped(float x, float y, float z, VkImage tex, 67 | float4 &out); 68 | 69 | void InitRasterThreads(); 70 | void ShutdownRasterThreads(); -------------------------------------------------------------------------------- /icd_interface.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | #define ICD_FUNC(name) \ 4 | if(!strcmp(pName, #name)) \ 5 | return (PFN_vkVoidFunction)&name; 6 | 7 | #define CHECK_INST_FUNCS() \ 8 | ICD_FUNC(vkEnumerateInstanceExtensionProperties); \ 9 | ICD_FUNC(vkEnumerateDeviceExtensionProperties); \ 10 | ICD_FUNC(vkGetDeviceProcAddr); \ 11 | ICD_FUNC(vkCreateInstance); \ 12 | ICD_FUNC(vkDestroyInstance); \ 13 | ICD_FUNC(vkEnumeratePhysicalDevices); \ 14 | ICD_FUNC(vkCreateDevice); \ 15 | ICD_FUNC(vkEnumerateInstanceVersion); 16 | 17 | #define CHECK_PHYS_FUNCS() \ 18 | ICD_FUNC(vkGetPhysicalDeviceFeatures); \ 19 | ICD_FUNC(vkGetPhysicalDeviceProperties); \ 20 | ICD_FUNC(vkGetPhysicalDeviceQueueFamilyProperties); \ 21 | ICD_FUNC(vkGetPhysicalDeviceMemoryProperties); \ 22 | ICD_FUNC(vkGetPhysicalDeviceFormatProperties); \ 23 | ICD_FUNC(vkGetPhysicalDeviceSurfaceSupportKHR); \ 24 | ICD_FUNC(vkGetPhysicalDeviceSurfaceFormatsKHR); \ 25 | ICD_FUNC(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); \ 26 | ICD_FUNC(vkGetPhysicalDeviceSurfacePresentModesKHR); 27 | 28 | #define CHECK_DEV_FUNCS() \ 29 | ICD_FUNC(vkDestroyDevice); \ 30 | ICD_FUNC(vkGetDeviceQueue); \ 31 | ICD_FUNC(vkCreateFence); \ 32 | ICD_FUNC(vkDestroyFence); \ 33 | ICD_FUNC(vkCreateSemaphore); \ 34 | ICD_FUNC(vkDestroySemaphore); \ 35 | ICD_FUNC(vkCreateCommandPool); \ 36 | ICD_FUNC(vkDestroyCommandPool); \ 37 | ICD_FUNC(vkAllocateCommandBuffers); \ 38 | ICD_FUNC(vkFreeCommandBuffers); \ 39 | ICD_FUNC(vkBeginCommandBuffer); \ 40 | ICD_FUNC(vkEndCommandBuffer); \ 41 | ICD_FUNC(vkCreateSwapchainKHR); \ 42 | ICD_FUNC(vkDestroySwapchainKHR); \ 43 | ICD_FUNC(vkGetSwapchainImagesKHR); \ 44 | ICD_FUNC(vkAcquireNextImageKHR); \ 45 | ICD_FUNC(vkQueuePresentKHR); \ 46 | ICD_FUNC(vkCreateImageView); \ 47 | ICD_FUNC(vkDestroyImageView); \ 48 | ICD_FUNC(vkCreateImage); \ 49 | ICD_FUNC(vkDestroyImage); \ 50 | ICD_FUNC(vkCreateBuffer); \ 51 | ICD_FUNC(vkDestroyBuffer); \ 52 | ICD_FUNC(vkGetBufferMemoryRequirements); \ 53 | ICD_FUNC(vkGetImageMemoryRequirements); \ 54 | ICD_FUNC(vkBindBufferMemory); \ 55 | ICD_FUNC(vkBindImageMemory); \ 56 | ICD_FUNC(vkAllocateMemory); \ 57 | ICD_FUNC(vkFreeMemory); \ 58 | ICD_FUNC(vkMapMemory); \ 59 | ICD_FUNC(vkUnmapMemory); \ 60 | ICD_FUNC(vkFlushMappedMemoryRanges); \ 61 | ICD_FUNC(vkGetImageSubresourceLayout); \ 62 | ICD_FUNC(vkCreateSampler); \ 63 | ICD_FUNC(vkDestroySampler); \ 64 | ICD_FUNC(vkCreateDescriptorSetLayout); \ 65 | ICD_FUNC(vkDestroyDescriptorSetLayout); \ 66 | ICD_FUNC(vkCreatePipelineLayout); \ 67 | ICD_FUNC(vkDestroyPipelineLayout); \ 68 | ICD_FUNC(vkCreateFramebuffer); \ 69 | ICD_FUNC(vkDestroyFramebuffer); \ 70 | ICD_FUNC(vkCreateRenderPass); \ 71 | ICD_FUNC(vkDestroyRenderPass); \ 72 | ICD_FUNC(vkCreateShaderModule); \ 73 | ICD_FUNC(vkDestroyShaderModule); \ 74 | ICD_FUNC(vkCreatePipelineCache); \ 75 | ICD_FUNC(vkDestroyPipelineCache); \ 76 | ICD_FUNC(vkCreateGraphicsPipelines); \ 77 | ICD_FUNC(vkCreateComputePipelines); \ 78 | ICD_FUNC(vkDestroyPipeline); \ 79 | ICD_FUNC(vkCreateDescriptorPool); \ 80 | ICD_FUNC(vkDestroyDescriptorPool); \ 81 | ICD_FUNC(vkAllocateDescriptorSets); \ 82 | ICD_FUNC(vkFreeDescriptorSets); \ 83 | ICD_FUNC(vkUpdateDescriptorSets); \ 84 | ICD_FUNC(vkCmdPipelineBarrier); \ 85 | ICD_FUNC(vkCmdBeginRenderPass); \ 86 | ICD_FUNC(vkCmdEndRenderPass); \ 87 | ICD_FUNC(vkCmdBindIndexBuffer); \ 88 | ICD_FUNC(vkCmdBindVertexBuffers); \ 89 | ICD_FUNC(vkCmdBindPipeline); \ 90 | ICD_FUNC(vkCmdBindDescriptorSets); \ 91 | ICD_FUNC(vkCmdSetViewport); \ 92 | ICD_FUNC(vkCmdSetScissor); \ 93 | ICD_FUNC(vkCmdPushConstants); \ 94 | ICD_FUNC(vkCmdDraw); \ 95 | ICD_FUNC(vkCmdDrawIndexed); \ 96 | ICD_FUNC(vkCmdCopyBufferToImage); \ 97 | ICD_FUNC(vkCmdCopyBuffer); \ 98 | ICD_FUNC(vkQueueSubmit); \ 99 | ICD_FUNC(vkWaitForFences); \ 100 | ICD_FUNC(vkResetFences); \ 101 | ICD_FUNC(vkDeviceWaitIdle); \ 102 | ICD_FUNC(vkQueueWaitIdle); 103 | 104 | VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceVersion(uint32_t *pApiVersion) 105 | { 106 | if(pApiVersion) 107 | { 108 | *pApiVersion = VK_MAKE_VERSION(1, 0, 0); 109 | return VK_SUCCESS; 110 | } 111 | 112 | return VK_ERROR_INITIALIZATION_FAILED; 113 | } 114 | 115 | VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice device, const char *pName) 116 | { 117 | CHECK_DEV_FUNCS(); 118 | 119 | // we want to return non-NULL for all entry points otherwise our ICD will be discarded. We've 120 | // implemented all the functions needed for vkcube so if any other function gets called it will 121 | // execute a string and we can see what function was missing. 122 | return (PFN_vkVoidFunction)strdup(pName); 123 | } 124 | 125 | extern "C" { 126 | 127 | #if defined(_WIN32) 128 | #undef VKAPI_ATTR 129 | #define VKAPI_ATTR __declspec(dllexport) 130 | #endif 131 | 132 | VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, 133 | const char *pName) 134 | { 135 | CHECK_INST_FUNCS(); 136 | CHECK_PHYS_FUNCS(); 137 | CHECK_DEV_FUNCS(); 138 | 139 | if(!strcmp(pName, "vkCreateDebugReportCallbackEXT")) 140 | return NULL; 141 | if(!strcmp(pName, "vkCreateWin32SurfaceKHR")) 142 | return NULL; 143 | 144 | // we want to return non-NULL for all entry points otherwise our ICD will be discarded. We've 145 | // implemented all the functions needed for vkcube so if any other function gets called it will 146 | // execute a string and we can see what function was missing. 147 | return (PFN_vkVoidFunction)strdup(pName); 148 | } 149 | 150 | VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(VkInstance instance, 151 | const char *pName) 152 | { 153 | CHECK_PHYS_FUNCS(); 154 | 155 | // we want to return non-NULL for all entry points otherwise our ICD will be discarded. We've 156 | // implemented all the functions needed for vkcube so if any other function gets called it will 157 | // execute a string and we can see what function was missing. 158 | return (PFN_vkVoidFunction)strdup(pName); 159 | } 160 | 161 | VKAPI_ATTR VkResult VKAPI_CALL vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 162 | { 163 | if(!pSupportedVersion) 164 | return VK_ERROR_INITIALIZATION_FAILED; 165 | 166 | *pSupportedVersion = std::min(CURRENT_LOADER_ICD_INTERFACE_VERSION, *pSupportedVersion); 167 | 168 | return VK_SUCCESS; 169 | } 170 | 171 | }; // extern "C" -------------------------------------------------------------------------------- /icd_stubs.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include "gpu.h" 3 | #include "spirv_compile.h" 4 | 5 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance(const VkInstanceCreateInfo *pCreateInfo, 6 | const VkAllocationCallbacks *pAllocator, 7 | VkInstance *pInstance) 8 | { 9 | *pInstance = (VkInstance) new VK_LOADER_DATA; 10 | set_loader_magic_value(*pInstance); 11 | InitFrameStats(); 12 | InitRasterThreads(); 13 | InitLLVM(); 14 | return VK_SUCCESS; 15 | } 16 | 17 | VKAPI_ATTR void VKAPI_CALL vkDestroyInstance(VkInstance instance, 18 | const VkAllocationCallbacks *pAllocator) 19 | { 20 | ShutdownRasterThreads(); 21 | ShutdownFrameStats(); 22 | delete(VK_LOADER_DATA *)instance; 23 | } 24 | 25 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(VkPhysicalDevice physicalDevice, 26 | const VkDeviceCreateInfo *pCreateInfo, 27 | const VkAllocationCallbacks *pAllocator, 28 | VkDevice *pDevice) 29 | { 30 | VkDevice dev = new VkDevice_T; 31 | assert((void *)dev == &dev->loaderMagic); 32 | set_loader_magic_value(dev); 33 | 34 | // we only have 1 queue family, there should only be 1 info 35 | assert(pCreateInfo->queueCreateInfoCount == 1); 36 | dev->queues.resize(pCreateInfo->pQueueCreateInfos[0].queueCount); 37 | 38 | for(uint32_t i = 0; i < pCreateInfo->pQueueCreateInfos[0].queueCount; i++) 39 | { 40 | dev->queues[i] = new VK_LOADER_DATA; 41 | set_loader_magic_value(dev->queues[i]); 42 | } 43 | 44 | *pDevice = dev; 45 | return VK_SUCCESS; 46 | } 47 | 48 | VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) 49 | { 50 | for(VK_LOADER_DATA *q : device->queues) 51 | delete q; 52 | 53 | delete device; 54 | } 55 | 56 | VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, 57 | uint32_t queueIndex, VkQueue *pQueue) 58 | { 59 | assert(queueFamilyIndex == 0); 60 | 61 | *pQueue = (VkQueue)device->queues[queueIndex]; 62 | } 63 | 64 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence(VkDevice device, const VkFenceCreateInfo *pCreateInfo, 65 | const VkAllocationCallbacks *pAllocator, VkFence *pFence) 66 | { 67 | // TODO but for now return unique values 68 | static uint64_t nextFence = 1; 69 | *pFence = (VkFence)(nextFence++); 70 | return VK_SUCCESS; 71 | } 72 | 73 | VKAPI_ATTR void VKAPI_CALL vkDestroyFence(VkDevice device, VkFence fence, 74 | const VkAllocationCallbacks *pAllocator) 75 | { 76 | // nothing to do 77 | } 78 | 79 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(VkDevice device, 80 | const VkSemaphoreCreateInfo *pCreateInfo, 81 | const VkAllocationCallbacks *pAllocator, 82 | VkSemaphore *pSemaphore) 83 | { 84 | // TODO but for now return unique values 85 | static uint64_t nextSemaphore = 1; 86 | *pSemaphore = (VkSemaphore)(nextSemaphore++); 87 | return VK_SUCCESS; 88 | } 89 | 90 | VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(VkDevice device, VkSemaphore semaphore, 91 | const VkAllocationCallbacks *pAllocator) 92 | { 93 | // nothing to do 94 | } 95 | 96 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler(VkDevice device, 97 | const VkSamplerCreateInfo *pCreateInfo, 98 | const VkAllocationCallbacks *pAllocator, 99 | VkSampler *pSampler) 100 | { 101 | // TODO but for now return unique values 102 | static uint64_t nextSampler = 1; 103 | *pSampler = (VkSampler)(nextSampler++); 104 | return VK_SUCCESS; 105 | } 106 | 107 | VKAPI_ATTR void VKAPI_CALL vkDestroySampler(VkDevice device, VkSampler sampler, 108 | const VkAllocationCallbacks *pAllocator) 109 | { 110 | // nothing to do 111 | } 112 | 113 | VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache(VkDevice device, 114 | const VkPipelineCacheCreateInfo *pCreateInfo, 115 | const VkAllocationCallbacks *pAllocator, 116 | VkPipelineCache *pPipelineCache) 117 | { 118 | // TODO but for now return unique values 119 | static uint64_t nextPipelineCache = 1; 120 | *pPipelineCache = (VkPipelineCache)(nextPipelineCache++); 121 | return VK_SUCCESS; 122 | } 123 | 124 | VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache(VkDevice device, VkPipelineCache pipelineCache, 125 | const VkAllocationCallbacks *pAllocator) 126 | { 127 | // nothing to do 128 | } 129 | 130 | VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(VkDevice device, uint32_t fenceCount, 131 | const VkFence *pFences, VkBool32 waitAll, 132 | uint64_t timeout) 133 | { 134 | // TODO 135 | return VK_SUCCESS; 136 | } 137 | 138 | VKAPI_ATTR VkResult VKAPI_CALL vkResetFences(VkDevice device, uint32_t fenceCount, 139 | const VkFence *pFences) 140 | { 141 | // TODO 142 | return VK_SUCCESS; 143 | } 144 | 145 | VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(VkDevice device) 146 | { 147 | // TODO 148 | return VK_SUCCESS; 149 | } 150 | 151 | VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(VkQueue queue) 152 | { 153 | // TODO 154 | return VK_SUCCESS; 155 | } 156 | -------------------------------------------------------------------------------- /images.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView(VkDevice device, 4 | const VkImageViewCreateInfo *pCreateInfo, 5 | const VkAllocationCallbacks *pAllocator, 6 | VkImageView *pImageView) 7 | { 8 | VkImageView ret = new VkImageView_T; 9 | ret->image = pCreateInfo->image; 10 | *pImageView = ret; 11 | return VK_SUCCESS; 12 | } 13 | 14 | VKAPI_ATTR void VKAPI_CALL vkDestroyImageView(VkDevice device, VkImageView imageView, 15 | const VkAllocationCallbacks *pAllocator) 16 | { 17 | delete imageView; 18 | } 19 | 20 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, 21 | const VkAllocationCallbacks *pAllocator, VkImage *pImage) 22 | { 23 | VkImage ret = new VkImage_T; 24 | ret->pixels = NULL; // no memory bound 25 | ret->extent = pCreateInfo->extent; 26 | ret->bytesPerPixel = 4; 27 | ret->imageType = pCreateInfo->imageType; 28 | ret->format = pCreateInfo->format; 29 | ret->arrayLayers = pCreateInfo->arrayLayers; 30 | ret->mipLevels = pCreateInfo->mipLevels; 31 | if(pCreateInfo->format == VK_FORMAT_R8_UNORM || pCreateInfo->format == VK_FORMAT_BC2_UNORM_BLOCK || 32 | pCreateInfo->format == VK_FORMAT_BC3_UNORM_BLOCK) 33 | ret->bytesPerPixel = 1; 34 | *pImage = ret; 35 | return VK_SUCCESS; 36 | } 37 | 38 | VKAPI_ATTR void VKAPI_CALL vkDestroyImage(VkDevice device, VkImage image, 39 | const VkAllocationCallbacks *pAllocator) 40 | { 41 | delete image; 42 | } 43 | 44 | VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory(VkDevice device, VkImage image, 45 | VkDeviceMemory memory, VkDeviceSize memoryOffset) 46 | { 47 | image->pixels = memory->bytes + memoryOffset; 48 | return VK_SUCCESS; 49 | } 50 | 51 | VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements(VkDevice device, VkImage image, 52 | VkMemoryRequirements *pMemoryRequirements) 53 | { 54 | // TODO 55 | pMemoryRequirements->alignment = 1; 56 | pMemoryRequirements->memoryTypeBits = 0x3; 57 | pMemoryRequirements->size = 58 | image->extent.width * image->extent.height * image->arrayLayers * image->bytesPerPixel; 59 | if(image->imageType == VK_IMAGE_TYPE_3D) 60 | pMemoryRequirements->size *= image->extent.depth; 61 | 62 | // allocate a bunch more space for mips 63 | if(image->mipLevels > 1) 64 | pMemoryRequirements->size *= 2; 65 | } 66 | 67 | VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout(VkDevice device, VkImage image, 68 | const VkImageSubresource *pSubresource, 69 | VkSubresourceLayout *pLayout) 70 | { 71 | assert(pSubresource->arrayLayer == 0 && pSubresource->mipLevel == 0 && 72 | pSubresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 73 | 74 | pLayout->offset = 0; 75 | pLayout->rowPitch = image->extent.width * image->bytesPerPixel; 76 | pLayout->arrayPitch = pLayout->depthPitch = pLayout->size = 77 | image->extent.width * image->extent.height * image->bytesPerPixel; 78 | } 79 | -------------------------------------------------------------------------------- /memory.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory(VkDevice device, 4 | const VkMemoryAllocateInfo *pAllocateInfo, 5 | const VkAllocationCallbacks *pAllocator, 6 | VkDeviceMemory *pMemory) 7 | { 8 | VkDeviceMemory ret = new VkDeviceMemory_T; 9 | ret->size = pAllocateInfo->allocationSize; 10 | ret->bytes = new byte[pAllocateInfo->allocationSize]; 11 | *pMemory = ret; 12 | return VK_SUCCESS; 13 | } 14 | 15 | VKAPI_ATTR void VKAPI_CALL vkFreeMemory(VkDevice device, VkDeviceMemory memory, 16 | const VkAllocationCallbacks *pAllocator) 17 | { 18 | delete[] memory->bytes; 19 | delete memory; 20 | } 21 | 22 | VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory(VkDevice device, VkDeviceMemory memory, 23 | VkDeviceSize offset, VkDeviceSize size, 24 | VkMemoryMapFlags flags, void **ppData) 25 | { 26 | byte *data = memory->bytes; 27 | data += offset; 28 | *ppData = (void *)data; 29 | return VK_SUCCESS; 30 | } 31 | 32 | VKAPI_ATTR void VKAPI_CALL vkUnmapMemory(VkDevice device, VkDeviceMemory memory) 33 | { 34 | } 35 | 36 | VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, 37 | const VkMappedMemoryRange *pMemoryRanges) 38 | { 39 | // no caching, no flushing! 40 | return VK_SUCCESS; 41 | } -------------------------------------------------------------------------------- /precompiled.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VkDeviceSize CalcSubresourceByteOffset(VkImage img, uint32_t mip, uint32_t layer) 4 | { 5 | VkDeviceSize offs = 0; 6 | 7 | const uint32_t w = img->extent.width; 8 | const uint32_t h = img->extent.height; 9 | const uint32_t bpp = img->bytesPerPixel; 10 | 11 | for(uint32_t m = 0; m < mip; m++) 12 | { 13 | const uint32_t mw = std::max(1U, w >> m); 14 | const uint32_t mh = std::max(1U, h >> m); 15 | offs += mw * mh * bpp; 16 | } 17 | 18 | if(layer > 0) 19 | { 20 | uint32_t mw = w; 21 | uint32_t mh = h; 22 | 23 | VkDeviceSize sliceSize = 0; 24 | 25 | for(uint32_t m = 0; m < img->mipLevels; m++) 26 | { 27 | sliceSize += mw * mh * bpp; 28 | mw = std::max(1U, mw >> 1); 29 | mh = std::max(1U, mh >> 1); 30 | } 31 | 32 | offs += sliceSize * layer; 33 | } 34 | 35 | return offs; 36 | } 37 | -------------------------------------------------------------------------------- /precompiled.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if defined(_WIN32) 4 | #define VK_USE_PLATFORM_WIN32_KHR 1 5 | #define WIN32_LEAN_AND_MEAN 6 | #define NOMINMAX 7 | 8 | #include 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "3rdparty/microprofile.h" 16 | #include "3rdparty/vk_icd.h" 17 | #include "3rdparty/vulkan.h" 18 | #include "stats.h" 19 | 20 | typedef unsigned char byte; 21 | 22 | struct VkDevice_T 23 | { 24 | uintptr_t loaderMagic; 25 | std::vector queues; 26 | }; 27 | 28 | // see commands.h 29 | enum class Command : uint16_t 30 | { 31 | PipelineBarrier, 32 | BeginRenderPass, 33 | EndRenderPass, 34 | BindPipeline, 35 | BindDescriptorSets, 36 | BindVB, 37 | BindIB, 38 | SetViewport, 39 | SetScissors, 40 | PushConstants, 41 | Draw, 42 | DrawIndexed, 43 | CopyBuf2Img, 44 | CopyBuf, 45 | }; 46 | 47 | struct GPUState; 48 | struct VertexCacheEntry; 49 | struct LLVMFunction; 50 | struct float4; 51 | 52 | typedef void (*Shader)(); 53 | typedef void (*VertexShader)(const GPUState &state, uint32_t vertexIndex, VertexCacheEntry &out); 54 | typedef void (*FragmentShader)(const GPUState &state, float pixdepth, const float4 &bary, 55 | const VertexCacheEntry tri[3], float4 &out); 56 | 57 | struct VkCommandBuffer_T 58 | { 59 | uintptr_t loaderMagic; 60 | bool live = false; 61 | std::vector commandStream; 62 | 63 | template 64 | T *push() 65 | { 66 | Command *id = (Command *)pushbytes(sizeof(Command)); 67 | *id = T::CommandID; 68 | return (T *)pushbytes(sizeof(T)); 69 | } 70 | 71 | void execute() const; 72 | 73 | private: 74 | byte *pushbytes(size_t sz); 75 | }; 76 | 77 | struct VkCommandPool_T 78 | { 79 | std::vector buffers; 80 | VkCommandBuffer alloc(); 81 | }; 82 | 83 | struct VkDeviceMemory_T 84 | { 85 | VkDeviceSize size = 0; 86 | byte *bytes = NULL; 87 | }; 88 | 89 | struct VkImage_T 90 | { 91 | VkExtent3D extent = {0, 0, 0}; 92 | VkImageType imageType = VK_IMAGE_TYPE_MAX_ENUM; 93 | VkFormat format = VK_FORMAT_UNDEFINED; 94 | uint32_t arrayLayers = 1; 95 | uint32_t mipLevels = 1; 96 | uint32_t bytesPerPixel = 4; 97 | byte *pixels = NULL; 98 | }; 99 | 100 | struct VkImageView_T 101 | { 102 | VkImage_T *image = NULL; 103 | }; 104 | 105 | struct VkBuffer_T 106 | { 107 | VkDeviceSize size = 0; 108 | byte *bytes = NULL; 109 | }; 110 | 111 | struct VkShaderModule_T 112 | { 113 | LLVMFunction *handle = NULL; 114 | }; 115 | 116 | struct VkPipeline_T 117 | { 118 | struct 119 | { 120 | VkFormat format; 121 | uint32_t stride; 122 | uint32_t offset; 123 | uint32_t vb; 124 | } vattrs[16]; 125 | VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; 126 | VkFrontFace frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; 127 | VkCullModeFlags cullMode = VK_CULL_MODE_FLAG_BITS_MAX_ENUM; 128 | VkCompareOp depthCompareOp = VK_COMPARE_OP_ALWAYS; 129 | bool depthWriteEnable = false; 130 | VkPipelineColorBlendAttachmentState blend; 131 | VertexShader vs = NULL; 132 | FragmentShader fs = NULL; 133 | }; 134 | 135 | struct VkDescriptorSetLayout_T 136 | { 137 | uint32_t bindingCount; 138 | }; 139 | 140 | struct VkDescriptorSet_T 141 | { 142 | struct Bind 143 | { 144 | Bind() { memset(&data, 0, sizeof(data)); } 145 | VkDescriptorType type = VK_DESCRIPTOR_TYPE_MAX_ENUM; 146 | 147 | union 148 | { 149 | VkDescriptorImageInfo imageInfo; 150 | VkDescriptorBufferInfo bufferInfo; 151 | VkBufferView texelBufferView; 152 | } data; 153 | }; 154 | 155 | Bind *binds; 156 | }; 157 | 158 | struct VkRenderPass_T 159 | { 160 | struct Attachment 161 | { 162 | int32_t idx; 163 | bool clear; 164 | }; 165 | 166 | struct Subpass 167 | { 168 | std::vector colAttachments; 169 | Attachment depthAttachment; 170 | }; 171 | 172 | std::vector subpasses; 173 | }; 174 | 175 | struct VkFramebuffer_T 176 | { 177 | std::vector attachments; 178 | }; 179 | 180 | struct VkSwapchainKHR_T 181 | { 182 | VkExtent2D extent = {0, 0}; 183 | 184 | struct Backbuffer 185 | { 186 | VkImage im = VK_NULL_HANDLE; 187 | VkDeviceMemory mem = VK_NULL_HANDLE; 188 | 189 | #if defined(_WIN32) 190 | HDC dc = NULL; 191 | HBITMAP bmp = NULL; 192 | #endif 193 | }; 194 | 195 | std::vector backbuffers; 196 | 197 | #if defined(_WIN32) 198 | HWND wnd = NULL; 199 | HDC dc = NULL; 200 | #endif 201 | 202 | uint32_t current = 0; 203 | }; 204 | 205 | VkDeviceSize CalcSubresourceByteOffset(VkImage img, uint32_t mip, uint32_t layer); 206 | -------------------------------------------------------------------------------- /query.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | static VkResult FillPropertyCountAndList(const VkExtensionProperties *src, uint32_t numExts, 4 | uint32_t *dstCount, VkExtensionProperties *dstProps) 5 | { 6 | if(dstCount && !dstProps) 7 | { 8 | // just returning the number of extensions 9 | *dstCount = numExts; 10 | return VK_SUCCESS; 11 | } 12 | else if(dstCount && dstProps) 13 | { 14 | uint32_t dstSpace = *dstCount; 15 | 16 | // return the number of extensions. 17 | *dstCount = std::min(numExts, dstSpace); 18 | 19 | // copy as much as there's space for, up to how many there are 20 | memcpy(dstProps, src, sizeof(VkExtensionProperties) * std::min(numExts, dstSpace)); 21 | 22 | // if there was enough space, return success, else incomplete 23 | if(dstSpace >= numExts) 24 | return VK_SUCCESS; 25 | else 26 | return VK_INCOMPLETE; 27 | } 28 | 29 | // both parameters were NULL, return incomplete 30 | return VK_INCOMPLETE; 31 | } 32 | 33 | VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( 34 | const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties) 35 | { 36 | assert(pLayerName == NULL); 37 | 38 | static const VkExtensionProperties exts[] = { 39 | {VK_KHR_SURFACE_EXTENSION_NAME, VK_KHR_SURFACE_SPEC_VERSION}, 40 | #if defined(_WIN32) 41 | {VK_KHR_WIN32_SURFACE_EXTENSION_NAME, VK_KHR_WIN32_SURFACE_SPEC_VERSION}, 42 | #endif 43 | }; 44 | 45 | return FillPropertyCountAndList(exts, (uint32_t)sizeof(exts) / sizeof(exts[0]), pPropertyCount, 46 | pProperties); 47 | } 48 | 49 | VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, 50 | const char *pLayerName, 51 | uint32_t *pPropertyCount, 52 | VkExtensionProperties *pProperties) 53 | { 54 | assert(pLayerName == NULL); 55 | 56 | static const VkExtensionProperties exts[] = { 57 | {VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_SWAPCHAIN_SPEC_VERSION}, 58 | }; 59 | 60 | return FillPropertyCountAndList(exts, (uint32_t)sizeof(exts) / sizeof(exts[0]), pPropertyCount, 61 | pProperties); 62 | } 63 | 64 | VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices(VkInstance instance, 65 | uint32_t *pPhysicalDeviceCount, 66 | VkPhysicalDevice *pPhysicalDevices) 67 | { 68 | // one physical device. In theory we could expose a dozen though and it wouldn't matter 69 | if(pPhysicalDeviceCount && !pPhysicalDevices) 70 | { 71 | *pPhysicalDeviceCount = 1; 72 | return VK_SUCCESS; 73 | } 74 | 75 | *pPhysicalDevices = (VkPhysicalDevice) new VK_LOADER_DATA; 76 | set_loader_magic_value(*pPhysicalDevices); 77 | return VK_SUCCESS; 78 | } 79 | 80 | VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, 81 | VkPhysicalDeviceFeatures *pFeatures) 82 | { 83 | memset(pFeatures, 0, sizeof(VkPhysicalDeviceFeatures)); 84 | pFeatures->fullDrawIndexUint32 = VK_TRUE; 85 | pFeatures->fillModeNonSolid = VK_TRUE; 86 | pFeatures->textureCompressionBC = VK_TRUE; 87 | } 88 | 89 | VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, 90 | VkPhysicalDeviceProperties *pProperties) 91 | { 92 | memset(pProperties, 0, sizeof(VkPhysicalDeviceProperties)); 93 | 94 | pProperties->apiVersion = VK_MAKE_VERSION(1, 0, 47); 95 | pProperties->driverVersion = VK_MAKE_VERSION(0, 1, 0); 96 | pProperties->vendorID = 0x10003; 97 | pProperties->deviceID = 0x01234; 98 | pProperties->deviceType = VK_PHYSICAL_DEVICE_TYPE_CPU; 99 | char devName[] = "Visor Software Renderer"; 100 | memcpy(pProperties->deviceName, devName, sizeof(devName)); 101 | for(int i = 0; i < VK_UUID_SIZE; i++) 102 | pProperties->pipelineCacheUUID[i] = uint8_t(i); 103 | 104 | // minimum set of limits. We can increase this in future when we know what we can support above 105 | // this minimum. 106 | VkSampleCountFlags minSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; 107 | pProperties->limits = { 108 | /* uint32_t maxImageDimension1D = */ 4096, 109 | /* uint32_t maxImageDimension2D = */ 4096, 110 | /* uint32_t maxImageDimension3D = */ 256, 111 | /* uint32_t maxImageDimensionCube = */ 4096, 112 | /* uint32_t maxImageArrayLayers = */ 256, 113 | /* uint32_t maxTexelBufferElements = */ 65536, 114 | /* uint32_t maxUniformBufferRange = */ 16384, 115 | /* uint32_t maxStorageBufferRange = */ 1U << 27, 116 | /* uint32_t maxPushConstantsSize = */ 128, 117 | /* uint32_t maxMemoryAllocationCount = */ 4096, 118 | /* uint32_t maxSamplerAllocationCount = */ 4000, 119 | /* VkDeviceSize bufferImageGranularity = */ 131072, 120 | /* VkDeviceSize sparseAddressSpaceSize = */ 0, 121 | /* uint32_t maxBoundDescriptorSets = */ 4, 122 | /* uint32_t maxPerStageDescriptorSamplers = */ 16, 123 | /* uint32_t maxPerStageDescriptorUniformBuffers = */ 12, 124 | /* uint32_t maxPerStageDescriptorStorageBuffers = */ 4, 125 | /* uint32_t maxPerStageDescriptorSampledImages = */ 16, 126 | /* uint32_t maxPerStageDescriptorStorageImages = */ 4, 127 | /* uint32_t maxPerStageDescriptorInputAttachments = */ 4, 128 | /* uint32_t maxPerStageResources = */ 128, 129 | /* uint32_t maxDescriptorSetSamplers = */ 96, 130 | /* uint32_t maxDescriptorSetUniformBuffers = */ 72, 131 | /* uint32_t maxDescriptorSetUniformBuffersDynamic = */ 8, 132 | /* uint32_t maxDescriptorSetStorageBuffers = */ 24, 133 | /* uint32_t maxDescriptorSetStorageBuffersDynamic = */ 4, 134 | /* uint32_t maxDescriptorSetSampledImages = */ 96, 135 | /* uint32_t maxDescriptorSetStorageImages = */ 24, 136 | /* uint32_t maxDescriptorSetInputAttachments = */ 4, 137 | /* uint32_t maxVertexInputAttributes = */ 16, 138 | /* uint32_t maxVertexInputBindings = */ 16, 139 | /* uint32_t maxVertexInputAttributeOffset = */ 2047, 140 | /* uint32_t maxVertexInputBindingStride = */ 2048, 141 | /* uint32_t maxVertexOutputComponents = */ 64, 142 | /* uint32_t maxTessellationGenerationLevel = */ 0, 143 | /* uint32_t maxTessellationPatchSize = */ 0, 144 | /* uint32_t maxTessellationControlPerVertexInputComponents = */ 0, 145 | /* uint32_t maxTessellationControlPerVertexOutputComponents = */ 0, 146 | /* uint32_t maxTessellationControlPerPatchOutputComponents = */ 0, 147 | /* uint32_t maxTessellationControlTotalOutputComponents = */ 0, 148 | /* uint32_t maxTessellationEvaluationInputComponents = */ 0, 149 | /* uint32_t maxTessellationEvaluationOutputComponents = */ 0, 150 | /* uint32_t maxGeometryShaderInvocations = */ 0, 151 | /* uint32_t maxGeometryInputComponents = */ 0, 152 | /* uint32_t maxGeometryOutputComponents = */ 0, 153 | /* uint32_t maxGeometryOutputVertices = */ 0, 154 | /* uint32_t maxGeometryTotalOutputComponents = */ 0, 155 | /* uint32_t maxFragmentInputComponents = */ 64, 156 | /* uint32_t maxFragmentOutputAttachments = */ 4, 157 | /* uint32_t maxFragmentDualSrcAttachments = */ 0, 158 | /* uint32_t maxFragmentCombinedOutputResources = */ 4, 159 | /* uint32_t maxComputeSharedMemorySize = */ 16384, 160 | /* uint32_t maxComputeWorkGroupCount[3] = */ {65536, 65536, 65536}, 161 | /* uint32_t maxComputeWorkGroupInvocations = */ 128, 162 | /* uint32_t maxComputeWorkGroupSize[3] = */ {128, 128, 64}, 163 | /* uint32_t subPixelPrecisionBits = */ 4, 164 | /* uint32_t subTexelPrecisionBits = */ 4, 165 | /* uint32_t mipmapPrecisionBits = */ 4, 166 | /* uint32_t maxDrawIndexedIndexValue = */ UINT32_MAX - 1, 167 | /* uint32_t maxDrawIndirectCount = */ 1, 168 | /* float maxSamplerLodBias = */ 2, 169 | /* float maxSamplerAnisotropy = */ 1, 170 | /* uint32_t maxViewports = */ 1, 171 | /* uint32_t maxViewportDimensions[2] = */ {4096, 4096}, 172 | /* float viewportBoundsRange[2] = */ {-8192.0f, 8191.0f}, 173 | /* uint32_t viewportSubPixelBits = */ 0, 174 | /* size_t minMemoryMapAlignment = */ 64, 175 | /* VkDeviceSize minTexelBufferOffsetAlignment = */ 256, 176 | /* VkDeviceSize minUniformBufferOffsetAlignment = */ 256, 177 | /* VkDeviceSize minStorageBufferOffsetAlignment = */ 256, 178 | /* int32_t minTexelOffset = */ -8, 179 | /* uint32_t maxTexelOffset = */ 7, 180 | /* int32_t minTexelGatherOffset = */ 0, 181 | /* uint32_t maxTexelGatherOffset = */ 0, 182 | /* float minInterpolationOffset = */ 0.0f, 183 | /* float maxInterpolationOffset = */ 0.0f, 184 | /* uint32_t subPixelInterpolationOffsetBits = */ 0, 185 | /* uint32_t maxFramebufferWidth = */ 4096, 186 | /* uint32_t maxFramebufferHeight = */ 4096, 187 | /* uint32_t maxFramebufferLayers = */ 256, 188 | /* VkSampleCountFlags framebufferColorSampleCounts = */ minSampleCounts, 189 | /* VkSampleCountFlags framebufferDepthSampleCounts = */ minSampleCounts, 190 | /* VkSampleCountFlags framebufferStencilSampleCounts = */ minSampleCounts, 191 | /* VkSampleCountFlags framebufferNoAttachmentsSampleCounts = */ minSampleCounts, 192 | /* uint32_t maxColorAttachments = */ 4, 193 | /* VkSampleCountFlags sampledImageColorSampleCounts = */ minSampleCounts, 194 | /* VkSampleCountFlags sampledImageIntegerSampleCounts = */ VK_SAMPLE_COUNT_1_BIT, 195 | /* VkSampleCountFlags sampledImageDepthSampleCounts = */ minSampleCounts, 196 | /* VkSampleCountFlags sampledImageStencilSampleCounts = */ minSampleCounts, 197 | /* VkSampleCountFlags storageImageSampleCounts = */ VK_SAMPLE_COUNT_1_BIT, 198 | /* uint32_t maxSampleMaskWords = */ 1, 199 | /* VkBool32 timestampComputeAndGraphics = */ VK_TRUE, 200 | /* float timestampPeriod = */ 1, 201 | /* uint32_t maxClipDistances = */ 0, 202 | /* uint32_t maxCullDistances = */ 0, 203 | /* uint32_t maxCombinedClipAndCullDistances = */ 0, 204 | /* uint32_t discreteQueuePriorities = */ 2, 205 | /* float pointSizeRange[2] = */ {1.0f, 1.0f}, 206 | /* float lineWidthRange[2] = */ {1.0f, 1.0f}, 207 | /* float pointSizeGranularity = */ 0.0f, 208 | /* float lineWidthGranularity = */ 0.0f, 209 | /* VkBool32 strictLines = */ VK_FALSE, 210 | /* VkBool32 standardSampleLocations = */ VK_TRUE, 211 | /* VkDeviceSize optimalBufferCopyOffsetAlignment = */ 1, 212 | /* VkDeviceSize optimalBufferCopyRowPitchAlignment = */ 1, 213 | /* VkDeviceSize nonCoherentAtomSize = */ 1, 214 | }; 215 | 216 | // sparse properties are all false 217 | pProperties->sparseProperties = {VK_FALSE, VK_FALSE, VK_FALSE, VK_FALSE, VK_FALSE}; 218 | } 219 | 220 | VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( 221 | VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, 222 | VkQueueFamilyProperties *pQueueFamilyProperties) 223 | { 224 | // one do-it-all queue family 225 | if(pQueueFamilyPropertyCount && !pQueueFamilyProperties) 226 | { 227 | *pQueueFamilyPropertyCount = 1; 228 | return; 229 | } 230 | 231 | memset(pQueueFamilyProperties, 0, sizeof(VkQueueFamilyProperties)); 232 | 233 | // we can do byte-granularity copies 234 | pQueueFamilyProperties->minImageTransferGranularity.width = 1; 235 | pQueueFamilyProperties->minImageTransferGranularity.height = 1; 236 | pQueueFamilyProperties->minImageTransferGranularity.depth = 1; 237 | 238 | // one do-it-all queue 239 | pQueueFamilyProperties->queueCount = 1; 240 | pQueueFamilyProperties->queueFlags = 241 | VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT; 242 | 243 | pQueueFamilyProperties->timestampValidBits = 64; 244 | } 245 | 246 | VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( 247 | VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties *pMemoryProperties) 248 | { 249 | memset(pMemoryProperties, 0, sizeof(VkPhysicalDeviceMemoryProperties)); 250 | 251 | // we could get away with one heap, but let's have a separate 'GPU' and 'CPU' heap 252 | // of 1GB each 253 | pMemoryProperties->memoryHeapCount = 2; 254 | pMemoryProperties->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; 255 | pMemoryProperties->memoryHeaps[0].size = 1ULL << 30; 256 | pMemoryProperties->memoryHeaps[1].flags = 0; 257 | pMemoryProperties->memoryHeaps[1].size = 1ULL << 30; 258 | 259 | // keeping things simple, one type per heap 260 | pMemoryProperties->memoryTypeCount = 2; 261 | pMemoryProperties->memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; 262 | pMemoryProperties->memoryTypes[0].heapIndex = 0; 263 | pMemoryProperties->memoryTypes[1].propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 264 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 265 | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 266 | pMemoryProperties->memoryTypes[1].heapIndex = 1; 267 | } 268 | 269 | VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, 270 | VkFormat format, 271 | VkFormatProperties *pFormatProperties) 272 | { 273 | // placeholder, just allow enough stuff without looking at the format 274 | pFormatProperties->bufferFeatures = 275 | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT | 276 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; 277 | pFormatProperties->linearTilingFeatures = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | 278 | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | 279 | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; 280 | 281 | if(format == VK_FORMAT_D32_SFLOAT || format == VK_FORMAT_D32_SFLOAT_S8_UINT) 282 | { 283 | pFormatProperties->bufferFeatures = 0; 284 | pFormatProperties->linearTilingFeatures = 285 | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; 286 | } 287 | 288 | pFormatProperties->optimalTilingFeatures = pFormatProperties->linearTilingFeatures; 289 | } 290 | -------------------------------------------------------------------------------- /rasterizer.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include 3 | #include 4 | #include 5 | #include "gpu.h" 6 | 7 | // # cores - 1 (main rast thread steals work) 8 | #define NUM_THREADS 7 9 | 10 | // should main rasterizer thread start stealing work if it's spinning waiting for threads to finish 11 | #define WORK_STEAL 1 12 | 13 | struct TriangleWork 14 | { 15 | const GPUState *state; 16 | 17 | int ABx; 18 | int ABy; 19 | int ACx; 20 | int ACy; 21 | 22 | const VertexCacheEntry *vsout; 23 | const int4 *tri; 24 | 25 | int barymul; 26 | int area2; 27 | 28 | float invarea; 29 | 30 | float4 invw; 31 | float4 depth; 32 | 33 | int4 minwin, maxwin; 34 | }; 35 | 36 | struct Rasterizer 37 | { 38 | std::thread threads[NUM_THREADS]; 39 | std::condition_variable wake; 40 | std::mutex mutex; 41 | bool kill = false; 42 | std::queue triwork; 43 | 44 | std::atomic pending; 45 | } rast; 46 | 47 | void ProcessTriangles(const TriangleWork &work); 48 | 49 | void RasterLoop() 50 | { 51 | InitTextureCache(); 52 | 53 | for(;;) 54 | { 55 | TriangleWork triwork; 56 | 57 | { 58 | std::unique_lock lk(rast.mutex); 59 | rast.wake.wait(lk, [] { return rast.kill || !rast.triwork.empty(); }); 60 | 61 | if(rast.kill) 62 | return; 63 | 64 | triwork = rast.triwork.front(); 65 | rast.triwork.pop(); 66 | } 67 | 68 | ProcessTriangles(triwork); 69 | 70 | rast.pending--; 71 | } 72 | } 73 | 74 | void InitRasterThreads() 75 | { 76 | InitTextureCache(); 77 | for(int i = 0; i < NUM_THREADS; i++) 78 | rast.threads[i] = std::thread([i] { 79 | char buf[32]; 80 | sprintf_s(buf, "Raster%i", i); 81 | MicroProfileOnThreadCreate(buf); 82 | RasterLoop(); 83 | }); 84 | } 85 | 86 | void ShutdownRasterThreads() 87 | { 88 | { 89 | std::unique_lock lk(rast.mutex); 90 | rast.kill = true; 91 | } 92 | 93 | rast.wake.notify_all(); 94 | 95 | for(int i = 0; i < NUM_THREADS; i++) 96 | if(rast.threads[i].joinable()) 97 | rast.threads[i].join(); 98 | } 99 | 100 | uint32_t GetIndex(const GPUState &state, uint32_t vertexIndex, bool indexed) 101 | { 102 | if(!indexed) 103 | return vertexIndex; 104 | 105 | const byte *ib = state.ib.buffer->bytes + state.ib.offset; 106 | 107 | if(state.ib.indexType == VK_INDEX_TYPE_UINT16) 108 | { 109 | uint16_t *i16 = (uint16_t *)ib; 110 | i16 += vertexIndex; 111 | return *i16; 112 | } 113 | else 114 | { 115 | uint32_t *i32 = (uint32_t *)ib; 116 | i32 += vertexIndex; 117 | return *i32; 118 | } 119 | } 120 | 121 | static void ShadeVerts(const GPUState &state, int numVerts, uint32_t first, bool indexed, 122 | std::vector &out) 123 | { 124 | MICROPROFILE_SCOPE(rasterizer_ShadeVerts); 125 | 126 | VertexCacheEntry tri[4]; 127 | 128 | if(state.pipeline->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) 129 | { 130 | VertexCacheEntry vert; 131 | 132 | // only handle whole triangles 133 | int lastVert = numVerts - 3; 134 | uint32_t vertexIndex = first; 135 | 136 | for(int v = 0; v <= lastVert; v += 3) 137 | { 138 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[0]); 139 | vertexIndex++; 140 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[1]); 141 | vertexIndex++; 142 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[2]); 143 | vertexIndex++; 144 | 145 | out.push_back(tri[0]); 146 | out.push_back(tri[1]); 147 | out.push_back(tri[2]); 148 | } 149 | } 150 | else if(state.pipeline->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) 151 | { 152 | assert(numVerts >= 3); 153 | 154 | // strip order to preserve winding order is: 155 | // N+0, N+1, N+2 156 | // N+2, N+1, N+3 157 | // N+2, N+3, N+4 158 | // N+4, N+5, N+6 159 | // ... 160 | // 161 | // So each pair of triangles forms the same pattern, we alternate between one and the other. 162 | // Bear in mind that the strip might end after the first half of a pair so we can't just shade 163 | // all 4 verts: 164 | // 165 | // N+0, N+1, N+2 166 | // N+2, N+1, N+3 167 | // M = N+2 168 | // M+0, M+1, M+2 169 | // M+2, M+1, M+3 170 | // S = M+2 171 | // S+0, S+1, S+2 172 | // S+2, S+1, S+3 173 | 174 | // do the first one separately when we have to emit a whole triangle 175 | uint32_t vertexIndex = first; 176 | 177 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[0]); 178 | vertexIndex++; 179 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[1]); 180 | vertexIndex++; 181 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[2]); 182 | vertexIndex++; 183 | 184 | out.push_back(tri[0]); 185 | out.push_back(tri[1]); 186 | out.push_back(tri[2]); 187 | 188 | numVerts -= 3; 189 | 190 | if(numVerts > 0) 191 | { 192 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[3]); 193 | vertexIndex++; 194 | numVerts--; 195 | 196 | out.push_back(tri[2]); 197 | out.push_back(tri[1]); 198 | out.push_back(tri[3]); 199 | } 200 | 201 | while(numVerts > 0) 202 | { 203 | // pull in two re-used verts from previous run. 204 | // See above: 205 | // 206 | // M = N+2 207 | // M+0, M+1, M+2 208 | // 209 | // so M+0 = N+2, M+1 = N+3 210 | tri[0] = tri[2]; 211 | tri[1] = tri[3]; 212 | 213 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[2]); 214 | vertexIndex++; 215 | numVerts--; 216 | 217 | out.push_back(tri[0]); 218 | out.push_back(tri[1]); 219 | out.push_back(tri[2]); 220 | 221 | if(numVerts > 0) 222 | { 223 | state.pipeline->vs(state, GetIndex(state, vertexIndex, indexed), tri[3]); 224 | vertexIndex++; 225 | numVerts--; 226 | 227 | out.push_back(tri[2]); 228 | out.push_back(tri[1]); 229 | out.push_back(tri[3]); 230 | } 231 | } 232 | } 233 | else 234 | { 235 | printf("Unsupported primitive topology!\n"); 236 | } 237 | } 238 | 239 | static void ToWindow(uint32_t w, uint32_t h, const std::vector &pos, 240 | std::vector &out) 241 | { 242 | MICROPROFILE_SCOPE(rasterizer_ToWindow); 243 | 244 | for(const VertexCacheEntry &v : pos) 245 | { 246 | int4 win(0, 0, 0, 0); 247 | 248 | win.x = int((v.position.x / v.position.w + 1.0f) * 0.5f * w); 249 | win.y = int((v.position.y * -1.0f / v.position.w + 1.0f) * 0.5f * h); 250 | 251 | out.push_back(win); 252 | } 253 | } 254 | 255 | static void MinMax(const int4 *coords, int4 &minwin, int4 &maxwin) 256 | { 257 | MICROPROFILE_SCOPE(rasterizer_MinMax); 258 | 259 | minwin = {INT_MAX, INT_MAX, INT_MAX, INT_MAX}; 260 | maxwin = {INT_MIN, INT_MIN, INT_MIN, INT_MIN}; 261 | 262 | for(int i = 0; i < 3; i++) 263 | { 264 | for(int c = 0; c < 4; c++) 265 | { 266 | minwin.v[c] = std::min(minwin.v[c], coords[i].v[c]); 267 | maxwin.v[c] = std::max(maxwin.v[c], coords[i].v[c]); 268 | } 269 | } 270 | } 271 | 272 | static int double_triarea(const int4 &a, const int4 &b, const int4 &c) 273 | { 274 | return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x); 275 | } 276 | 277 | static float clamp01(float in) 278 | { 279 | return in > 1.0f ? 1.0f : (in < 0.0f ? 0.0f : in); 280 | } 281 | 282 | static inline int4 barycentric(const int ABx, const int ABy, const int ACx, const int ACy, 283 | const int area2, const int4 *verts, const int4 &pixel) 284 | { 285 | /* 286 | 287 | static int4 cross(int4 a, int4 b) 288 | { 289 | return int4((a.y * b.z) - (b.y * a.z), (a.z * b.x) - (b.z * a.x), (a.x * b.y) - (b.x * a.y), 290 | 1); 291 | } 292 | 293 | int4 u = cross(int4(verts[1].x - verts[0].x, verts[2].x - verts[0].x, verts[0].x - pixel.x, 1), 294 | int4(verts[1].y - verts[0].y, verts[2].y - verts[0].y, verts[0].y - pixel.y, 1)); 295 | 296 | if(u.z == 0) 297 | return int4(-1, -1, -1, -1); 298 | 299 | return int4(u.z - (u.x + u.y), u.x, u.y, u.z); 300 | 301 | */ 302 | 303 | const int PAx = verts[0].x - pixel.x; 304 | const int PAy = verts[0].y - pixel.y; 305 | 306 | const int ux = (ACx * PAy) - (ACy * PAx); 307 | const int uy = (PAx * ABy) - (PAy * ABx); 308 | 309 | return int4(area2 - (ux + uy), ux, uy, 0); 310 | } 311 | 312 | void ClearTarget(VkImage target, const VkClearDepthStencilValue &col) 313 | { 314 | MICROPROFILE_SCOPE(rasterizer_ClearTarget); 315 | 316 | byte *bits = target->pixels; 317 | const uint32_t w = target->extent.width; 318 | const uint32_t h = target->extent.height; 319 | const uint32_t bpp = target->bytesPerPixel; 320 | 321 | assert(bpp == 4); 322 | 323 | for(uint32_t y = 0; y < h; y++) 324 | { 325 | for(uint32_t x = 0; x < w; x++) 326 | { 327 | memcpy(&bits[(y * w + x) * 4], &col.depth, 4); 328 | } 329 | } 330 | } 331 | 332 | void ClearTarget(VkImage target, const VkClearColorValue &col) 333 | { 334 | MICROPROFILE_SCOPE(rasterizer_ClearTarget); 335 | 336 | byte *bits = target->pixels; 337 | const uint32_t w = target->extent.width; 338 | const uint32_t h = target->extent.height; 339 | const uint32_t bpp = target->bytesPerPixel; 340 | 341 | byte eval[4]; 342 | eval[2] = byte(col.float32[0] * 255.0f); 343 | eval[1] = byte(col.float32[1] * 255.0f); 344 | eval[0] = byte(col.float32[2] * 255.0f); 345 | eval[3] = byte(col.float32[3] * 255.0f); 346 | 347 | if(bpp == 1) 348 | { 349 | memset(bits, eval[2], w * h); 350 | } 351 | else if(bpp == 4) 352 | { 353 | for(uint32_t y = 0; y < h; y++) 354 | { 355 | for(uint32_t x = 0; x < w; x++) 356 | { 357 | memcpy(&bits[(y * w + x) * bpp], eval, 4); 358 | } 359 | } 360 | } 361 | } 362 | 363 | void DrawTriangles(const GPUState &state, int numVerts, uint32_t first, bool indexed) 364 | { 365 | MICROPROFILE_SCOPE(rasterizer_DrawTriangles); 366 | 367 | const uint32_t w = state.col[0]->extent.width; 368 | const uint32_t h = state.col[0]->extent.height; 369 | 370 | static std::vector shadedVerts; 371 | shadedVerts.clear(); 372 | ShadeVerts(state, numVerts, first, indexed, shadedVerts); 373 | 374 | static std::vector winCoords; 375 | winCoords.clear(); 376 | ToWindow(w, h, shadedVerts, winCoords); 377 | 378 | int tris_in = 0, tris_out = 0; 379 | 380 | const int4 *curTriangle = winCoords.data(); 381 | const VertexCacheEntry *curVSOut = shadedVerts.data(); 382 | 383 | assert(winCoords.size() % 3 == 0); 384 | 385 | for(int i = 0; i < winCoords.size(); i += 3) 386 | { 387 | const int4 *tri = curTriangle; 388 | const VertexCacheEntry *vsout = curVSOut; 389 | 390 | curTriangle += 3; 391 | curVSOut += 3; 392 | 393 | tris_in++; 394 | 395 | int area2 = double_triarea(tri[0], tri[1], tri[2]); 396 | 397 | // skip zero-area triangles 398 | if(area2 == 0) 399 | continue; 400 | 401 | int area2_flipped = area2; 402 | 403 | int barymul = 1; 404 | // if clockwise winding is front-facing, invert barycentrics and area before backface test 405 | if(state.pipeline->frontFace == VK_FRONT_FACE_CLOCKWISE) 406 | { 407 | barymul *= -1; 408 | area2_flipped *= -1; 409 | } 410 | 411 | // cull front-faces if desired 412 | if(area2_flipped > 0 && (state.pipeline->cullMode & VK_CULL_MODE_FRONT_BIT)) 413 | continue; 414 | 415 | if(area2_flipped < 0) 416 | { 417 | // cull back-faces if desired 418 | if(state.pipeline->cullMode & VK_CULL_MODE_BACK_BIT) 419 | continue; 420 | 421 | // otherwise flip barycentrics again to ensure they'll be positive 422 | barymul *= -1; 423 | area2_flipped *= -1; 424 | } 425 | 426 | tris_out++; 427 | 428 | int4 minwin, maxwin; 429 | MinMax(tri, minwin, maxwin); 430 | 431 | // clamp to screen, assume guard band is enough! 432 | minwin.x = std::max(0, minwin.x); 433 | minwin.y = std::max(0, minwin.y); 434 | maxwin.x = std::min(int(w - 1), maxwin.x); 435 | maxwin.y = std::min(int(h - 1), maxwin.y); 436 | 437 | TriangleWork work; 438 | 439 | work.state = &state; 440 | work.ABx = tri[1].x - tri[0].x; 441 | work.ABy = tri[1].y - tri[0].y; 442 | work.ACx = tri[2].x - tri[0].x; 443 | work.ACy = tri[2].y - tri[0].y; 444 | work.vsout = vsout; 445 | work.tri = tri; 446 | work.barymul = barymul; 447 | work.area2 = area2; 448 | work.invarea = 1.0f / float(area2_flipped); 449 | work.invw = float4(1.0f / vsout[0].position.w, 1.0f / vsout[1].position.w, 450 | 1.0f / vsout[2].position.w, 0.0f); 451 | work.depth = float4(vsout[0].position.z * work.invw.x, vsout[1].position.z * work.invw.y, 452 | vsout[2].position.z * work.invw.z, 0.0f); 453 | 454 | const int blockSize = 32; 455 | 456 | int xblocks = 1 + (maxwin.x - minwin.x) / blockSize; 457 | int yblocks = 1 + (maxwin.y - minwin.y) / blockSize; 458 | 459 | { 460 | MICROPROFILE_SCOPEI("rasterizer", "submit_work", MP_GREEN); 461 | 462 | for(int x = 0; x < xblocks; x++) 463 | { 464 | for(int y = 0; y < yblocks; y++) 465 | { 466 | work.minwin = minwin; 467 | work.minwin.x += blockSize * x; 468 | work.minwin.y += blockSize * y; 469 | 470 | work.maxwin.x = std::min(maxwin.x, work.minwin.x + blockSize); 471 | work.maxwin.y = std::min(maxwin.y, work.minwin.y + blockSize); 472 | 473 | { 474 | std::unique_lock lk(rast.mutex); 475 | rast.triwork.push(work); 476 | rast.pending++; 477 | } 478 | } 479 | } 480 | } 481 | 482 | { 483 | MICROPROFILE_SCOPEI("rasterizer", "notify_all", MP_RED); 484 | rast.wake.notify_all(); 485 | } 486 | } 487 | 488 | { 489 | MICROPROFILE_SCOPEI("rasterizer", "pending_flush", MP_BLUE); 490 | while(rast.pending) 491 | { 492 | TriangleWork triwork; 493 | bool work = false; 494 | 495 | #if WORK_STEAL 496 | { 497 | std::unique_lock lk(rast.mutex); 498 | 499 | if(!rast.triwork.empty()) 500 | { 501 | triwork = rast.triwork.front(); 502 | rast.triwork.pop(); 503 | work = true; 504 | } 505 | } 506 | 507 | if(work) 508 | { 509 | ProcessTriangles(triwork); 510 | 511 | rast.pending--; 512 | } 513 | #endif 514 | } 515 | } 516 | 517 | MICROPROFILE_COUNTER_ADD("rasterizer/triangles/in", tris_in); 518 | MICROPROFILE_COUNTER_ADD("rasterizer/triangles/out", tris_out); 519 | MICROPROFILE_COUNTER_ADD("rasterizer/draws/in", 1); 520 | } 521 | 522 | void ProcessTriangles(const TriangleWork &work) 523 | { 524 | MICROPROFILE_SCOPE(rasterizer_ProcessTriangles); 525 | 526 | MICROPROFILE_COUNTER_ADD("rasterizer/blocks/processed", 1); 527 | 528 | const GPUState &state = *work.state; 529 | const uint32_t w = state.col[0]->extent.width; 530 | const uint32_t h = state.col[0]->extent.height; 531 | const uint32_t bpp = state.col[0]->bytesPerPixel; 532 | 533 | byte *bits = state.col[0]->pixels; 534 | float *depthbits = state.depth ? (float *)state.depth->pixels : NULL; 535 | 536 | int pixels_written = 0, pixels_tested = 0, depth_passed = 0; 537 | 538 | for(int y = work.minwin.y; y < work.maxwin.y; y++) 539 | { 540 | for(int x = work.minwin.x; x < work.maxwin.x; x++) 541 | { 542 | int4 b = barycentric(work.ABx, work.ABy, work.ACx, work.ACy, work.area2, work.tri, 543 | int4(x, y, 0, 0)); 544 | 545 | b.x *= work.barymul; 546 | b.y *= work.barymul; 547 | b.z *= work.barymul; 548 | 549 | if(b.x >= 0 && b.y >= 0 && b.z >= 0) 550 | { 551 | // normalise the barycentrics 552 | float4 n = float4(float(b.x), float(b.y), float(b.z), 0.0f); 553 | n.x *= work.invarea; 554 | n.y *= work.invarea; 555 | n.z *= work.invarea; 556 | 557 | // calculate pixel depth 558 | float pixdepth = n.x * work.depth.x + n.y * work.depth.y + n.z * work.depth.z; 559 | 560 | bool passed = true; 561 | 562 | if(state.pipeline->depthCompareOp != VK_COMPARE_OP_ALWAYS && depthbits) 563 | { 564 | float curdepth = depthbits[y * w + x]; 565 | 566 | switch(state.pipeline->depthCompareOp) 567 | { 568 | case VK_COMPARE_OP_NEVER: passed = false; break; 569 | case VK_COMPARE_OP_LESS: passed = pixdepth < curdepth; break; 570 | case VK_COMPARE_OP_EQUAL: passed = pixdepth == curdepth; break; 571 | case VK_COMPARE_OP_LESS_OR_EQUAL: passed = pixdepth <= curdepth; break; 572 | case VK_COMPARE_OP_GREATER: passed = pixdepth > curdepth; break; 573 | case VK_COMPARE_OP_NOT_EQUAL: passed = pixdepth != curdepth; break; 574 | case VK_COMPARE_OP_GREATER_OR_EQUAL: passed = pixdepth >= curdepth; break; 575 | } 576 | } 577 | 578 | if(passed) 579 | { 580 | // perspective correct with W 581 | n.x *= work.invw.x; 582 | n.y *= work.invw.y; 583 | n.z *= work.invw.z; 584 | 585 | float invlen = 1.0f / (n.x + n.y + n.z); 586 | n.x *= invlen; 587 | n.y *= invlen; 588 | n.z *= invlen; 589 | 590 | float4 pix; 591 | state.pipeline->fs(state, pixdepth, n, work.vsout, pix); 592 | 593 | if(state.pipeline->blend.blendEnable) 594 | { 595 | float4 existing = float4(bits[(y * w + x) * bpp + 2], bits[(y * w + x) * bpp + 1], 596 | bits[(y * w + x) * bpp + 0], 1.0f); 597 | existing.x /= 255.0f; 598 | existing.y /= 255.0f; 599 | existing.z /= 255.0f; 600 | 601 | float srcFactor = 1.0f; 602 | 603 | switch(state.pipeline->blend.srcColorBlendFactor) 604 | { 605 | case VK_BLEND_FACTOR_ZERO: srcFactor = 0.0f; break; 606 | case VK_BLEND_FACTOR_ONE: srcFactor = 1.0f; break; 607 | case VK_BLEND_FACTOR_SRC_ALPHA: srcFactor = pix.w; break; 608 | case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: srcFactor = 1.0f - pix.w; break; 609 | case VK_BLEND_FACTOR_SRC_COLOR: 610 | case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 611 | case VK_BLEND_FACTOR_DST_COLOR: 612 | case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 613 | case VK_BLEND_FACTOR_DST_ALPHA: 614 | case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 615 | case VK_BLEND_FACTOR_CONSTANT_COLOR: 616 | case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 617 | case VK_BLEND_FACTOR_CONSTANT_ALPHA: 618 | case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 619 | case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 620 | case VK_BLEND_FACTOR_SRC1_COLOR: 621 | case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 622 | case VK_BLEND_FACTOR_SRC1_ALPHA: 623 | case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 624 | printf("Unsupported blend factor\n"); 625 | break; 626 | } 627 | 628 | float dstFactor = 1.0f; 629 | 630 | switch(state.pipeline->blend.dstColorBlendFactor) 631 | { 632 | case VK_BLEND_FACTOR_ZERO: dstFactor = 0.0f; break; 633 | case VK_BLEND_FACTOR_ONE: dstFactor = 1.0f; break; 634 | case VK_BLEND_FACTOR_SRC_ALPHA: dstFactor = pix.w; break; 635 | case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: dstFactor = 1.0f - pix.w; break; 636 | case VK_BLEND_FACTOR_SRC_COLOR: 637 | case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 638 | case VK_BLEND_FACTOR_DST_COLOR: 639 | case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 640 | case VK_BLEND_FACTOR_DST_ALPHA: 641 | case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 642 | case VK_BLEND_FACTOR_CONSTANT_COLOR: 643 | case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 644 | case VK_BLEND_FACTOR_CONSTANT_ALPHA: 645 | case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 646 | case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 647 | case VK_BLEND_FACTOR_SRC1_COLOR: 648 | case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 649 | case VK_BLEND_FACTOR_SRC1_ALPHA: 650 | case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 651 | printf("Unsupported blend factor\n"); 652 | break; 653 | } 654 | 655 | float4 blended; 656 | 657 | switch(state.pipeline->blend.colorBlendOp) 658 | { 659 | case VK_BLEND_OP_ADD: 660 | blended.x = srcFactor * pix.x + dstFactor * existing.x; 661 | blended.y = srcFactor * pix.y + dstFactor * existing.y; 662 | blended.z = srcFactor * pix.z + dstFactor * existing.z; 663 | blended.w = srcFactor * pix.w + dstFactor * existing.w; 664 | break; 665 | case VK_BLEND_OP_SUBTRACT: 666 | case VK_BLEND_OP_REVERSE_SUBTRACT: 667 | case VK_BLEND_OP_MIN: 668 | case VK_BLEND_OP_MAX: printf("Unsupported blend op\n"); break; 669 | } 670 | 671 | pix = blended; 672 | } 673 | 674 | bits[(y * w + x) * bpp + 2] = byte(clamp01(pix.x) * 255.0f); 675 | bits[(y * w + x) * bpp + 1] = byte(clamp01(pix.y) * 255.0f); 676 | bits[(y * w + x) * bpp + 0] = byte(clamp01(pix.z) * 255.0f); 677 | 678 | depth_passed++; 679 | 680 | if(state.pipeline->depthWriteEnable && depthbits) 681 | { 682 | depthbits[y * w + x] = pixdepth; 683 | } 684 | } 685 | 686 | pixels_written++; 687 | } 688 | 689 | pixels_tested++; 690 | } 691 | } 692 | 693 | MICROPROFILE_COUNTER_ADD("rasterizer/pixels/tested", pixels_tested); 694 | MICROPROFILE_COUNTER_ADD("rasterizer/pixels/written", pixels_written); 695 | MICROPROFILE_COUNTER_ADD("rasterizer/depth/passed", depth_passed); 696 | } -------------------------------------------------------------------------------- /renderpass.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer(VkDevice device, 4 | const VkFramebufferCreateInfo *pCreateInfo, 5 | const VkAllocationCallbacks *pAllocator, 6 | VkFramebuffer *pFramebuffer) 7 | { 8 | VkFramebuffer ret = new VkFramebuffer_T; 9 | if(pCreateInfo->attachmentCount > 0) 10 | ret->attachments.insert(ret->attachments.begin(), pCreateInfo->pAttachments, 11 | pCreateInfo->pAttachments + pCreateInfo->attachmentCount); 12 | *pFramebuffer = ret; 13 | return VK_SUCCESS; 14 | } 15 | 16 | VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer(VkDevice device, VkFramebuffer framebuffer, 17 | const VkAllocationCallbacks *pAllocator) 18 | { 19 | delete framebuffer; 20 | } 21 | 22 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass(VkDevice device, 23 | const VkRenderPassCreateInfo *pCreateInfo, 24 | const VkAllocationCallbacks *pAllocator, 25 | VkRenderPass *pRenderPass) 26 | { 27 | VkRenderPass ret = new VkRenderPass_T; 28 | 29 | ret->subpasses.reserve(pCreateInfo->subpassCount); 30 | for(uint32_t i = 0; i < pCreateInfo->subpassCount; i++) 31 | { 32 | VkRenderPass_T::Subpass sub; 33 | sub.colAttachments.resize(pCreateInfo->pSubpasses[i].colorAttachmentCount); 34 | for(uint32_t a = 0; a < pCreateInfo->pSubpasses[i].colorAttachmentCount; a++) 35 | { 36 | sub.colAttachments[a].idx = (int32_t)pCreateInfo->pSubpasses[i].pColorAttachments[a].attachment; 37 | sub.colAttachments[a].clear = (pCreateInfo->pAttachments[sub.colAttachments[a].idx].loadOp == 38 | VK_ATTACHMENT_LOAD_OP_CLEAR); 39 | 40 | sub.depthAttachment.idx = -1; 41 | if(pCreateInfo->pSubpasses[i].pDepthStencilAttachment) 42 | sub.depthAttachment.idx = pCreateInfo->pSubpasses[i].pDepthStencilAttachment->attachment; 43 | if(sub.depthAttachment.idx >= 0) 44 | sub.depthAttachment.clear = (pCreateInfo->pAttachments[sub.depthAttachment.idx].loadOp == 45 | VK_ATTACHMENT_LOAD_OP_CLEAR); 46 | } 47 | ret->subpasses.emplace_back(sub); 48 | } 49 | 50 | *pRenderPass = ret; 51 | return VK_SUCCESS; 52 | } 53 | 54 | VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass(VkDevice device, VkRenderPass renderPass, 55 | const VkAllocationCallbacks *pAllocator) 56 | { 57 | delete renderPass; 58 | } 59 | -------------------------------------------------------------------------------- /shaders.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include "spirv_compile.h" 3 | 4 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule(VkDevice device, 5 | const VkShaderModuleCreateInfo *pCreateInfo, 6 | const VkAllocationCallbacks *pAllocator, 7 | VkShaderModule *pShaderModule) 8 | { 9 | VkShaderModule ret = new VkShaderModule_T; 10 | 11 | ret->handle = CompileFunction(pCreateInfo->pCode, pCreateInfo->codeSize / sizeof(uint32_t)); 12 | 13 | if(ret->handle == NULL) 14 | return VK_ERROR_DEVICE_LOST; 15 | 16 | *pShaderModule = ret; 17 | return VK_SUCCESS; 18 | } 19 | 20 | VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule(VkDevice device, VkShaderModule shaderModule, 21 | const VkAllocationCallbacks *pAllocator) 22 | { 23 | DestroyFunction(shaderModule->handle); 24 | delete shaderModule; 25 | } 26 | 27 | VKAPI_ATTR VkResult VKAPI_CALL 28 | vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, 29 | const VkGraphicsPipelineCreateInfo *pCreateInfos, 30 | const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 31 | { 32 | for(uint32_t i = 0; i < createInfoCount; i++) 33 | { 34 | VkPipeline ret = new VkPipeline_T; 35 | 36 | uint32_t strides[16] = {0}; 37 | 38 | for(uint32_t vb = 0; vb < pCreateInfos[i].pVertexInputState->vertexBindingDescriptionCount; vb++) 39 | { 40 | const VkVertexInputBindingDescription &bind = 41 | pCreateInfos[i].pVertexInputState->pVertexBindingDescriptions[vb]; 42 | 43 | strides[bind.binding] = bind.stride; 44 | } 45 | 46 | for(uint32_t va = 0; va < pCreateInfos[i].pVertexInputState->vertexAttributeDescriptionCount; va++) 47 | { 48 | const VkVertexInputAttributeDescription &attr = 49 | pCreateInfos[i].pVertexInputState->pVertexAttributeDescriptions[va]; 50 | 51 | ret->vattrs[attr.location].vb = attr.binding; 52 | ret->vattrs[attr.location].format = attr.format; 53 | ret->vattrs[attr.location].offset = attr.offset; 54 | ret->vattrs[attr.location].stride = strides[attr.binding]; 55 | } 56 | 57 | ret->topology = pCreateInfos[i].pInputAssemblyState->topology; 58 | ret->frontFace = pCreateInfos[i].pRasterizationState->frontFace; 59 | ret->cullMode = pCreateInfos[i].pRasterizationState->cullMode; 60 | 61 | if(pCreateInfos[i].pDepthStencilState) 62 | { 63 | ret->depthCompareOp = pCreateInfos[i].pDepthStencilState->depthCompareOp; 64 | ret->depthWriteEnable = pCreateInfos[i].pDepthStencilState->depthWriteEnable == VK_TRUE; 65 | } 66 | 67 | if(pCreateInfos[i].pColorBlendState && pCreateInfos[i].pColorBlendState->attachmentCount > 0) 68 | { 69 | ret->blend = pCreateInfos[i].pColorBlendState->pAttachments[0]; 70 | } 71 | else 72 | { 73 | memset(&ret->blend, 0, sizeof(ret->blend)); 74 | } 75 | 76 | for(uint32_t s = 0; s < pCreateInfos[i].stageCount; s++) 77 | { 78 | VkShaderModule mod = pCreateInfos[i].pStages[s].module; 79 | if(pCreateInfos[i].pStages[s].stage == VK_SHADER_STAGE_VERTEX_BIT) 80 | { 81 | ret->vs = (VertexShader)GetFuncPointer(mod->handle, pCreateInfos[i].pStages[s].pName); 82 | } 83 | else if(pCreateInfos[i].pStages[s].stage == VK_SHADER_STAGE_FRAGMENT_BIT) 84 | { 85 | ret->fs = (FragmentShader)GetFuncPointer(mod->handle, pCreateInfos[i].pStages[s].pName); 86 | } 87 | } 88 | pPipelines[i] = ret; 89 | } 90 | return VK_SUCCESS; 91 | } 92 | 93 | VKAPI_ATTR VkResult VKAPI_CALL 94 | vkCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, 95 | const VkComputePipelineCreateInfo *pCreateInfos, 96 | const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 97 | { 98 | for(uint32_t i = 0; i < createInfoCount; i++) 99 | { 100 | VkPipeline ret = new VkPipeline_T; 101 | pPipelines[i] = ret; 102 | } 103 | return VK_SUCCESS; 104 | } 105 | 106 | VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline(VkDevice device, VkPipeline pipeline, 107 | const VkAllocationCallbacks *pAllocator) 108 | { 109 | delete pipeline; 110 | } 111 | -------------------------------------------------------------------------------- /spirv_compile.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void InitLLVM(); 4 | void ShutdownLLVM(); 5 | 6 | struct LLVMFunction; 7 | 8 | LLVMFunction *CompileFunction(const uint32_t *pCode, size_t codeSize); 9 | Shader GetFuncPointer(LLVMFunction *func, const char *name); 10 | void DestroyFunction(LLVMFunction *func); -------------------------------------------------------------------------------- /stats.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | MICROPROFILE_DECLARE(rasterizer_ShadeVerts); 4 | MICROPROFILE_DECLARE(rasterizer_ProcessTriangles); 5 | MICROPROFILE_DECLARE(rasterizer_ToWindow); 6 | MICROPROFILE_DECLARE(rasterizer_MinMax); 7 | MICROPROFILE_DECLARE(rasterizer_ClearTarget); 8 | MICROPROFILE_DECLARE(rasterizer_DrawTriangles); 9 | MICROPROFILE_DECLARE(vkQueueSubmit); 10 | MICROPROFILE_DECLARE(vkQueuePresentKHR); 11 | 12 | MICROPROFILE_DEFINE(rasterizer_ShadeVerts, "rasterizer", "ShadeVerts", MP_KHAKI); 13 | MICROPROFILE_DEFINE(rasterizer_ProcessTriangles, "rasterizer", "ProcessTriangles", MP_CHOCOLATE); 14 | MICROPROFILE_DEFINE(rasterizer_ToWindow, "rasterizer", "ToWindow", MP_MAROON); 15 | MICROPROFILE_DEFINE(rasterizer_MinMax, "rasterizer", "MinMax", MP_FIREBRICK); 16 | MICROPROFILE_DEFINE(rasterizer_ClearTarget, "rasterizer", "ClearTarget", MP_GAINSBORO); 17 | MICROPROFILE_DEFINE(rasterizer_DrawTriangles, "rasterizer", "DrawTriangles", MP_THISTLE); 18 | MICROPROFILE_DEFINE(vkQueueSubmit, "vulkan", "vkQueueSubmit", MP_RED); 19 | MICROPROFILE_DEFINE(vkQueuePresentKHR, "vulkan", "vkQueuePresentKHR", MP_BLUE); 20 | 21 | void InitFrameStats() 22 | { 23 | } 24 | 25 | void BeginFrameStats() 26 | { 27 | MICROPROFILE_COUNTER_SET("rasterizer/blocks/processed", 0); 28 | MICROPROFILE_COUNTER_SET("rasterizer/pixels/tested", 0); 29 | MICROPROFILE_COUNTER_SET("rasterizer/pixels/written", 0); 30 | MICROPROFILE_COUNTER_SET("rasterizer/depth/passed", 0); 31 | MICROPROFILE_COUNTER_SET("rasterizer/triangles/in", 0); 32 | MICROPROFILE_COUNTER_SET("rasterizer/triangles/out", 0); 33 | MICROPROFILE_COUNTER_SET("rasterizer/draws/in", 0); 34 | MICROPROFILE_COUNTER_SET("tcache/misses", 0); 35 | MICROPROFILE_COUNTER_SET("tcache/hits", 0); 36 | } 37 | 38 | void EndFrameStats() 39 | { 40 | MicroProfileFlip(NULL); 41 | } 42 | 43 | void ShutdownFrameStats() 44 | { 45 | MicroProfileShutdown(); 46 | } -------------------------------------------------------------------------------- /stats.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void InitFrameStats(); 4 | void BeginFrameStats(); 5 | void EndFrameStats(); 6 | void ShutdownFrameStats(); 7 | 8 | MICROPROFILE_DECLARE(rasterizer_ShadeVerts); 9 | MICROPROFILE_DECLARE(rasterizer_ProcessTriangles); 10 | MICROPROFILE_DECLARE(rasterizer_ToWindow); 11 | MICROPROFILE_DECLARE(rasterizer_MinMax); 12 | MICROPROFILE_DECLARE(rasterizer_ClearTarget); 13 | MICROPROFILE_DECLARE(rasterizer_DrawTriangles); 14 | MICROPROFILE_DECLARE(vkQueueSubmit); 15 | MICROPROFILE_DECLARE(vkQueuePresentKHR); -------------------------------------------------------------------------------- /texture_sampling.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | #include "3rdparty/decompress.h" 3 | #include "gpu.h" 4 | 5 | struct TextureCacheEntry 6 | { 7 | TextureCacheEntry *prev = NULL; 8 | TextureCacheEntry *next = NULL; 9 | int TLx = -1, TLy = -1; 10 | VkDeviceSize byteOffs = 0; 11 | VkImage tex = VK_NULL_HANDLE; 12 | float4 pixels[4][4]; 13 | }; 14 | 15 | __declspec(thread) TextureCacheEntry tcache_data[8]; 16 | 17 | __declspec(thread) TextureCacheEntry *tcache_head = NULL; 18 | 19 | void InitTextureCache() 20 | { 21 | tcache_head = &tcache_data[0]; 22 | 23 | tcache_data[0].next = &tcache_data[1]; 24 | 25 | for(int i = 1; i < 7; i++) 26 | { 27 | tcache_data[i].prev = &tcache_data[i - 1]; 28 | tcache_data[i].next = &tcache_data[i + 1]; 29 | } 30 | 31 | tcache_data[7].prev = &tcache_data[6]; 32 | } 33 | 34 | float4 &CacheCoord(VkImage tex, VkDeviceSize byteOffs, int x, int y) 35 | { 36 | // get the top-left of the 4x4 quad containin (x,y) 37 | int TLx = x & ~0x3; 38 | int TLy = y & ~0x3; 39 | 40 | TextureCacheEntry *tcache = tcache_head; 41 | while(tcache) 42 | { 43 | if(tcache->tex == tex && tcache->byteOffs == byteOffs && tcache->TLx == TLx && tcache->TLy == TLy) 44 | { 45 | MICROPROFILE_COUNTER_ADD("tcache/hits", 1); 46 | 47 | // move to the head of the cache list if it's not there already 48 | if(tcache->prev) 49 | { 50 | // remove from list 51 | tcache->prev->next = tcache->next; 52 | if(tcache->next) 53 | tcache->next->prev = tcache->prev; 54 | 55 | // move to head 56 | tcache_head->prev = tcache; 57 | tcache->prev = NULL; 58 | tcache->next = tcache_head; 59 | tcache_head = tcache; 60 | } 61 | 62 | return tcache->pixels[y & 0x3][x & 0x3]; 63 | } 64 | 65 | // if we hit the last item, break 66 | if(!tcache->next) 67 | break; 68 | 69 | tcache = tcache->next; 70 | } 71 | 72 | MICROPROFILE_COUNTER_ADD("tcache/misses", 1); 73 | 74 | // tcache points to the last item in the list since we just iterated over all of it above. 75 | 76 | // remove from the end of the list 77 | tcache->prev->next = NULL; 78 | 79 | // move to head 80 | tcache_head->prev = tcache; 81 | tcache->prev = NULL; 82 | tcache->next = tcache_head; 83 | tcache_head = tcache; 84 | 85 | // pull in contents from texture 86 | tcache->tex = tex; 87 | tcache->byteOffs = byteOffs; 88 | tcache->TLx = TLx; 89 | tcache->TLy = TLy; 90 | 91 | byte *base = tex->pixels + byteOffs; 92 | 93 | if(tex->format == VK_FORMAT_BC2_UNORM_BLOCK || tex->format == VK_FORMAT_BC3_UNORM_BLOCK) 94 | { 95 | byte decoded[16 * 4]; 96 | 97 | const int blockX = TLx >> 2; 98 | const int blockY = TLy >> 2; 99 | const uint32_t widthInBlocks = tex->extent.width >> 2; 100 | 101 | byte *blockbase = base + (blockY * widthInBlocks + blockX) * 16; 102 | 103 | if(tex->format == VK_FORMAT_BC2_UNORM_BLOCK) 104 | DecompressBlockBC2(0, 0, 4 * sizeof(uint32_t), blockbase, decoded); 105 | else if(tex->format == VK_FORMAT_BC3_UNORM_BLOCK) 106 | DecompressBlockBC3(0, 0, 4 * sizeof(uint32_t), blockbase, decoded); 107 | 108 | for(int row = 0; row < 4; row++) 109 | { 110 | for(int col = 0; col < 4; col++) 111 | { 112 | tcache->pixels[row][col] = float4(float(decoded[(row * 4 + col) * 4 + 0]) / 255.0f, 113 | float(decoded[(row * 4 + col) * 4 + 1]) / 255.0f, 114 | float(decoded[(row * 4 + col) * 4 + 2]) / 255.0f, 115 | float(decoded[(row * 4 + col) * 4 + 3]) / 255.0f); 116 | } 117 | } 118 | } 119 | else 120 | { 121 | const uint32_t bpp = tex->bytesPerPixel; 122 | 123 | for(int row = 0; row < 4; row++) 124 | { 125 | byte *rowbase = base + ((TLy + row) * tex->extent.width + TLx) * bpp; 126 | 127 | for(int col = 0; col < 4; col++) 128 | { 129 | tcache->pixels[row][col] = 130 | float4(float(rowbase[col * bpp + 0]) / 255.0f, float(rowbase[col * bpp + 1]) / 255.0f, 131 | float(rowbase[col * bpp + 2]) / 255.0f, float(rowbase[col * bpp + 3]) / 255.0f); 132 | } 133 | } 134 | } 135 | 136 | return tcache->pixels[y & 0x3][x & 0x3]; 137 | } 138 | 139 | extern "C" __declspec(dllexport) void sample_tex_wrapped(float u, float v, VkImage tex, 140 | VkDeviceSize byteOffs, float4 &out) 141 | { 142 | u = u - floor(u); 143 | v = v - floor(v); 144 | 145 | u *= tex->extent.width; 146 | v *= tex->extent.height; 147 | 148 | int iu0 = int(u); 149 | int iv0 = int(v); 150 | int iu1 = iu0 + 1; 151 | int iv1 = iv0 + 1; 152 | 153 | if(iu1 >= (int)tex->extent.width) 154 | iu1 -= tex->extent.width; 155 | if(iv1 >= (int)tex->extent.height) 156 | iv1 -= tex->extent.height; 157 | 158 | float fu = u - float(iu0); 159 | float fv = v - float(iv0); 160 | float inv_fu = 1.0f - fu; 161 | float inv_fv = 1.0f - fv; 162 | 163 | const float4 &TL = CacheCoord(tex, byteOffs, iu0, iv0); 164 | const float4 &TR = CacheCoord(tex, byteOffs, iu1, iv0); 165 | const float4 &BL = CacheCoord(tex, byteOffs, iu0, iv1); 166 | const float4 &BR = CacheCoord(tex, byteOffs, iu1, iv1); 167 | 168 | float4 top; 169 | top.x = TL.x * inv_fu + TR.x * fu; 170 | top.y = TL.y * inv_fu + TR.y * fu; 171 | top.z = TL.z * inv_fu + TR.z * fu; 172 | top.w = TL.w * inv_fu + TR.w * fu; 173 | 174 | float4 bottom; 175 | bottom.x = BL.x * inv_fu + BR.x * fu; 176 | bottom.y = BL.y * inv_fu + BR.y * fu; 177 | bottom.z = BL.z * inv_fu + BR.z * fu; 178 | bottom.w = BL.w * inv_fu + BR.w * fu; 179 | 180 | out.x = top.x * inv_fv + bottom.x * fv; 181 | out.y = top.y * inv_fv + bottom.y * fv; 182 | out.z = top.z * inv_fv + bottom.z * fv; 183 | out.w = top.w * inv_fv + bottom.w * fv; 184 | } 185 | 186 | extern "C" __declspec(dllexport) void sample_cube_wrapped(float x, float y, float z, VkImage tex, 187 | float4 &out) 188 | { 189 | float ax = abs(x); 190 | float ay = abs(y); 191 | float az = abs(z); 192 | 193 | bool px = x > 0.0f; 194 | bool py = y > 0.0f; 195 | bool pz = z > 0.0f; 196 | 197 | float axis, u, v; 198 | VkDeviceSize offset = 0; 199 | 200 | // X+ 201 | if(px && ax >= ay && ax >= az) 202 | { 203 | axis = ax; 204 | u = -z; 205 | v = -y; 206 | offset = CalcSubresourceByteOffset(tex, 0, 0); 207 | } 208 | // X- 209 | if(!px && ax >= ay && ax >= az) 210 | { 211 | axis = ax; 212 | u = z; 213 | v = -y; 214 | offset = CalcSubresourceByteOffset(tex, 0, 1); 215 | } 216 | // Y+ 217 | if(py && ay >= ax && ay >= az) 218 | { 219 | axis = ay; 220 | u = x; 221 | v = z; 222 | offset = CalcSubresourceByteOffset(tex, 0, 2); 223 | } 224 | // Y- 225 | if(!py && ay >= ax && ay >= az) 226 | { 227 | axis = ay; 228 | u = x; 229 | v = -z; 230 | offset = CalcSubresourceByteOffset(tex, 0, 3); 231 | } 232 | // Z+ 233 | if(pz && az >= ax && az >= ay) 234 | { 235 | axis = az; 236 | u = x; 237 | v = -y; 238 | offset = CalcSubresourceByteOffset(tex, 0, 4); 239 | } 240 | // Z- 241 | if(!pz && az >= ax && az >= ay) 242 | { 243 | axis = az; 244 | u = -x; 245 | v = -y; 246 | offset = CalcSubresourceByteOffset(tex, 0, 5); 247 | } 248 | 249 | sample_tex_wrapped(0.5f * (u / axis + 1.0f), 0.5f * (v / axis + 1.0f), tex, offset, out); 250 | } 251 | -------------------------------------------------------------------------------- /visor.json: -------------------------------------------------------------------------------- 1 | { 2 | "file_format_version": "1.0.0", 3 | "ICD": { 4 | "library_path": ".\\x64\\Development\\visor.dll", 5 | "api_version": "1.0.47" 6 | } 7 | } -------------------------------------------------------------------------------- /visor.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "visor", "visor.vcxproj", "{DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Development|x64 = Development|x64 11 | Development|x86 = Development|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Development|x64.ActiveCfg = Development|x64 17 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Development|x64.Build.0 = Development|x64 18 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Development|x86.ActiveCfg = Development|Win32 19 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Development|x86.Build.0 = Development|Win32 20 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Release|x64.ActiveCfg = Release|x64 21 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Release|x64.Build.0 = Release|x64 22 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Release|x86.ActiveCfg = Release|Win32 23 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /visor.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Development 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Development 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {DC7771D7-E4DB-4AEF-92E9-ACC03FEE37F0} 23 | Win32Proj 24 | visor 25 | 8.1 26 | 27 | 28 | 29 | DynamicLibrary 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | DynamicLibrary 36 | false 37 | v140 38 | true 39 | Unicode 40 | 41 | 42 | DynamicLibrary 43 | true 44 | v140 45 | Unicode 46 | 47 | 48 | DynamicLibrary 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | true 74 | $(SolutionDir)$(Platform)\$(Configuration)\ 75 | $(SolutionDir)$(Platform)\$(Configuration)\obj\ 76 | 77 | 78 | true 79 | $(SolutionDir)$(Platform)\$(Configuration)\obj\ 80 | 81 | 82 | false 83 | $(SolutionDir)$(Platform)\$(Configuration)\ 84 | $(SolutionDir)$(Platform)\$(Configuration)\obj\ 85 | 86 | 87 | false 88 | $(SolutionDir)$(Platform)\$(Configuration)\obj\ 89 | 90 | 91 | 92 | Create 93 | Level3 94 | Disabled 95 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 96 | /DMICROPROFILE_GPU_TIMERS=0 /DVK_USE_PLATFORM_WIN32_KHR=1 %(AdditionalOptions) 97 | precompiled.h 98 | ProgramDatabase 99 | MultiThreadedDLL 100 | 101 | 102 | Windows 103 | true 104 | ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 105 | 106 | 107 | 108 | 109 | Create 110 | Level3 111 | Disabled 112 | _WINDOWS;_ITERATOR_DEBUG_LEVEL=0;%(PreprocessorDefinitions) 113 | /DMICROPROFILE_GPU_TIMERS=0 /DVK_USE_PLATFORM_WIN32_KHR=1 %(AdditionalOptions) 114 | precompiled.h 115 | ProgramDatabase 116 | T:\llvm-6.0.0.src\install\include\;%(AdditionalIncludeDirectories) 117 | MultiThreadedDLL 118 | 119 | 120 | Windows 121 | true 122 | ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;LLVMAnalysis.lib;LLVMAsmPrinter.lib;LLVMBinaryFormat.lib;LLVMBitReader.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMDebugInfoCodeView.lib;LLVMExecutionEngine.lib;LLVMGlobalISel.lib;LLVMipo.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMMC.lib;LLVMMCDisassembler.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMOrcJIT.lib;LLVMProfileData.lib;LLVMRuntimeDyld.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMVectorize.lib;LLVMX86AsmPrinter.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMX86Utils.lib;%(AdditionalDependencies) 123 | T:\llvm-6.0.0.src\install\lib\ 124 | 125 | 126 | 127 | 128 | Level3 129 | Create 130 | MaxSpeed 131 | true 132 | true 133 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 134 | /DMICROPROFILE_GPU_TIMERS=0 %(AdditionalOptions) 135 | precompiled.h 136 | MultiThreadedDLL 137 | 138 | 139 | Windows 140 | true 141 | true 142 | true 143 | ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 144 | 145 | 146 | 147 | 148 | Level3 149 | Create 150 | MaxSpeed 151 | true 152 | true 153 | NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 154 | /DMICROPROFILE_GPU_TIMERS=0 %(AdditionalOptions) 155 | precompiled.h 156 | MultiThreadedDLL 157 | T:\llvm-6.0.0.src\install\include\;%(AdditionalIncludeDirectories) 158 | 159 | 160 | Windows 161 | true 162 | true 163 | true 164 | ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;LLVMAnalysis.lib;LLVMAsmPrinter.lib;LLVMBinaryFormat.lib;LLVMBitReader.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMDebugInfoCodeView.lib;LLVMExecutionEngine.lib;LLVMGlobalISel.lib;LLVMipo.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMMC.lib;LLVMMCDisassembler.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMOrcJIT.lib;LLVMProfileData.lib;LLVMRuntimeDyld.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMVectorize.lib;LLVMX86AsmPrinter.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMX86Utils.lib;%(AdditionalDependencies) 165 | T:\llvm-6.0.0.src\install\lib\ 166 | 167 | 168 | 169 | 170 | NotUsing 171 | 172 | 173 | NotUsing 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | -------------------------------------------------------------------------------- /visor.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 3rdparty\microprofile 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 3rdparty\decompress 26 | 27 | 28 | 29 | 30 | 31 | 32 | 3rdparty\vulkan 33 | 34 | 35 | 3rdparty\vulkan 36 | 37 | 38 | 3rdparty\vulkan 39 | 40 | 41 | 3rdparty\microprofile 42 | 43 | 44 | 3rdparty\microprofile 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 3rdparty\vulkan 53 | 54 | 55 | 3rdparty\vulkan 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | {6cd89935-ac88-4b34-b28c-cc80386c136f} 64 | 65 | 66 | {c8070b18-1182-4e2b-a292-ae2eb61a9ba0} 67 | 68 | 69 | {901a93a7-86c6-466a-a4c6-8b33ca7bd5e4} 70 | 71 | 72 | {8d761509-3638-475e-9f65-f6e3cec1b493} 73 | 74 | 75 | -------------------------------------------------------------------------------- /wsi.cpp: -------------------------------------------------------------------------------- 1 | #include "precompiled.h" 2 | 3 | VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, 4 | uint32_t queueFamilyIndex, 5 | VkSurfaceKHR surface, 6 | VkBool32 *pSupported) 7 | { 8 | // support presenting on all queues 9 | if(pSupported) 10 | *pSupported = VK_TRUE; 11 | return VK_SUCCESS; 12 | } 13 | 14 | VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, 15 | VkSurfaceKHR surface, 16 | uint32_t *pSurfaceFormatCount, 17 | VkSurfaceFormatKHR *pSurfaceFormats) 18 | { 19 | // support BGRA8 in UNORM and SRGB modes 20 | if(pSurfaceFormatCount && !pSurfaceFormats) 21 | { 22 | *pSurfaceFormatCount = 2; 23 | return VK_SUCCESS; 24 | } 25 | 26 | pSurfaceFormats[0].colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; 27 | pSurfaceFormats[0].format = VK_FORMAT_B8G8R8A8_SRGB; 28 | pSurfaceFormats[1].colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; 29 | pSurfaceFormats[1].format = VK_FORMAT_B8G8R8A8_UNORM; 30 | 31 | return VK_SUCCESS; 32 | } 33 | 34 | VKAPI_ATTR VkResult VKAPI_CALL 35 | vkGetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, 36 | VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) 37 | { 38 | VkIcdSurfaceBase *base = (VkIcdSurfaceBase *)surface; 39 | #if defined(_WIN32) 40 | if(base->platform == VK_ICD_WSI_PLATFORM_WIN32) 41 | { 42 | memset(pSurfaceCapabilities, 0, sizeof(VkSurfaceCapabilitiesKHR)); 43 | 44 | VkIcdSurfaceWin32 *win32 = (VkIcdSurfaceWin32 *)base; 45 | 46 | pSurfaceCapabilities->minImageCount = 1; 47 | pSurfaceCapabilities->maxImageCount = 2; 48 | pSurfaceCapabilities->minImageExtent = {1, 1}; 49 | pSurfaceCapabilities->maxImageExtent = {32768, 32768}; 50 | pSurfaceCapabilities->maxImageArrayLayers = 1; 51 | 52 | RECT rect = {}; 53 | GetClientRect(win32->hwnd, &rect); 54 | 55 | pSurfaceCapabilities->currentExtent = {uint32_t(rect.right - rect.left), 56 | uint32_t(rect.bottom - rect.top)}; 57 | 58 | pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; 59 | pSurfaceCapabilities->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; 60 | pSurfaceCapabilities->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; 61 | pSurfaceCapabilities->supportedUsageFlags = 62 | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; 63 | 64 | return VK_SUCCESS; 65 | } 66 | #endif 67 | return VK_ERROR_DEVICE_LOST; 68 | } 69 | 70 | VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( 71 | VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t *pPresentModeCount, 72 | VkPresentModeKHR *pPresentModes) 73 | { 74 | // only support FIFO 75 | if(pPresentModeCount && !pPresentModes) 76 | { 77 | *pPresentModeCount = 1; 78 | return VK_SUCCESS; 79 | } 80 | 81 | pPresentModes[0] = VK_PRESENT_MODE_FIFO_KHR; 82 | 83 | return VK_SUCCESS; 84 | } 85 | 86 | VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(VkDevice device, 87 | const VkSwapchainCreateInfoKHR *pCreateInfo, 88 | const VkAllocationCallbacks *pAllocator, 89 | VkSwapchainKHR *pSwapchain) 90 | { 91 | VkSwapchainKHR ret = new VkSwapchainKHR_T; 92 | 93 | // TODO probably want more properties out of here, although we restricted the options a lot 94 | VkIcdSurfaceBase *base = (VkIcdSurfaceBase *)pCreateInfo->surface; 95 | 96 | ret->backbuffers.resize(pCreateInfo->minImageCount); 97 | 98 | ret->extent = pCreateInfo->imageExtent; 99 | 100 | #if defined(_WIN32) 101 | if(base->platform == VK_ICD_WSI_PLATFORM_WIN32) 102 | { 103 | VkIcdSurfaceWin32 *win32 = (VkIcdSurfaceWin32 *)base; 104 | ret->wnd = win32->hwnd; 105 | ret->dc = GetDC(ret->wnd); 106 | 107 | for(size_t i = 0; i < ret->backbuffers.size(); i++) 108 | { 109 | ret->backbuffers[i].mem = new VkDeviceMemory_T(); 110 | ret->backbuffers[i].mem->size = ret->extent.width * ret->extent.height * 4; 111 | 112 | HDC dc = CreateCompatibleDC(ret->dc); 113 | HBITMAP bmp = NULL; 114 | 115 | BITMAPINFO info = {}; 116 | info.bmiHeader.biSize = sizeof(info.bmiHeader); 117 | info.bmiHeader.biWidth = ret->extent.width; 118 | info.bmiHeader.biHeight = ret->extent.height; 119 | info.bmiHeader.biPlanes = 1; 120 | info.bmiHeader.biBitCount = 32; 121 | info.bmiHeader.biCompression = BI_RGB; 122 | info.bmiHeader.biSizeImage = 0; 123 | info.bmiHeader.biXPelsPerMeter = info.bmiHeader.biYPelsPerMeter = 96; 124 | info.bmiHeader.biClrUsed = 0; 125 | info.bmiHeader.biClrImportant = 0; 126 | 127 | bmp = CreateDIBSection(dc, &info, DIB_RGB_COLORS, (void **)&ret->backbuffers[i].mem->bytes, 128 | NULL, 0); 129 | assert(bmp && ret->backbuffers[i].mem->bytes); 130 | 131 | SelectObject(dc, bmp); 132 | 133 | ret->backbuffers[i].dc = dc; 134 | ret->backbuffers[i].bmp = bmp; 135 | } 136 | } 137 | #endif 138 | 139 | for(size_t i = 0; i < ret->backbuffers.size(); i++) 140 | { 141 | ret->backbuffers[i].im = new VkImage_T(); 142 | ret->backbuffers[i].im->extent = {ret->extent.width, ret->extent.height, 1}; 143 | ret->backbuffers[i].im->bytesPerPixel = 4; 144 | ret->backbuffers[i].im->pixels = ret->backbuffers[i].mem->bytes; 145 | } 146 | 147 | *pSwapchain = ret; 148 | return VK_SUCCESS; 149 | } 150 | 151 | VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, 152 | const VkAllocationCallbacks *pAllocator) 153 | { 154 | for(VkSwapchainKHR_T::Backbuffer &b : swapchain->backbuffers) 155 | { 156 | delete b.im; 157 | delete b.mem; 158 | 159 | #if defined(_WIN32) 160 | if(b.dc) 161 | DeleteDC(b.dc); 162 | 163 | if(b.bmp) 164 | DeleteObject(b.bmp); 165 | #endif 166 | } 167 | 168 | if(swapchain->dc) 169 | ReleaseDC(swapchain->wnd, swapchain->dc); 170 | 171 | delete swapchain; 172 | } 173 | 174 | VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, 175 | uint32_t *pSwapchainImageCount, 176 | VkImage *pSwapchainImages) 177 | { 178 | if(pSwapchainImageCount && !pSwapchainImages) 179 | { 180 | *pSwapchainImageCount = (uint32_t)swapchain->backbuffers.size(); 181 | return VK_SUCCESS; 182 | } 183 | 184 | for(uint32_t i = 0; i < *pSwapchainImageCount; i++) 185 | pSwapchainImages[i] = swapchain->backbuffers[i].im; 186 | 187 | return VK_SUCCESS; 188 | } 189 | 190 | VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, 191 | uint64_t timeout, VkSemaphore semaphore, 192 | VkFence fence, uint32_t *pImageIndex) 193 | { 194 | // ignore fence and semaphore 195 | swapchain->current = (swapchain->current + 1) % swapchain->backbuffers.size(); 196 | *pImageIndex = swapchain->current; 197 | 198 | return VK_SUCCESS; 199 | } 200 | 201 | VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo) 202 | { 203 | for(uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) 204 | { 205 | MICROPROFILE_SCOPE(vkQueuePresentKHR); 206 | 207 | const VkSwapchainKHR &swap = pPresentInfo->pSwapchains[i]; 208 | 209 | const VkSwapchainKHR_T::Backbuffer &bb = swap->backbuffers[pPresentInfo->pImageIndices[i]]; 210 | 211 | BitBlt(swap->dc, 0, 0, swap->extent.width, swap->extent.height, bb.dc, 0, 0, SRCCOPY); 212 | } 213 | 214 | EndFrameStats(); 215 | 216 | BeginFrameStats(); 217 | 218 | return VK_SUCCESS; 219 | } 220 | --------------------------------------------------------------------------------