├── .editorconfig ├── .gitmodules ├── data └── shaders │ ├── shared │ ├── raster.glsl │ ├── scanline.glsl │ ├── timers.glsl │ ├── compute_funcs.glsl │ ├── structures.glsl │ ├── definitions.glsl │ ├── funcs.glsl │ └── shading.glsl │ ├── env_map.glsl │ ├── simple_material.glsl │ ├── pbr_material.glsl │ ├── bin_categorizer.glsl │ ├── trace.glsl │ ├── bin_counter.glsl │ ├── bin_dispatcher.glsl │ └── raster_low.glsl ├── dependencies.json ├── .gitignore ├── .gitattributes ├── src ├── shader_structs.h ├── extern_impl.cpp ├── texture_atlas.h ├── scene_convert.h ├── lucid_pch.h ├── wavefront_obj.h ├── shading.h ├── quad_generator.h ├── meshlet.h ├── scene_setup.h ├── simple_renderer.h ├── path_tracer.h ├── pbr_renderer.h ├── lucid_base.h ├── shading.cpp ├── lucid_app.h ├── lucid_renderer.h ├── scene.h ├── scene_assimp.cpp ├── lucid.cpp ├── texture_atlas.cpp ├── quad_generator.cpp ├── path_tracer.cpp ├── wavefront_obj.cpp ├── simple_renderer.cpp ├── pbr_renderer.cpp ├── meshlet.cpp ├── tri_optimizer.cpp └── scene_setup.cpp ├── .clang-format ├── tools └── format.py ├── input_scenes.xml ├── docs └── readme.md ├── CMakeLists.txt └── .github └── workflows └── test.yml /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | [*] 3 | end_of_line = lf 4 | indent_style = tab 5 | indent_size = 4 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libfwk"] 2 | path = libfwk 3 | url = https://github.com/nadult/libfwk.git 4 | -------------------------------------------------------------------------------- /data/shaders/shared/raster.glsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nadult/lucid/HEAD/data/shaders/shared/raster.glsl -------------------------------------------------------------------------------- /dependencies.json: -------------------------------------------------------------------------------- 1 | { 2 | "includes": [ 3 | "libfwk/dependencies.json" 4 | ], 5 | "dependencies": [ 6 | "assimp" 7 | ] 8 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /dependencies 3 | /scenes 4 | /lucid 5 | /lucid_config.xml 6 | /temp 7 | /.cache 8 | /*.cap 9 | /*.data 10 | /*.exe 11 | /*.pdb 12 | /*.html 13 | /*.js 14 | /*.tga 15 | /*.wasm 16 | /data/spirv* 17 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.cpp text eol=lf 3 | *.h text eol=lf 4 | *.c text eol=lf 5 | *.txt text eol=lf 6 | *.vcxproj text eol=crlf 7 | *.vcxproj.filters text eol=crlf 8 | *.sln text eol=crlf 9 | *.props text eol=crlf 10 | -------------------------------------------------------------------------------- /src/shader_structs.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "lucid_base.h" 5 | 6 | namespace shader { 7 | #include "../data/shaders/shared/structures.glsl" 8 | } 9 | 10 | #define LUCID_INFO_MEMBER_OFFSET(name) (offsetof(::shader::LucidInfo, name)) -------------------------------------------------------------------------------- /src/extern_impl.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #define STB_IMAGE_WRITE_IMPLEMENTATION 5 | 6 | #include "../extern/stb_image_write.h" 7 | 8 | #define TINYEXR_USE_MINIZ 0 9 | #define TINYEXR_USE_OPENMP 0 10 | #define TINYEXR_USE_STB_ZLIB 1 11 | #define TINYEXR_IMPLEMENTATION 12 | 13 | #include "../extern/tinyexr.h" 14 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: llvm 2 | 3 | IndentWidth: 4 4 | TabWidth: 4 5 | UseTab: Always 6 | ColumnLimit: 100 7 | 8 | #AlwaysBreakTemplateDeclarations: Multiline 9 | #AlignAfterOpenBracket: DontAlign 10 | AlignTrailingComments: false 11 | ReflowComments: false 12 | AllowShortBlocksOnASingleLine: Always 13 | AllowShortFunctionsOnASingleLine: All 14 | 15 | NamespaceIndentation: Inner 16 | SpaceBeforeParens: Never 17 | FixNamespaceComments: false 18 | Standard: Cpp11 19 | IndentRequires: true 20 | BreakBeforeConceptDeclarations: false 21 | BreakBeforeTernaryOperators: false 22 | 23 | DeriveLineEnding: false 24 | UseCRLF: false -------------------------------------------------------------------------------- /src/texture_atlas.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | 9 | struct TextureAtlas { 10 | struct Entry { 11 | int2 pos; 12 | int2 size; 13 | int2 border_tl, border_br; 14 | }; 15 | 16 | struct Config { 17 | int round_elem_size = 1; 18 | int max_atlas_size = 16 * 1024; 19 | }; 20 | 21 | static Maybe make(vector sizes, Config); 22 | 23 | FRect uvRect(const Entry &, float inset_pixels = 0.0f) const; 24 | Image merge(CSpan, IColor background = ColorId::black) const; 25 | 26 | vector entries; 27 | Config config; 28 | int2 size = {64, 64}; 29 | }; 30 | -------------------------------------------------------------------------------- /src/scene_convert.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "scene.h" 7 | 8 | struct InputScene { 9 | InputScene(string name, string path); 10 | InputScene(const FilePath &root_path, CXmlNode); 11 | 12 | string name, path; 13 | string env_map_path; 14 | float quad_squareness = 1.0f; 15 | bool merge_verts = false; 16 | bool flip_uv = false; 17 | bool flip_yz = false; 18 | bool pbr = false; 19 | }; 20 | 21 | Ex> loadInputScenes(ZStr path); 22 | 23 | // TODO: warning / logging system needed 24 | 25 | vector panoramaToCubeMap(const Image &); 26 | Ex loadExr(ZStr path); 27 | SceneTexture convertTexture(ZStr path); 28 | Scene convertScene(WavefrontObject); 29 | void convertScenes(ZStr scenes_path); 30 | -------------------------------------------------------------------------------- /src/lucid_pch.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define DUMP FWK_DUMP 27 | #define FATAL FWK_FATAL 28 | -------------------------------------------------------------------------------- /tools/format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) Krzysztof Jakubowski 4 | # This file is part of LucidRaster. See license.txt for details. 5 | 6 | import argparse, sys, os, re 7 | 8 | 9 | def lucid_dir(): 10 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 11 | 12 | 13 | sys.path.insert(0, os.path.join(lucid_dir(), "libfwk", "tools")) 14 | from format import CodeFormatter, find_files 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser( 18 | prog=__file__, 19 | description="Tool for code formatting and format verification", 20 | ) 21 | parser.add_argument("-c", "--check", action="store_true") 22 | args = parser.parse_args() 23 | 24 | formatter = CodeFormatter() 25 | os.chdir(lucid_dir()) 26 | files = find_files(["src", os.path.join("data", "shaders")], re.compile(".*[.](h|cpp|glsl)$")) 27 | formatter.format_cpp(files, args.check) 28 | -------------------------------------------------------------------------------- /src/wavefront_obj.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "lucid_base.h" 5 | 6 | #include 7 | 8 | struct WavefrontMap { 9 | string name; 10 | vector args; 11 | }; 12 | 13 | struct WavefrontMaterial { 14 | static Ex load(ZStr path, vector &out); 15 | 16 | string name; 17 | float dissolve_factor = 1.0; 18 | float3 diffuse = float3(1.0f); 19 | vector> maps = {}; 20 | }; 21 | 22 | struct WavefrontObject { 23 | static Ex load(ZStr path, i64 file_size_limit = 1400 * 1024 * 1024); 24 | 25 | vector positions; 26 | vector normals; 27 | vector tex_coords; 28 | vector> tris; 29 | 30 | struct MaterialGroup { 31 | int material_id = 0; 32 | int first_tri = 0, num_tris = 0; 33 | }; 34 | 35 | string resource_path; 36 | vector materials; 37 | vector material_groups; 38 | }; 39 | -------------------------------------------------------------------------------- /src/shading.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | 8 | #include 9 | 10 | namespace shader { 11 | struct Frustum; 12 | struct Viewport; 13 | struct Rect; 14 | struct Lighting; 15 | }; 16 | 17 | struct SunLight { 18 | float3 dir = {1, 0, 0}, color{1}; 19 | float power = 2.0f; 20 | }; 21 | 22 | struct SimpleLight { 23 | float3 color{1}; 24 | float power = 0.5f; 25 | }; 26 | 27 | struct SceneLighting { 28 | void setConfig(const AnyConfig &); 29 | AnyConfig config() const; 30 | 31 | static SceneLighting makeDefault(); 32 | operator shader::Lighting() const; 33 | 34 | string env_map_path; 35 | PVImageView env_map; 36 | SimpleLight ambient; 37 | SunLight sun; 38 | }; 39 | 40 | struct FrustumInfo { 41 | FrustumInfo() = default; 42 | FrustumInfo(const Camera &); 43 | operator shader::Frustum() const; 44 | 45 | array origins; 46 | array dirs; 47 | 48 | float3 dir0, origin0; 49 | float3 dirx, diry; 50 | }; 51 | 52 | shader::Viewport makeViewport(const Camera &cam, int2 viewport_size); 53 | shader::Rect makeRect(FRect rect); 54 | -------------------------------------------------------------------------------- /src/quad_generator.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | 9 | struct QuadNode { 10 | QuadNode(int t0, int t1) : tris{t0, t1} {}; 11 | 12 | array tris; 13 | array verts; 14 | array conflicts = {-1, -1, -1, -1}; 15 | float squareness; 16 | 17 | void addConflict(int idx) { 18 | if(isOneOf(idx, conflicts)) 19 | return; 20 | for(auto &val : conflicts) 21 | if(val == -1) { 22 | val = idx; 23 | break; 24 | } 25 | } 26 | int otherTri(int idx) const { return idx == tris[0] ? tris[1] : tris[0]; } 27 | 28 | int degree() const { 29 | int deg = 0; 30 | for(int idx : conflicts) 31 | if(idx != -1) 32 | deg++; 33 | return deg; 34 | } 35 | }; 36 | 37 | vector> triNeighbours(CSpan> tris); 38 | Pair, vector>> quadNodes(CSpan, CSpan> tris, 39 | CSpan> tri_neighbours); 40 | vector> genQuads(CSpan> tris, CSpan> tri_neighbours, 41 | CSpan quad_nodes, CSpan> tri_quads, 42 | float square_weight); 43 | -------------------------------------------------------------------------------- /src/meshlet.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | 9 | struct Meshlet { 10 | vector verts; 11 | vector normals; 12 | vector uvs; 13 | vector colors; 14 | vector> tris; 15 | uint material_id; 16 | }; 17 | 18 | // Czy chcemy od razu robić quady? 19 | // - najpierw byśmy zmergowali mesh w quady a później w meshlety? 20 | // 21 | // Czy meshlety to instancje? 22 | // na razie z każdej instancji generujemy inny zestaw meshletów i tyle. Instancja może mapować się na N meshletów 23 | 24 | struct SceneMesh; 25 | struct Scene; 26 | 27 | struct TriInfo { 28 | array verts; 29 | array neighbours; 30 | FBox bbox; 31 | }; 32 | 33 | struct MeshPartition { 34 | vector verts; 35 | vector> tris; 36 | FBox bbox; 37 | }; 38 | 39 | vector meshTriInfo(CSpan verts, CSpan> tris); 40 | 41 | vector meshPartition(CSpan, const SceneMesh &, CSpan, 42 | Interval selected_tris, int max_tris, int max_verts); 43 | 44 | void meshPartitionStats(CSpan, int max_tris, int max_verts); 45 | void visualizeMeshPartitions(const Scene &, CSpan); 46 | void meshletTest(const Scene &, float square_weight); 47 | -------------------------------------------------------------------------------- /src/scene_setup.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "scene.h" 7 | #include 8 | 9 | class SceneSetup { 10 | public: 11 | SceneSetup(string name); 12 | virtual ~SceneSetup(); 13 | virtual void doMenu(VDeviceRef){}; 14 | virtual Ex<> updateScene(VDeviceRef) = 0; 15 | 16 | string name; 17 | Maybe scene; 18 | RenderConfig render_config; 19 | 20 | vector views; 21 | int view_id = 0; 22 | Maybe camera; 23 | }; 24 | 25 | class BoxesSetup final : public SceneSetup { 26 | public: 27 | BoxesSetup(); 28 | void doMenu(VDeviceRef) override; 29 | Ex<> updateScene(VDeviceRef) override; 30 | string sceneId() const; 31 | 32 | private: 33 | int3 m_current_dims; 34 | int3 m_dims = {10, 10, 10}; 35 | float m_box_size = 0.5f; 36 | float m_box_dist = 0.1f; 37 | }; 38 | 39 | class PlanesSetup final : public SceneSetup { 40 | public: 41 | PlanesSetup(); 42 | void doMenu(VDeviceRef) override; 43 | Ex<> updateScene(VDeviceRef) override; 44 | string sceneId() const; 45 | 46 | private: 47 | int m_current_planes = 0; 48 | int m_num_planes = 32; 49 | float m_plane_size = 4.0f; 50 | float m_plane_dist = 0.1f; 51 | }; 52 | 53 | class LoadedSetup final : public SceneSetup { 54 | public: 55 | LoadedSetup(string name); 56 | Ex<> updateScene(VDeviceRef) override; 57 | 58 | static vector findAll(); 59 | }; 60 | -------------------------------------------------------------------------------- /src/simple_renderer.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | #include 9 | #include 10 | 11 | namespace shader { 12 | struct SimpleDrawCall; // TODO: change name to diff from fwk 13 | } 14 | 15 | class SimpleRenderer { 16 | public: 17 | SimpleRenderer(); 18 | FWK_MOVABLE_CLASS(SimpleRenderer); 19 | 20 | static void addShaderDefs(VulkanDevice &, ShaderCompiler &, const ShaderConfig &); 21 | CSpan shaderDefIds() const { return m_shader_def_ids; } 22 | Ex<> exConstruct(VulkanDevice &, ShaderCompiler &, const IRect &viewport, VAttachment); 23 | 24 | // TODO: wireframe to config 25 | Ex<> render(const RenderContext &, bool wireframe); 26 | const IRect &viewport() const { return m_viewport; } 27 | 28 | private: 29 | struct PipeConfig { 30 | FWK_ORDER_BY(PipeConfig, backface_culling, additive_blending, opaque, wireframe); 31 | 32 | bool backface_culling; 33 | bool additive_blending; 34 | bool opaque; 35 | bool wireframe; 36 | }; 37 | 38 | Ex getPipeline(VulkanDevice &, const PipeConfig &); 39 | Ex<> renderPhase(const RenderContext &, VBufferSpan, bool opaque, 40 | bool wireframe); 41 | 42 | vector m_shader_def_ids; 43 | HashMap m_pipelines; 44 | PVShaderModule m_frag_module, m_vert_module; 45 | PVPipelineLayout m_pipeline_layout; 46 | PVImageView m_depth_buffer; 47 | PVRenderPass m_render_pass; 48 | IRect m_viewport; 49 | }; 50 | -------------------------------------------------------------------------------- /input_scenes.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/path_tracer.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | #include 9 | 10 | DEFINE_ENUM(PathTracerOpt, timers, debug); 11 | using PathTracerOpts = EnumFlags; 12 | 13 | namespace shader { 14 | struct PathTracerConfig; 15 | struct PathTracerInfo; 16 | } 17 | 18 | class PathTracer { 19 | public: 20 | using Opt = PathTracerOpt; 21 | using Opts = PathTracerOpts; 22 | using Context = RenderContext; 23 | 24 | PathTracer(); 25 | FWK_MOVABLE_CLASS(PathTracer) 26 | 27 | static void addShaderDefs(VulkanDevice &, ShaderCompiler &, const ShaderConfig &); 28 | CSpan shaderDefIds() const { return m_shader_def_ids; } 29 | Ex exConstruct(VulkanDevice &, ShaderCompiler &, Opts, int2 view_size); 30 | void render(const Context &); 31 | 32 | private: 33 | Ex<> setupInputData(const Context &); 34 | Ex<> updateScene(VulkanDevice &, Scene &); 35 | 36 | Opts m_opts; 37 | 38 | vector m_shader_def_ids; 39 | PVPipeline p_trace; 40 | 41 | VBufferSpan m_config; 42 | VBufferSpan m_info; 43 | 44 | string m_scene_id; 45 | 46 | static constexpr int num_frames = 2; 47 | VBufferSpan m_frame_info[num_frames]; 48 | VBufferSpan m_frame_config[num_frames]; 49 | VBufferSpan m_debug_buffer; 50 | 51 | VBufferSpan m_indices; 52 | VBufferSpan m_vertices; 53 | VBufferSpan m_tex_coords; 54 | PVAccelStruct m_accel_struct; 55 | 56 | int2 m_bin_counts; 57 | int m_bin_count, m_bin_size; 58 | int2 m_size; // TODO: rename 59 | }; 60 | -------------------------------------------------------------------------------- /data/shaders/env_map.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/definitions.glsl" 7 | #include "shared/funcs.glsl" 8 | 9 | layout(set = 0, binding = 0) uniform ubo10 { EnvMapDrawCall env_map_dc; }; 10 | layout(set = 0, binding = 1) uniform sampler2D env_map; 11 | 12 | #ifdef VERTEX_SHADER // ------------------------------------------------------- 13 | 14 | layout(location = 0) in vec2 in_pos; 15 | layout(location = 0) out vec3 v_posWS; 16 | 17 | void main() { 18 | gl_Position = vec4(in_pos, 0.999999, 1.0); 19 | v_posWS = (vec4(in_pos, 1.0, 1.0) * env_map_dc.inv_proj_view_matrix).xyz; 20 | } 21 | 22 | #elif defined(FRAGMENT_SHADER) // --------------------------------------------- 23 | 24 | layout(location = 0) in vec3 v_posWS; 25 | layout(location = 0) out vec4 f_color; 26 | 27 | vec2 longLat(vec3 normal) { 28 | // convert normal to longitude and latitude 29 | float latitude = acos(normal.y) / PI; 30 | float longitude = (atan(normal.x, normal.z) + PI) / (2.0 * PI); 31 | return vec2(longitude, latitude); 32 | } 33 | 34 | vec2 screenPos() { return gl_FragCoord.xy * env_map_dc.inv_screen_size * 2.0 - 1.0; } 35 | 36 | vec3 screenToWorld(vec2 pos, float z) { 37 | vec4 wpos = env_map_dc.inv_proj_view_matrix * vec4(pos, z, 1.0); 38 | return wpos.xyz * (1.0 / wpos.w); 39 | } 40 | 41 | void main() { 42 | vec2 screen_pos = screenPos(); 43 | vec3 pos1 = screenToWorld(screen_pos, -1.0); 44 | vec3 pos2 = screenToWorld(screen_pos, 1.0); 45 | vec3 dir = -normalize(pos2 - pos1); 46 | 47 | vec2 tex_coord = longLat(dir); 48 | f_color.rgb = texture(env_map, tex_coord * vec2(1.0, -1.0)).xyz * 0.5; 49 | f_color.a = 1.0; 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/pbr_renderer.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | #include 9 | #include 10 | 11 | namespace shader { 12 | struct PbrDrawCall; 13 | struct Lighting; 14 | } 15 | 16 | class PbrRenderer { 17 | public: 18 | PbrRenderer(); 19 | FWK_MOVABLE_CLASS(PbrRenderer); 20 | 21 | static void addShaderDefs(VulkanDevice &, ShaderCompiler &, const ShaderConfig &); 22 | CSpan shaderDefIds() const { return m_shader_def_ids; } 23 | Ex<> exConstruct(VulkanDevice &, ShaderCompiler &, const IRect &viewport, VAttachment); 24 | 25 | // TODO: wireframe to config 26 | Ex<> render(const RenderContext &, bool wireframe); 27 | const IRect &viewport() const { return m_viewport; } 28 | 29 | private: 30 | struct PipeConfig { 31 | FWK_ORDER_BY(PipeConfig, backface_culling, additive_blending, opaque, wireframe, pbr); 32 | 33 | bool backface_culling; 34 | bool additive_blending; 35 | bool opaque; 36 | bool wireframe; 37 | bool pbr; 38 | }; 39 | 40 | Ex getPipeline(VulkanDevice &, const PipeConfig &); 41 | Ex<> renderPhase(const RenderContext &, VBufferSpan, bool opaque, 42 | bool wireframe); 43 | Ex<> renderEnvMap(const RenderContext &); 44 | 45 | vector m_shader_def_ids; 46 | HashMap m_pipelines; 47 | PVShaderModule m_frag_module, m_vert_module; 48 | PVPipelineLayout m_pipeline_layout; 49 | PVPipeline m_env_pipeline; 50 | PVImageView m_depth_buffer; 51 | PVRenderPass m_render_pass; 52 | VBufferSpan m_rect_vertices; 53 | VBufferSpan m_lighting_buf; 54 | IRect m_viewport; 55 | }; 56 | -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | # LucidRaster: GPU Software Rasterizer for Exact Order-Independent Transparency 2 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 3 | [![Build status](https://github.com/nadult/lucid/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/nadult/lucid/actions) 4 | 5 | LucidRaster is a software rasterizer running on a GPU which allows for efficient exact rendering of 6 | complex transparent scenes. It uses a new two-stage sorting technique and sample accumulation 7 | method. On average it's faster than high-quality OIT approximations and only about 3x slower than 8 | hardware alpha blending. It can be very efficient especially when rendering scenes with high 9 | triangle density or high depth complexity. 10 | 11 | Most of LucidRaster's logic is implemented in Vulkan compute shaders, the rest of the code is mainly 12 | C++. 13 | 14 | [Paper](https://arxiv.org/abs/2405.13364) 15 | [Windows build + scene files](https://github.com/nadult/lucid/releases) 16 | [Project page (more details)](https://nadult.github.io/lucid/) 17 | [Author's Linkedin profile](https://www.linkedin.com/in/nadult/) 18 | 19 | This work is licensed under a [GNU GPL v3 license](https://www.gnu.org/licenses/gpl-3.0.html). 20 | 21 | ![](https://nadult.github.io/images/lucid/lucid1.jpg) 22 | 23 | 24 | 25 | ## Building 26 | 27 | LucidRaster is based on CMake and [libfwk](https://github.com/nadult/libfwk) framework. Please take 28 | a look at libfwk's readme to learn what tools / compilers are required. LucidRaster can be easily 29 | built under Windows and Linux by running the following commands: 30 | ``` 31 | cd lucid/ 32 | git submodule update --init --recursive 33 | libfwk/tools/configure.py download-deps 34 | libfwk/tools/configure.py 35 | cmake --build build --parallel 36 | ``` 37 | 38 | There are also [github 39 | workflows](https://github.com/nadult/lucid/blob/main/.github/workflows/test.yml) available, which 40 | build LucidRaster for both platforms (but artifacts are only created for Windows). -------------------------------------------------------------------------------- /data/shaders/shared/scanline.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _SCANLINE_GLSL_ 5 | #define _SCANLINE_GLSL_ 6 | 7 | #include "definitions.glsl" 8 | 9 | struct ScanlineParams { 10 | vec3 min, max, step; 11 | }; 12 | 13 | ScanlineParams loadScanlineParamsRow(uvec4 val0, uvec4 val1, vec2 start) { 14 | ScanlineParams params; 15 | 16 | bvec3 xneg = bvec3((val1.w & 1) != 0, (val1.w & 2) != 0, (val1.w & 4) != 0); 17 | vec3 scan = uintBitsToFloat(val0.xyz); 18 | params.step = uintBitsToFloat(val1.xyz); 19 | 20 | const float inf = 1.0 / 0.0; 21 | scan += params.step * start.y - vec3(start.x); 22 | params.min = vec3(xneg[0] ? -inf : scan[0], xneg[1] ? -inf : scan[1], xneg[2] ? -inf : scan[2]); 23 | params.max = vec3(xneg[0] ? scan[0] : inf, xneg[1] ? scan[1] : inf, xneg[2] ? scan[2] : inf); 24 | 25 | return params; 26 | } 27 | 28 | ScanlineParams loadScanlineParamsBin(uvec4 val0, uvec4 val1, out int min_by, out int max_by) { 29 | ScanlineParams params; 30 | 31 | vec3 scan = uintBitsToFloat(val0.xyz); 32 | params.step = uintBitsToFloat(val1.xyz); 33 | min_by = int(val0.w & 0xffff) >> BIN_SHIFT; 34 | max_by = int(val0.w >> 16) >> BIN_SHIFT; 35 | 36 | bvec3 xneg = bvec3((val1.w & 1) != 0, (val1.w & 2) != 0, (val1.w & 4) != 0); 37 | bvec3 yneg = bvec3((val1.w & 8) != 0, (val1.w & 16) != 0, (val1.w & 32) != 0); 38 | 39 | // Computing offsets for trivial reject corner 40 | float offset = BIN_SIZE - 0.989; 41 | vec3 yoffset = vec3(yneg[0] ? 0.0 : offset, yneg[1] ? 0.0 : offset, yneg[2] ? 0.0 : offset); 42 | vec3 xoffset = vec3(xneg[0] ? 0.0 : offset, xneg[1] ? 0.0 : offset, xneg[2] ? 0.0 : offset); 43 | 44 | vec2 start = vec2(0.99, min_by * BIN_SIZE - 0.01); 45 | scan += params.step * (yoffset + vec3(start.y)) - (xoffset + vec3(start.x)); 46 | const float inf = 1.0 / 0.0; 47 | params.min = vec3(xneg[0] ? -inf : scan[0], xneg[1] ? -inf : scan[1], xneg[2] ? -inf : scan[2]); 48 | params.max = vec3(xneg[0] ? scan[0] : inf, xneg[1] ? scan[1] : inf, xneg[2] ? scan[2] : inf); 49 | params.step *= BIN_SIZE; 50 | 51 | return params; 52 | } 53 | 54 | #endif -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) Krzysztof Jakubowski 2 | # This file is part of LucidRaster. See license.txt for details. 3 | 4 | cmake_minimum_required(VERSION 3.10) 5 | 6 | set(CMAKE_CXX_STANDARD 20) 7 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 8 | 9 | if (CMAKE_GENERATOR MATCHES "Visual Studio") 10 | set(CMAKE_GENERATOR_TOOLSET "ClangCL") 11 | endif() 12 | 13 | project(lucid VERSION 0.1 LANGUAGES CXX) 14 | 15 | set(FWK_BUILD_TESTS OFF CACHE BOOL "") 16 | set(FWK_BUILD_TOOLS OFF CACHE BOOL "") 17 | set(FWK_UNITY_BUILD ON CACHE BOOL "") 18 | set(FWK_DEPENDENCIES_DIR "${CMAKE_CURRENT_SOURCE_DIR}/dependencies" CACHE PATH "") 19 | add_subdirectory(libfwk EXCLUDE_FROM_ALL) 20 | 21 | set(HEADERS_lucid 22 | lucid_app.h 23 | lucid_base.h 24 | lucid_pch.h 25 | lucid_renderer.h 26 | meshlet.h 27 | path_tracer.h 28 | pbr_renderer.h 29 | quad_generator.h 30 | scene_convert.h 31 | scene_setup.h 32 | scene.h 33 | shader_structs.h 34 | shading.h 35 | simple_renderer.h 36 | texture_atlas.h 37 | wavefront_obj.h 38 | ) 39 | 40 | set(SOURCES_lucid 41 | extern_impl.cpp 42 | lucid_app.cpp 43 | lucid_renderer.cpp 44 | lucid.cpp 45 | meshlet.cpp 46 | path_tracer.cpp 47 | pbr_renderer.cpp 48 | quad_generator.cpp 49 | scene_convert.cpp 50 | scene_setup.cpp 51 | scene.cpp 52 | shading.cpp 53 | simple_renderer.cpp 54 | texture_atlas.cpp 55 | tri_optimizer.cpp 56 | wavefront_obj.cpp 57 | ) 58 | 59 | list(TRANSFORM HEADERS_lucid PREPEND "src/") 60 | list(TRANSFORM SOURCES_lucid PREPEND "src/") 61 | 62 | add_executable(lucid) 63 | target_sources(lucid PRIVATE ${SOURCES_lucid}) 64 | target_sources(lucid PUBLIC FILE_SET HEADERS FILES ${HEADERS_lucid}) 65 | target_link_libraries(lucid PRIVATE libfwk) 66 | 67 | set(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 68 | set_property(TARGET lucid PROPERTY VS_DEBUGGER_WORKING_DIRECTORY "${BASE_DIR}") 69 | set_target_properties(lucid PROPERTIES 70 | DEBUG_POSTFIX "_dbg" DEVELOP_POSTFIX "_dev" 71 | RUNTIME_OUTPUT_DIRECTORY_DEBUG "${BASE_DIR}" 72 | RUNTIME_OUTPUT_DIRECTORY_DEVELOP "${BASE_DIR}" 73 | RUNTIME_OUTPUT_DIRECTORY_RELEASE "${BASE_DIR}" 74 | ) 75 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test & build 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | workflow_call: 8 | 9 | env: 10 | PYTHONUNBUFFERED: 1 11 | 12 | jobs: 13 | build-windows: 14 | runs-on: windows-2025 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | submodules: 'recursive' 19 | 20 | - name: Install dependencies 21 | run: python libfwk/tools/configure.py download-deps 22 | 23 | - name: Configure Lucid & libfwk 24 | run: python libfwk\tools\configure.py -T Release -G ninja-clang-cl -DFWK_UNITY_BUILD=ON 25 | 26 | - name: Build lucid & libfwk 27 | run: | 28 | cmake --build build -j8 29 | 30 | - name: Prepare archive files 31 | shell: cmd 32 | run: | 33 | mkdir lucid-raster 34 | copy lucid.exe lucid-raster\ 35 | xcopy data\shaders\ lucid-raster\data\shaders\ /E 36 | 37 | - name: Archive build 38 | uses: actions/upload-artifact@v4 39 | with: 40 | name: lucid-release-build 41 | path: lucid-raster 42 | 43 | build-linux: 44 | runs-on: ubuntu-24.04 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | submodules: 'recursive' 49 | 50 | - name: Download dependencies 51 | run: python libfwk/tools/configure.py download-deps 52 | 53 | - name: Configure Lucid & libfwk 54 | run: | 55 | export CXX=clang++-18 56 | python libfwk/tools/configure.py -G ninja -T Debug -DFWK_UNITY_BUILD=ON 57 | 58 | - name: Build Lucid & libfwk 59 | run: cmake --build build -j8 60 | 61 | check-formatting: 62 | runs-on: windows-2025 63 | steps: 64 | - name: Disable git autocrlf 65 | run: | 66 | git config --global core.autocrlf false 67 | git config --global core.eol lf 68 | 69 | - uses: actions/checkout@v4 70 | with: 71 | submodules: 'true' 72 | 73 | - name: Set up Python 74 | uses: actions/setup-python@v5 75 | with: 76 | python-version: '3.12' 77 | architecture: 'x64' 78 | 79 | - name: Install Dependencies 80 | run: pip install black 81 | 82 | - name: Check C++ formatting 83 | run: | 84 | python tools/format.py -c 85 | 86 | - name: Check Python formatting 87 | if: '!cancelled()' 88 | run: | 89 | python -m black tools/* --check --color --diff -l 100 -------------------------------------------------------------------------------- /data/shaders/shared/timers.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _TIMERS_GLSL_ 5 | #define _TIMERS_GLSL_ 6 | 7 | #include "definitions.glsl" 8 | 9 | #ifdef TIMERS_ENABLED 10 | 11 | #extension GL_ARB_shader_clock : require 12 | #extension GL_KHR_shader_subgroup_basic : require 13 | 14 | shared uint s_timers[TIMERS_COUNT]; 15 | #define START_TIMER() uvec2 timer0_ = clock2x32ARB(); 16 | // TODO: for now we're just ignoring high bits; Maybe we shouldn't do that? 17 | #define UPDATE_TIMER(idx) \ 18 | if(gl_SubgroupInvocationID == 0) { \ 19 | uvec2 timer = clock2x32ARB(); \ 20 | uint low = timer.x - timer0_.x; \ 21 | uint high = timer.y - timer0_.y; \ 22 | if(low > timer.x) \ 23 | high--; \ 24 | atomicAdd(s_timers[idx], uint(low) >> 4); \ 25 | timer0_ = timer; \ 26 | } 27 | 28 | #define INIT_TIMERS() \ 29 | { \ 30 | if(LIX < TIMERS_COUNT) \ 31 | s_timers[LIX] = 0; \ 32 | } 33 | 34 | #define COMMIT_TIMERS(out_timers) \ 35 | { \ 36 | barrier(); \ 37 | if(LIX < TIMERS_COUNT) \ 38 | atomicAdd(out_timers[LIX], s_timers[LIX]); \ 39 | } 40 | 41 | #else 42 | 43 | #define START_TIMER() 44 | #define UPDATE_TIMER(idx) 45 | 46 | #define INIT_TIMERS() \ 47 | {} 48 | #define COMMIT_TIMERS(out_timers) \ 49 | {} 50 | 51 | #endif 52 | 53 | #endif -------------------------------------------------------------------------------- /src/lucid_base.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_pch.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace fwk; 15 | 16 | #define ORDER_BY FWK_ORDER_BY 17 | 18 | FilePath mainPath(); 19 | string dataPath(string file_name); 20 | 21 | namespace fwk { 22 | struct ShaderDebugResults; 23 | } 24 | 25 | void printDebugData(VulkanCommandQueue &, VBufferSpan, Str title); 26 | 27 | struct RenderConfig { 28 | float scene_opacity = 1.0f; 29 | IColor background_color = IColor(0, 30, 30); 30 | VSamplerSetup sampler_setup = VSamplerSetup(VTexFilter::linear, VTexFilter::linear, 31 | VTexFilter::linear, VTexAddress::repeat, 16); 32 | bool backface_culling = false; 33 | bool additive_blending = false; 34 | bool pbr_mode = false; 35 | }; 36 | 37 | struct ShaderConfig { 38 | string build_name; 39 | vector> predefined_macros; 40 | }; 41 | 42 | ShaderConfig getShaderConfig(VulkanDevice &); 43 | 44 | DEFINE_ENUM(DrawCallOpt, has_vertex_colors, has_vertex_tex_coords, has_vertex_normals, is_opaque, 45 | tex_opaque, has_uv_rect, has_albedo_tex, has_normal_tex, has_pbr_tex, has_inst_color); 46 | using DrawCallOpts = EnumFlags; 47 | 48 | struct SceneDrawCall { 49 | FBox bbox; 50 | int material_id = -1; // -1 means no material assigned 51 | int num_tris = 0, tri_offset = 0; 52 | int num_quads = 0, quad_offset = 0; 53 | DrawCallOpts opts = none; 54 | }; 55 | 56 | // TODO: cleanup in structures 57 | struct Scene; 58 | struct SceneMaterial; 59 | struct SceneDrawCall; 60 | struct SceneLighting; 61 | 62 | struct VertexArray { 63 | static void getDefs(VPipelineSetup &, bool with_tangents); 64 | 65 | VBufferSpan positions; 66 | VBufferSpan colors; 67 | VBufferSpan tex_coords; 68 | VBufferSpan normals; 69 | VBufferSpan tangents; // optional 70 | }; 71 | 72 | struct RenderContext { 73 | Scene &scene; 74 | VulkanDevice &device; 75 | RenderConfig config; 76 | VertexArray verts; 77 | VBufferSpan tris_ib, quads_ib; 78 | vector dcs; 79 | vector materials; 80 | PVImageView opaque_tex, trans_tex; 81 | const SceneLighting &lighting; 82 | const Frustum &frustum; 83 | const Camera &camera; 84 | }; 85 | 86 | struct StatsRow { 87 | string label; 88 | string value; 89 | string tooltip = {}; 90 | }; 91 | 92 | struct StatsGroup { 93 | vector rows; 94 | string title; 95 | int label_width = 100; 96 | }; 97 | -------------------------------------------------------------------------------- /data/shaders/simple_material.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/definitions.glsl" 7 | #include "shared/funcs.glsl" 8 | 9 | layout(set = 0, binding = 0) uniform ubo00 { Lighting lighting; }; 10 | layout(set = 1, binding = 0) uniform ubo10 { SimpleDrawCall simple_dc; }; 11 | layout(set = 1, binding = 1) uniform sampler2D color_tex; 12 | 13 | bool flagSet(uint flag) { return (simple_dc.draw_call_opts & flag) != 0; } 14 | 15 | #ifdef VERTEX_SHADER // ------------------------------------------------------- 16 | 17 | layout(location = 0) in vec3 in_pos; 18 | layout(location = 1) in vec4 in_color; 19 | layout(location = 2) in vec2 in_tex_coord; 20 | layout(location = 3) in uint in_normal; 21 | 22 | layout(location = 0) out vec2 v_tex_coord; 23 | layout(location = 1) out vec4 v_color; 24 | layout(location = 2) out vec3 v_posWS; 25 | layout(location = 3) out vec3 v_normalWS; 26 | 27 | void main() { 28 | vec3 posWS = vec4(in_pos, 1.0).xyz; 29 | gl_Position = simple_dc.proj_view_matrix * vec4(posWS, 1.0); 30 | v_posWS = posWS; 31 | 32 | v_color = flagSet(INST_HAS_VERTEX_COLORS) ? in_color : vec4(1, 1, 1, 1); 33 | if(flagSet(INST_HAS_VERTEX_TEX_COORDS)) 34 | v_tex_coord = in_tex_coord; 35 | if(flagSet(INST_HAS_VERTEX_NORMALS)) 36 | v_normalWS = decodeNormalUint(in_normal); 37 | } 38 | 39 | #elif defined(FRAGMENT_SHADER) // --------------------------------------------- 40 | 41 | layout(location = 0) in vec2 v_tex_coord; 42 | layout(location = 1) in vec4 v_color; 43 | layout(location = 2) in vec3 v_posWS; 44 | layout(location = 3) in vec3 v_normalWS; 45 | 46 | layout(location = 0) out vec4 f_color; 47 | 48 | void main() { 49 | vec3 normalWS; 50 | if(flagSet(INST_HAS_VERTEX_NORMALS)) { 51 | normalWS = v_normalWS; 52 | } else { 53 | // Flat shading if no normal data is available 54 | normalWS = normalize(cross(dFdy(v_posWS), dFdx(v_posWS))); 55 | } 56 | 57 | vec4 color = simple_dc.material_color * v_color; 58 | if(flagSet(INST_HAS_ALBEDO_TEXTURE)) { 59 | vec2 tex_coord = v_tex_coord; 60 | vec2 tex_dx = dFdx(v_tex_coord); 61 | vec2 tex_dy = dFdy(v_tex_coord); 62 | if(flagSet(INST_HAS_UV_RECT)) { 63 | // TODO: all textures need borders, even if POW2? 64 | tex_dx *= simple_dc.uv_rect_size; 65 | tex_dy *= simple_dc.uv_rect_size; 66 | tex_coord = simple_dc.uv_rect_pos + simple_dc.uv_rect_size * fract(tex_coord); 67 | } 68 | color *= textureGrad(color_tex, tex_coord, tex_dx, tex_dy); 69 | } 70 | 71 | float light_value = max(0.0, dot(-lighting.sun_dir.xyz, normalWS) * 0.7 + 0.3); 72 | f_color.rgb = finalShading(lighting, color.rgb, light_value); 73 | f_color.a = color.a; 74 | } 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /data/shaders/shared/compute_funcs.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _COMPUTE_FUNCS_GLSL_ 5 | #define _COMPUTE_FUNCS_GLSL_ 6 | 7 | #include "structures.glsl" 8 | 9 | #extension GL_KHR_shader_subgroup_shuffle_relative : require 10 | #extension GL_KHR_shader_subgroup_shuffle : require 11 | #extension GL_KHR_shader_subgroup_arithmetic : require 12 | 13 | #ifdef VENDOR_NVIDIA 14 | 15 | #define INCLUSIVE_ADD_STEP(step) \ 16 | if(SUBGROUP_SIZE > step) { \ 17 | temp = subgroupShuffleUp(accum, step); \ 18 | accum += gl_SubgroupInvocationID >= step ? temp : 0; \ 19 | } 20 | 21 | int subgroupInclusiveAddFast(int accum) { 22 | int temp; 23 | INCLUSIVE_ADD_STEP(1); 24 | INCLUSIVE_ADD_STEP(2); 25 | INCLUSIVE_ADD_STEP(4); 26 | INCLUSIVE_ADD_STEP(8); 27 | INCLUSIVE_ADD_STEP(16); 28 | INCLUSIVE_ADD_STEP(32); 29 | return accum; 30 | } 31 | 32 | uint subgroupInclusiveAddFast(uint accum) { 33 | uint temp; 34 | INCLUSIVE_ADD_STEP(1); 35 | INCLUSIVE_ADD_STEP(2); 36 | INCLUSIVE_ADD_STEP(4); 37 | INCLUSIVE_ADD_STEP(8); 38 | INCLUSIVE_ADD_STEP(16); 39 | INCLUSIVE_ADD_STEP(32); 40 | return accum; 41 | } 42 | 43 | #undef INCLUSIVE_ADD_STEP 44 | 45 | #else 46 | 47 | #define subgroupInclusiveAddFast subgroupInclusiveAdd 48 | 49 | #endif 50 | 51 | // TODO: ifdef for SUBGROUP_SIZE == 32? 52 | uint subgroupInclusiveAddFast32(uint accum) { 53 | uint temp, invocation_id = LIX & 31; 54 | #define INCLUSIVE_ADD_STEP(step) \ 55 | if(SUBGROUP_SIZE > step) { \ 56 | temp = subgroupShuffleUp(accum, step); \ 57 | accum += invocation_id >= step ? temp : 0; \ 58 | } 59 | INCLUSIVE_ADD_STEP(1); 60 | INCLUSIVE_ADD_STEP(2); 61 | INCLUSIVE_ADD_STEP(4); 62 | INCLUSIVE_ADD_STEP(8); 63 | INCLUSIVE_ADD_STEP(16); 64 | #undef INCLUSIVE_ADD_STEP 65 | return accum; 66 | } 67 | 68 | uint subgroupMax_(uint value, int width) { 69 | if(width >= 2) 70 | value = max(value, subgroupShuffleXor(value, 1)); 71 | if(width >= 4) 72 | value = max(value, subgroupShuffleXor(value, 2)); 73 | if(width >= 8) 74 | value = max(value, subgroupShuffleXor(value, 4)); 75 | if(width >= 16) 76 | value = max(value, subgroupShuffleXor(value, 8)); 77 | if(width >= 32) 78 | value = max(value, subgroupShuffleXor(value, 16)); 79 | if(width >= 64) 80 | value = max(value, subgroupShuffleXor(value, 32)); 81 | return value; 82 | } 83 | 84 | #endif -------------------------------------------------------------------------------- /data/shaders/pbr_material.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/definitions.glsl" 7 | #include "shared/funcs.glsl" 8 | 9 | layout(set = 0, binding = 0) uniform ubo00 { Lighting lighting; }; 10 | layout(set = 1, binding = 0) uniform ubo10 { PbrDrawCall draw_call; }; 11 | 12 | layout(set = 1, binding = 1) uniform sampler2D albedo_tex; 13 | layout(set = 1, binding = 2) uniform sampler2D normal_tex; 14 | layout(set = 1, binding = 3) uniform sampler2D pbr_tex; 15 | layout(set = 1, binding = 4) uniform sampler2D env_map; 16 | 17 | bool flagSet(uint flag) { return (draw_call.draw_call_opts & flag) != 0; } 18 | 19 | #ifdef VERTEX_SHADER // ------------------------------------------------------- 20 | 21 | layout(location = 0) in vec3 in_pos; 22 | layout(location = 1) in vec4 in_color; 23 | layout(location = 2) in vec2 in_tex_coord; 24 | layout(location = 3) in uint in_normal; 25 | layout(location = 4) in uint in_tangent; 26 | 27 | layout(location = 0) out vec2 v_tex_coord; 28 | layout(location = 1) out vec4 v_color; 29 | layout(location = 2) out vec3 v_posWS; 30 | layout(location = 3) out vec3 v_normalWS; 31 | layout(location = 4) out vec3 v_tangentWS; 32 | layout(location = 5) out vec3 v_binormalWS; 33 | 34 | void main() { 35 | vec3 posWS = vec4(in_pos, 1.0).xyz; 36 | gl_Position = draw_call.proj_view_matrix * vec4(posWS, 1.0); 37 | v_posWS = posWS; 38 | 39 | v_color = flagSet(INST_HAS_VERTEX_COLORS) ? in_color : vec4(1, 1, 1, 1); 40 | 41 | v_tex_coord = in_tex_coord; 42 | v_normalWS = decodeNormalUint(in_normal); 43 | v_tangentWS = decodeNormalUint(in_tangent); 44 | v_binormalWS = cross(v_tangentWS, v_normalWS); 45 | } 46 | 47 | #elif defined(FRAGMENT_SHADER) // --------------------------------------------- 48 | 49 | layout(location = 0) in vec2 v_tex_coord; 50 | layout(location = 1) in vec4 v_color; 51 | layout(location = 2) in vec3 v_posWS; 52 | layout(location = 3) in vec3 v_normalWS; 53 | layout(location = 4) in vec3 v_tangentWS; 54 | layout(location = 5) in vec3 v_binormalWS; 55 | 56 | layout(location = 0) out vec4 f_color; 57 | 58 | void main() { 59 | vec3 normal = v_normalWS; 60 | if(flagSet(INST_HAS_NORMAL_TEXTURE)) { 61 | vec3 normal_map = texture(normal_tex, v_tex_coord).rgb; 62 | normal_map = normal_map * 2.0 - 1.0; 63 | normal = 64 | normal_map.x * v_tangentWS + normal_map.y * v_binormalWS + normal_map.z * v_normalWS; 65 | normal = normalize(normal); 66 | } 67 | 68 | // roughness, metallic, ao 69 | vec3 pbr = vec3(1.0, 1.0, 1.0); 70 | if(flagSet(INST_HAS_PBR_TEXTURE)) { 71 | pbr = texture(pbr_tex, v_tex_coord).rgb; 72 | } 73 | 74 | vec4 color = draw_call.material_color * v_color; 75 | if(flagSet(INST_HAS_ALBEDO_TEXTURE)) { 76 | color *= texture(albedo_tex, v_tex_coord); 77 | } 78 | 79 | float light_value = max(0.0, dot(-lighting.sun_dir.xyz, normal) * 0.5 + 0.5) * pbr.z; 80 | f_color.rgb = finalShading(lighting, color.rgb, light_value); 81 | f_color.a = color.a; 82 | } 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /src/shading.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "shading.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "shader_structs.h" 13 | 14 | void SceneLighting::setConfig(const AnyConfig &config) { 15 | if(auto *env_map = config.get("env_map")) 16 | env_map_path = *env_map; 17 | } 18 | 19 | AnyConfig SceneLighting::config() const { 20 | AnyConfig out; 21 | if(!env_map_path.empty()) 22 | out.set("env_map", env_map_path); 23 | return out; 24 | } 25 | 26 | SceneLighting SceneLighting::makeDefault() { 27 | SceneLighting out; 28 | out.sun.dir = {0.842121, -0.300567, -0.447763}; 29 | out.sun.color = {0.8, 0.8, 0.8}; 30 | out.sun.power = 2.5; 31 | out.ambient.color = {0.8, 0.8, 0.6}; 32 | out.ambient.power = 0.4f; 33 | return out; 34 | } 35 | 36 | SceneLighting::operator shader::Lighting() const { 37 | shader::Lighting out; 38 | out.ambient_color = float4(ambient.color, 1.0); 39 | out.sun_color = float4(sun.color, 1.0); 40 | out.sun_dir = float4(sun.dir, 0.0); 41 | out.ambient_power = ambient.power; 42 | out.sun_power = sun.power; 43 | return out; 44 | } 45 | 46 | FrustumInfo::FrustumInfo(const Camera &camera) { 47 | auto params = camera.params(); 48 | auto iview = inverseOrZero(camera.viewMatrix()); 49 | auto rays = fwk::Frustum(camera.projectionMatrix()).cornerRays(); 50 | 51 | for(int i : intRange(dirs)) { 52 | origins[i] = rays[i].origin(); 53 | dirs[i] = rays[i].dir(); 54 | origins[i] = mulPoint(iview, origins[i]); 55 | dirs[i] = mulNormal(iview, dirs[i]); 56 | } 57 | origin0 = origins[0]; 58 | dir0 = dirs[0]; 59 | dirx = (dirs[3] - dirs[0]) * (1.0f / params.viewport.width()); 60 | diry = (dirs[1] - dirs[0]) * (1.0f / params.viewport.height()); 61 | } 62 | 63 | FrustumInfo::operator shader::Frustum() const { 64 | shader::Frustum out; 65 | for(int i : intRange(dirs)) { 66 | out.ws_dirs[i] = float4(dirs[i], 0.0); 67 | out.ws_origins[i] = float4(origins[i], 0.0); 68 | } 69 | out.ws_origin0 = float4(origin0, 1.0); 70 | out.ws_dir0 = float4(dir0, 0.0); 71 | out.ws_dirx = float4(dirx, 0.0); 72 | out.ws_diry = float4(diry, 0.0); 73 | return out; 74 | } 75 | 76 | shader::Viewport makeViewport(const Camera &cam, int2 viewport_size) { 77 | shader::Viewport out; 78 | // TODO: add view_matrix & view_proj_matrix ? 79 | out.proj_matrix = cam.projectionMatrix(); 80 | out.near_plane = cam.params().depth.min; 81 | out.far_plane = cam.params().depth.max; 82 | out.inv_far_plane = 1.0f / cam.params().depth.max; 83 | out.size = float2(viewport_size); 84 | out.inv_size = vinv(float2(viewport_size)); 85 | return out; 86 | } 87 | 88 | shader::Rect makeRect(FRect rect) { 89 | shader::Rect out; 90 | out.pos = rect.min(); 91 | out.size = rect.size(); 92 | out.min_uv = (rect.min() + float2(1.0f, 1.0f)) * 0.5f; 93 | out.max_uv = (rect.max() + float2(1.0f, 1.0f)) * 0.5f; 94 | return out; 95 | } 96 | -------------------------------------------------------------------------------- /src/lucid_app.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_renderer.h" 7 | #include "path_tracer.h" 8 | #include "shading.h" 9 | #include 10 | #include 11 | #include 12 | 13 | #include // TODO: shouldn't be needed 14 | #include 15 | 16 | class LucidRenderer; 17 | class SimpleRenderer; 18 | class PbrRenderer; 19 | class PathTracer; 20 | class SceneSetup; 21 | struct Scene; 22 | 23 | DEFINE_ENUM(RenderingMode, simple, lucid, mixed, pbr, path_trace); 24 | 25 | class LucidApp { 26 | public: 27 | LucidApp(VWindowRef, VDeviceRef); 28 | ~LucidApp(); 29 | 30 | void setConfig(const AnyConfig &); 31 | static Maybe loadConfig(); 32 | void saveConfig() const; 33 | 34 | void selectSetup(ZStr name); 35 | void selectSetup(int idx); 36 | 37 | void switchView(); 38 | 39 | bool updateViewport(); 40 | Ex updateRenderer(); 41 | Ex<> updateEnvMap(); 42 | 43 | void doMenu(); 44 | bool handleInput(vector events, float time_diff); 45 | bool tick(float time_diff); 46 | 47 | void clearScreen(const RenderContext &); 48 | void drawFrame(); 49 | void drawScene(); 50 | 51 | bool mainLoop(); 52 | static bool mainLoop(VulkanWindow &, void *this_ptr); 53 | 54 | void printPerfStats(); 55 | 56 | private: 57 | void showStatsMenu(const Scene &); 58 | void showSceneStats(const Scene &); 59 | void showRasterStats(const Scene &); 60 | 61 | VWindowRef m_window; 62 | VDeviceRef m_device; 63 | PVRenderPass m_gui_render_pass; 64 | Gui m_gui; 65 | 66 | Maybe m_mouse_pos; 67 | Dynamic m_perf_analyzer; 68 | SceneLighting m_lighting; 69 | Maybe m_picked_pos; 70 | 71 | Dynamic m_shader_compiler; 72 | Dynamic m_path_tracer; 73 | Dynamic m_lucid_renderer; 74 | Dynamic m_simple_renderer; 75 | Dynamic m_pbr_renderer; 76 | LucidRenderOpts m_lucid_opts = none; 77 | PathTracerOpts m_path_tracer_opts = none; 78 | bool m_wireframe_mode = false; 79 | bool m_test_meshlets = false; 80 | bool m_show_stats = false; 81 | bool m_verify_lucid_info = true; 82 | int m_select_stats_tab = -1, m_selected_stats_tab = 0; 83 | RenderingMode m_rendering_mode = RenderingMode::simple; 84 | 85 | IRect m_viewport; 86 | CameraControl m_cam_control; 87 | vector> m_setups; 88 | float m_square_weight = 0.5f; 89 | int m_setup_idx = -1; 90 | 91 | bool m_is_picking_block = false; 92 | bool m_is_final_pick = false; 93 | 94 | struct StatPoint { 95 | perf::ExecId exec_id; 96 | ZStr short_name; 97 | }; 98 | 99 | vector selectPerfPoints() const; 100 | void updatePerfStats(); 101 | 102 | vector> m_stats; 103 | double m_last_time = -1.0, m_last_shader_update_time = -1.0; 104 | bool m_gather_perf_stats = true; 105 | int m_prev_setup_idx = -1; 106 | int m_skip_frame_id = 0; 107 | int m_scene_frame_id = 0; 108 | }; 109 | -------------------------------------------------------------------------------- /src/lucid_renderer.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | #include 9 | 10 | DEFINE_ENUM(LucidRenderOpt, debug_quad_setup, debug_bin_counter, debug_bin_dispatcher, debug_raster, 11 | timers, additive_blending, visualize_errors, alpha_threshold); 12 | 13 | using LucidRenderOpts = EnumFlags; 14 | 15 | namespace shader { 16 | struct LucidConfig; 17 | struct LucidInfo; 18 | struct InstanceData; 19 | } 20 | 21 | class LucidRenderer { 22 | public: 23 | using Opt = LucidRenderOpt; 24 | using Opts = LucidRenderOpts; 25 | using Context = RenderContext; 26 | 27 | static constexpr int max_width = 2560, max_height = 2048; 28 | static constexpr int max_instances = 64 * 1024; 29 | static constexpr int max_instance_quads = 1024; 30 | 31 | LucidRenderer(); 32 | FWK_MOVABLE_CLASS(LucidRenderer) 33 | 34 | static void addShaderDefs(VulkanDevice &, ShaderCompiler &, const ShaderConfig &); 35 | CSpan shaderDefIds() const { return m_shader_def_ids; } 36 | Ex exConstruct(VulkanDevice &, ShaderCompiler &, Opts, int2 view_size); 37 | 38 | Ex<> render(const Context &); 39 | 40 | void verifyInfo(); 41 | vector getStats() const; 42 | 43 | auto opts() const { return m_opts; } 44 | int binSize() const { return m_bin_size; } 45 | int blockSize() const { return m_block_size; } 46 | int subgroupSize() const { return m_subgroup_size; } 47 | 48 | int maxVisibleQuads() const { return m_max_visible_quads; } 49 | int maxSceneQuads() const { return m_max_visible_quads * 5 / 2; } 50 | 51 | private: 52 | Ex<> uploadInstances(const Context &); 53 | Ex<> setupInputData(const Context &); 54 | void quadSetup(const Context &); 55 | void computeBins(const Context &); 56 | void bindRaster(PVPipeline, const Context &); 57 | void rasterLow(const Context &); 58 | void rasterHigh(const Context &); 59 | 60 | Opts m_opts; 61 | 62 | vector m_shader_def_ids; 63 | PVPipeline p_quad_setup; 64 | PVPipeline p_bin_counter, p_bin_dispatcher, p_bin_categorizer; 65 | PVPipeline p_raster_low, p_raster_high; 66 | 67 | VBufferSpan m_config; 68 | VBufferSpan m_info; 69 | VBufferSpan m_instances; 70 | VBufferSpan m_instance_colors; 71 | VBufferSpan m_instance_uv_rects; 72 | VBufferSpan m_scratch_32; 73 | VBufferSpan m_scratch_64; 74 | VBufferSpan m_bin_quads, m_bin_tris; 75 | VBufferSpan m_normals_storage; 76 | VBufferSpan m_uvec4_storage; 77 | 78 | static constexpr int num_frames = 2; 79 | VBufferSpan<> m_frame_instance_data[num_frames]; 80 | VBufferSpan m_frame_info[num_frames]; 81 | VBufferSpan m_frame_config[num_frames]; 82 | VBufferSpan m_debug_buffer; 83 | 84 | vector m_last_info; 85 | bool m_last_info_updated = false; 86 | 87 | int m_bin_size, m_block_size, m_subgroup_size; 88 | int m_max_dispatches, m_max_visible_quads; 89 | 90 | int2 m_bin_counts; 91 | int m_bin_count; 92 | 93 | int2 m_size; // TODO: rename 94 | int m_num_instances = 0, m_num_quads = 0; 95 | int m_instance_packet_size = 0; 96 | }; 97 | -------------------------------------------------------------------------------- /data/shaders/shared/structures.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _STRUCTURES_GLSL_ 5 | #define _STRUCTURES_GLSL_ 6 | 7 | #include "definitions.glsl" 8 | 9 | struct InstanceData { 10 | int index_offset; 11 | int vertex_offset; 12 | int num_quads; 13 | uint flags; 14 | }; 15 | 16 | struct Lighting { 17 | vec4 ambient_color; 18 | vec4 sun_color; 19 | vec4 sun_dir; 20 | float sun_power, ambient_power; 21 | }; 22 | 23 | // All vectors in world space 24 | struct Frustum { 25 | vec4 ws_origins[4], ws_dirs[4]; 26 | vec4 ws_origin0, ws_dir0; 27 | vec4 ws_dirx, ws_diry; 28 | }; 29 | 30 | struct SimpleDrawCall { 31 | mat4 proj_view_matrix; 32 | vec4 material_color; 33 | vec2 uv_rect_pos; 34 | vec2 uv_rect_size; 35 | uint draw_call_opts; 36 | vec4 world_camera_pos; 37 | }; 38 | 39 | struct PbrDrawCall { 40 | mat4 proj_view_matrix; 41 | mat4 inv_proj_view_matrix; 42 | vec4 material_color; 43 | uint draw_call_opts; 44 | vec4 world_camera_pos; 45 | }; 46 | 47 | struct EnvMapDrawCall { 48 | vec2 screen_size, inv_screen_size; 49 | mat4 inv_proj_view_matrix; 50 | }; 51 | 52 | struct Rect { 53 | vec2 pos, size; 54 | vec2 min_uv, max_uv; 55 | }; 56 | 57 | struct Viewport { 58 | mat4 proj_matrix; 59 | vec2 size, inv_size; 60 | float near_plane, far_plane; 61 | float inv_far_plane; 62 | }; 63 | 64 | #define LUCID_INFO_MAX_DISPATCHES 256 65 | 66 | // This structure contains all the necessary counters, atomics, etc. 67 | // In shader code it's available as g_info; In the same SSBO just after 68 | // this structure some basic per-bin counters are also kept (g_counts) 69 | // TODO: consistent naming (count or num) 70 | struct LucidInfo { 71 | int num_input_quads; 72 | int num_visible_quads[2]; 73 | int num_counted_quads[2]; 74 | 75 | int bin_level_counts[BIN_LEVELS_COUNT]; 76 | 77 | // Atomic counters 78 | uint a_small_bins, a_high_bins; 79 | uint a_setup_work_groups; 80 | uint a_dummy_counter; 81 | 82 | // Counters for indirect dispatch 83 | uint num_binning_dispatches[3]; 84 | uint bin_level_dispatches[BIN_LEVELS_COUNT][3]; 85 | 86 | // Statistics, timings, etc. (secondary data) 87 | uint num_rejected_quads[REJECTION_TYPE_COUNT]; 88 | 89 | uint setup_timers[TIMERS_COUNT]; 90 | uint raster_timers[TIMERS_COUNT]; 91 | uint bin_dispatcher_timers[TIMERS_COUNT]; 92 | 93 | uint stats[STATS_COUNT]; 94 | 95 | int dispatcher_first_batch[2][LUCID_INFO_MAX_DISPATCHES]; 96 | int dispatcher_num_batches[2][LUCID_INFO_MAX_DISPATCHES]; 97 | 98 | int temp[64]; 99 | }; 100 | 101 | // This structure keeps uniform data passed to Lucid shaders 102 | struct LucidConfig { 103 | Frustum frustum; 104 | mat4 view_proj_matrix; 105 | Lighting lighting; 106 | vec4 background_color; 107 | 108 | uint enable_backface_culling; 109 | int num_instances; 110 | int instance_packet_size; 111 | }; 112 | 113 | struct PathTracerInfo { 114 | int temp[256]; 115 | }; 116 | 117 | // This structure keeps uniform data passed to Lucid shaders 118 | struct PathTracerConfig { 119 | Frustum frustum; 120 | mat4 view_proj_matrix; 121 | Lighting lighting; 122 | vec4 background_color; 123 | }; 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /data/shaders/bin_categorizer.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/compute_funcs.glsl" 7 | #include "shared/funcs.glsl" 8 | #include "shared/structures.glsl" 9 | 10 | #define LSIZE BIN_CATEGORIZER_LSIZE 11 | layout(local_size_x = 512, local_size_x_id = BIN_CATEGORIZER_LSIZE_ID) in; 12 | 13 | coherent layout(std430, binding = 0) buffer lucid_info_ { 14 | LucidInfo g_info; 15 | int g_counts[]; 16 | }; 17 | layout(binding = 1) uniform lucid_config_ { LucidConfig u_config; }; 18 | 19 | shared int s_bins[BIN_COUNT]; 20 | shared int s_temp[BIN_COUNT / SUBGROUP_SIZE + 1], s_temp2[SUBGROUP_SIZE]; 21 | 22 | void computeOffsets(const bool quads_mode) { 23 | for(uint idx = LIX; idx < BIN_COUNT; idx += LSIZE) { 24 | int value = quads_mode ? BIN_QUAD_COUNTS(idx) : BIN_TRI_COUNTS(idx); 25 | int accum = subgroupInclusiveAddFast(value); 26 | s_bins[idx] = accum - value; 27 | if((idx & SUBGROUP_MASK) == SUBGROUP_MASK) 28 | s_temp[idx >> SUBGROUP_SHIFT] = accum; 29 | } 30 | barrier(); 31 | if(LIX < BIN_COUNT / SUBGROUP_SIZE) 32 | s_temp[LIX] = subgroupInclusiveAddFast(s_temp[LIX]); 33 | barrier(); 34 | if(LIX < (BIN_COUNT / SUBGROUP_SIZE) / SUBGROUP_SIZE) 35 | s_temp2[LIX] = subgroupInclusiveAddFast(s_temp[(LIX << SUBGROUP_SHIFT) + SUBGROUP_MASK]); 36 | barrier(); 37 | if(LIX < BIN_COUNT / SUBGROUP_SIZE && LIX >= SUBGROUP_SIZE) 38 | s_temp[LIX] += s_temp2[int(LIX >> SUBGROUP_SHIFT) - 1]; 39 | barrier(); 40 | for(uint idx = LIX; idx < BIN_COUNT; idx += LSIZE) { 41 | int widx = int(idx >> SUBGROUP_SHIFT) - 1; 42 | int accum = s_bins[idx]; 43 | if(widx >= 0) 44 | accum += s_temp[widx]; 45 | if(quads_mode) { 46 | BIN_QUAD_OFFSETS(idx) = accum; 47 | BIN_QUAD_OFFSETS_TEMP(idx) = accum; 48 | } else { 49 | BIN_TRI_OFFSETS(idx) = accum; 50 | BIN_TRI_OFFSETS_TEMP(idx) = accum; 51 | } 52 | } 53 | } 54 | 55 | shared int s_bin_level_counts[BIN_LEVELS_COUNT]; 56 | 57 | void categorizeBins() { 58 | if(LIX < BIN_LEVELS_COUNT) 59 | s_bin_level_counts[LIX] = 0; 60 | barrier(); 61 | 62 | // Note: ordering bins by number of tris (largest first) does not help 63 | // Note: using morton order doesnt help either (actually it makes it run slower) 64 | // It seems that normal order (row by row) is quite good 65 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) { 66 | int num_quads = BIN_QUAD_COUNTS(i); 67 | int num_tris = BIN_TRI_COUNTS(i) + num_quads * 2; 68 | 69 | // TODO: add micro phase (< 64/128?) 70 | if(num_tris == 0) { 71 | atomicAdd(s_bin_level_counts[0], 1); 72 | } else if(num_tris < 1024) { 73 | // TODO: On gallery, dragon, san-miguel setting limit to 512 for low increases perf, why? 74 | int id = atomicAdd(s_bin_level_counts[BIN_LEVEL_LOW], 1); 75 | LOW_LEVEL_BINS(id) = int(i); 76 | } else if(true) { 77 | int id = atomicAdd(s_bin_level_counts[BIN_LEVEL_HIGH], 1); 78 | HIGH_LEVEL_BINS(id) = int(i); 79 | } 80 | } 81 | barrier(); 82 | if(LIX < BIN_LEVELS_COUNT) { 83 | g_info.bin_level_counts[LIX] = s_bin_level_counts[LIX]; 84 | int max_dispatches = MAX_DISPATCHES >> (LIX == BIN_LEVEL_HIGH ? 1 : 0); 85 | g_info.bin_level_dispatches[LIX][0] = min(s_bin_level_counts[LIX], max_dispatches); 86 | g_info.bin_level_dispatches[LIX][1] = 1; 87 | g_info.bin_level_dispatches[LIX][2] = 1; 88 | } 89 | } 90 | 91 | void computeOffsets() { 92 | computeOffsets(true); 93 | barrier(); 94 | computeOffsets(false); 95 | } 96 | 97 | void main() { 98 | if(WGID.x == 0) 99 | computeOffsets(); 100 | else 101 | categorizeBins(); 102 | } 103 | -------------------------------------------------------------------------------- /src/scene.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #pragma once 5 | 6 | #include "lucid_base.h" 7 | #include 8 | #include 9 | #include 10 | 11 | struct WavefrontObject; 12 | 13 | DEFINE_ENUM(SceneMapType, albedo, normal, pbr); 14 | using SceneMapTypes = EnumFlags; 15 | 16 | struct SceneTexture { 17 | SceneTexture(); 18 | FWK_COPYABLE_CLASS(SceneTexture); 19 | 20 | Ex loadPlain(ZStr path); 21 | Ex load(Stream &); 22 | Ex save(Stream &) const; 23 | 24 | bool empty() const { return size() == int2(0, 0); } 25 | VColorFormat format() const; 26 | int2 size() const; 27 | 28 | string name; 29 | PVImageView vk_image; 30 | vector mips; 31 | SceneMapType map_type = SceneMapType::albedo; 32 | bool is_opaque = true; 33 | bool is_clamped = false; 34 | bool is_atlas = false; 35 | }; 36 | 37 | struct SceneMaterial { 38 | struct Map { 39 | explicit operator bool() const { return texture_id != -1; } 40 | 41 | // clamped maps don't have to use uv_rect, because their uv_coordinates are transformed 42 | bool usesUvRect() const { return !is_clamped && uv_rect != FRect(0, 0, 1, 1); } 43 | 44 | PVImageView vk_image; 45 | FRect uv_rect = FRect(0, 0, 1, 1); 46 | bool is_opaque = false; 47 | bool is_clamped = true; 48 | int texture_id = -1; 49 | }; 50 | 51 | SceneMaterial(string name = ""); 52 | ~SceneMaterial(); 53 | 54 | Ex load(Stream &); 55 | Ex save(Stream &) const; 56 | 57 | bool isOpaque() const; 58 | void freeTextures(); 59 | 60 | string name; 61 | float3 diffuse = float3(1); 62 | float opacity = 1.0f; 63 | EnumMap maps; 64 | }; 65 | 66 | struct SceneMesh { 67 | Ex load(Stream &); 68 | Ex save(Stream &) const; 69 | 70 | using Tri = array; 71 | using Quad = array; 72 | vector tris; 73 | vector quads; 74 | FBox bounding_box; 75 | bool colors_opaque = true; 76 | int material_id = 0; 77 | int num_degenerate_quads = 0; 78 | }; 79 | 80 | struct Scene { 81 | Scene(); 82 | FWK_MOVABLE_CLASS(Scene); 83 | 84 | static Ex load(ZStr Path); 85 | Ex load(Stream &); 86 | Ex save(Stream &) const; 87 | 88 | void mergeVertices(int decimal_places = 3); 89 | void generateQuads(float squareness_weight); 90 | 91 | int numTris() const; 92 | int numQuads() const; 93 | int numVerts() const { return positions.size(); } 94 | bool hasColors() const { return colors && colors.size() == positions.size(); } 95 | 96 | // TODO: remove these? 97 | bool hasNormals() const { return normals && normals.size() == positions.size(); } 98 | bool hasQuantizedNormals() const { 99 | return quantized_normals && quantized_normals.size() == positions.size(); 100 | } 101 | 102 | bool hasTexCoords() const { return tex_coords && tex_coords.size() == positions.size(); } 103 | 104 | // Only albedo and at most 2 textures (one opaque, one alpha) 105 | bool hasSimpleTextures() const; 106 | 107 | struct Intersection { 108 | int mesh_id, tri_id; 109 | float3 pos; 110 | }; 111 | 112 | Maybe intersect(Segment3F) const; 113 | 114 | vector positions; 115 | vector colors; 116 | vector tex_coords; 117 | vector normals; 118 | vector tangents; 119 | vector quantized_normals; 120 | vector quantized_tangents; 121 | 122 | string id, resource_path; 123 | vector textures; 124 | vector materials; 125 | vector meshes; 126 | FBox bounding_box; 127 | 128 | // ------ Rendering data -------------------------------------------------- 129 | 130 | void updatePrimitiveOffsets(); 131 | Ex<> updateRenderingData(VulkanDevice &); 132 | void freeRenderingData(); 133 | void computeTangents(); 134 | void quantizeVectors(); 135 | 136 | void computeFlatVectors(); 137 | void quantizeFlatVectors(); 138 | 139 | vector draws(const Frustum &) const; 140 | Pair textureAtlasPair() const; 141 | 142 | VertexArray verts; 143 | VBufferSpan tris_ib, quads_ib; 144 | 145 | vector> mesh_primitive_offsets; 146 | }; 147 | -------------------------------------------------------------------------------- /src/scene_assimp.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "scene.h" 5 | 6 | #include "shading.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | static float3 fromAssimp(const aiVector3D &vec) { return {vec.x, vec.y, vec.z}; } 16 | static IColor fromAssimp(const aiColor4D &col) { 17 | return (IColor)FColor(col.r, col.g, col.b, col.a); 18 | } 19 | 20 | static string fromAssimp(const aiString &str) { return str.C_Str(); } 21 | 22 | static Matrix4 fromAssimp(const aiMatrix4x4 &mat) { 23 | static_assert(sizeof(aiMatrix4x4) == sizeof(Matrix4)); 24 | Matrix4 out; 25 | memcpy(&out, &mat, sizeof(mat)); 26 | return out; 27 | } 28 | 29 | Ex Scene::loadAssimp(ZStr input_path) { 30 | Assimp::Importer importer; 31 | auto ai_flags = aiProcess_Triangulate | aiProcess_SortByPType | aiProcess_JoinIdenticalVertices; 32 | const aiScene *ai_scene = importer.ReadFile(input_path.c_str(), ai_flags); 33 | 34 | if(!ai_scene) { 35 | return FWK_ERROR("Error while loading '%' with assimp:%\n", input_path, 36 | importer.GetErrorString()); 37 | } 38 | 39 | Scene scene; 40 | scene.meshes.resize(ai_scene->mNumMeshes); 41 | 42 | for(uint i = 0; i < ai_scene->mNumMeshes; i++) { 43 | auto &src_mesh = *ai_scene->mMeshes[i]; 44 | auto &dst_mesh = scene.meshes[i]; 45 | 46 | dst_mesh.positions.resize(src_mesh.mNumVertices); 47 | for(uint j = 0; j < src_mesh.mNumVertices; j++) 48 | dst_mesh.positions[j] = fromAssimp(src_mesh.mVertices[j]); 49 | 50 | dst_mesh.tex_coords.resize(src_mesh.mNumUVComponents[0]); 51 | for(uint j = 0; j < src_mesh.mNumUVComponents[0]; j++) 52 | dst_mesh.tex_coords[j] = fromAssimp(src_mesh.mTextureCoords[0][j]).xy(); 53 | 54 | if(src_mesh.HasNormals()) { 55 | dst_mesh.normals.resize(src_mesh.mNumVertices); 56 | for(uint j = 0; j < src_mesh.mNumVertices; j++) 57 | dst_mesh.normals[j] = fromAssimp(src_mesh.mNormals[j]); 58 | } 59 | 60 | if(src_mesh.HasVertexColors(0)) { 61 | dst_mesh.colors.resize(src_mesh.mNumVertices); 62 | for(uint j = 0; j < src_mesh.mNumVertices; j++) 63 | dst_mesh.colors[j] = fromAssimp(src_mesh.mColors[0][j]); 64 | } 65 | 66 | dst_mesh.tris.resize(src_mesh.mNumFaces); 67 | dst_mesh.bounding_box = enclose(dst_mesh.positions); 68 | for(uint j = 0; j < src_mesh.mNumFaces; j++) { 69 | auto &face = src_mesh.mFaces[j]; 70 | dst_mesh.tris[j] = {int(face.mIndices[0]), int(face.mIndices[1]), 71 | int(face.mIndices[2])}; 72 | } 73 | } 74 | 75 | for(uint i = 0; i < ai_scene->mNumMaterials; i++) { 76 | auto &src = *ai_scene->mMaterials[i]; 77 | auto &dst = scene.materials.emplace_back(fromAssimp(src.GetName())); 78 | 79 | aiColor3D color(0.f, 0.f, 0.f); 80 | src.Get(AI_MATKEY_COLOR_DIFFUSE, color); 81 | float opacity = 1.0f; 82 | src.Get(AI_MATKEY_OPACITY, opacity); 83 | 84 | dst.diffuse = {color.b, color.g, color.r}; 85 | dst.opacity = opacity; 86 | 87 | aiString texName; 88 | if(src.Get(AI_MATKEY_TEXTURE(aiTextureType_DIFFUSE, 0), texName) == aiReturn_SUCCESS) 89 | dst.maps[MaterialMapType::diffuse].texture_name = fromAssimp(texName); 90 | if(src.Get(AI_MATKEY_TEXTURE(aiTextureType_NORMALS, 0), texName) == aiReturn_SUCCESS) 91 | dst.maps[MaterialMapType::bump].texture_name = fromAssimp(texName); 92 | } 93 | 94 | struct Node { 95 | aiNode *node; 96 | Matrix4 trans; 97 | }; 98 | vector nodes; 99 | nodes.emplace_back(ai_scene->mRootNode, fromAssimp(ai_scene->mRootNode->mTransformation)); 100 | 101 | for(int i = 0; i < nodes.size(); i++) { 102 | auto node = nodes[i]; 103 | for(uint j = 0; j < node.node->mNumMeshes; j++) { 104 | auto mesh_id = node.node->mMeshes[j]; 105 | auto &mesh = *ai_scene->mMeshes[mesh_id]; 106 | Instance new_inst; 107 | new_inst.first_tri = 0; 108 | new_inst.mesh_id = mesh_id; 109 | new_inst.material_id = mesh.mMaterialIndex; 110 | new_inst.num_tris = mesh.mNumFaces; 111 | // TODO: rough, compute exactly ? 112 | new_inst.bounding_box = scene.meshes[mesh_id].bounding_box; 113 | new_inst.trans = node.trans; 114 | scene.instances.emplace_back(new_inst); 115 | } 116 | for(uint j = 0; j < node.node->mNumChildren; j++) { 117 | auto *child = node.node->mChildren[j]; 118 | nodes.emplace_back(child, node.trans * fromAssimp(child->mTransformation)); 119 | } 120 | } 121 | 122 | return scene; 123 | } 124 | -------------------------------------------------------------------------------- /src/lucid.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "lucid_app.h" 5 | #include "scene_convert.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | Ex exMain(int argc, char **argv) { 24 | IRect window_rect = IRect({1280, 720}) + int2(32, 32); 25 | 26 | // TODO: xml loading is still messy 27 | Maybe config = LucidApp::loadConfig(); 28 | VInstanceSetup setup; 29 | auto window_flags = VWindowFlag::resizable | VWindowFlag::centered | VWindowFlag::allow_hidpi | 30 | VWindowFlag::sleep_when_minimized; 31 | [[maybe_unused]] uint multisampling = 1; 32 | #ifdef NDEBUG 33 | bool debug_mode = false; 34 | #else 35 | bool debug_mode = true; 36 | #endif 37 | 38 | VSwapChainSetup swap_chain_setup; 39 | // TODO: UI is configure for Unorm, shouldn't we use SRGB by default? 40 | swap_chain_setup.preferred_formats = {VK_FORMAT_B8G8R8A8_UNORM}; 41 | swap_chain_setup.preferred_present_mode = VPresentMode::immediate; 42 | swap_chain_setup.usage = 43 | VImageUsage::color_att | VImageUsage::storage | VImageUsage::transfer_dst; 44 | swap_chain_setup.initial_layout = VImageLayout::general; 45 | 46 | for(int n = 1; n < argc; n++) { 47 | string argument = argv[n]; 48 | if(argument == "--convert-scenes") { 49 | convertScenes(mainPath() / "input_scenes.xml"); 50 | return 0; 51 | } else if(argument == "--vsync") { 52 | swap_chain_setup.preferred_present_mode = VPresentMode::fifo; 53 | } else if(argument == "--vulkan-debug") { 54 | debug_mode = true; 55 | } else if(argument == "--no-vulkan-debug") { 56 | debug_mode = false; 57 | } else if(argument == "--msaa") { 58 | ASSERT(n + 1 < argc && "Invalid nr of arguments"); 59 | multisampling = clamp(atoi(argv[n + 1]), 1, 16); 60 | n++; 61 | } else { 62 | FATAL("Unsupported argument: %s", argument.c_str()); 63 | } 64 | } 65 | 66 | if(debug_mode) { 67 | setup.debug_levels = VDebugLevel::warning | VDebugLevel::error; 68 | setup.debug_types = all; 69 | } 70 | 71 | // TODO: create instance on a thread, in the meantime load resources? 72 | auto instance = EX_PASS(VulkanInstance::create(setup)); 73 | 74 | if(config) { 75 | // TODO: first initialize SDL? 76 | auto displays = VulkanWindow::displays(); 77 | if(auto *rect = config->get("window_rect")) { 78 | window_rect = VulkanWindow::sanitizeWindowRect(displays, *rect); 79 | window_flags &= ~VWindowFlag::centered; 80 | } 81 | if(config->get("window_maximized", false)) 82 | window_flags |= VWindowFlag::maximized; 83 | } 84 | 85 | auto window = EX_PASS(VulkanWindow::create(instance, "LucidRaster", window_rect, window_flags)); 86 | 87 | VDeviceSetup dev_setup; 88 | dev_setup.features.emplace(); 89 | dev_setup.features->shaderInt64 = VK_TRUE; 90 | dev_setup.features->samplerAnisotropy = VK_TRUE; 91 | dev_setup.features->fillModeNonSolid = VK_TRUE; 92 | dev_setup.allow_descriptor_update_after_bind = true; 93 | auto pref_device = instance->preferredDevice(window->surfaceHandle(), &dev_setup.queues); 94 | if(!pref_device) 95 | return FWK_ERROR("Couldn't find a suitable Vulkan device"); 96 | dev_setup.extensions = {"VK_EXT_shader_subgroup_vote", "VK_EXT_shader_subgroup_ballot"}; 97 | auto device = EX_PASS(instance->createDevice(*pref_device, dev_setup)); 98 | auto phys_info = instance->info(device->physId()); 99 | print("Selected Vulkan physical device: %\nDriver version: %\n", 100 | phys_info.properties.deviceName, phys_info.properties.driverVersion); 101 | device->addSwapChain(EX_PASS(VulkanSwapChain::create(*device, window, swap_chain_setup))); 102 | 103 | Dynamic perf_manager; 104 | Dynamic perf_ctx; 105 | if(true) { 106 | perf_manager.emplace(); 107 | perf_ctx.emplace(1024); 108 | } 109 | 110 | LucidApp app(window, device); 111 | if(config) 112 | app.setConfig(*config); 113 | app.updateEnvMap().check(); 114 | app.updateViewport(); 115 | EXPECT(app.updateRenderer()); 116 | window->runMainLoop(LucidApp::mainLoop, &app); 117 | app.printPerfStats(); 118 | 119 | return 0; 120 | } 121 | 122 | int main(int argc, char **argv) { 123 | auto result = exMain(argc, argv); 124 | 125 | if(!result) { 126 | result.error().print(); 127 | return 1; 128 | } 129 | return *result; 130 | } -------------------------------------------------------------------------------- /src/texture_atlas.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "texture_atlas.h" 5 | 6 | #include 7 | 8 | #define STBRP_STATIC 9 | #define STB_RECT_PACK_IMPLEMENTATION 10 | #include "../extern/stb_rect_pack.h" 11 | 12 | using Entry = TextureAtlas::Entry; 13 | using Config = TextureAtlas::Config; 14 | 15 | Maybe TextureAtlas::make(vector sizes, Config config) { 16 | if(!sizes) 17 | return {}; 18 | DASSERT(isPowerOfTwo(config.max_atlas_size)); 19 | DASSERT(config.round_elem_size >= 1 && isPowerOfTwo(config.round_elem_size)); 20 | 21 | int2 max_size; 22 | vector rsizes = sizes; 23 | for(int i : intRange(sizes)) { 24 | DASSERT(sizes[i].x > 0 && sizes[i].y > 0); 25 | max_size = vmax(max_size, sizes[i]); 26 | rsizes[i] = (rsizes[i] + int2(config.round_elem_size - 1)) / config.round_elem_size; 27 | } 28 | max_size = {nextPow2(max_size.x), nextPow2(max_size.y)}; 29 | 30 | stbrp_context ctx; 31 | // TODO: how many nodes do we actually need? 32 | PodVector nodes(sizes.size() * 32); 33 | PodVector rects(sizes.size()); 34 | 35 | for(int i : intRange(sizes)) { 36 | rects[i].id = i; 37 | rects[i].x = rects[i].y = 0; 38 | rects[i].was_packed = 0; 39 | rects[i].w = rsizes[i].x; 40 | rects[i].h = rsizes[i].y; 41 | } 42 | 43 | while(max_size.x <= config.max_atlas_size && max_size.y <= config.max_atlas_size) { 44 | int2 max_rsize = max_size / config.round_elem_size; 45 | stbrp_init_target(&ctx, max_rsize.x, max_rsize.y, nodes.data(), nodes.size()); 46 | if(stbrp_pack_rects(&ctx, rects.data(), rects.size())) { 47 | vector entries(sizes.size()); 48 | for(int i : intRange(rects)) { 49 | auto &rect = rects[i]; 50 | auto &entry = entries[rect.id]; 51 | entry.size = sizes[i]; 52 | auto rsize = rsizes[i] * config.round_elem_size; 53 | entry.border_tl = (rsize - entry.size) / 2; 54 | entry.border_br = (rsize - entry.size) - entry.border_tl; 55 | entry.pos = int2(rect.x, rect.y) * config.round_elem_size + entry.border_tl; 56 | } 57 | return TextureAtlas{std::move(entries), config, max_size}; 58 | } 59 | (max_size.x > max_size.y ? max_size.y : max_size.x) *= 2; 60 | } 61 | 62 | return none; 63 | } 64 | 65 | static void fillBorders(const Entry &entry, ImageView tex) { 66 | int2 rsize = entry.size + entry.border_tl + entry.border_br; 67 | if(rsize == entry.size) 68 | return; 69 | 70 | int left = entry.border_tl.x, top = entry.border_tl.y; 71 | int right = entry.border_br.x, bottom = entry.border_br.y; 72 | int2 origin = entry.pos, size = entry.size; 73 | 74 | auto fill_corner = [&](int2 start, int w, int h, int2 src) { 75 | start += origin; 76 | src += origin; 77 | for(int y = 0; y < h; y++) 78 | for(int x = 0; x < w; x++) 79 | tex(start.x + x, start.y + y) = tex(src); 80 | }; 81 | 82 | auto fill_rows = [&](int start_y, int h, int src_y) { 83 | for(int y = 0; y < h; y++) { 84 | int2 dst(origin.x, origin.y + start_y + y); 85 | for(int x = 0; x < size.x; x++) 86 | tex(dst.x + x, dst.y) = tex(origin.x + x, origin.y + src_y); 87 | } 88 | }; 89 | 90 | auto fill_cols = [&](int start_x, int w, int src_x) { 91 | for(int y = 0; y < size.y; y++) { 92 | for(int x = 0; x < w; x++) 93 | tex(origin.x + start_x + x, origin.y + y) = tex(origin.x + src_x, origin.y + y); 94 | } 95 | }; 96 | 97 | fill_corner({-left, -top}, left, top, {}); 98 | fill_corner({size.x, -top}, right, top, {size.x - 1, 0}); 99 | fill_corner({-left, size.y}, left, bottom, {0, size.y - 1}); 100 | fill_corner({size.x, size.y}, right, bottom, {size.x - 1, size.y - 1}); 101 | 102 | fill_rows(-top, top, 0); 103 | fill_rows(size.y, bottom, size.y - 1); 104 | 105 | fill_cols(-left, left, 0); 106 | fill_cols(size.x, right, size.x - 1); 107 | } 108 | 109 | Image TextureAtlas::merge(CSpan textures_, IColor background) const { 110 | DASSERT(textures_.size() == entries.size()); 111 | Image out(size); 112 | out.fill(background); 113 | 114 | // TODO: use ImageViews instead 115 | vector textures; 116 | for(auto *texture : textures_) { 117 | DASSERT(baseFormat(texture->format()) == VBaseFormat::rgba8); 118 | PodVector data = texture->data(); 119 | textures.emplace_back(data.reinterpret(), texture->size(), 120 | VColorFormat::rgba8_unorm); 121 | } 122 | 123 | for(int i : intRange(textures)) { 124 | DASSERT(!textures[i].empty()); 125 | out.blit(textures[i], entries[i].pos); 126 | fillBorders(entries[i], out.pixels()); 127 | } 128 | return out; 129 | } 130 | 131 | FRect TextureAtlas::uvRect(const Entry &entry, float inset_pixels) const { 132 | auto scale = vinv(float2(size)); 133 | float2 p1 = (float2(entry.pos) + float2(inset_pixels)) * scale; 134 | float2 p2 = (float2(entry.pos + entry.size) - float2(inset_pixels)) * scale; 135 | return {p1, p2}; 136 | } 137 | -------------------------------------------------------------------------------- /data/shaders/shared/definitions.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _DEFINITIONS_GLSL_ 5 | #define _DEFINITIONS_GLSL_ 6 | 7 | #ifdef __cplusplus 8 | // Thread about using vec3 in glsl buffers: 9 | // https://stackoverflow.com/questions/38172696 10 | 11 | #include 12 | #include 13 | 14 | struct alignas(8) vec2 { 15 | vec2() : x(0.0f), y(0.0f) {} 16 | vec2(const fwk::float2 &rhs) : x(rhs.x), y(rhs.y) {} 17 | operator fwk::float2() const { return {x, y}; } 18 | 19 | float x, y; 20 | }; 21 | 22 | struct alignas(16) vec4 { 23 | vec4() : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {} 24 | vec4(const fwk::float4 &rhs) : x(rhs.x), y(rhs.y), z(rhs.z), w(rhs.w) {} 25 | vec4(const fwk::float3 &rhs) : x(rhs.x), y(rhs.y), z(rhs.z), w(0.0f) {} 26 | operator fwk::float4() const { return {x, y, z, w}; } 27 | operator fwk::float3() const { return {x, y, z}; } 28 | 29 | float x, y, z, w; 30 | }; 31 | 32 | struct alignas(16) mat4 { 33 | mat4() {} 34 | mat4(const fwk::Matrix4 &mat) : col{mat[0], mat[1], mat[2], mat[3]} {} 35 | 36 | vec4 col[4]; 37 | }; 38 | #endif 39 | 40 | // TODO: locase names 41 | #ifdef __cplusplus 42 | #define CONSTANT(id, name, default_value) int name = default_value; 43 | struct SpecializationConstants { 44 | #else 45 | #define CONSTANT(id, name, default_value) layout(constant_id = id) const int name = default_value; 46 | #endif 47 | CONSTANT(0, VIEWPORT_SIZE_X, 1280) 48 | CONSTANT(1, VIEWPORT_SIZE_Y, 720) 49 | CONSTANT(2, BIN_COUNT, 880) 50 | CONSTANT(3, BIN_COUNT_X, 40) 51 | CONSTANT(4, BIN_COUNT_Y, 22) 52 | CONSTANT(5, BIN_SIZE, 32) 53 | CONSTANT(6, BIN_SHIFT, 5) 54 | CONSTANT(7, MAX_VISIBLE_QUADS, 1024 * 1024) 55 | CONSTANT(8, MAX_VISIBLE_QUADS_SHIFT, 20) 56 | CONSTANT(9, MAX_VISIBLE_TRIS, 2 * 1024 * 1024) 57 | CONSTANT(10, MAX_DISPATCHES, 128) 58 | CONSTANT(11, RENDER_OPTIONS, 0) 59 | 60 | CONSTANT(12, BIN_DISPATCHER_LSHIFT, 10) 61 | 62 | #define BIN_DISPATCHER_LSIZE_ID 13 63 | #define BIN_CATEGORIZER_LSIZE_ID 14 64 | CONSTANT(BIN_DISPATCHER_LSIZE_ID, BIN_DISPATCHER_LSIZE, 1024) 65 | CONSTANT(BIN_CATEGORIZER_LSIZE_ID, BIN_CATEGORIZER_LSIZE, 512) 66 | 67 | #ifdef __cplusplus 68 | }; 69 | #endif 70 | #undef CONSTANT 71 | 72 | // clang-format off 73 | #define BIN_LEVELS_COUNT 5 74 | #define REJECTION_TYPE_COUNT 4 75 | #define TIMERS_COUNT 8 76 | #define STATS_COUNT 4 77 | 78 | #define BIN_LEVEL_EMPTY 0 79 | #define BIN_LEVEL_MICRO 1 80 | #define BIN_LEVEL_LOW 2 81 | #define BIN_LEVEL_MEDIUM 3 82 | #define BIN_LEVEL_HIGH 4 83 | 84 | // These map directly to DrawCallOpts (lucid_base.h) 85 | #define INST_HAS_VERTEX_COLORS 0x001 86 | #define INST_HAS_VERTEX_TEX_COORDS 0x002 87 | #define INST_HAS_VERTEX_NORMALS 0x004 88 | #define INST_IS_OPAQUE 0x008 89 | #define INST_TEX_OPAQUE 0x010 90 | #define INST_HAS_UV_RECT 0x020 91 | #define INST_HAS_ALBEDO_TEXTURE 0x040 92 | #define INST_HAS_NORMAL_TEXTURE 0x080 93 | #define INST_HAS_PBR_TEXTURE 0x100 94 | #define INST_HAS_COLOR 0x200 95 | 96 | // Different reasons for rejection of triangles/quads during setup 97 | #define REJECTION_TYPE_OTHER 0 98 | #define REJECTION_TYPE_BACKFACE 1 99 | #define REJECTION_TYPE_FRUSTUM 2 100 | #define REJECTION_TYPE_BETWEEN_SAMPLES 3 101 | 102 | #ifndef __cplusplus 103 | #define LIX gl_LocalInvocationIndex 104 | #define LID gl_LocalInvocationID 105 | #define WGID gl_WorkGroupID 106 | 107 | #if !defined(SUBGROUP_SIZE) || !defined(SUBGROUP_SHIFT) 108 | #error "SUBGROUP_SIZE and SUBGROUP_SHIFT must be defined" 109 | #endif 110 | #define SUBGROUP_MASK (SUBGROUP_SIZE - 1) 111 | 112 | bool renderOptSet(uint bit) { 113 | return (RENDER_OPTIONS & bit) != 0u; 114 | } 115 | 116 | // Per-bin number of quad counts, offsets, etc. 117 | #define BIN_QUAD_COUNTS(idx) g_counts[BIN_COUNT * 0 + (idx)] 118 | #define BIN_QUAD_OFFSETS(idx) g_counts[BIN_COUNT * 1 + (idx)] 119 | #define BIN_QUAD_OFFSETS_TEMP(idx) g_counts[BIN_COUNT * 2 + (idx)] 120 | 121 | #define BIN_TRI_COUNTS(idx) g_counts[BIN_COUNT * 3 + (idx)] 122 | #define BIN_TRI_OFFSETS(idx) g_counts[BIN_COUNT * 4 + (idx)] 123 | #define BIN_TRI_OFFSETS_TEMP(idx) g_counts[BIN_COUNT * 5 + (idx)] 124 | 125 | // Lists of bins of different quad density levels 126 | #define MICRO_LEVEL_BINS(idx) g_counts[BIN_COUNT * 6 + (idx)] 127 | #define LOW_LEVEL_BINS(idx) g_counts[BIN_COUNT * 7 + (idx)] 128 | #define MEDIUM_LEVEL_BINS(idx) g_counts[BIN_COUNT * 8 + (idx)] 129 | #define HIGH_LEVEL_BINS(idx) g_counts[BIN_COUNT * 9 + (idx)] 130 | 131 | // Macros useful when accessing storage 132 | #define STORAGE_TRI_BARY_OFFSET 0 133 | #define STORAGE_TRI_SCAN_OFFSET (MAX_VISIBLE_QUADS * 4) 134 | #define STORAGE_TRI_DEPTH_OFFSET (MAX_VISIBLE_QUADS * 8) 135 | #define STORAGE_QUAD_COLOR_OFFSET (MAX_VISIBLE_QUADS * 10) 136 | #define STORAGE_QUAD_NORMAL_OFFSET (MAX_VISIBLE_QUADS * 11) 137 | #define STORAGE_QUAD_TEXTURE_OFFSET (MAX_VISIBLE_QUADS * 12) 138 | #endif 139 | 140 | #endif -------------------------------------------------------------------------------- /src/quad_generator.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "quad_generator.h" 5 | 6 | #include 7 | 8 | float squareness(CSpan corners) { 9 | float out = 0.0; 10 | array edges; 11 | for(auto [i, j] : wrappedPairsRange(4)) 12 | edges[i] = normalize(corners[j] - corners[i]); 13 | for(auto [i, j] : wrappedPairsRange(4)) 14 | out += fabs(dot(edges[i], edges[j])); 15 | return (4.0f - out) * 0.25f; 16 | } 17 | 18 | vector> triNeighbours(CSpan> tris) { 19 | vector> out(tris.size(), array{{-1, -1, -1}}); 20 | HashMap, int> edge_tri_map; 21 | edge_tri_map.reserve(tris.size() * 4); 22 | 23 | for(int i : intRange(tris)) { 24 | auto &tri = tris[i]; 25 | for(int j = 0; j < 3; j++) { 26 | int v0 = tri[j], v1 = tri[j == 2 ? 0 : j + 1]; 27 | edge_tri_map.emplace({v0, v1}, i); 28 | } 29 | } 30 | 31 | for(int i : intRange(tris)) { 32 | auto &tri = tris[i]; 33 | for(int j = 0; j < 3; j++) { 34 | int v0 = tri[j], v1 = tri[j == 2 ? 0 : j + 1]; 35 | auto it = edge_tri_map.find({v1, v0}); 36 | if(it != edge_tri_map.end()) { 37 | if(it->value != i) 38 | out[i][j] = it->value; 39 | } 40 | } 41 | } 42 | 43 | return out; 44 | } 45 | 46 | static int findIndex(CSpan values, int value) { 47 | for(int i : intRange(values)) 48 | if(values[i] == value) 49 | return i; 50 | return -1; 51 | } 52 | 53 | //#define DEBUG_QUADGEN 54 | 55 | Pair, vector>> 56 | quadNodes(CSpan verts, CSpan> tris, CSpan> tri_neighbours) { 57 | vector quads; 58 | quads.reserve(tri_neighbours.size() * 2 / 3); 59 | vector> tri_quads(tris.size(), array{{-1, -1, -1}}); 60 | 61 | // Potrzebujemy tutaj mapę sąsiadów po krawędziach 62 | 63 | for(int idx0 : intRange(tris)) { 64 | auto &tri0 = tris[idx0]; 65 | auto &tri0_neighbours = tri_neighbours[idx0]; 66 | for(int i : intRange(3)) { 67 | int idx1 = tri0_neighbours[i]; 68 | if(idx1 == -1 || tri_quads[idx0][i] != -1) 69 | continue; 70 | auto &tri1_neighbours = tri_neighbours[idx1]; 71 | int j = findIndex(tri1_neighbours, idx0); 72 | if(j == -1) 73 | continue; 74 | 75 | int opposite_vert = -1; 76 | for(int ov : tris[idx1]) 77 | if(!isOneOf(ov, tri0)) { 78 | opposite_vert = ov; 79 | break; 80 | } 81 | if(opposite_vert == -1) 82 | continue; 83 | 84 | int quad_idx = quads.size(); 85 | auto &quad = quads.emplace_back(idx0, idx1); 86 | tri_quads[idx0][i] = quad_idx; 87 | tri_quads[idx1][j] = quad_idx; 88 | 89 | quad.verts = {{tri0[i], opposite_vert, tri0[(i + 1) % 3], tri0[(i + 2) % 3]}}; 90 | float3 points[4]; 91 | for(int k : intRange(4)) 92 | points[k] = verts[quad.verts[k]]; 93 | quad.squareness = squareness(points); 94 | //print("Quad %: [% %] sq: % idx:%\n", quad_idx, idx0, idx1, quad.squareness, i); 95 | } 96 | } 97 | 98 | for(auto &tri_quad : tri_quads) 99 | for(int i = 0; i < 3; i++) { 100 | int q0 = tri_quad[i]; 101 | int q1 = tri_quad[i == 2 ? 0 : i + 1]; 102 | if(q0 != -1 && q1 != -1) { 103 | quads[q0].addConflict(q1); 104 | quads[q1].addConflict(q0); 105 | } 106 | } 107 | 108 | #ifdef DEBUG_QUADGEN 109 | for(int i : intRange(verts)) 110 | print("Vertex %: %\n", i, verts[i]); 111 | for(int i : intRange(tris)) 112 | print("Tri %: %\n", i, tris[i]); 113 | for(int i : intRange(quads)) 114 | print("Quad %: v:% t:% con:% sq:%\n", i, quads[i].verts, quads[i].tris, quads[i].conflicts, 115 | quads[i].squareness); 116 | #endif 117 | 118 | return {std::move(quads), std::move(tri_quads)}; 119 | } 120 | 121 | vector> genQuads(CSpan> tris, CSpan> tri_neighbours, 122 | CSpan quads, CSpan> tri_quads, 123 | float square_weight) { 124 | vector> out; 125 | out.reserve(tris.size() * 2 / 3); 126 | 127 | // 2:selected quad 1:removed quad 128 | vector visited_quads(quads.size(), 0); 129 | // At the end of the algorithm we will iterate in BFS manner over all tris 130 | // Tris will also be used to iterate over selected quads. First triangle 131 | // will be used for that (second will be disabled). 132 | vector visited_tris(tris.size(), false); 133 | 134 | vector degree(quads.size(), 0); 135 | for(int qidx : intRange(quads)) 136 | degree[qidx] = quads[qidx].degree(); 137 | 138 | auto score = [&](int idx) { 139 | int deg = degree[idx]; 140 | return deg - quads[idx].squareness * square_weight; 141 | }; 142 | 143 | Heap heap(quads.size()); 144 | for(int qidx : intRange(quads)) { 145 | heap.insert(qidx, score(qidx)); 146 | #ifdef DEBUG_QUADGEN 147 | print("Quad % score: %\n", qidx, score(qidx)); 148 | #endif 149 | } 150 | 151 | // Computing maximum independent set with basic greedy algorithm 152 | // TODO: use better algorithm (with preprocessing): 153 | // http://www.ru.is/~mmh/papers/algo.pdf 154 | while(!heap.empty()) { 155 | auto [_, qidx] = heap.extractMin(); 156 | if(visited_quads[qidx]) 157 | continue; 158 | //print("Selected quad: %\n", qidx); 159 | 160 | auto &quad = quads[qidx]; 161 | visited_quads[qidx] = 2; 162 | visited_tris[quad.tris[1]] = true; 163 | for(int nidx : quad.conflicts) 164 | if(nidx != -1) { 165 | auto &nquad = quads[nidx]; 166 | visited_quads[nidx] = 1; 167 | //print("Removing quad: %\n", nidx); 168 | 169 | for(int nidx2 : nquad.conflicts) 170 | if(nidx2 != -1 && !visited_quads[nidx2]) { 171 | degree[nidx2]--; 172 | heap.update(nidx2, score(nidx2)); 173 | //print("Lower degree for %: %\n", nidx2, degree[nidx2]); 174 | } 175 | } 176 | } 177 | 178 | auto get_selected_quad = [&](int tidx) { 179 | for(int qidx : tri_quads[tidx]) 180 | if(qidx != -1 && visited_quads[qidx] == 2) 181 | return qidx; 182 | return -1; 183 | }; 184 | 185 | // Trying to maintain original triangle order 186 | int num_degenerate = 0; 187 | for(int tidx : intRange(tris)) { 188 | if(visited_tris[tidx]) 189 | continue; 190 | int qidx = get_selected_quad(tidx); 191 | if(qidx == -1) { 192 | out.emplace_back(tris[tidx][0], tris[tidx][1], tris[tidx][2], tris[tidx][2]); 193 | num_degenerate++; 194 | } else { 195 | out.emplace_back(quads[qidx].verts); 196 | } 197 | } 198 | 199 | print("Quadized: % tris -> % quads (% degenerate)\n", tris.size(), out.size(), num_degenerate); 200 | return out; 201 | } 202 | -------------------------------------------------------------------------------- /data/shaders/trace.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #define LSIZE 1024 7 | #define LSHIFT 10 8 | 9 | #include "shared/funcs.glsl" 10 | #include "shared/structures.glsl" 11 | 12 | #extension GL_KHR_shader_subgroup_vote : require 13 | #extension GL_KHR_shader_subgroup_shuffle : require 14 | #extension GL_EXT_ray_tracing : enable 15 | #extension GL_EXT_ray_query : require 16 | 17 | coherent layout(std430, binding = 0) buffer info_ { 18 | PathTracerInfo g_info; 19 | int g_counts[]; 20 | }; 21 | layout(binding = 1) uniform config_ { PathTracerConfig u_config; }; 22 | 23 | layout(binding = 2, rgba8) uniform image2D g_raster_image; 24 | 25 | layout(binding = 3) buffer buf03_ { uint g_indices[]; }; 26 | layout(binding = 4) buffer buf04_ { float g_vertices[]; }; 27 | layout(binding = 5) buffer buf05_ { vec2 g_tex_coords[]; }; 28 | layout(binding = 6) uniform accelerationStructureEXT g_accelStruct; 29 | 30 | layout(binding = 10) uniform sampler2D albedo_tex; 31 | layout(binding = 11) uniform sampler2D normal_tex; 32 | layout(binding = 12) uniform sampler2D pbr_tex; 33 | layout(binding = 13) uniform sampler2D env_map; 34 | 35 | #include "%shader_debug" 36 | DEBUG_SETUP(1, 12) 37 | 38 | shared ivec2 s_bin_pos; 39 | 40 | void outputPixel(ivec2 pixel_pos, vec4 color) { imageStore(g_raster_image, pixel_pos, color); } 41 | 42 | layout(local_size_x = LSIZE) in; 43 | 44 | const float epsilon = 0.0001; 45 | const float infinity = 1.0 / 0.0; // TODO... 46 | 47 | vec3 getVertex(uint idx) { 48 | return vec3(g_vertices[idx * 3 + 0], g_vertices[idx * 3 + 1], g_vertices[idx * 3 + 2]); 49 | } 50 | 51 | uvec3 getTriangleIndices(uint tri_id) { 52 | return uvec3(g_indices[tri_id * 3 + 0], g_indices[tri_id * 3 + 1], g_indices[tri_id * 3 + 2]); 53 | } 54 | 55 | void getTriangleVertices(uvec3 tri_indices, out vec3 tri0, out vec3 tri1, out vec3 tri2) { 56 | tri0 = getVertex(tri_indices[0]); 57 | tri1 = getVertex(tri_indices[1]); 58 | tri2 = getVertex(tri_indices[2]); 59 | } 60 | 61 | void getTriangleTexCoords(uvec3 tri_indices, out vec2 tri0, out vec2 tri1, out vec2 tri2) { 62 | tri0 = g_tex_coords[tri_indices[0]]; 63 | tri1 = g_tex_coords[tri_indices[1]]; 64 | tri2 = g_tex_coords[tri_indices[2]]; 65 | } 66 | 67 | void getTriangleVectors(in vec3 tri0, in vec3 tri1, in vec3 tri2, out vec3 tangent, out vec3 normal, 68 | out vec3 binormal) { 69 | tangent = normalize(tri1 - tri0); 70 | normal = normalize(cross(tangent, tri2 - tri0)); 71 | binormal = cross(normal, tangent); 72 | } 73 | 74 | struct TraceResult { 75 | float dist; 76 | vec2 barycentric; 77 | uint num_iters; 78 | uint tri_id; 79 | }; 80 | 81 | #define MAX_ISECT_DIST 10000.0 82 | #define INVALID_TRI_ID uint(0xffffffff) 83 | 84 | TraceResult rayTraceAS(vec3 origin, vec3 dir) { 85 | TraceResult result; 86 | 87 | rayQueryEXT rq; 88 | rayQueryInitializeEXT(rq, g_accelStruct, gl_RayFlagsOpaqueEXT, 0xff, origin, 0.0, dir, 89 | MAX_ISECT_DIST); 90 | result.num_iters = 0; 91 | while(rayQueryProceedEXT(rq)) 92 | result.num_iters++; 93 | if(rayQueryGetIntersectionTypeEXT(rq, true) != 0) { 94 | result.dist = rayQueryGetIntersectionTEXT(rq, true); 95 | result.tri_id = rayQueryGetIntersectionPrimitiveIndexEXT(rq, true); 96 | result.barycentric = rayQueryGetIntersectionBarycentricsEXT(rq, true); 97 | } else { 98 | result.dist = MAX_ISECT_DIST; 99 | result.tri_id = INVALID_TRI_ID; 100 | } 101 | 102 | return result; 103 | } 104 | 105 | vec3 uniformSampleHemisphere(vec2 u) { 106 | float z = u[0]; 107 | float r = sqrt(max(0.0, 1.0 - z * z)); 108 | float phi = 2 * PI * u[1]; 109 | return vec3(r * cos(phi), r * sin(phi), z); 110 | } 111 | 112 | void getScreenRay(ivec2 pixel_pos, out vec3 origin, out vec3 dir) { 113 | origin = u_config.frustum.ws_origin0.xyz; 114 | dir = u_config.frustum.ws_dir0.xyz + float(pixel_pos.x) * u_config.frustum.ws_dirx.xyz + 115 | float(pixel_pos.y) * u_config.frustum.ws_diry.xyz; 116 | dir += vec3(0.0000001); // avoiding division by 0 117 | dir = normalize(dir); 118 | } 119 | 120 | float randomFloat(inout uint rngState) { 121 | // Condensed version of pcg_output_rxs_m_xs_32_32, with simple conversion to floating-point [0,1]. 122 | rngState = rngState * 747796405 + 1; 123 | uint word = ((rngState >> ((rngState >> 28) + 4)) ^ rngState) * 277803737; 124 | word = (word >> 22) ^ word; 125 | return float(word) / 4294967295.0f; 126 | } 127 | 128 | float computeAO(inout uint random_seed, uint tri_id, vec3 hit_point) { 129 | uvec3 tri_indices = getTriangleIndices(tri_id); 130 | vec3 tri[3]; 131 | getTriangleVertices(tri_indices, tri[0], tri[1], tri[2]); 132 | vec3 tri_vecs[3]; 133 | getTriangleVectors(tri[0], tri[1], tri[2], tri_vecs[0], tri_vecs[1], tri_vecs[2]); 134 | 135 | const int dim_size = 3; 136 | int hits = 0, total = (dim_size + 1) * (dim_size + 1); 137 | 138 | for(int x = 0; x <= dim_size; x++) 139 | for(int y = 0; y <= dim_size; y++) { 140 | vec2 uv = vec2(randomFloat(random_seed), randomFloat(random_seed)); 141 | vec3 hemi = uniformSampleHemisphere(uv); 142 | vec3 dir = tri_vecs[0] * hemi[0] + tri_vecs[1] * hemi[2] + tri_vecs[2] * hemi[1]; 143 | vec3 origin = hit_point + dir * 0.001; 144 | TraceResult ao_hit = rayTraceAS(origin, dir); 145 | if(ao_hit.dist > 0.01 && ao_hit.dist < 10.0 && ao_hit.tri_id != tri_id) 146 | hits++; 147 | } 148 | 149 | return max(0.0, (total - hits) / float(total) - 0.1) * (1.0 / 0.9); 150 | } 151 | 152 | vec2 longLat(vec3 normal) { 153 | // convert normal to longitude and latitude 154 | float latitude = acos(normal.y) / PI; 155 | float longitude = (atan(normal.x, normal.z) + PI) / (2.0 * PI); 156 | return vec2(longitude, latitude); 157 | } 158 | 159 | void traceBin() { 160 | ivec2 pixel_pos = ivec2(LIX & 31, LIX >> 5) + s_bin_pos; 161 | uint random_seed = pixel_pos.x + (pixel_pos.y << 16); 162 | 163 | vec3 ray_origin, ray_dir; 164 | getScreenRay(pixel_pos, ray_origin, ray_dir); 165 | 166 | TraceResult result = rayTraceAS(ray_origin, ray_dir); 167 | 168 | vec3 vcolor = vec3(0.0); 169 | if(result.dist < MAX_ISECT_DIST) { 170 | vec2 uvs[3]; 171 | uvec3 tri_indices = getTriangleIndices(result.tri_id); 172 | getTriangleTexCoords(tri_indices, uvs[0], uvs[1], uvs[2]); 173 | vec3 bary = vec3(1.0 - result.barycentric[0] - result.barycentric[1], result.barycentric); 174 | vec2 uv = uvs[0] * bary.x + uvs[1] * bary.y + uvs[2] * bary.z; 175 | vcolor = texture(albedo_tex, uv).rgb; 176 | 177 | vec3 hit_point = ray_origin + ray_dir * result.dist; 178 | float ao = computeAO(random_seed, result.tri_id, hit_point); 179 | ao = 0.3 + 0.7 * ao; 180 | vcolor *= ao; 181 | } else { 182 | vec2 tex_coord = longLat(-ray_dir) * vec2(1.0, -1.0); 183 | vcolor = texture(env_map, tex_coord).rgb * 0.5; 184 | } 185 | 186 | outputPixel(pixel_pos, SATURATE(vec4(vcolor, 1.0))); 187 | } 188 | 189 | void main() { 190 | if(LIX == 0) { 191 | s_bin_pos = ivec2(gl_WorkGroupID.xy) * 32; 192 | } 193 | barrier(); 194 | traceBin(); 195 | } -------------------------------------------------------------------------------- /src/path_tracer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "path_tracer.h" 5 | 6 | #include "scene.h" 7 | #include "shader_structs.h" 8 | #include "shading.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | PathTracer::PathTracer() = default; 25 | FWK_MOVABLE_CLASS_IMPL(PathTracer) 26 | 27 | void PathTracer::addShaderDefs(VulkanDevice &device, ShaderCompiler &compiler, 28 | const ShaderConfig &shader_config) { 29 | vector> debug_macros = {{"DEBUG_ENABLED", ""}}; 30 | vector> timers_macros = {{"TIMERS_ENABLED", ""}}; 31 | auto base_macros = shader_config.predefined_macros; 32 | insertBack(debug_macros, base_macros); 33 | insertBack(timers_macros, base_macros); 34 | 35 | auto add_defs = [&](ZStr name, ZStr file_name, bool debuggable = true, 36 | bool with_timers = true) { 37 | compiler.add({name, VShaderStage::compute, file_name, base_macros}); 38 | if(debuggable) { 39 | auto dbg_name = format("%_debug", name); 40 | compiler.add({dbg_name, VShaderStage::compute, file_name, debug_macros}); 41 | } 42 | if(with_timers) { 43 | compiler.add( 44 | {format("%_timers", name), VShaderStage::compute, file_name, timers_macros}); 45 | } 46 | }; 47 | 48 | add_defs("trace", "trace.glsl"); 49 | } 50 | 51 | Ex PathTracer::exConstruct(VulkanDevice &device, ShaderCompiler &compiler, Opts opts, 52 | int2 view_size) { 53 | print("Constructing PathTracer (flags:% res:%):\n", opts, view_size); 54 | auto time = getTime(); 55 | 56 | m_bin_size = 32; 57 | m_bin_counts = (view_size + int2(m_bin_size - 1)) / m_bin_size; 58 | m_bin_count = m_bin_counts.x * m_bin_counts.y; 59 | m_opts = opts; 60 | m_size = view_size; 61 | 62 | shader::SpecializationConstants consts; 63 | consts.VIEWPORT_SIZE_X = view_size.x; 64 | consts.VIEWPORT_SIZE_Y = view_size.y; 65 | consts.BIN_COUNT = m_bin_count; 66 | consts.BIN_COUNT_X = m_bin_counts.x; 67 | consts.BIN_COUNT_Y = m_bin_counts.y; 68 | consts.BIN_SIZE = m_bin_size; 69 | consts.BIN_SHIFT = log2(m_bin_size); 70 | consts.RENDER_OPTIONS = m_opts.bits; 71 | 72 | int bin_dispatcher_lsize = m_bin_size == 64 ? 512 : 1024; 73 | consts.BIN_DISPATCHER_LSIZE = bin_dispatcher_lsize; 74 | consts.BIN_DISPATCHER_LSHIFT = log2(bin_dispatcher_lsize); 75 | consts.BIN_CATEGORIZER_LSIZE = 1024; 76 | 77 | auto make_compute_pipe = [&](string name, Opts debug_option, 78 | bool has_timers) -> Ex { 79 | if(opts & debug_option) 80 | name = name + "_debug"; 81 | else if(has_timers) 82 | name = name + "_timers"; 83 | 84 | auto time = getTime(); 85 | VComputePipelineSetup setup; 86 | auto def_id = *compiler.find(name); 87 | m_shader_def_ids.emplace_back(def_id); 88 | setup.compute_module = EX_PASS(compiler.createShaderModule(device, def_id)); 89 | setup.spec_constants.emplace_back(consts, 0u); 90 | auto result = VulkanPipeline::create(device, setup); 91 | print("Compute pipeline '%': % ms\n", name, int((getTime() - time) * 1000)); 92 | return result; 93 | }; 94 | 95 | bool has_timers = m_opts & Opt::timers; 96 | p_trace = EX_PASS(make_compute_pipe("trace", Opt::debug, has_timers)); 97 | 98 | if(opts & Opt::debug) 99 | m_debug_buffer = EX_PASS(shaderDebugBuffer(device)); 100 | 101 | for(int i : intRange(num_frames)) { 102 | auto info_usage = VBufferUsage::storage | VBufferUsage::transfer_src | 103 | VBufferUsage::transfer_dst | VBufferUsage::indirect; 104 | auto config_usage = VBufferUsage::uniform | VBufferUsage::transfer_dst; 105 | auto mem_usage = VMemoryUsage::temporary; 106 | m_frame_info[i] = 107 | EX_PASS(VulkanBuffer::create(device, 1, info_usage, mem_usage)); 108 | m_frame_config[i] = EX_PASS( 109 | VulkanBuffer::create(device, 1, config_usage, mem_usage)); 110 | } 111 | 112 | print("Total build time: % ms\n\n", int((getTime() - time) * 1000.0)); 113 | return {}; 114 | } 115 | 116 | Ex<> PathTracer::updateScene(VulkanDevice &device, Scene &scene) { 117 | m_scene_id = scene.id; 118 | 119 | // TODO: multi-material support 120 | 121 | auto blas = 122 | EX_PASS(VulkanAccelStruct::buildBottom(device, scene.verts.positions, scene.tris_ib)); 123 | VAccelStructInstance instance{blas, Matrix4::identity()}; 124 | m_accel_struct = EX_PASS(VulkanAccelStruct::buildTop(device, {instance})); 125 | m_indices = scene.tris_ib; 126 | m_vertices = scene.verts.positions; 127 | m_tex_coords = scene.verts.tex_coords; 128 | // TODO: wait until AS is built? 129 | 130 | return {}; 131 | } 132 | 133 | void PathTracer::render(const Context &ctx) { 134 | auto &cmds = ctx.device.cmdQueue(); 135 | PERF_GPU_SCOPE(cmds); 136 | 137 | if(ctx.scene.id != m_scene_id) 138 | updateScene(ctx.device, ctx.scene).check(); 139 | 140 | cmds.fullBarrier(); 141 | 142 | // TODO: second frame is broken 143 | setupInputData(ctx).check(); 144 | 145 | cmds.bind(p_trace); 146 | auto ds = cmds.bindDS(0); 147 | ds.set(0, m_info); 148 | ds.set(1, VDescriptorType::uniform_buffer, m_config); 149 | 150 | auto swap_chain = ctx.device.swapChain(); 151 | auto raster_image = swap_chain->acquiredImage(); 152 | ds.setStorageImage(2, raster_image, VImageLayout::general); 153 | 154 | ds.set(3, m_indices, m_vertices, m_tex_coords); 155 | ds.set(6, m_accel_struct); 156 | 157 | auto sampler = ctx.device.getSampler(ctx.config.sampler_setup); 158 | //ds.set(10, {{sampler, ctx.opaque_tex}}); 159 | //ds.set(11, {{sampler, ctx.trans_tex}}); 160 | 161 | DASSERT(ctx.scene.materials); 162 | auto &material = ctx.scene.materials.front(); 163 | // TODO: different default textures for different map types 164 | auto &albedo_map = material.maps[SceneMapType::albedo]; 165 | auto &normal_map = material.maps[SceneMapType::normal]; 166 | auto &pbr_map = material.maps[SceneMapType::pbr]; 167 | ds.set(10, {{sampler, albedo_map.vk_image}, 168 | {sampler, normal_map.vk_image}, 169 | {sampler, pbr_map.vk_image}, 170 | {sampler, ctx.lighting.env_map}}); 171 | 172 | if(m_opts & Opt::debug) { 173 | ds.set(12, m_debug_buffer); 174 | shaderDebugResetBuffer(cmds, m_debug_buffer); 175 | } 176 | 177 | cmds.dispatchCompute({m_bin_counts.x, m_bin_counts.y, 1}); 178 | if(m_opts & Opt::debug) 179 | printDebugData(cmds, m_debug_buffer, "raster_low_debug"); 180 | cmds.fullBarrier(); 181 | } 182 | 183 | Ex<> PathTracer::setupInputData(const Context &ctx) { 184 | auto &cmds = ctx.device.cmdQueue(); 185 | PERF_GPU_SCOPE(cmds); 186 | 187 | auto frame_index = cmds.frameIndex() % num_frames; 188 | 189 | shader::PathTracerConfig config; 190 | config.frustum = FrustumInfo(ctx.camera); 191 | config.view_proj_matrix = ctx.camera.matrix(); 192 | config.lighting = ctx.lighting; 193 | config.background_color = (float4)FColor(ctx.config.background_color); 194 | m_config = m_frame_config[frame_index]; 195 | EXPECT(m_config.upload(cspan(&config, 1))); 196 | return {}; 197 | } 198 | -------------------------------------------------------------------------------- /src/wavefront_obj.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "wavefront_obj.h" 5 | 6 | #include 7 | #include 8 | 9 | // TODO: make parsing work without exceptions ? 10 | Ex WavefrontMaterial::load(ZStr file_path, vector &out) { 11 | auto file_text = EX_PASS(loadFileString(file_path)); 12 | 13 | int count = 0; 14 | WavefrontMaterial *new_mat = nullptr; 15 | 16 | for(string line : tokenize(file_text, '\n')) { 17 | if(line[0] == '#') 18 | continue; 19 | 20 | TextParser parser(line); 21 | Str element; 22 | parser >> element; 23 | if(!element) 24 | continue; 25 | 26 | if(element == "newmtl") { 27 | Str name; 28 | parser >> name; 29 | EXPECT(!name.empty()); 30 | bool has_duplicate = anyOf(out, [=](auto &mat) { return mat.name == name; }); 31 | if(has_duplicate) { 32 | print("Warning: ignoring duplicate material: '%'\n", name); 33 | new_mat = nullptr; 34 | continue; 35 | } 36 | 37 | out.emplace_back(name); 38 | new_mat = &out.back(); 39 | count++; 40 | } 41 | 42 | if(!new_mat) 43 | continue; 44 | 45 | if(element.startsWith("map_")) { 46 | WavefrontMap map; 47 | vector args; 48 | parser >> args; 49 | EXPECT(!args.empty()); 50 | map.name = args.back(); 51 | args.pop_back(); 52 | auto locase_elem = toLower(element.substr(4)); 53 | // kd, bump, ks, ao, roughness 54 | new_mat->maps.emplace_back(locase_elem, std::move(map)); 55 | } else if(element == "d") 56 | parser >> new_mat->dissolve_factor; 57 | else if(element == "Kd") 58 | parser >> new_mat->diffuse; 59 | 60 | EX_CATCH(); 61 | } 62 | 63 | print("Loaded % material% from: '%'\n", count, count > 0 ? "s" : "", file_path); 64 | return {}; 65 | } 66 | 67 | Ex WavefrontObject::load(ZStr path, i64 file_size_limit) { 68 | auto start_time = getTime(); 69 | auto file_text = EX_PASS(loadFileString(path, file_size_limit)); 70 | 71 | auto parse_time = getTime(); 72 | vector material_libs; 73 | vector src_positions, src_normals; 74 | vector src_tex_coords; 75 | int src_counts[3] = {0, 0, 0}, num_verts = 0; 76 | 77 | using MultiIndex = array; 78 | HashMap vertex_map; 79 | vector> tris; 80 | 81 | auto parseIndex = [&](Str text) { 82 | array idx = {0, 0, 0}; 83 | idx[0] = atoi(text.data()); 84 | 85 | auto slash_pos = text.find('/'); 86 | if(slash_pos != -1) { 87 | text = text.advance(slash_pos + 1); 88 | slash_pos = text.find('/'); 89 | if(slash_pos == -1) { 90 | idx[1] = atoi(text.data()); 91 | } else if(slash_pos == 0) { 92 | idx[2] = atoi(text.data() + 1); 93 | } else { 94 | idx[1] = atoi(text.data()); 95 | idx[2] = atoi(text.data() + slash_pos + 1); 96 | } 97 | } 98 | for(int j = 0; j < 3; j++) 99 | idx[j] = idx[j] < 0 ? idx[j] + src_counts[j] : idx[j] - 1; 100 | auto it = vertex_map.find(idx); 101 | if(it == vertex_map.end()) { 102 | vertex_map.emplace(idx, num_verts); 103 | return num_verts++; 104 | } 105 | return it->value; 106 | }; 107 | 108 | struct UseMtl { 109 | int first_tri = 0; 110 | string mat_name; 111 | }; 112 | vector use_mtls; 113 | vector materials; 114 | 115 | for(string line : tokenize(file_text, '\n')) { 116 | if(line[0] == '#') 117 | continue; 118 | TextParser parser(line); 119 | Str element; 120 | parser >> element; 121 | 122 | if(element == "o") { 123 | // if(tris.size()>0) { 124 | // objects.push_back(BaseScene::Object(verts,uvs,normals,tris)); 125 | // objects.back().name = strpbrk(line," \t") + 1; 126 | // tris.clear(); 127 | // } 128 | } else if(element == "v") { 129 | float3 vert; 130 | float w = 1.0f; 131 | parser >> vert; 132 | if(!parser.empty()) 133 | parser >> w; 134 | src_positions.emplace_back(vert); 135 | src_counts[0]++; 136 | } else if(element == "vt") { 137 | float2 uv(0.0f, 0.0f); 138 | float w = 0.0f; 139 | parser >> uv[0]; 140 | if(!parser.empty()) 141 | parser >> uv[1]; 142 | if(!parser.empty()) 143 | parser >> w; 144 | uv[1] = 1.0f - uv[1]; 145 | src_tex_coords.push_back(uv); 146 | src_counts[1]++; 147 | } else if(element == "vn") { 148 | float3 vert; 149 | parser >> vert; 150 | src_normals.emplace_back(vert); 151 | src_counts[2]++; 152 | } else if(element == "f") { 153 | int indices[64]; 154 | int count = 0; 155 | 156 | while(!parser.empty()) { 157 | if(count == arraySize(indices)) 158 | return FWK_ERROR("Too many face indices (% is max): '%'", arraySize(indices), 159 | line); 160 | Str elem; 161 | parser >> elem; 162 | indices[count++] = parseIndex(elem); 163 | } 164 | 165 | for(int i = 1; i + 1 < count; i++) 166 | tris.emplace_back(indices[0], indices[i], indices[i + 1]); 167 | } else if(element == "usemtl") { 168 | Str mat_name; 169 | parser >> mat_name; 170 | EXPECT(!mat_name.empty()); 171 | use_mtls.emplace_back(tris.size(), mat_name); 172 | } else if(element == "mtllib") { 173 | Str lib_name; 174 | parser >> lib_name; 175 | if(!anyOf(material_libs, lib_name)) 176 | material_libs.emplace_back(lib_name); 177 | } 178 | } 179 | file_text.clear(); 180 | 181 | auto dir_path = FilePath(path).parent(); 182 | for(auto mtl_lib : material_libs) 183 | EXPECT(WavefrontMaterial::load(dir_path / mtl_lib, materials)); 184 | 185 | WavefrontObject out; 186 | auto init_time = getTime(); 187 | out.positions.resize(num_verts); 188 | out.tris = std::move(tris); 189 | for(auto &vindex : vertex_map) { 190 | int src_index = vindex.key[0]; 191 | auto value = 192 | src_index >= 0 && src_index < src_counts[0] ? src_positions[src_index] : float3(); 193 | out.positions[vindex.value] = value; 194 | } 195 | 196 | if(src_tex_coords) { 197 | out.tex_coords.resize(num_verts); 198 | for(auto &vindex : vertex_map) { 199 | int src_index = vindex.key[1]; 200 | auto value = 201 | src_index >= 0 && src_index < src_counts[1] ? src_tex_coords[src_index] : float2(); 202 | out.tex_coords[vindex.value] = value; 203 | } 204 | } 205 | 206 | if(src_normals) { 207 | out.normals.resize(num_verts); 208 | for(auto &vindex : vertex_map) { 209 | int src_index = vindex.key[2]; 210 | auto value = 211 | src_index >= 0 && src_index < src_counts[2] ? src_normals[src_index] : float3(); 212 | out.normals[vindex.value] = value; 213 | } 214 | } 215 | 216 | // TODO: handle groups 217 | 218 | if(use_mtls.empty()) { 219 | materials.emplace_back("default"); 220 | out.material_groups.emplace_back(0, 0, out.tris.size()); 221 | } else { 222 | int default_mat_idx = -1; 223 | 224 | for(int i = 0; i < use_mtls.size(); i++) { 225 | auto &use_mtl = use_mtls[i]; 226 | int end_tri = i + 1 < use_mtls.size() ? use_mtls[i + 1].first_tri : out.tris.size(); 227 | int mat_idx = -1; 228 | for(int j = 0; j < materials.size(); j++) 229 | if(materials[j].name == use_mtl.mat_name) { 230 | mat_idx = j; 231 | break; 232 | } 233 | if(mat_idx == -1) { 234 | print("Warning: material '%' not found, using default\n", use_mtl.mat_name); 235 | if(default_mat_idx == -1) { 236 | for(int j = 0; j < materials.size(); j++) 237 | if(materials[j].name == "default") { 238 | default_mat_idx = j; 239 | break; 240 | } 241 | if(default_mat_idx == -1) { 242 | default_mat_idx = materials.size(); 243 | materials.emplace_back("default"); 244 | } 245 | } 246 | mat_idx = default_mat_idx; 247 | } 248 | out.material_groups.emplace_back(mat_idx, use_mtl.first_tri, 249 | end_tri - use_mtl.first_tri); 250 | } 251 | } 252 | out.materials = std::move(materials); 253 | out.resource_path = dir_path; 254 | auto finish_time = getTime(); 255 | 256 | print("Loaded Wavefront OBJ in: % ms\n", int((finish_time - start_time) * 1000.0)); 257 | print(" reading file: % ms\n parsing: % ms\n initializing scene: % ms\n", 258 | int((parse_time - start_time) * 1000.0), int((init_time - parse_time) * 1000.0), 259 | int((finish_time - init_time) * 1000.0)); 260 | return out; 261 | } 262 | -------------------------------------------------------------------------------- /src/simple_renderer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "simple_renderer.h" 5 | 6 | #include "scene.h" 7 | #include "shader_structs.h" 8 | #include "shading.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | SimpleRenderer::SimpleRenderer() = default; 22 | FWK_MOVABLE_CLASS_IMPL(SimpleRenderer); 23 | 24 | vector> sharedShaderMacros(VulkanDevice &); 25 | 26 | void SimpleRenderer::addShaderDefs(VulkanDevice &device, ShaderCompiler &compiler, 27 | const ShaderConfig &shader_config) { 28 | vector> vsh_macros = {{"VERTEX_SHADER", "1"}}; 29 | vector> fsh_macros = {{"FRAGMENT_SHADER", "1"}}; 30 | auto shared_macros = shader_config.predefined_macros; 31 | insertBack(vsh_macros, shared_macros); 32 | insertBack(fsh_macros, shared_macros); 33 | 34 | compiler.add({"simple_vert", VShaderStage::vertex, "simple_material.glsl", vsh_macros}); 35 | compiler.add({"simple_frag", VShaderStage::fragment, "simple_material.glsl", fsh_macros}); 36 | } 37 | 38 | Ex SimpleRenderer::exConstruct(VulkanDevice &device, ShaderCompiler &compiler, 39 | const IRect &viewport, VAttachment color_att) { 40 | auto depth_format = device.bestSupportedFormat(VDepthStencilFormat::d32f); 41 | auto depth_buffer = 42 | EX_PASS(VulkanImage::create(device, VImageSetup(depth_format, viewport.size()))); 43 | m_depth_buffer = VulkanImageView::create(depth_buffer); 44 | 45 | VAttachment depth_att( 46 | depth_format, VAttachmentSync(VLoadOp::clear, VStoreOp::store, VImageLayout::undefined, 47 | defaultLayout(depth_format), defaultLayout(depth_format))); 48 | // Note: using 'general' image layout instead of 'color attachment optimal' makes no difference 49 | // performance-wise (tested on RTX 3050). General is needed in lucid, because lucid raster writes 50 | // to this buffer from compute shader and we would like to minimize transitions if possible. 51 | color_att = {color_att.colorFormat(), 52 | VAttachmentSync(VLoadOp::load, VStoreOp::store, VImageLayout::general, 53 | VImageLayout::general, VImageLayout::general)}; 54 | m_render_pass = device.getRenderPass({color_att, depth_att}); 55 | 56 | m_viewport = viewport; 57 | 58 | auto frag_id = *compiler.find("simple_frag"); 59 | auto vert_id = *compiler.find("simple_vert"); 60 | m_shader_def_ids = {frag_id, vert_id}; 61 | 62 | m_frag_module = EX_PASS(compiler.createShaderModule(device, frag_id)); 63 | m_vert_module = EX_PASS(compiler.createShaderModule(device, vert_id)); 64 | m_pipeline_layout = device.getPipelineLayout({m_frag_module, m_vert_module}); 65 | 66 | return {}; 67 | } 68 | 69 | Ex SimpleRenderer::getPipeline(VulkanDevice &device, const PipeConfig &config) { 70 | PERF_SCOPE(); 71 | 72 | auto &ref = m_pipelines[config]; 73 | if(!ref) { 74 | VPipelineSetup setup; 75 | setup.pipeline_layout = m_pipeline_layout; 76 | setup.render_pass = m_render_pass; 77 | setup.shader_modules = {{m_vert_module, m_frag_module}}; 78 | setup.depth = VDepthSetup(VDepthFlag::test | VDepthFlag::write); 79 | VertexArray::getDefs(setup, false); 80 | 81 | setup.raster = VRasterSetup(VPrimitiveTopology::triangle_list, 82 | config.wireframe ? VPolygonMode::line : VPolygonMode::fill, 83 | mask(config.backface_culling, VCull::back)); 84 | if(!config.opaque) { 85 | VBlendingMode additive_blend(VBlendFactor::src_alpha, VBlendFactor::one); 86 | VBlendingMode normal_blend(VBlendFactor::src_alpha, VBlendFactor::one_minus_src_alpha); 87 | setup.blending.attachments = { 88 | {config.additive_blending ? additive_blend : normal_blend}}; 89 | setup.depth = VDepthSetup(VDepthFlag::test); 90 | } 91 | 92 | ref = EX_PASS(VulkanPipeline::create(device, setup)); 93 | } 94 | 95 | return ref; 96 | } 97 | 98 | Ex<> SimpleRenderer::renderPhase(const RenderContext &ctx, 99 | VBufferSpan simple_dc_buf, bool opaque, 100 | bool wireframe) { 101 | auto &cmds = ctx.device.cmdQueue(); 102 | PERF_GPU_SCOPE(cmds); 103 | 104 | PipeConfig pipe_config{ctx.config.backface_culling, ctx.config.additive_blending, opaque, 105 | wireframe}; 106 | auto pipeline = EX_PASS(getPipeline(ctx.device, pipe_config)); 107 | cmds.bind(pipeline); 108 | 109 | auto sampler = ctx.device.getSampler(ctx.config.sampler_setup); 110 | int prev_mat_id = -1; 111 | for(int dc : intRange(ctx.dcs)) { 112 | auto &draw_call = ctx.dcs[dc]; 113 | auto &material = ctx.materials[draw_call.material_id]; 114 | if(bool(draw_call.opts & DrawCallOpt::is_opaque) != opaque) 115 | continue; 116 | if(prev_mat_id != draw_call.material_id) { 117 | auto ds = cmds.bindDS(1); 118 | ds.set(0, VDescriptorType::uniform_buffer, simple_dc_buf.subSpan(dc, dc + 1)); 119 | auto &albedo_map = material.maps[SceneMapType::albedo]; 120 | ds.set(1, {{sampler, albedo_map.vk_image}}); 121 | prev_mat_id = draw_call.material_id; 122 | } 123 | 124 | cmds.drawIndexed(draw_call.num_tris * 3, 1, draw_call.tri_offset * 3); 125 | } 126 | 127 | // TODO: what about ordering objects by material ? 128 | // TODO: add option to order objects in different ways ? 129 | // TODO: optional alpha test first for blended objects 130 | 131 | return {}; 132 | } 133 | 134 | Ex<> SimpleRenderer::render(const RenderContext &ctx, bool wireframe) { 135 | DASSERT(ctx.scene.hasSimpleTextures()); 136 | 137 | auto &cmds = ctx.device.cmdQueue(); 138 | PERF_GPU_SCOPE(cmds); 139 | 140 | // TODO: optimize this 141 | auto ubo_usage = VBufferUsage::uniform; 142 | shader::Lighting lighting; 143 | lighting.ambient_color = ctx.lighting.ambient.color; 144 | lighting.ambient_power = ctx.lighting.ambient.power; 145 | lighting.sun_color = ctx.lighting.sun.color; 146 | lighting.sun_power = ctx.lighting.sun.power; 147 | lighting.sun_dir = ctx.lighting.sun.dir; 148 | auto lighting_buf = EX_PASS(VulkanBuffer::createAndUpload(ctx.device, cspan(&lighting, 1), 149 | ubo_usage, VMemoryUsage::frame)); 150 | 151 | int num_opaque = 0; 152 | 153 | // TODO: minimize it (do it only for different materials) 154 | vector simple_dcs; 155 | simple_dcs.reserve(ctx.dcs.size()); 156 | for(const auto &draw_call : ctx.dcs) { 157 | auto &material = ctx.materials[draw_call.material_id]; 158 | auto &simple_dc = simple_dcs.emplace_back(); 159 | if(draw_call.opts & DrawCallOpt::is_opaque) 160 | num_opaque++; 161 | simple_dc.world_camera_pos = ctx.camera.pos(); 162 | simple_dc.proj_view_matrix = ctx.camera.matrix(); 163 | simple_dc.material_color = float4(material.diffuse, material.opacity); 164 | simple_dc.draw_call_opts = uint(draw_call.opts.bits); 165 | if(draw_call.opts & DrawCallOpt::has_uv_rect) { 166 | auto uv_rect = material.maps[SceneMapType::albedo].uv_rect; 167 | simple_dc.uv_rect_pos = uv_rect.min(); 168 | simple_dc.uv_rect_size = uv_rect.size(); 169 | } 170 | } 171 | auto simple_dc_buf = EX_PASS( 172 | VulkanBuffer::createAndUpload(ctx.device, simple_dcs, ubo_usage, VMemoryUsage::frame)); 173 | 174 | cmds.bind(m_pipeline_layout); 175 | cmds.bindDS(0).set(0, VDescriptorType::uniform_buffer, lighting_buf); 176 | cmds.setViewport(m_viewport); 177 | cmds.setScissor(none); 178 | 179 | auto &verts = ctx.verts; 180 | cmds.bindVertices(0, verts.positions, verts.colors, verts.tex_coords, verts.normals); 181 | cmds.bindIndices(ctx.tris_ib); 182 | 183 | auto swap_chain = ctx.device.swapChain(); 184 | auto swap_image = swap_chain->acquiredImage()->image(); 185 | 186 | auto framebuffer = ctx.device.getFramebuffer({swap_chain->acquiredImage(), m_depth_buffer}); 187 | cmds.beginRenderPass(framebuffer, m_render_pass, none, 188 | {FColor(ColorId::magneta), VClearDepthStencil(1.0)}); 189 | 190 | if(num_opaque > 0) 191 | EXPECT(renderPhase(ctx, simple_dc_buf, true, wireframe)); 192 | if(num_opaque != ctx.dcs.size()) 193 | EXPECT(renderPhase(ctx, simple_dc_buf, false, wireframe)); 194 | 195 | cmds.endRenderPass(); 196 | 197 | return {}; 198 | } 199 | -------------------------------------------------------------------------------- /data/shaders/bin_counter.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/compute_funcs.glsl" 7 | #include "shared/funcs.glsl" 8 | #include "shared/scanline.glsl" 9 | #include "shared/structures.glsl" 10 | #include "shared/timers.glsl" 11 | 12 | #include "%shader_debug" 13 | 14 | // TODO: use gl_SubgroupSize, gl_SubgroupId 15 | // TODO: simplify offsets generation; we don't have to do this in 2D 16 | 17 | #extension GL_KHR_shader_subgroup_shuffle : require 18 | #extension GL_KHR_shader_subgroup_shuffle_relative : require 19 | 20 | #define LSIZE BIN_DISPATCHER_LSIZE 21 | #define LSHIFT BIN_DISPATCHER_LSHIFT 22 | 23 | #define SMALL_BATCH_STEPS 4 24 | #define SMALL_BATCH_SHIFT (LSHIFT + 2) 25 | #define SMALL_BATCH_SIZE (LSIZE * SMALL_BATCH_STEPS) 26 | 27 | #define LARGE_BATCH_SHIFT (LSHIFT - 1) 28 | #define LARGE_BATCH_SIZE (LSIZE / 2) 29 | 30 | layout(local_size_x = 1024, local_size_x_id = BIN_DISPATCHER_LSIZE_ID) in; 31 | 32 | layout(std430, binding = 0) coherent buffer lucid_info_ { 33 | LucidInfo g_info; 34 | int g_counts[]; 35 | }; 36 | layout(binding = 1) uniform lucid_config_ { LucidConfig u_config; }; 37 | 38 | layout(std430, set = 1, binding = 0) restrict readonly buffer buf1_ { uint g_quad_aabbs[]; }; 39 | layout(std430, set = 1, binding = 1) restrict writeonly buffer buf2_ { uint g_bin_quads[]; }; 40 | layout(std430, set = 1, binding = 2) restrict writeonly buffer buf3_ { uint g_bin_tris[]; }; 41 | layout(std430, set = 1, binding = 3) restrict buffer buf4_ { int g_bin_batches[]; }; 42 | layout(std430, set = 1, binding = 4) restrict readonly buffer buf6_ { uvec4 g_uvec4_storage[]; }; 43 | DEBUG_SETUP(1, 5) 44 | 45 | #define MAX_SMALL_BATCHES (MAX_VISIBLE_QUADS / SMALL_BATCH_SIZE + 256) 46 | #define MAX_LARGE_BATCHES (MAX_VISIBLE_QUADS / LARGE_BATCH_SIZE + 256) 47 | 48 | #define SMALL_BATCHES(idx) g_bin_batches[(MAX_DISPATCHES * BIN_COUNT * 2) + idx] 49 | #define LARGE_BATCHES(idx) g_bin_batches[(MAX_DISPATCHES * BIN_COUNT * 2 + MAX_SMALL_BATCHES) + idx] 50 | 51 | shared int s_bins[BIN_COUNT]; 52 | 53 | void scanlineStep(in out ScanlineParams params, out int bmin, out int bmax) { 54 | float xmin = max(max(params.min[0], params.min[1]), params.min[2]); 55 | float xmax = min(min(params.max[0], params.max[1]), params.max[2]); 56 | params.min += params.step; 57 | params.max += params.step; 58 | 59 | // There can be holes between two tris, should we exploit this? Maybe it's not worth it? 60 | bmin = int(xmin + 1.0) >> BIN_SHIFT; 61 | bmax = int(xmax) >> BIN_SHIFT; 62 | } 63 | 64 | void countSmallQuadBins(uint quad_idx) { 65 | ivec4 aabb = decodeAABB28(g_quad_aabbs[quad_idx]); 66 | int bsx = aabb[0], bsy = aabb[1], bex = aabb[2], bey = aabb[3]; 67 | int area = (bex - bsx + 1) * (bey - bsy + 1); 68 | 69 | for(int by = bsy; by <= bey; by++) 70 | for(int bx = bsx; bx <= bex; bx++) 71 | atomicAdd(s_bins[by * BIN_COUNT_X + bx], 1); 72 | 73 | // Handling only tris with bin area 1 to 3: 74 | /*atomicAdd(s_bins[bsy * BIN_COUNT_X + bsx], 1); 75 | if(bex != bsx || bey != bsy) 76 | atomicAdd(s_bins[bey * BIN_COUNT_X + bex], 1); 77 | int bmx = (bsx + bex) >> 1, bmy = (bsy + bey) >> 1; 78 | if(bmx > bsx || bmy > bsy) 79 | atomicAdd(s_bins[bmy * BIN_COUNT_X + bmx], 1);*/ 80 | } 81 | 82 | void accumulateLargeTriCountsAcrossRows() { 83 | // Accumulating large quad counts across rows 84 | for(uint by = LIX >> SUBGROUP_SHIFT; by < BIN_COUNT_Y; by += LSIZE / SUBGROUP_SIZE) { 85 | int prev_accum = 0; 86 | for(uint bx = LIX & SUBGROUP_MASK; bx < BIN_COUNT_X; bx += SUBGROUP_SIZE) { 87 | uint idx = bx + by * BIN_COUNT_X; 88 | int value = s_bins[idx]; 89 | int accum = prev_accum + subgroupInclusiveAddFast(value); 90 | s_bins[idx] = accum; 91 | prev_accum = subgroupShuffle(accum, SUBGROUP_MASK); 92 | } 93 | } 94 | /*if(LIX < BIN_COUNT_Y) { // Slow version 95 | uint by = LIX; 96 | int accum = 0; 97 | for(uint bx = 0; bx < BIN_COUNT_X; bx++) { 98 | uint idx = bx + by * BIN_COUNT_X; 99 | accum += s_bins[idx]; 100 | s_bins[idx] = accum; 101 | } 102 | }*/ 103 | } 104 | 105 | ScanlineParams loadScanlineParamsBin(uint tri_idx, out int bsy, out int bey) { 106 | uint scan_offset = STORAGE_TRI_SCAN_OFFSET + tri_idx * 2; 107 | uvec4 val0 = g_uvec4_storage[scan_offset + 0]; 108 | uvec4 val1 = g_uvec4_storage[scan_offset + 1]; 109 | return loadScanlineParamsBin(val0, val1, bsy, bey); 110 | } 111 | 112 | void countLargeTriBins(int quad_idx, int second_tri) { 113 | uint enc_aabb = g_quad_aabbs[quad_idx]; 114 | uint cull_flag = (enc_aabb >> (30 + second_tri)) & 1; 115 | if(cull_flag == 1) 116 | return; 117 | 118 | uint tri_idx = quad_idx * 2 + second_tri; 119 | ivec4 aabb = decodeAABB28(enc_aabb); 120 | int bsx = aabb[0], bex = aabb[2], bsy, bey; 121 | ScanlineParams params = loadScanlineParamsBin(tri_idx, bsy, bey); 122 | 123 | for(int by = bsy; by <= bey; by++) { 124 | int bmin, bmax; 125 | scanlineStep(params, bmin, bmax); 126 | bmin = max(bmin, bsx), bmax = min(bmax, bex); 127 | 128 | if(bmax >= bmin) { 129 | atomicAdd(s_bins[bmin + by * BIN_COUNT_X], 1); 130 | if(bmax + 1 < BIN_COUNT_X) 131 | atomicAdd(s_bins[bmax + 1 + by * BIN_COUNT_X], -1); 132 | } 133 | } 134 | } 135 | 136 | shared int s_num_quads[2]; 137 | shared int s_quads_offset, s_active_work_group_id; 138 | shared int s_first_batch[2], s_last_batch[2], s_num_batches[2]; 139 | shared int s_num_finished_batches[2], s_num_all_batches[2]; 140 | 141 | void countSmallQuads() { 142 | START_TIMER(); 143 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) 144 | s_bins[i] = 0; 145 | barrier(); 146 | 147 | // Computing small quads bin coverage 148 | int num_quads = s_num_quads[0]; 149 | while(true) { 150 | if(LIX == 0) { 151 | int quads_offset = atomicAdd(g_info.num_counted_quads[0], SMALL_BATCH_SIZE); 152 | if(quads_offset < num_quads) { 153 | int batch = quads_offset >> SMALL_BATCH_SHIFT; 154 | int last_batch = s_last_batch[0]; 155 | if(last_batch == -1) 156 | s_first_batch[0] = batch; 157 | else 158 | SMALL_BATCHES(last_batch) = batch; 159 | s_last_batch[0] = batch; 160 | s_num_batches[0]++; 161 | } 162 | s_quads_offset = quads_offset; 163 | } 164 | barrier(); 165 | 166 | int quad_offset = s_quads_offset; 167 | if(quad_offset >= num_quads) 168 | break; 169 | 170 | for(int s = 0; s < SMALL_BATCH_STEPS; s++) { 171 | int quad_idx = quad_offset + (LSIZE * s) + int(LIX); 172 | if(quad_idx >= num_quads) 173 | break; 174 | countSmallQuadBins(quad_idx); 175 | } 176 | barrier(); 177 | } 178 | 179 | UPDATE_TIMER(0); 180 | barrier(); 181 | if(s_num_batches[0] == 0) 182 | return; 183 | 184 | // Copying bin counters to global memory buffer 185 | uint wg_offset = WGID.x * BIN_COUNT * 2; 186 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) { 187 | g_bin_batches[wg_offset + i] = s_bins[i]; 188 | if(s_bins[i] > 0) 189 | atomicAdd(BIN_QUAD_COUNTS(i), s_bins[i]); 190 | } 191 | } 192 | 193 | void countLargeTris() { 194 | START_TIMER(); 195 | // Computing large quads bin coverage 196 | int num_quads = s_num_quads[1]; 197 | while(true) { 198 | if(LIX == 0) { 199 | int quads_offset = atomicAdd(g_info.num_counted_quads[1], LARGE_BATCH_SIZE); 200 | if(quads_offset < num_quads) { 201 | int batch = quads_offset >> LARGE_BATCH_SHIFT; 202 | int last_batch = s_last_batch[1]; 203 | if(last_batch == -1) 204 | s_first_batch[1] = batch; 205 | else 206 | LARGE_BATCHES(last_batch) = batch; 207 | s_last_batch[1] = batch; 208 | s_num_batches[1]++; 209 | } 210 | s_quads_offset = quads_offset; 211 | } 212 | barrier(); 213 | 214 | int large_quads_offset = s_quads_offset; 215 | if(large_quads_offset >= num_quads) 216 | break; 217 | 218 | int large_quad_idx = large_quads_offset + int(LIX >> 1); 219 | if(large_quad_idx < num_quads) 220 | countLargeTriBins((MAX_VISIBLE_QUADS - 1) - large_quad_idx, int(LIX & 1)); 221 | 222 | barrier(); 223 | } 224 | 225 | barrier(); 226 | UPDATE_TIMER(1); 227 | 228 | // Thread groups which didn't do any estimation can quit early: 229 | // they won't participate in dispatching either 230 | if(s_num_batches[1] == 0) 231 | return; 232 | accumulateLargeTriCountsAcrossRows(); 233 | barrier(); 234 | 235 | // Copying bin counters to global memory buffer 236 | uint wg_offset = WGID.x * BIN_COUNT * 2 + BIN_COUNT; 237 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) { 238 | g_bin_batches[wg_offset + i] = s_bins[i]; 239 | if(s_bins[i] > 0) 240 | atomicAdd(BIN_TRI_COUNTS(i), s_bins[i]); 241 | } 242 | } 243 | 244 | void main() { 245 | INIT_TIMERS(); 246 | if(LIX < 2) { 247 | int num_quads = g_info.num_visible_quads[LIX]; 248 | s_num_quads[LIX] = num_quads; 249 | int batch_size = LIX == 0 ? SMALL_BATCH_SIZE : LARGE_BATCH_SIZE; 250 | int batch_shift = LIX == 0 ? SMALL_BATCH_SHIFT : LARGE_BATCH_SHIFT; 251 | s_num_all_batches[LIX] = (num_quads + (batch_size - 1)) >> batch_shift; 252 | s_first_batch[LIX] = -1; 253 | s_last_batch[LIX] = -1; 254 | s_num_batches[LIX] = 0; 255 | } 256 | barrier(); 257 | 258 | if(s_num_all_batches[0] > 0) { 259 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) 260 | s_bins[i] = 0; 261 | barrier(); 262 | countSmallQuads(); 263 | } 264 | 265 | if(s_num_all_batches[1] > 0) { 266 | barrier(); 267 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) 268 | s_bins[i] = 0; 269 | barrier(); 270 | countLargeTris(); 271 | } 272 | 273 | barrier(); 274 | if(LIX < 2) { 275 | g_info.dispatcher_first_batch[LIX][WGID.x] = s_first_batch[LIX]; 276 | g_info.dispatcher_num_batches[LIX][WGID.x] = s_num_batches[LIX]; 277 | } 278 | 279 | COMMIT_TIMERS(g_info.bin_dispatcher_timers); 280 | } 281 | -------------------------------------------------------------------------------- /data/shaders/bin_dispatcher.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #include "shared/compute_funcs.glsl" 7 | #include "shared/funcs.glsl" 8 | #include "shared/scanline.glsl" 9 | #include "shared/structures.glsl" 10 | #include "shared/timers.glsl" 11 | 12 | #include "%shader_debug" 13 | 14 | // TODO: use gl_SubgroupSize, gl_SubgroupId 15 | // TODO: simplify offsets generation; we don't have to do this in 2D 16 | 17 | #extension GL_KHR_shader_subgroup_shuffle : require 18 | #extension GL_KHR_shader_subgroup_shuffle_relative : require 19 | #extension GL_KHR_shader_subgroup_vote : require 20 | 21 | #define LSIZE BIN_DISPATCHER_LSIZE 22 | #define LSHIFT BIN_DISPATCHER_LSHIFT 23 | 24 | #define SMALL_BATCH_STEPS 4 25 | #define SMALL_BATCH_SHIFT (LSHIFT + 2) 26 | #define SMALL_BATCH_SIZE (LSIZE * SMALL_BATCH_STEPS) 27 | 28 | #define LARGE_BATCH_SHIFT (LSHIFT - 1) 29 | #define LARGE_BATCH_SIZE (LSIZE / 2) 30 | 31 | layout(local_size_x = 1024, local_size_x_id = BIN_DISPATCHER_LSIZE_ID) in; 32 | 33 | layout(std430, binding = 0) coherent buffer lucid_info_ { 34 | LucidInfo g_info; 35 | int g_counts[]; 36 | }; 37 | layout(binding = 1) uniform lucid_config_ { LucidConfig u_config; }; 38 | 39 | layout(std430, set = 1, binding = 0) restrict readonly buffer buf1_ { uint g_quad_aabbs[]; }; 40 | layout(std430, set = 1, binding = 1) restrict writeonly buffer buf2_ { uint g_bin_quads[]; }; 41 | layout(std430, set = 1, binding = 2) restrict writeonly buffer buf3_ { uint g_bin_tris[]; }; 42 | layout(std430, set = 1, binding = 3) restrict buffer buf4_ { int g_bin_batches[]; }; 43 | layout(std430, set = 1, binding = 4) restrict readonly buffer buf6_ { uvec4 g_uvec4_storage[]; }; 44 | DEBUG_SETUP(1, 5) 45 | 46 | #define MAX_SMALL_BATCHES (MAX_VISIBLE_QUADS / SMALL_BATCH_SIZE + 256) 47 | #define MAX_LARGE_BATCHES (MAX_VISIBLE_QUADS / LARGE_BATCH_SIZE + 256) 48 | 49 | #define SMALL_BATCHES(idx) g_bin_batches[(MAX_DISPATCHES * BIN_COUNT * 2) + idx] 50 | #define LARGE_BATCHES(idx) g_bin_batches[(MAX_DISPATCHES * BIN_COUNT * 2 + MAX_SMALL_BATCHES) + idx] 51 | 52 | shared int s_bins[BIN_COUNT]; 53 | shared int s_temp[LSIZE], s_temp2[SUBGROUP_SIZE]; 54 | 55 | void scanlineStep(in out ScanlineParams params, out int bmin, out int bmax) { 56 | float xmin = max(max(params.min[0], params.min[1]), params.min[2]); 57 | float xmax = min(min(params.max[0], params.max[1]), params.max[2]); 58 | params.min += params.step; 59 | params.max += params.step; 60 | 61 | // There can be holes between two tris, should we exploit this? Maybe it's not worth it? 62 | bmin = int(xmin + 1.0) >> BIN_SHIFT; 63 | bmax = int(xmax) >> BIN_SHIFT; 64 | } 65 | 66 | void dispatchQuad(int quad_idx) { 67 | uint enc_aabb = g_quad_aabbs[quad_idx]; 68 | uint cull_flags = enc_aabb & 0xf0000000; 69 | uint bin_quad_idx = uint(quad_idx) | cull_flags; 70 | ivec4 aabb = decodeAABB28(enc_aabb); 71 | int bsx = aabb[0], bsy = aabb[1], bex = aabb[2], bey = aabb[3]; 72 | 73 | for(int by = bsy; by <= bey; by++) { 74 | for(int bx = bsx; bx <= bex; bx++) { 75 | uint bin_id = bx + by * BIN_COUNT_X; 76 | uint quad_offset = atomicAdd(s_bins[bin_id], 1); 77 | g_bin_quads[quad_offset] = bin_quad_idx; 78 | } 79 | } 80 | } 81 | 82 | ScanlineParams loadScanlineParamsBin(uint tri_idx, out int bsy, out int bey) { 83 | uint scan_offset = STORAGE_TRI_SCAN_OFFSET + tri_idx * 2; 84 | uvec4 val0 = g_uvec4_storage[scan_offset + 0]; 85 | uvec4 val1 = g_uvec4_storage[scan_offset + 1]; 86 | return loadScanlineParamsBin(val0, val1, bsy, bey); 87 | } 88 | 89 | void dispatchLargeTriSimple(int large_quad_idx, int second_tri, int num_quads) { 90 | if(large_quad_idx >= num_quads) 91 | return; 92 | int quad_idx = (MAX_VISIBLE_QUADS - 1) - large_quad_idx; 93 | int tri_idx = quad_idx * 2 + second_tri; 94 | 95 | uint enc_aabb = g_quad_aabbs[quad_idx]; 96 | uint cull_flag = (enc_aabb >> (30 + second_tri)) & 1; 97 | if(cull_flag == 1) 98 | return; 99 | 100 | ivec4 aabb = decodeAABB28(enc_aabb); 101 | int bsx = aabb[0], bex = aabb[2], bsy, bey; 102 | ScanlineParams params = loadScanlineParamsBin(tri_idx, bsy, bey); 103 | 104 | for(int by = bsy; by <= bey; by++) { 105 | int bmin, bmax; 106 | scanlineStep(params, bmin, bmax); 107 | bmin = max(bmin, bsx), bmax = min(bmax, bex); 108 | 109 | for(int bx = bmin; bx <= bmax; bx++) { 110 | uint bin_id = bx + by * BIN_COUNT_X; 111 | g_bin_tris[atomicAdd(s_bins[bin_id], 1)] = tri_idx; 112 | } 113 | } 114 | } 115 | 116 | // This is an optimized tri dispatcher which is more work efficient. It is especially 117 | // useful if there is a large variation in quad sizes and for large tris in general. 118 | // 119 | // Work balancing happens at each bin row. First we find out how many bins do we have 120 | // to write to and then we divide this work equally across all threads within a subgroup. 121 | // We do this by dividing those items into SUBGROUP_SIZE segments and then assigning 1 segment 122 | // to each thread. 123 | void dispatchLargeTriBalanced(int large_quad_idx, int second_tri, int num_quads) { 124 | bool is_valid = large_quad_idx < num_quads; 125 | if(subgroupAll(!is_valid)) 126 | return; 127 | 128 | ScanlineParams params; 129 | uint tri_idx; 130 | int bsy = 0, bey = -1, bsx, bex; 131 | 132 | if(is_valid) { 133 | int quad_idx = (MAX_VISIBLE_QUADS - 1) - large_quad_idx; 134 | uint enc_aabb = g_quad_aabbs[quad_idx]; 135 | uint cull_flag = (enc_aabb >> (30 + second_tri)) & 1; 136 | ivec4 aabb = decodeAABB28(enc_aabb); 137 | bsx = aabb[0], bex = aabb[2]; 138 | if(cull_flag == 0) { 139 | tri_idx = quad_idx * 2 + second_tri; 140 | params = loadScanlineParamsBin(tri_idx, bsy, bey); 141 | } 142 | } 143 | 144 | for(int by = bsy; subgroupAny(by <= bey); by++) { 145 | int bmin = 0, bmax = -1; 146 | if(by <= bey) { 147 | scanlineStep(params, bmin, bmax); 148 | bmin = max(bmin, bsx), bmax = min(bmax, bex); 149 | } 150 | 151 | int num_samples = max(0, bmax - bmin + 1); 152 | if(subgroupAll(num_samples == 0)) 153 | continue; 154 | 155 | int sample_offset = subgroupInclusiveAddFast(num_samples); 156 | int subgroup_num_samples = subgroupShuffle(sample_offset, SUBGROUP_MASK); 157 | sample_offset -= num_samples; 158 | 159 | int subgroup_offset = int(LIX & ~SUBGROUP_MASK), thread_id = int(LIX & SUBGROUP_MASK); 160 | int segment_size = (subgroup_num_samples + SUBGROUP_MASK) / SUBGROUP_SIZE; 161 | int segment_id = sample_offset / segment_size; 162 | int segment_offset = sample_offset - segment_id * segment_size; 163 | if(num_samples > 0) { 164 | if(segment_offset == 0) 165 | s_temp[subgroup_offset + segment_id] = thread_id; 166 | for(int k = 1; segment_offset + num_samples > segment_size * k; k++) 167 | s_temp[subgroup_offset + segment_id + k] = thread_id; 168 | } 169 | 170 | uint cur_src_thread_id = s_temp[LIX]; 171 | int cur_sample_id = thread_id * segment_size; 172 | int cur_offset = cur_sample_id - subgroupShuffle(sample_offset, cur_src_thread_id); 173 | int cur_num_samples = min(subgroup_num_samples - cur_sample_id, segment_size); 174 | int base_bin_id = by * BIN_COUNT_X + bmin; 175 | 176 | int i = 0; 177 | while(subgroupAny(i < cur_num_samples)) { 178 | uint cur_tri_idx = subgroupShuffle(tri_idx, cur_src_thread_id); 179 | int cur_bin_id = subgroupShuffle(base_bin_id, cur_src_thread_id); 180 | int cur_width = subgroupShuffle(num_samples, cur_src_thread_id); 181 | 182 | if(cur_width == 0) { 183 | cur_src_thread_id++; 184 | continue; 185 | } 186 | if(i < cur_num_samples) { 187 | uint tri_offset = atomicAdd(s_bins[cur_bin_id + cur_offset], 1); 188 | g_bin_tris[tri_offset] = cur_tri_idx; 189 | cur_offset++; 190 | if(cur_offset == cur_width) 191 | cur_offset = 0, cur_src_thread_id++; 192 | i++; 193 | } 194 | } 195 | } 196 | } 197 | 198 | shared int s_num_quads[2], s_quads_offset; 199 | shared int s_first_batch[2], s_num_batches[2]; 200 | shared int s_num_finished_batches[2]; 201 | 202 | void dispatchSmallQuads() { 203 | START_TIMER(); 204 | uint wg_offset = WGID.x * BIN_COUNT * 2; 205 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) { 206 | s_bins[i] = g_bin_batches[wg_offset + i]; 207 | if(s_bins[i] > 0) 208 | s_bins[i] = atomicAdd(BIN_QUAD_OFFSETS_TEMP(i), s_bins[i]); 209 | } 210 | barrier(); 211 | 212 | int num_quads = s_num_quads[0]; 213 | while(s_num_batches[0] > 0) { 214 | barrier(); 215 | if(LIX == 0) { 216 | int batch = s_first_batch[0]; 217 | s_first_batch[0] = SMALL_BATCHES(batch); 218 | s_num_batches[0]--; 219 | s_quads_offset = batch << SMALL_BATCH_SHIFT; 220 | } 221 | barrier(); 222 | int quads_offset = s_quads_offset; 223 | for(int s = 0; s < SMALL_BATCH_STEPS; s++) { 224 | int quad_idx = quads_offset + LSIZE * s + int(LIX); 225 | if(quad_idx >= num_quads) 226 | break; 227 | dispatchQuad(quad_idx); 228 | } 229 | } 230 | barrier(); 231 | UPDATE_TIMER(2); 232 | } 233 | 234 | void dispatchLargeTris() { 235 | START_TIMER(); 236 | uint wg_offset = WGID.x * BIN_COUNT * 2 + BIN_COUNT; 237 | for(uint i = LIX; i < BIN_COUNT; i += LSIZE) { 238 | s_bins[i] = g_bin_batches[wg_offset + i]; 239 | if(s_bins[i] > 0) 240 | s_bins[i] = atomicAdd(BIN_TRI_OFFSETS_TEMP(i), s_bins[i]); 241 | } 242 | barrier(); 243 | 244 | int num_quads = s_num_quads[1]; 245 | while(s_num_batches[1] > 0) { 246 | barrier(); 247 | if(LIX == 0) { 248 | int batch = s_first_batch[1]; 249 | s_first_batch[1] = LARGE_BATCHES(batch); 250 | s_num_batches[1]--; 251 | s_quads_offset = batch << LARGE_BATCH_SHIFT; 252 | } 253 | barrier(); 254 | int large_quad_idx = s_quads_offset + int(LIX >> 1); 255 | dispatchLargeTriBalanced(large_quad_idx, int(LIX & 1), num_quads); 256 | } 257 | UPDATE_TIMER(3); 258 | } 259 | 260 | void main() { 261 | INIT_TIMERS(); 262 | 263 | if(LIX < 2) { 264 | int num_quads = g_info.num_visible_quads[LIX]; 265 | s_num_quads[LIX] = num_quads; 266 | s_first_batch[LIX] = g_info.dispatcher_first_batch[LIX][WGID.x]; 267 | s_num_batches[LIX] = g_info.dispatcher_num_batches[LIX][WGID.x]; 268 | } 269 | barrier(); 270 | 271 | if(s_num_batches[0] > 0) 272 | dispatchSmallQuads(); 273 | if(s_num_batches[1] > 0) { 274 | barrier(); 275 | dispatchLargeTris(); 276 | } 277 | 278 | COMMIT_TIMERS(g_info.bin_dispatcher_timers); 279 | } 280 | -------------------------------------------------------------------------------- /data/shaders/shared/funcs.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _FUNCS_GLSL_ 5 | #define _FUNCS_GLSL_ 6 | 7 | #include "structures.glsl" 8 | 9 | #define PI 3.14159265359 10 | 11 | #define SATURATE(val) clamp(val, 0.0, 1.0) 12 | 13 | // decode/encode source: http://aras-p.info/texts/CompactNormalStorage.html 14 | // TODO: this encoding is wrong for z == -1.0 15 | vec3 decodeNormal(vec2 enc) { 16 | vec2 fenc = enc * 4.0 - 2.0; 17 | float f = dot(fenc, fenc); 18 | float g = sqrt(1.0 - f / 4.0); 19 | return vec3(fenc * g, 1.0 - f * 0.5); 20 | } 21 | 22 | vec2 encodeNormal(vec3 n) { 23 | float p = sqrt(n.z * 8.0 + 8.0); 24 | return n.xy / p + 0.5; 25 | } 26 | 27 | // TODO: use whole range 28 | uint encodeNormalUint(vec3 n) { 29 | uint x = uint(512.0 + n.x * 511.0) & 0x3ffu; 30 | uint y = uint(512.0 + n.y * 511.0) & 0x3ffu; 31 | uint z = uint(512.0 + n.z * 511.0) & 0x3ffu; 32 | return x | (y << 10) | (z << 20); 33 | } 34 | 35 | uvec2 encodeAABB64(uvec4 aabb) { 36 | return uvec2(aabb[0] | (aabb[1] << 16), aabb[2] | (aabb[3] << 16)); 37 | } 38 | 39 | uvec4 decodeAABB64(uvec2 aabb) { 40 | return uvec4(aabb[0] & 0xffffu, aabb[0] >> 16, aabb[1] & 0xffffu, aabb[1] >> 16); 41 | } 42 | 43 | uint encodeAABB32(uvec4 aabb) { 44 | return ((aabb[0] & 0xffu) << 0) | ((aabb[1] & 0xffu) << 8) | ((aabb[2] & 0xffu) << 16) | 45 | ((aabb[3] & 0xffu) << 24); 46 | } 47 | 48 | ivec4 decodeAABB32(uint aabb) { 49 | return ivec4(aabb & 0xffu, (aabb >> 8) & 0xffu, (aabb >> 16) & 0xffu, aabb >> 24); 50 | } 51 | 52 | uint encodeAABB28(uvec4 aabb) { 53 | return ((aabb[0] & 0x7fu) << 0) | ((aabb[1] & 0x7fu) << 7) | ((aabb[2] & 0x7fu) << 14) | 54 | ((aabb[3] & 0x7fu) << 21); 55 | } 56 | 57 | ivec4 decodeAABB28(uint aabb) { 58 | return ivec4(aabb & 0x7fu, (aabb >> 7) & 0x7fu, (aabb >> 14) & 0x7fu, (aabb >> 21) & 0x7fu); 59 | } 60 | 61 | vec3 decodeNormalUint(uint n) { 62 | float x = (float((n >> 0) & 0x3ffu) - 512.0) * (1.0 / 511.0); 63 | float y = (float((n >> 10) & 0x3ffu) - 512.0) * (1.0 / 511.0); 64 | float z = (float((n >> 20) & 0x3ffu) - 512.0) * (1.0 / 511.0); 65 | return vec3(x, y, z); // TODO: normalize ? 66 | } 67 | 68 | float decodeFloat3(vec3 xyz) { return dot(xyz, vec3(1.0, 1.0 / 255.0, 1.0 / 65025.0)); } 69 | 70 | vec3 encodeFloat3(float v) { 71 | vec4 enc = vec4(1.0, 255.0, 65025.0, 160581375.0) * v; 72 | enc = fract(enc); 73 | enc -= enc.yzww * vec4(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0, 0.0); 74 | return enc.xyz; 75 | } 76 | 77 | float decodeFloat2(vec2 xy) { return dot(xy, vec2(1.0, 1.0 / 255.0)) * (1.0 / 0.99); } 78 | 79 | vec2 encodeFloat2(float v) { 80 | vec2 enc = vec2(1.0, 255.0) * (v * 0.99); 81 | enc = fract(enc); 82 | enc.x -= enc.y * 1.0 / 255.0; 83 | return enc.xy; 84 | } 85 | 86 | vec4 encodeInt4(uint v) { 87 | vec4 enc; 88 | enc.x = float(v & 0xffu); 89 | enc.y = float((v & 0xff00u) >> 8); 90 | enc.z = float((v & 0xff0000u) >> 16); 91 | enc.w = float((v & 0xff000000u) >> 24); 92 | return enc * (1.0 / 255.0); 93 | } 94 | 95 | uint decodeInt4(vec4 v) { 96 | v *= 255.0f; 97 | return uint(v.x) | (uint(v.y) << 8) | (uint(v.z) << 16) | (uint(v.w) << 24); 98 | } 99 | 100 | vec4 decodeRGBA8(uint icolor) { 101 | return vec4(float((icolor >> 0) & 0xffu), float((icolor >> 8) & 0xffu), 102 | float((icolor >> 16) & 0xffu), float((icolor >> 24) & 0xffu)) * 103 | (1.0 / 255.0); 104 | } 105 | 106 | vec3 decodeRGB8(uint icolor) { 107 | return vec3(float(icolor & 0xffu), float((icolor >> 8) & 0xffu), 108 | float((icolor >> 16) & 0xffu)) * 109 | (1.0 / 255.0); 110 | } 111 | 112 | vec3 decodeRGB10(uint icolor) { 113 | return vec3(float(icolor & 0x7ffu) * (1.0 / 2047.0), 114 | float((icolor >> 11) & 0x7ffu) * (1.0 / 2047.0), 115 | float((icolor >> 22) & 0x3ffu) * (1.0 / 1023.0)); 116 | } 117 | 118 | uint encodeRGBA8(vec4 col) { 119 | return (uint(col.r * 255.0)) | (uint(col.g * 255.0) << 8) | (uint(col.b * 255.0) << 16) | 120 | ((uint(col.a * 255.0)) << 24); 121 | } 122 | 123 | uint encodeRGB8(vec3 col) { 124 | return (uint(col.r * 255.0)) | (uint(col.g * 255.0) << 8) | (uint(col.b * 255.0) << 16); 125 | } 126 | 127 | uint encodeRGB10(vec3 col) { 128 | return (uint(col.r * 2047.0)) | (uint(col.g * 2047.0) << 11) | (uint(col.b * 1023.0) << 22); 129 | } 130 | 131 | uint tintColor(uint enc_color, vec3 tint, float strength) { 132 | vec3 color = decodeRGB8(enc_color); 133 | color = color * (1.0 - strength) + tint * strength; 134 | return encodeRGB8(SATURATE(color)); 135 | } 136 | 137 | uvec2 encodeCD(vec4 color, float depth) { 138 | depth = float(0xffffff) / (1.0 + depth); 139 | uint enc_col = (uint(clamp(color.r, 0.0, 1.0) * 2047.0)) | 140 | (uint(clamp(color.g, 0.0, 1.0) * 2047.0) << 11) | 141 | (uint(clamp(color.b, 0.0, 1.0) * 1023.0) << 22); 142 | uint enc_depth_alpha = (uint(depth) << 8) | uint(color.a * 255.0); 143 | return uvec2(enc_col, enc_depth_alpha); 144 | } 145 | 146 | vec4 decodeCDColor(uvec2 enc) { 147 | return vec4(float((enc[0] >> 0) & 0x7ffu) * (1.0 / 2047.0), 148 | float((enc[0] >> 11) & 0x7ffu) * (1.0 / 2047.0), 149 | float((enc[0] >> 22) & 0x3ffu) * (1.0 / 1023.0), 150 | float(enc[1] & 0xffu) * (1.0 / 255.0)); 151 | } 152 | 153 | vec3 linearToSRGB(vec3 color) { 154 | return vec3(color.r < 0.0031308 ? 12.92 * color.r : 1.055 * pow(color.r, 1.0 / 2.4) - 0.055, 155 | color.g < 0.0031308 ? 12.92 * color.g : 1.055 * pow(color.g, 1.0 / 2.4) - 0.055, 156 | color.b < 0.0031308 ? 12.92 * color.b : 1.055 * pow(color.b, 1.0 / 2.4) - 0.055); 157 | } 158 | 159 | vec3 SRGBToLinear(vec3 color) { 160 | return vec3( 161 | color.r < 0.04045 ? (1.0 / 12.92) * color.r : pow((color.r + 0.055) * (1.0 / 1.055), 2.4), 162 | color.g < 0.04045 ? (1.0 / 12.92) * color.g : pow((color.g + 0.055) * (1.0 / 1.055), 2.4), 163 | color.b < 0.04045 ? (1.0 / 12.92) * color.b : pow((color.b + 0.055) * (1.0 / 1.055), 2.4)); 164 | } 165 | 166 | uint encodeNormalHemiOct(vec3 n) { 167 | vec2 p = n.xy * (1.0 / (abs(n.x) + abs(n.y) + abs(n.z))); 168 | return uint((p.x + p.y) * 32767.0 + 32768.0) | (uint((p.x - p.y) * 16383.0 + 16384.0) << 16) | 169 | (n.z < 0.0 ? 0x80000000u : 0u); 170 | } 171 | 172 | vec3 decodeNormalHemiOct(uint n) { 173 | vec2 e = vec2((float(n & 0xffffu) - 32768.0) * (0.5 / 32767.0), 174 | (float((n >> 16) & 0x7fffu) - 16384.0) * (0.5 / 16383.0)); 175 | vec2 temp = vec2(e.x + e.y, e.x - e.y); 176 | vec3 v = vec3(temp, 1.0 - abs(temp.x) - abs(temp.y)); 177 | if((n & 0x80000000u) != 0u) 178 | v.z = -v.z; 179 | return normalize(v); 180 | } 181 | 182 | vec2 signNotZero(vec2 v) { return vec2((v.x >= 0.0) ? +1.0 : -1.0, (v.y >= 0.0) ? +1.0 : -1.0); } 183 | 184 | vec2 float32x3_to_oct(in vec3 v) { 185 | // Project the sphere onto the octahedron, and then onto the xy plane 186 | vec2 p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z))); 187 | // Reflect the folds of the lower hemisphere over the diagonals 188 | vec2 e = (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * signNotZero(p)) : p; 189 | return e * 0.5 + 0.5; 190 | } 191 | 192 | vec3 oct_to_float32x3(vec2 e) { 193 | e = e * 2.0 - 1.0; 194 | vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); 195 | if(v.z < 0) 196 | v.xy = (1.0 - abs(v.yx)) * signNotZero(v.xy); 197 | return normalize(v); 198 | } 199 | 200 | uint encodeNormalOct(vec3 n) { 201 | vec2 e = float32x3_to_oct(n) * 65535.0; 202 | return uint(e.x) | (uint(e.y) << 16); 203 | } 204 | 205 | vec3 decodeNormalOct(uint ei) { 206 | vec2 e = vec2(ei & 0xffffu, ei >> 16) * (1.0 / 65535.0); 207 | return oct_to_float32x3(e); 208 | } 209 | 210 | float invLerp(float a, float b, float v) { return (v - a) / (b - a); } 211 | 212 | void swap(inout float a, inout float b) { 213 | float temp = a; 214 | a = b; 215 | b = temp; 216 | } 217 | 218 | void swap(inout uint a, inout uint b) { 219 | uint temp = a; 220 | a = b; 221 | b = temp; 222 | } 223 | 224 | void swap(inout int a, inout int b) { 225 | int temp = a; 226 | a = b; 227 | b = temp; 228 | } 229 | 230 | vec4 rectPos(Rect rect, vec3 pos) { return vec4(rect.pos + rect.size * pos.xy, 0.0, 1.0); } 231 | vec2 rectTexCoord(Rect rect, vec3 pos) { 232 | return (rect.pos + rect.size * pos.xy + vec2(1.0, 1.0)) * 0.5; 233 | } 234 | 235 | vec3 gradientColor(uint value, uvec4 steps) { 236 | vec3 color_a, color_b; 237 | int step_id = 3; 238 | 239 | for(int i = 0; i < 4; i++) 240 | if(value < steps[i]) { 241 | step_id = i; 242 | break; 243 | } 244 | 245 | vec3 colors[4] = {vec3(1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, 0.0, 1.0), 246 | vec3(1.0, 1.0, 1.0)}; 247 | float base_step = step_id == 0 ? 0 : steps[step_id - 1]; 248 | vec3 base_color = step_id == 0 ? vec3(0) : colors[step_id - 1]; 249 | float t = float(value - base_step) / float(steps[step_id] - base_step); 250 | return colors[step_id] * t + base_color * (1.0 - t); 251 | } 252 | 253 | // --------------------- Lighting functions ----------------------------------- 254 | 255 | vec3 skyColor(float vertical_pos) { 256 | vec3 sky = vec3(5.0, 234.0, 250.0) / 255.0; 257 | vec3 horizon = vec3(247.0, 214.0, 255.0) / 255.0; 258 | return mix(sky, horizon, 1.0 - vertical_pos); 259 | } 260 | 261 | vec3 finalShading(Lighting lighting, vec3 diffuse, float light_value) { 262 | // TODO: read more about HDR? 263 | diffuse = SRGBToLinear(diffuse); 264 | vec3 amb_light = lighting.ambient_color.rgb * lighting.ambient_power; 265 | vec3 dif_light = lighting.sun_color.rgb * lighting.sun_power * light_value; 266 | 267 | //return lighting.scene_color; 268 | //return diffuse; 269 | 270 | return linearToSRGB(diffuse * (amb_light + dif_light)); 271 | } 272 | 273 | // --------------------- Viewport functions ----------------------------------- 274 | 275 | // Spaces: 276 | // 277 | // View: (0, 0, 0) is camera position, z is in range -(near_plane, far_plane) 278 | // TODO: why is it negated? 279 | // NDC: (-1, -1, -1) - (1, 1, 1); near plane is at (0, 0, 1), far plane at (0, 0, -1) 280 | // 281 | // World * view_matrix -> View 282 | // View * proj_matrix -> Clip 283 | // Clip / w -> NDC 284 | 285 | float zndcToView(Viewport viewport, float zndc) { 286 | return (zndc * viewport.proj_matrix[3][3] - viewport.proj_matrix[3][2]) / 287 | (zndc * viewport.proj_matrix[2][3] - viewport.proj_matrix[2][2]); 288 | } 289 | 290 | float decodeZView(Viewport viewport, vec2 xy) { return -decodeFloat2(xy) * viewport.far_plane; } 291 | vec2 encodeZView(Viewport viewport, float z) { return encodeFloat2(-z * viewport.inv_far_plane); } 292 | 293 | float depthToZView(Viewport viewport, float depth_value) { 294 | float zndc = 2.0 * depth_value - 1.0; 295 | return -zndcToView(viewport, zndc); 296 | } 297 | 298 | float zndcToDepth(Viewport viewport, float zndc) { return (-zndc + 1.0) * 0.5; } 299 | 300 | #endif -------------------------------------------------------------------------------- /src/pbr_renderer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "pbr_renderer.h" 5 | 6 | #include "scene.h" 7 | #include "shader_structs.h" 8 | #include "shading.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | PbrRenderer::PbrRenderer() = default; 22 | FWK_MOVABLE_CLASS_IMPL(PbrRenderer); 23 | 24 | vector> sharedShaderMacros(VulkanDevice &); 25 | 26 | void PbrRenderer::addShaderDefs(VulkanDevice &device, ShaderCompiler &compiler, 27 | const ShaderConfig &shader_config) { 28 | vector> vsh_macros = {{"VERTEX_SHADER", "1"}}; 29 | vector> fsh_macros = {{"FRAGMENT_SHADER", "1"}}; 30 | auto shared_macros = shader_config.predefined_macros; 31 | insertBack(vsh_macros, shared_macros); 32 | insertBack(fsh_macros, shared_macros); 33 | 34 | compiler.add({"pbr_vert", VShaderStage::vertex, "pbr_material.glsl", vsh_macros}); 35 | compiler.add({"pbr_frag", VShaderStage::fragment, "pbr_material.glsl", fsh_macros}); 36 | compiler.add({"env_map_vert", VShaderStage::vertex, "env_map.glsl", vsh_macros}); 37 | compiler.add({"env_map_frag", VShaderStage::fragment, "env_map.glsl", fsh_macros}); 38 | } 39 | 40 | Ex PbrRenderer::exConstruct(VulkanDevice &device, ShaderCompiler &compiler, 41 | const IRect &viewport, VAttachment color_att) { 42 | auto depth_format = device.bestSupportedFormat(VDepthStencilFormat::d32f); 43 | auto depth_buffer = 44 | EX_PASS(VulkanImage::create(device, VImageSetup(depth_format, viewport.size()))); 45 | m_depth_buffer = VulkanImageView::create(depth_buffer); 46 | // TODO: :we need to transition depth_buffer format too 47 | 48 | VAttachment depth_att( 49 | depth_format, VAttachmentSync(VLoadOp::clear, VStoreOp::store, VImageLayout::undefined, 50 | defaultLayout(depth_format), defaultLayout(depth_format))); 51 | color_att = {color_att.colorFormat(), 52 | VAttachmentSync(VLoadOp::load, VStoreOp::store, VImageLayout::general, 53 | VImageLayout::general, VImageLayout::general)}; 54 | m_render_pass = device.getRenderPass({color_att, depth_att}); 55 | 56 | m_viewport = viewport; 57 | 58 | auto frag_id = *compiler.find("pbr_frag"); 59 | auto vert_id = *compiler.find("pbr_vert"); 60 | auto env_frag_id = *compiler.find("env_map_frag"); 61 | auto env_vert_id = *compiler.find("env_map_vert"); 62 | m_shader_def_ids = {frag_id, vert_id, env_frag_id, env_vert_id}; 63 | 64 | m_frag_module = EX_PASS(compiler.createShaderModule(device, frag_id)); 65 | m_vert_module = EX_PASS(compiler.createShaderModule(device, vert_id)); 66 | m_pipeline_layout = device.getPipelineLayout({m_frag_module, m_vert_module}); 67 | 68 | auto env_frag_module = EX_PASS(compiler.createShaderModule(device, env_frag_id)); 69 | auto env_vert_module = EX_PASS(compiler.createShaderModule(device, env_vert_id)); 70 | 71 | VPipelineSetup setup; 72 | setup.pipeline_layout = device.getPipelineLayout({env_frag_module, env_vert_module}); 73 | setup.render_pass = m_render_pass; 74 | setup.shader_modules = {{env_vert_module, env_frag_module}}; 75 | setup.depth = VDepthSetup(VDepthFlag::test); 76 | setup.raster = VRasterSetup(VPrimitiveTopology::triangle_list, VPolygonMode::fill); 77 | setup.vertex_attribs = {{vertexAttrib(0, 0)}}; 78 | setup.vertex_bindings = {{vertexBinding(0)}}; 79 | m_env_pipeline = EX_PASS(VulkanPipeline::create(device, setup)); 80 | 81 | auto quad_verts = Box({-1.0f, -1.0f}, {1.0f, 1.0f}).corners(); 82 | array quad_tris = {quad_verts[0], quad_verts[1], quad_verts[2], 83 | quad_verts[0], quad_verts[2], quad_verts[3]}; 84 | auto vb_usage = VBufferUsage::vertex | VBufferUsage::storage; 85 | m_rect_vertices = EX_PASS(VulkanBuffer::createAndUpload(device, cspan(quad_tris), vb_usage)); 86 | 87 | return {}; 88 | } 89 | 90 | Ex PbrRenderer::getPipeline(VulkanDevice &device, const PipeConfig &config) { 91 | PERF_SCOPE(); 92 | 93 | auto &ref = m_pipelines[config]; 94 | if(!ref) { 95 | VPipelineSetup setup; 96 | setup.pipeline_layout = m_pipeline_layout; 97 | setup.render_pass = m_render_pass; 98 | setup.shader_modules = {{m_vert_module, m_frag_module}}; 99 | setup.depth = VDepthSetup(VDepthFlag::test | VDepthFlag::write); 100 | VertexArray::getDefs(setup, true); 101 | 102 | setup.raster = VRasterSetup(VPrimitiveTopology::triangle_list, 103 | config.wireframe ? VPolygonMode::line : VPolygonMode::fill, 104 | mask(config.backface_culling, VCull::back)); 105 | if(!config.opaque) { 106 | VBlendingMode additive_blend(VBlendFactor::src_alpha, VBlendFactor::one); 107 | VBlendingMode normal_blend(VBlendFactor::src_alpha, VBlendFactor::one_minus_src_alpha); 108 | setup.blending.attachments = { 109 | {config.additive_blending ? additive_blend : normal_blend}}; 110 | setup.depth = VDepthSetup(VDepthFlag::test); 111 | } 112 | 113 | ref = EX_PASS(VulkanPipeline::create(device, setup)); 114 | } 115 | 116 | return ref; 117 | } 118 | 119 | Ex<> PbrRenderer::renderPhase(const RenderContext &ctx, VBufferSpan dc_buf, 120 | bool opaque, bool wireframe) { 121 | auto &cmds = ctx.device.cmdQueue(); 122 | PERF_GPU_SCOPE(cmds); 123 | 124 | cmds.bind(m_pipeline_layout); 125 | cmds.bindDS(0).set(0, VDescriptorType::uniform_buffer, m_lighting_buf); 126 | auto &verts = ctx.verts; 127 | cmds.bindVertices(0, verts.positions, verts.colors, verts.tex_coords, verts.normals, 128 | verts.tangents); 129 | cmds.bindIndices(ctx.tris_ib); 130 | 131 | PipeConfig pipe_config{ctx.config.backface_culling, ctx.config.additive_blending, opaque, 132 | wireframe, false}; 133 | auto pipeline = EX_PASS(getPipeline(ctx.device, pipe_config)); 134 | cmds.bind(pipeline); 135 | 136 | auto sampler = ctx.device.getSampler(ctx.config.sampler_setup); 137 | int prev_mat_id = -1; 138 | for(int dc : intRange(ctx.dcs)) { 139 | auto &draw_call = ctx.dcs[dc]; 140 | auto &material = ctx.materials[draw_call.material_id]; 141 | if(bool(draw_call.opts & DrawCallOpt::is_opaque) != opaque) 142 | continue; 143 | if(prev_mat_id != draw_call.material_id) { 144 | auto ds = cmds.bindDS(1); 145 | 146 | // TODO: uniform buffer alignment issue 147 | // https://vulkan.lunarg.com/doc/view/1.3.290.0/windows/1.3-extensions/vkspec.html#VUID-VkWriteDescriptorSet-descriptorType-00327 148 | // Switched from path-tracer to pbr renderer on conference 149 | ds.set(0, VDescriptorType::uniform_buffer, dc_buf.subSpan(dc, dc + 1)); 150 | 151 | // TODO: different default textures for different map types 152 | auto &albedo_map = material.maps[SceneMapType::albedo]; 153 | auto &normal_map = material.maps[SceneMapType::normal]; 154 | auto &pbr_map = material.maps[SceneMapType::pbr]; 155 | 156 | ds.set(1, {{sampler, albedo_map.vk_image}, 157 | {sampler, normal_map.vk_image}, 158 | {sampler, pbr_map.vk_image}}); 159 | prev_mat_id = draw_call.material_id; 160 | } 161 | 162 | cmds.drawIndexed(draw_call.num_tris * 3, 1, draw_call.tri_offset * 3); 163 | } 164 | 165 | // TODO: what about ordering objects by material ? 166 | // TODO: add option to order objects in different ways ? 167 | // TODO: optional alpha test first for blended objects 168 | 169 | return {}; 170 | } 171 | 172 | Ex<> PbrRenderer::renderEnvMap(const RenderContext &ctx) { 173 | auto &cmds = ctx.device.cmdQueue(); 174 | PERF_GPU_SCOPE(cmds); 175 | DASSERT(ctx.lighting.env_map); 176 | 177 | auto ubo_usage = VBufferUsage::uniform; 178 | shader::EnvMapDrawCall env_map_dc; 179 | float2 screen_size = float2(ctx.camera.params().viewport.size()); 180 | env_map_dc.screen_size = screen_size; 181 | env_map_dc.inv_screen_size = vinv(screen_size); 182 | env_map_dc.inv_proj_view_matrix = inverseOrZero(ctx.camera.matrix()); 183 | auto env_map_dc_buf = EX_PASS(VulkanBuffer::createAndUpload(ctx.device, cspan(&env_map_dc, 1), 184 | ubo_usage, VMemoryUsage::frame)); 185 | 186 | cmds.bind(m_env_pipeline); 187 | auto ds = cmds.bindDS(0); 188 | ds.set(0, VDescriptorType::uniform_buffer, env_map_dc_buf.subSpan(0, 1)); 189 | auto sampler = ctx.device.getSampler(ctx.config.sampler_setup); 190 | ds.set(1, {{sampler, ctx.lighting.env_map}}); 191 | 192 | cmds.bindVertices(0, m_rect_vertices); 193 | cmds.draw(m_rect_vertices.size()); 194 | return {}; 195 | } 196 | 197 | Ex<> PbrRenderer::render(const RenderContext &ctx, bool wireframe) { 198 | auto &cmds = ctx.device.cmdQueue(); 199 | PERF_GPU_SCOPE(cmds); 200 | 201 | // TODO: optimize this 202 | auto ubo_usage = VBufferUsage::uniform; 203 | shader::Lighting lighting; 204 | lighting.ambient_color = ctx.lighting.ambient.color; 205 | lighting.ambient_power = ctx.lighting.ambient.power; 206 | lighting.sun_color = ctx.lighting.sun.color; 207 | lighting.sun_power = ctx.lighting.sun.power; 208 | lighting.sun_dir = ctx.lighting.sun.dir; 209 | m_lighting_buf = EX_PASS(VulkanBuffer::createAndUpload(ctx.device, cspan(&lighting, 1), 210 | ubo_usage, VMemoryUsage::frame)); 211 | 212 | int num_opaque = 0; 213 | 214 | // TODO: minimize it (do it only for different materials) 215 | vector dcs; 216 | dcs.reserve(ctx.dcs.size()); 217 | auto inv_proj_view_matrix = inverseOrZero(ctx.camera.matrix()); 218 | 219 | for(const auto &draw_call : ctx.dcs) { 220 | auto &material = ctx.materials[draw_call.material_id]; 221 | auto &simple_dc = dcs.emplace_back(); 222 | if(draw_call.opts & DrawCallOpt::is_opaque) 223 | num_opaque++; 224 | simple_dc.world_camera_pos = ctx.camera.pos(); 225 | simple_dc.proj_view_matrix = ctx.camera.matrix(); 226 | simple_dc.inv_proj_view_matrix = inv_proj_view_matrix; 227 | simple_dc.material_color = float4(material.diffuse, material.opacity); 228 | simple_dc.draw_call_opts = uint(draw_call.opts.bits); 229 | } 230 | 231 | auto dc_buf = 232 | EX_PASS(VulkanBuffer::createAndUpload(ctx.device, dcs, ubo_usage, VMemoryUsage::frame)); 233 | 234 | cmds.setViewport(m_viewport); 235 | cmds.setScissor(none); 236 | 237 | auto swap_chain = ctx.device.swapChain(); 238 | auto swap_image = swap_chain->acquiredImage()->image(); 239 | auto framebuffer = ctx.device.getFramebuffer({swap_chain->acquiredImage(), m_depth_buffer}); 240 | cmds.beginRenderPass(framebuffer, m_render_pass, none, 241 | {FColor(ColorId::magneta), VClearDepthStencil(1.0)}); 242 | 243 | if(num_opaque > 0) 244 | EXPECT(renderPhase(ctx, dc_buf, true, wireframe)); 245 | if(ctx.lighting.env_map) 246 | EXPECT(renderEnvMap(ctx)); 247 | if(num_opaque != ctx.dcs.size()) 248 | EXPECT(renderPhase(ctx, dc_buf, false, wireframe)); 249 | 250 | cmds.endRenderPass(); 251 | m_lighting_buf = {}; 252 | 253 | return {}; 254 | } 255 | -------------------------------------------------------------------------------- /src/meshlet.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #include "meshlet.h" 5 | 6 | #include "quad_generator.h" 7 | #include "scene.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | vector meshTriInfo(CSpan verts, CSpan> tris) { 16 | vector out(tris.size()); 17 | HashMap, int> edge_tri_map; 18 | edge_tri_map.reserve(tris.size() * 4); 19 | 20 | for(int i : intRange(tris)) { 21 | auto &tri = tris[i]; 22 | array positions{verts[tri[0]], verts[tri[1]], verts[tri[2]]}; 23 | for(int j = 0; j < 3; j++) { 24 | int v0 = tri[j], v1 = tri[j == 2 ? 0 : j + 1]; 25 | edge_tri_map.emplace({v0, v1}, i); 26 | } 27 | out[i].neighbours = {-1, -1, -1}; 28 | out[i].verts = {tri[0], tri[1], tri[2]}; 29 | out[i].bbox = enclose(positions); 30 | } 31 | 32 | for(int i : intRange(tris)) { 33 | auto &tri = tris[i]; 34 | for(int j = 0; j < 3; j++) { 35 | int v0 = tri[j], v1 = tri[j == 2 ? 0 : j + 1]; 36 | auto it = edge_tri_map.find({v1, v0}); 37 | if(it != edge_tri_map.end()) { 38 | if(it->value != i) 39 | out[i].neighbours[j] = it->value; 40 | } 41 | } 42 | } 43 | 44 | return out; 45 | } 46 | 47 | vector toPartitions(CSpan verts, const SceneMesh &mesh, 48 | CSpan> quads) { 49 | vector out; 50 | for(auto &quad : quads) { 51 | auto &part = out.emplace_back(); 52 | if(quad[2] == quad[3]) { 53 | part.verts = cspan(quad).subSpan(0, 3); 54 | part.tris = {{quad[0], quad[1], quad[2]}}; 55 | array corners{{verts[quad[0]], verts[quad[1]], verts[quad[2]]}}; 56 | part.bbox = enclose(corners); 57 | } else { 58 | part.verts = cspan(quad); 59 | part.tris = {{quad[0], quad[1], quad[2]}, {quad[0], quad[2], quad[3]}}; 60 | array corners{ 61 | {verts[quad[0]], verts[quad[1]], verts[quad[2]], verts[quad[3]]}}; 62 | part.bbox = enclose(corners); 63 | } 64 | } 65 | return out; 66 | } 67 | 68 | vector meshPartition(CSpan verts, const SceneMesh &mesh, 69 | CSpan tri_infos, int max_tris, int max_verts) { 70 | vector out; 71 | static constexpr int unassigned_id = -1, front_id = -2; 72 | vector tri_group_id(tri_infos.size(), unassigned_id); 73 | vector selected_verts(verts.size(), 0); 74 | 75 | print("Input mesh: tris:% all_verts:% bbox:%\n", mesh.tris.size(), verts.size(), 76 | mesh.bounding_box); 77 | print("Partition limits: max_tris:% max_verts:%\n", max_tris, max_verts); 78 | 79 | PodVector selected_tris(mesh.tris.size()); 80 | for(int i : intRange(mesh.tris)) 81 | selected_tris[i] = i; 82 | auto bbox_size = mesh.bounding_box.size(); 83 | int longest_axis = bbox_size[0] > bbox_size[1] ? 0 : 1; 84 | longest_axis = bbox_size[2] > bbox_size[longest_axis] ? 2 : longest_axis; 85 | 86 | std::sort(selected_tris.begin(), selected_tris.end(), [&](int a, int b) { 87 | auto &tri0 = tri_infos[a]; 88 | auto &tri1 = tri_infos[b]; 89 | return tri0.bbox.min(longest_axis) < tri1.bbox.min(longest_axis); 90 | }); 91 | 92 | vector front; 93 | for(int start_idx : selected_tris) { 94 | if(tri_group_id[start_idx] != unassigned_id) 95 | continue; 96 | 97 | int group_id = out.size(); 98 | tri_group_id[start_idx] = group_id; 99 | out.emplace_back(); 100 | auto &partition = out.back(); 101 | partition.tris.reserve(max_tris); 102 | partition.verts.reserve(max_verts); 103 | front.clear(); 104 | 105 | auto extend_front = [&](int src_idx) { 106 | for(auto idx : tri_infos[src_idx].neighbours) 107 | if(idx != -1 && tri_group_id[idx] == unassigned_id) { 108 | tri_group_id[idx] = front_id; 109 | front.emplace_back(idx); 110 | } 111 | }; 112 | 113 | auto &start_tri = tri_infos[start_idx]; 114 | FBox bbox = start_tri.bbox; 115 | partition.tris.emplace_back(start_tri.verts); 116 | insertBack(partition.verts, start_tri.verts); 117 | for(auto vidx : start_tri.verts) 118 | selected_verts[vidx] = 1; 119 | extend_front(start_idx); 120 | 121 | auto start_tri_nrm = Triangle3F(verts[start_tri.verts[0]], verts[start_tri.verts[1]], 122 | verts[start_tri.verts[2]]) 123 | .normal(); 124 | 125 | // TODO: instead of merging allow jumping to neighbouring triangles in initial search 126 | 127 | while(!front.empty() && partition.tris.size() < max_tris && 128 | partition.verts.size() < max_verts) { 129 | Pair, int> best = {{3, inf}, 0}; 130 | for(int i : intRange(front)) { 131 | int idx = front[i]; 132 | auto &ntri = tri_infos[idx]; 133 | 134 | float3 ntri_nrm = 135 | Triangle3F(verts[ntri.verts[0]], verts[ntri.verts[1]], verts[ntri.verts[2]]) 136 | .normal(); 137 | float tri_dot = 1.5f - 0.5f * fabs(dot(start_tri_nrm, ntri_nrm)); 138 | float max_sa = enclose(bbox, ntri.bbox).surfaceArea(); 139 | 140 | int num_neighbours = 0; 141 | if(max_tris > 2) 142 | for(int nidx : tri_infos[idx].neighbours) 143 | if(nidx != -1 && tri_group_id[nidx] < 0) 144 | num_neighbours++; 145 | 146 | best = minFirst(best, {{num_neighbours, max_sa}, i}); 147 | } 148 | 149 | int new_idx = front[best.second]; 150 | front[best.second] = front.back(); 151 | front.pop_back(); 152 | 153 | auto &new_tri = tri_infos[new_idx]; 154 | for(int vidx : new_tri.verts) 155 | if(!selected_verts[vidx]) { 156 | selected_verts[vidx] = 1; 157 | partition.verts.emplace_back(vidx); 158 | } 159 | partition.tris.emplace_back(new_tri.verts); 160 | bbox = enclose(bbox, new_tri.bbox); 161 | tri_group_id[new_idx] = group_id; 162 | extend_front(new_idx); 163 | } 164 | partition.bbox = bbox; 165 | 166 | for(auto vidx : partition.verts) 167 | selected_verts[vidx] = 0; 168 | for(auto nidx : front) { 169 | DASSERT_EQ(tri_group_id[nidx], front_id); 170 | tri_group_id[nidx] = unassigned_id; 171 | } 172 | makeSorted(partition.verts); 173 | 174 | print("- partition %: tris:% verts:% bbox_size:%\n", group_id, partition.tris.size(), 175 | partition.verts.size(), partition.bbox.size()); 176 | // TODO: re-index vertices 177 | } 178 | 179 | // Jak łączyć ze sobą partycje? po odległości? tak, żeby minimalizować bboxa 180 | // Zaczynać od najmniejszych? 181 | 182 | float merge_limit = 4.0f; 183 | std::sort(out.begin(), out.end(), 184 | [](auto &a, auto &b) { return a.verts.size() < b.verts.size(); }); 185 | 186 | // To jest trochę bez sensu, najlepiej od razu dobrze podzielić 187 | bool still_merging = true; 188 | while(still_merging) { 189 | still_merging = false; 190 | 191 | Pair> best_pair = {inf, {-1, -1}}; 192 | for(int i : intRange(out)) { 193 | for(int j = i + 1; j < out.size(); j++) { 194 | if(out[i].verts.size() + out[j].verts.size() > max_verts) 195 | break; 196 | if(out[i].tris.size() + out[j].tris.size() > max_tris) 197 | continue; 198 | 199 | FBox bbox = enclose(out[i].bbox, out[j].bbox); 200 | float score = 201 | bbox.surfaceArea() / (out[i].bbox.surfaceArea() + out[j].bbox.surfaceArea()); 202 | best_pair = minFirst(best_pair, {score, {i, j}}); 203 | } 204 | } 205 | 206 | auto [idx1, idx2] = best_pair.second; 207 | if(idx1 != -1 && best_pair.first <= merge_limit) { 208 | print("Merged % into %; score: %\n", idx1, idx2, best_pair.first); 209 | auto &par1 = out[idx1], &par2 = out[idx2]; 210 | insertBack(par1.tris, par2.tris); 211 | insertBack(par1.verts, par2.verts); 212 | makeSortedUnique(par1.verts); 213 | out[idx2] = std::move(out.back()); 214 | out.pop_back(); 215 | std::sort(out.begin(), out.end(), 216 | [](auto &a, auto &b) { return a.verts.size() < b.verts.size(); }); 217 | still_merging = true; 218 | } 219 | } 220 | 221 | return out; 222 | } 223 | 224 | void meshPartitionStats(CSpan partitions, int max_tris, int max_verts) { 225 | // TODO: duplicated vertices count? 226 | double avg_tris = 0, avg_verts = 0, avg_sa = 0; 227 | int num_full_tris = 0, num_full_verts = 0; 228 | for(auto &partition : partitions) { 229 | avg_tris += partition.tris.size(); 230 | avg_verts += partition.verts.size(); 231 | avg_sa += partition.bbox.surfaceArea(); 232 | if(partition.tris.size() == max_tris) 233 | num_full_tris++; 234 | if(partition.verts.size() == max_verts) 235 | num_full_verts++; 236 | } 237 | avg_tris = avg_tris / partitions.size(); 238 | avg_verts = avg_verts / partitions.size(); 239 | avg_sa /= partitions.size(); 240 | printf("Partitions: %d\n avg_tris:%.2f (%.2f %%)\n avg_verts:%.2f (%.2f %%)\n", 241 | partitions.size(), avg_tris, avg_tris / max_tris * 100.0, avg_verts, 242 | avg_verts / max_verts * 100.0); 243 | printf(" avg_surface_area: %.2f\n", avg_sa); 244 | printf(" partitions with full tris: %.2f %%\n", 245 | double(num_full_tris) / partitions.size() * 100); 246 | printf(" partitions with full verts: %.2f %%\n", 247 | double(num_full_verts) / partitions.size() * 100); 248 | } 249 | 250 | void visualizeMeshPartitions(const Scene &scene, CSpan partitions) { 251 | vector> partition_tris; 252 | CSpan verts = scene.positions; 253 | for(auto &partition : partitions) { 254 | vector tris; 255 | tris.reserve(partition.tris.size()); 256 | for(auto tri : partition.tris) 257 | tris.emplace_back(verts[tri[0]], verts[tri[1]], verts[tri[2]]); 258 | partition_tris.emplace_back(std::move(tris)); 259 | } 260 | 261 | vector colors; 262 | for(auto color_id : all) 263 | if(!isOneOf(color_id, ColorId::black, ColorId::transparent)) { 264 | colors.emplace_back((IColor)lerp(FColor(color_id), FColor(ColorId::white), 0.0f)); 265 | colors.emplace_back((IColor)lerp(FColor(color_id), FColor(ColorId::white), 0.1f)); 266 | colors.emplace_back((IColor)lerp(FColor(color_id), FColor(ColorId::white), 0.2f)); 267 | } 268 | 269 | auto vis_func = [&](Canvas3D &canvas, double2 mouse_pos) -> string { 270 | FATAL("fixme"); 271 | /*for(int i : intRange(partitions)) { 272 | IColor color = colors[i % colors.size()]; 273 | canvas(partition_tris[i], partition_tris[i].size() == 1 ? ColorId::black : color); 274 | }*/ 275 | 276 | return ""; 277 | }; 278 | 279 | FATAL("fixme"); 280 | //Investigator3 investigator(vis_func, InvestigatorOpt::exit_with_space, 281 | // {DBox(scene.bounding_box), none, 0.1f}); 282 | //investigator.run(); 283 | } 284 | 285 | void meshletTest(const Scene &scene, float square_weight) { 286 | vector partitions; 287 | 288 | /*int max_tris = 64, max_verts = 64; 289 | for(int i : intRange(scene.meshes)) { 290 | auto tri_info = meshTriInfo(scene.positions, scene.meshes[i].tris); 291 | auto &mesh = scene.meshes[i]; 292 | auto current = meshPartition(scene.positions, mesh, tri_info, max_tris, max_verts); 293 | insertBack(partitions, current); 294 | }*/ 295 | 296 | int max_tris = 2, max_verts = 4; 297 | for(int i : intRange(scene.meshes)) { 298 | auto &mesh = scene.meshes[i]; 299 | auto tri_neighbours = triNeighbours(scene.meshes[i].tris); 300 | auto [quad_nodes, tri_quads] = quadNodes(scene.positions, mesh.tris, tri_neighbours); 301 | auto qmesh = genQuads(mesh.tris, tri_neighbours, quad_nodes, tri_quads, square_weight); 302 | insertBack(partitions, toPartitions(scene.positions, mesh, qmesh)); 303 | } 304 | 305 | meshPartitionStats(partitions, max_tris, max_verts); 306 | visualizeMeshPartitions(scene, partitions); 307 | } 308 | -------------------------------------------------------------------------------- /data/shaders/shared/shading.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #ifndef _SHADING_GLSL_ 5 | #define _SHADING_GLSL_ 6 | 7 | #include "funcs.glsl" 8 | #include "structures.glsl" 9 | 10 | #extension GL_KHR_shader_subgroup_vote : require 11 | #extension GL_KHR_shader_subgroup_shuffle : require 12 | 13 | coherent layout(std430, binding = 0) buffer lucid_info_ { 14 | LucidInfo g_info; 15 | int g_counts[]; 16 | }; 17 | layout(binding = 1) uniform lucid_config_ { LucidConfig u_config; }; 18 | 19 | layout(std430, set = 1, binding = 0) readonly restrict buffer buf0_ { uint g_bin_quads[]; }; 20 | layout(std430, set = 1, binding = 1) readonly restrict buffer buf1_ { uint g_bin_tris[]; }; 21 | layout(std430, set = 1, binding = 2) restrict buffer buf2_ { uint g_scratch_32[]; }; 22 | layout(std430, set = 1, binding = 3) restrict buffer buf3_ { uvec2 g_scratch_64[]; }; 23 | layout(std430, set = 1, binding = 4) readonly restrict buffer buf4_ { uint g_instance_colors[]; }; 24 | layout(std430, set = 1, binding = 5) readonly restrict buffer buf5_ { vec4 g_instance_uv_rects[]; }; 25 | layout(std430, set = 1, binding = 6) readonly restrict buffer buf6_ { uvec4 g_uvec4_storage[]; }; 26 | layout(std430, set = 1, binding = 7) readonly restrict buffer buf7_ { uint g_normals_storage[]; }; 27 | layout(set = 1, binding = 8, rgba8) uniform image2D g_raster_image; 28 | layout(set = 1, binding = 9) uniform sampler2D opaque_texture; 29 | layout(set = 1, binding = 10) uniform sampler2D transparent_texture; 30 | 31 | // TODO: separate opaque and transparent objects, draw opaque objects first to texture 32 | // then read it and use depth to optimize drawing 33 | 34 | // Basic rasterization statistics 35 | // Stat 0: num fragments 36 | // Stat 1: num half-block-tris 37 | // Stat 2: num invalid pixels 38 | shared uint s_stats[STATS_COUNT]; 39 | 40 | void updateStats(uint num_fragments, uint num_hblocks) { 41 | atomicAdd(s_stats[0], num_fragments); 42 | atomicAdd(s_stats[1], num_hblocks); 43 | } 44 | 45 | void initStats() { 46 | if(LIX < STATS_COUNT) 47 | s_stats[LIX] = 0; 48 | } 49 | 50 | void commitStats() { 51 | if(LIX < STATS_COUNT) 52 | atomicAdd(g_info.stats[LIX], s_stats[LIX]); 53 | } 54 | 55 | shared ivec2 s_bin_pos; 56 | 57 | void outputPixel(ivec2 pixel_pos, vec4 color) { 58 | //color = tintColor(color, vec3(0.2, 0.3, 0.4), 0.8); 59 | imageStore(g_raster_image, s_bin_pos + pixel_pos, color); 60 | } 61 | 62 | const float alpha_threshold = 1.0 / 128.0; 63 | 64 | void getTriangleParams(uint tri_idx, out vec3 depth_eq, out vec2 bary_params, out vec3 edge0, 65 | out vec3 edge1, out uint instance_id, out uint instance_flags) { 66 | uint bary_offset = STORAGE_TRI_BARY_OFFSET + tri_idx * 2; 67 | uvec4 val0 = g_uvec4_storage[STORAGE_TRI_DEPTH_OFFSET + tri_idx]; 68 | uvec4 val1 = g_uvec4_storage[bary_offset + 0]; 69 | uvec4 val2 = g_uvec4_storage[bary_offset + 1]; 70 | depth_eq = uintBitsToFloat(val0.xyz); 71 | bary_params = uintBitsToFloat(uvec2(val1.w, val2.w)); 72 | instance_flags = val0[3] & 0xffff; 73 | instance_id = val0[3] >> 16; 74 | edge0 = uintBitsToFloat(val1.xyz); 75 | edge1 = uintBitsToFloat(val2.xyz); 76 | } 77 | 78 | void getTriangleVertexColors(uint tri_idx, out vec4 color0, out vec4 color1, out vec4 color2) { 79 | uint quad_idx = tri_idx >> 1; 80 | uint second_tri = tri_idx & 1; 81 | uvec4 colors = g_uvec4_storage[STORAGE_QUAD_COLOR_OFFSET + quad_idx]; 82 | color0 = decodeRGBA8(colors[0]); 83 | color1 = decodeRGBA8(colors[1 + second_tri]); 84 | color2 = decodeRGBA8(colors[2 + second_tri]); 85 | } 86 | 87 | void getTriangleVertexNormals(uint tri_idx, out vec3 normal0, out vec3 normal1, out vec3 normal2) { 88 | uint quad_idx = tri_idx >> 1; 89 | uint second_tri = tri_idx & 1; 90 | uvec4 normals = g_uvec4_storage[STORAGE_QUAD_NORMAL_OFFSET + quad_idx]; 91 | normal0 = decodeNormalUint(normals[0]); 92 | normal1 = decodeNormalUint(normals[1 + second_tri]); 93 | normal2 = decodeNormalUint(normals[2 + second_tri]); 94 | } 95 | 96 | void getTriangleVertexTexCoords(uint tri_idx, out vec2 tex0, out vec2 tex1, out vec2 tex2) { 97 | uint quad_idx = tri_idx >> 1; 98 | uint second_tri = tri_idx & 1; 99 | uint tex_offset = STORAGE_QUAD_TEXTURE_OFFSET + quad_idx * 2; 100 | uvec4 tex_coords0 = g_uvec4_storage[tex_offset + 0]; 101 | uvec4 tex_coords1 = g_uvec4_storage[tex_offset + 1]; 102 | tex0 = uintBitsToFloat(tex_coords0.xy); 103 | tex1 = uintBitsToFloat(second_tri == 0 ? tex_coords0.zw : tex_coords1.xy); 104 | tex2 = uintBitsToFloat(second_tri == 0 ? tex_coords1.xy : tex_coords1.zw); 105 | } 106 | 107 | uint shadeSample(ivec2 pixel_pos, uint tri_idx, out float out_depth) { 108 | float px = float(pixel_pos.x), py = float(pixel_pos.y); 109 | 110 | vec3 depth_eq, edge0_eq, edge1_eq; 111 | uint instance_id, instance_flags; 112 | vec2 bary_params; 113 | getTriangleParams(tri_idx, depth_eq, bary_params, edge0_eq, edge1_eq, instance_id, 114 | instance_flags); 115 | 116 | float inv_ray_pos = depth_eq.x * px + (depth_eq.y * py + depth_eq.z); 117 | out_depth = inv_ray_pos; 118 | float ray_pos = 1.0 / inv_ray_pos; 119 | 120 | float e0 = edge0_eq.x * px + (edge0_eq.y * py + edge0_eq.z); 121 | float e1 = edge1_eq.x * px + (edge1_eq.y * py + edge1_eq.z); 122 | vec2 bary = vec2(e0, e1) * ray_pos; 123 | 124 | vec2 bary_dx, bary_dy; 125 | if((instance_flags & INST_HAS_ALBEDO_TEXTURE) != 0) { 126 | float ray_posx = 1.0 / (inv_ray_pos + depth_eq.x); 127 | float ray_posy = 1.0 / (inv_ray_pos + depth_eq.y); 128 | 129 | bary_dx = vec2(e0 + edge0_eq.x, e1 + edge1_eq.x) * ray_posx - bary; 130 | bary_dy = vec2(e0 + edge0_eq.y, e1 + edge1_eq.y) * ray_posy - bary; 131 | } 132 | // TODO: compute bary only if we use vertex attributes? That would be all scenes... 133 | bary -= bary_params; 134 | 135 | vec4 color = (instance_flags & INST_HAS_COLOR) != 0 ? 136 | decodeRGBA8(g_instance_colors[instance_id]) : 137 | vec4(1.0); 138 | 139 | if((instance_flags & INST_HAS_ALBEDO_TEXTURE) != 0) { 140 | vec2 tex0, tex1, tex2; 141 | getTriangleVertexTexCoords(tri_idx, tex0, tex1, tex2); 142 | 143 | vec2 tex_coord = bary[0] * tex1 + (bary[1] * tex2 + tex0); 144 | vec2 tex_dx = bary_dx[0] * tex1 + bary_dx[1] * tex2; 145 | vec2 tex_dy = bary_dy[0] * tex1 + bary_dy[1] * tex2; 146 | 147 | if((instance_flags & INST_HAS_UV_RECT) != 0) { 148 | vec4 uv_rect = g_instance_uv_rects[instance_id]; 149 | tex_coord = uv_rect.zw * fract(tex_coord) + uv_rect.xy; 150 | tex_dx *= uv_rect.zw, tex_dy *= uv_rect.zw; 151 | } 152 | 153 | vec4 tex_col; 154 | if((instance_flags & INST_TEX_OPAQUE) != 0) 155 | tex_col = vec4(textureGrad(opaque_texture, tex_coord, tex_dx, tex_dy).xyz, 1.0); 156 | else 157 | tex_col = textureGrad(transparent_texture, tex_coord, tex_dx, tex_dy); 158 | color *= tex_col; 159 | } 160 | 161 | if((instance_flags & INST_HAS_VERTEX_COLORS) != 0) { 162 | vec4 col0, col1, col2; 163 | getTriangleVertexColors(tri_idx, col0, col1, col2); 164 | color *= (1.0 - bary[0] - bary[1]) * col0 + (bary[0] * col1 + bary[1] * col2); 165 | } 166 | 167 | if(color.a == 0.0) 168 | return 0; 169 | 170 | vec3 normal; 171 | if((instance_flags & INST_HAS_VERTEX_NORMALS) != 0) { 172 | vec3 nrm0, nrm1, nrm2; 173 | getTriangleVertexNormals(tri_idx, nrm0, nrm1, nrm2); 174 | nrm1 -= nrm0; 175 | nrm2 -= nrm0; 176 | normal = bary[0] * nrm1 + (bary[1] * nrm2 + nrm0); 177 | } else { 178 | normal = decodeNormalUint(g_normals_storage[tri_idx]); 179 | } 180 | 181 | float light_value = max(0.0, dot(-u_config.lighting.sun_dir.xyz, normal) * 0.7 + 0.3); 182 | color.rgb = SATURATE(finalShading(u_config.lighting, color.rgb, light_value)); 183 | return encodeRGBA8(color); 184 | } 185 | 186 | #define RC_COLOR_SIZE 3 187 | #ifdef VISUALIZE_ERRORS 188 | #define RC_DEPTH_SIZE (RC_COLOR_SIZE + 1) 189 | #else 190 | #define RC_DEPTH_SIZE RC_COLOR_SIZE 191 | #endif 192 | 193 | struct ReductionContext { 194 | // FFS: for some reason vectors produce faster code than arrays on integrated AMDs 195 | #if RC_DEPTH_SIZE == 3 196 | vec3 prev_depths; 197 | #elif RC_DEPTH_SIZE == 4 198 | vec4 prev_depths; 199 | #else 200 | float prev_depths[RC_DEPTH_SIZE]; 201 | #endif 202 | 203 | #if RC_COLOR_SIZE == 3 204 | uvec3 prev_colors; 205 | #elif RC_COLOR_SIZE == 4 206 | uvec4 prev_colors; 207 | #else 208 | uint prev_colors[RC_COLOR_SIZE]; 209 | #endif 210 | 211 | float out_trans; 212 | vec3 out_color; 213 | }; 214 | 215 | void swap(inout ReductionContext ctx, int idx0, int idx1) { 216 | swap(ctx.prev_colors[idx0], ctx.prev_colors[idx1]); 217 | swap(ctx.prev_depths[idx0], ctx.prev_depths[idx1]); 218 | } 219 | 220 | void initReduceSamples(out ReductionContext ctx) { 221 | for(int i = 0; i < RC_DEPTH_SIZE; i++) 222 | ctx.prev_depths[i] = 999999999.0; 223 | for(int i = 0; i < RC_COLOR_SIZE; i++) 224 | ctx.prev_colors[i] = 0; 225 | ctx.out_color = vec3(0.0); 226 | ctx.out_trans = 1.0; 227 | } 228 | 229 | bool reduceSample(inout ReductionContext ctx, inout vec3 out_color, uvec2 sample_s, 230 | uint pixel_bitmask) { 231 | int num_samples = bitCount(pixel_bitmask); // TODO: stall (2.75%, conference) 232 | 233 | while(subgroupAny(num_samples > 0)) { 234 | int bit = int(findLSB(pixel_bitmask)); 235 | pixel_bitmask &= ~(1u << bit); 236 | #if SUBGROUP_SIZE == HALFGROUP_SIZE 237 | uvec2 value = subgroupShuffle(sample_s, bit); 238 | #else 239 | uvec2 value = subgroupShuffle(sample_s, (LIX & 32) + bit); 240 | #endif 241 | uint color = value.x; 242 | float depth = uintBitsToFloat(value.y); 243 | 244 | if(num_samples <= 0) 245 | continue; 246 | num_samples--; 247 | 248 | if(depth > ctx.prev_depths[0]) { 249 | swap(color, ctx.prev_colors[0]); 250 | swap(depth, ctx.prev_depths[0]); 251 | if(ctx.prev_depths[0] > ctx.prev_depths[1]) { 252 | swap(ctx, 0, 1); 253 | if(ctx.prev_depths[1] > ctx.prev_depths[2]) { 254 | swap(ctx, 1, 2); 255 | int i = 3; 256 | for(; i < RC_DEPTH_SIZE && ctx.prev_depths[i - 1] > ctx.prev_depths[i]; i++) 257 | swap(ctx, i - 1, i); 258 | #ifdef VISUALIZE_ERRORS 259 | if(i == RC_DEPTH_SIZE) { 260 | atomicAdd(s_stats[2], 1); 261 | out_color = vec3(1.0, 0.0, 0.0); 262 | ctx.out_trans = 0.0; 263 | continue; 264 | } 265 | #endif 266 | } 267 | } 268 | } 269 | 270 | for(int i = RC_DEPTH_SIZE - 1; i > 0; i--) 271 | ctx.prev_depths[i] = ctx.prev_depths[i - 1]; 272 | ctx.prev_depths[0] = depth; 273 | 274 | if(ctx.prev_colors[RC_COLOR_SIZE - 1] != 0) { 275 | vec4 cur_color = decodeRGBA8(ctx.prev_colors[RC_COLOR_SIZE - 1]); 276 | #ifdef ADDITIVE_BLENDING 277 | out_color += cur_color.rgb * cur_color.a; 278 | #else 279 | out_color += cur_color.rgb * cur_color.a * ctx.out_trans; 280 | ctx.out_trans *= 1.0 - cur_color.a; 281 | 282 | #ifdef ALPHA_THRESHOLD 283 | if(subgroupAll(ctx.out_trans < alpha_threshold)) 284 | num_samples = 0; 285 | #endif 286 | #endif 287 | } 288 | 289 | for(int i = RC_COLOR_SIZE - 1; i > 0; i--) 290 | ctx.prev_colors[i] = ctx.prev_colors[i - 1]; 291 | ctx.prev_colors[0] = color; 292 | } 293 | 294 | return false; 295 | } 296 | 297 | vec4 finishReduceSamples(ReductionContext ctx) { 298 | vec3 out_color = ctx.out_color; 299 | 300 | for(int i = RC_COLOR_SIZE - 1; i >= 0; i--) 301 | if(ctx.prev_colors[i] != 0) { 302 | vec4 cur_color = decodeRGBA8(ctx.prev_colors[i]); 303 | float cur_transparency = 1.0 - cur_color.a; 304 | #ifdef ADDITIVE_BLENDING 305 | out_color += cur_color.rgb * cur_color.a; 306 | #else 307 | out_color += cur_color.rgb * cur_color.a * ctx.out_trans; 308 | ctx.out_trans *= 1.0 - cur_color.a; 309 | #endif 310 | } 311 | 312 | out_color += ctx.out_trans * u_config.background_color.xyz; 313 | return vec4(SATURATE(out_color), 1.0); 314 | } 315 | 316 | #endif 317 | -------------------------------------------------------------------------------- /data/shaders/raster_low.glsl: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | #version 460 5 | 6 | #define LSIZE 256 7 | #define LSHIFT 8 8 | 9 | #define BIN_LEVEL BIN_LEVEL_LOW 10 | 11 | #include "shared/raster.glsl" 12 | 13 | #include "%shader_debug" 14 | DEBUG_SETUP(1, 11) 15 | 16 | #define MAX_BLOCK_ROW_TRIS 1024 17 | #define MAX_BLOCK_TRIS 256 18 | #define MAX_BLOCK_TRIS_SHIFT 8 19 | 20 | layout(local_size_x = LSIZE) in; 21 | 22 | #define WORKGROUP_SCRATCH_SIZE (16 * 1024) 23 | #define WORKGROUP_SCRATCH_SHIFT 14 24 | 25 | // More space needed only for 64x64 bins 26 | uint scratchBlockRowOffset(uint by) { 27 | return (gl_WorkGroupID.x << WORKGROUP_SCRATCH_SHIFT) + by * (MAX_BLOCK_ROW_TRIS * 2); 28 | } 29 | 30 | uint scratchHalfBlockOffset(uint hbid) { 31 | return (gl_WorkGroupID.x << WORKGROUP_SCRATCH_SHIFT) + 8 * 1024 + hbid * MAX_BLOCK_TRIS; 32 | } 33 | 34 | shared uint s_block_row_tri_count[BLOCK_ROWS]; 35 | shared uint s_block_tri_count[NUM_BLOCKS]; 36 | shared uint s_hblock_counts[NUM_HBLOCKS]; 37 | shared int s_promoted_bin_count; 38 | 39 | void generateRowTris(uint tri_idx) { 40 | uint dst_offset = scratchBlockRowOffset(0); 41 | 42 | uint scan_offset = STORAGE_TRI_SCAN_OFFSET + tri_idx * 2; 43 | uvec4 val0 = g_uvec4_storage[scan_offset + 0]; 44 | uvec4 val1 = g_uvec4_storage[scan_offset + 1]; 45 | int min_by = clamp(int(val0.w & 0xffff) - s_bin_pos.y, 0, BIN_MASK) >> BLOCK_SHIFT; 46 | int max_by = clamp(int(val0.w >> 16) - s_bin_pos.y, 0, BIN_MASK) >> BLOCK_SHIFT; 47 | 48 | vec2 start = vec2(s_bin_pos.x, s_bin_pos.y + min_by * BLOCK_SIZE); 49 | ScanlineParams scan = loadScanlineParamsRow(val0, val1, start); 50 | 51 | for(int by = min_by; by <= max_by; by++) { 52 | uvec3 bits0 = rasterBinStep(scan); 53 | uvec3 bits1 = rasterBinStep(scan); 54 | uint bx_mask = bits0.z | bits1.z; 55 | if(bx_mask == 0) 56 | continue; 57 | 58 | uint row_idx = atomicAdd(s_block_row_tri_count[by], 1); 59 | uint roffset = row_idx + by * (MAX_BLOCK_ROW_TRIS * 2); 60 | g_scratch_64[dst_offset + roffset] = uvec2(bits0.x | (bx_mask << 24), bits1.x); 61 | g_scratch_64[dst_offset + roffset + MAX_BLOCK_ROW_TRIS] = 62 | uvec2(bits0.y | ((tri_idx & 0xfff) << 20), bits1.y | ((tri_idx & 0xfff000) << 8)); 63 | } 64 | } 65 | 66 | void processQuads() { 67 | for(uint i = LIX >> 1; i < s_bin_quad_count; i += LSIZE / 2) { 68 | uint second_tri = LIX & 1; 69 | uint bin_quad_idx = g_bin_quads[s_bin_quad_offset + i]; 70 | uint quad_idx = bin_quad_idx & 0xfffffff; 71 | uint cull_flag = (bin_quad_idx >> (30 + second_tri)) & 1; 72 | if(cull_flag == 1) 73 | continue; 74 | generateRowTris(quad_idx * 2 + second_tri); 75 | } 76 | 77 | for(uint i = (LSIZE - 1) - LIX; i < s_bin_tri_count; i += LSIZE) 78 | generateRowTris(g_bin_tris[s_bin_tri_offset + i]); 79 | } 80 | 81 | void generateBlocks(uint bid) { 82 | int lbid = int(LIX >> HALFGROUP_SHIFT); 83 | uint by = bid >> BLOCK_ROWS_SHIFT, bx = bid & BLOCK_ROWS_MASK; 84 | 85 | uint rows_offset = scratchBlockRowOffset(by); 86 | uint tri_count = s_block_row_tri_count[by]; 87 | uint buf_offset = lbid << MAX_BLOCK_TRIS_SHIFT; 88 | const uint mini_offset = BASE_BUFFER_SIZE; 89 | 90 | { 91 | uint bx_bits_mask = 1u << (24 + bx), tri_offset = 0; 92 | for(uint i = LIX & HALFGROUP_MASK; i < tri_count; i += HALFGROUP_SIZE) { 93 | uint bx_bits = g_scratch_64[rows_offset + i].x; 94 | if((bx_bits & bx_bits_mask) != 0) { 95 | tri_offset = atomicAdd(s_block_tri_count[bid], 1); 96 | if(tri_offset < MAX_BLOCK_TRIS) 97 | s_buffer[buf_offset + tri_offset] = i; 98 | } 99 | } 100 | subgroupMemoryBarrierShared(); 101 | if(subgroupAny(tri_offset >= MAX_BLOCK_TRIS)) { 102 | if(gl_SubgroupInvocationID == 0) 103 | s_raster_error = ~0; 104 | return; 105 | } 106 | } 107 | 108 | tri_count = s_block_tri_count[bid]; 109 | int startx = int(bx << BLOCK_SHIFT); 110 | vec2 block_pos = vec2(s_bin_pos + ivec2(bx << BLOCK_SHIFT, by << BLOCK_SHIFT)); 111 | 112 | uint frag_count = 0; 113 | for(uint i = LIX & HALFGROUP_MASK; i < tri_count; i += HALFGROUP_SIZE) { 114 | uint row_tri_idx = s_buffer[buf_offset + i]; 115 | 116 | uvec2 tri_mins = g_scratch_64[rows_offset + row_tri_idx]; 117 | uvec2 tri_maxs = g_scratch_64[rows_offset + row_tri_idx + MAX_BLOCK_ROW_TRIS]; 118 | uint tri_idx = (tri_maxs.x >> 20) | ((tri_maxs.y & 0xfff00000) >> 8); 119 | 120 | uvec2 num_frags; 121 | vec2 cpos = rasterHalfBlockCentroid(tri_mins.x, tri_maxs.x, startx, num_frags.x) + 122 | rasterHalfBlockCentroid(tri_mins.y, tri_maxs.y, startx, num_frags.y); 123 | 124 | uint num_block_frags = num_frags.x + num_frags.y; 125 | if(num_block_frags == 0) // This means that bx_mask is invalid 126 | DEBUG_RECORD(0, 0, 0, 0); 127 | 128 | // 22-bit depth 129 | uint depth = 130 | rasterBlockDepth(cpos * (0.5 / float(num_block_frags)) + block_pos, tri_idx, 0x3ffffe); 131 | frag_count += num_frags.x | (num_frags.y << 16); 132 | s_buffer[buf_offset + i] = row_tri_idx | (depth << 10); 133 | } 134 | subgroupMemoryBarrier(); 135 | 136 | frag_count = subgroupInclusiveAddFast32(frag_count); 137 | if((LIX & HALFGROUP_MASK) == HALFGROUP_MASK) { 138 | uint hbid = halfBlockId(lbid + (bid & ~(NUM_HALFGROUPS - 1))); 139 | uint v0 = frag_count & 0xffff, v1 = frag_count >> 16; 140 | s_hblock_counts[hbid] = (v0 << 16) | tri_count; 141 | s_hblock_counts[hbid + HBLOCK_COLS] = (v1 << 16) | tri_count; 142 | } 143 | 144 | if(tri_count > RC_COLOR_SIZE) { 145 | // rcount: count rounded up to the next power of 2; minimum: HALFGROUP_SIZE 146 | uint rcount = 147 | max(HALFGROUP_SIZE, 148 | (tri_count & (tri_count - 1)) == 0 ? tri_count : (2 << findMSB(tri_count))); 149 | sortBuffer(tri_count, rcount, buf_offset, HALFGROUP_SIZE, LIX & HALFGROUP_MASK, false); 150 | } 151 | subgroupMemoryBarrierShared(); 152 | 153 | // TODO: move to sortBuffer() 154 | #ifdef DEBUG_ENABLED 155 | // Making sure that tris are properly ordered 156 | if(tri_count > RC_COLOR_SIZE) 157 | for(uint i = LIX & HALFGROUP_MASK; i < tri_count; i += HALFGROUP_SIZE) { 158 | uint value = s_buffer[buf_offset + i]; 159 | uint prev_value = i == 0 ? 0 : s_buffer[buf_offset + i - 1]; 160 | if(value <= prev_value) 161 | DEBUG_RECORD(i, tri_count, prev_value, value); 162 | } 163 | #endif 164 | 165 | uint hbid0 = halfBlockId(bid), hbid1 = hbid0 + HBLOCK_COLS; 166 | uint dst_offset0 = scratchHalfBlockOffset(hbid0); 167 | uint dst_offset1 = scratchHalfBlockOffset(hbid1); 168 | 169 | uint base_offset = 0; 170 | for(uint i = LIX & HALFGROUP_MASK; i < tri_count; i += HALFGROUP_SIZE) { 171 | uint row_tri_idx = s_buffer[buf_offset + i] & 0x3ff; 172 | uvec2 tri_mins = g_scratch_64[rows_offset + row_tri_idx]; 173 | uvec2 tri_maxs = g_scratch_64[rows_offset + row_tri_idx + MAX_BLOCK_ROW_TRIS]; 174 | uint tri_idx_shifted = ((tri_maxs.x >> 12) & 0xfff00) | (tri_maxs.y & 0xfff00000); 175 | uvec2 num_frags_half; 176 | uvec2 bits = uvec2(rasterHalfBlockBits(tri_mins.x, tri_maxs.x, startx, num_frags_half.x), 177 | rasterHalfBlockBits(tri_mins.y, tri_maxs.y, startx, num_frags_half.y)); 178 | uint num_frags = num_frags_half.x | (num_frags_half.y << 16); 179 | 180 | uint num_frags_accum = subgroupInclusiveAddFast32(num_frags); 181 | uint cur_offset = base_offset + num_frags_accum - num_frags; 182 | 183 | #if WARP_SIZE == HALFGROUP_SIZE 184 | base_offset += subgroupBroadcast(num_frags_accum, HALFGROUP_MASK); 185 | #else 186 | base_offset += subgroupShuffle(num_frags_accum, (LIX & 32) + HALFGROUP_MASK); 187 | #endif 188 | 189 | uint seg_offset0 = cur_offset & 0xff, seg_offset1 = (cur_offset & 0xff0000) >> 16; 190 | uint seg_high0 = (cur_offset & 0xf00) << 20, seg_high1 = (cur_offset & 0x0f000000) << 4; 191 | g_scratch_64[dst_offset0 + i] = uvec2(tri_idx_shifted | seg_offset0, bits.x | seg_high0); 192 | g_scratch_64[dst_offset1 + i] = uvec2(tri_idx_shifted | seg_offset1, bits.y | seg_high1); 193 | } 194 | } 195 | 196 | void visualizeBlockCounts(uint hbid, ivec2 pixel_pos) { 197 | uint frag_count = s_hblock_counts[hbid] >> 16; 198 | uint tri_count = s_hblock_counts[hbid] & 0xffff; 199 | //tri_count = s_block_tri_count[fullBlockId(hbid)]; 200 | //tri_count = s_block_row_tri_count[pixel_pos.y >> BLOCK_SHIFT]; 201 | //tri_count = s_bin_quad_count * 2 + s_bin_tri_count; 202 | 203 | vec3 color; 204 | color = gradientColor(frag_count, uvec4(8, 32, 128, 1024) * HALFGROUP_SIZE); 205 | //color = gradientColor(tri_count, uvec4(16, 64, 256, 1024)); 206 | 207 | outputPixel(pixel_pos, vec4(SATURATE(color), 1.0)); 208 | } 209 | 210 | void rasterBin() { 211 | START_TIMER(); 212 | 213 | if(LIX < NUM_BLOCKS) { 214 | s_block_tri_count[LIX] = 0; 215 | if(LIX < BLOCK_ROWS) 216 | s_block_row_tri_count[LIX] = 0; 217 | } 218 | barrier(); 219 | processQuads(); 220 | groupMemoryBarrier(); 221 | barrier(); // TODO: stall (7%, conference) 222 | UPDATE_TIMER(0); 223 | 224 | const int num_blocks = (BIN_SIZE / BLOCK_SIZE) * (BIN_SIZE / BLOCK_SIZE); 225 | for(uint bid = LIX >> HALFGROUP_SHIFT; bid < num_blocks; bid += NUM_HALFGROUPS) 226 | generateBlocks(bid); 227 | 228 | barrier(); 229 | // raster_low errors are not visualized, but propagated to high 230 | if(s_raster_error != 0) { 231 | if(LIX == 0) { 232 | int id = atomicAdd(g_info.bin_level_counts[BIN_LEVEL_HIGH], 1); 233 | HIGH_LEVEL_BINS(id) = s_bin_id; 234 | s_promoted_bin_count = max(s_promoted_bin_count, id + 1); 235 | } 236 | return; 237 | } 238 | groupMemoryBarrier(); 239 | UPDATE_TIMER(1); 240 | 241 | for(uint hbid = LIX >> HALFGROUP_SHIFT; hbid < NUM_HBLOCKS; hbid += NUM_HALFGROUPS) { 242 | ReductionContext context; 243 | initReduceSamples(context); 244 | //initVisualizeSamples(); 245 | 246 | uint temp_counts = s_hblock_counts[hbid]; 247 | int frag_count = int(temp_counts >> 16); 248 | uint control_var = initUnpackSamples(temp_counts & 0xffff, temp_counts >> 16); 249 | while(frag_count > 0) { 250 | uint src_offset = scratchHalfBlockOffset(hbid); 251 | unpackSamples(control_var, src_offset); 252 | UPDATE_TIMER(2); 253 | 254 | shadeAndReduceSamples(hbid, min(frag_count, SEGMENT_SIZE), context); 255 | //visualizeSamples(min(frag_count, SEGMENT_SIZE)); 256 | UPDATE_TIMER(3); 257 | 258 | #ifdef ALPHA_THRESHOLD 259 | if(subgroupAll(context.out_trans < alpha_threshold)) 260 | break; 261 | #endif 262 | frag_count -= SEGMENT_SIZE; 263 | } 264 | 265 | ivec2 pixel_pos = halfBlockPixelPos(hbid); 266 | outputPixel(pixel_pos, finishReduceSamples(context)); 267 | //finishVisualizeSamples(pixel_pos); 268 | //visualizeBlockCounts(hbid, pixel_pos); 269 | UPDATE_TIMER(4); 270 | } 271 | 272 | if(LIX >= LSIZE - NUM_HBLOCKS) { 273 | uint counts = s_hblock_counts[(LSIZE - 1) - LIX]; 274 | updateStats(counts >> 16, counts & 0xffff); 275 | } 276 | 277 | // TODO: we should be able to start processing next bin before all subgroups have finished 278 | // but we would have to divide work in processQuads differently; 279 | // We could load bins in double-buffered fashion and once one bin is completely finished, we could load next one 280 | barrier(); // TODO: stall (10.5%, conference) 281 | } 282 | 283 | // TODO: consider removing persistent threads and using acquire/unacquire for storage 284 | void main() { 285 | INIT_TIMERS(); 286 | initBinLoader(BIN_LEVEL_LOW); 287 | if(LIX == 0) 288 | s_promoted_bin_count = 0; 289 | initStats(); 290 | 291 | while(loadNextBin(BIN_LEVEL_LOW)) 292 | rasterBin(); 293 | 294 | // If some of the bins are promoted to the next level, we have to adjust number of dispatches 295 | if(LIX == 0 && s_promoted_bin_count > 0) { 296 | uint num_dispatches = min(s_promoted_bin_count, MAX_DISPATCHES / 2); 297 | atomicMax(g_info.bin_level_dispatches[BIN_LEVEL_HIGH][0], num_dispatches); 298 | } 299 | 300 | COMMIT_TIMERS(g_info.raster_timers); 301 | commitStats(); 302 | } 303 | -------------------------------------------------------------------------------- /src/tri_optimizer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | // Original code source: 5 | // https://github.com/GarageGames/Torque3D/blob/master/Engine/source/gfx/util/triListOpt.cpp 6 | // 7 | // Copyright (c) 2012 GarageGames, LLC 8 | // Licensed under MIT 9 | 10 | #include "lucid_base.h" 11 | 12 | #include 13 | #include 14 | 15 | static constexpr int max_vertex_cache_size = 32; 16 | 17 | struct VertData { 18 | int cache_pos = -1; 19 | float score = 0.0f; 20 | uint num_refs = 0; 21 | uint num_unadded_refs = 0; 22 | int *tri_index = nullptr; 23 | }; 24 | 25 | struct TriData { 26 | ListNode node; 27 | float score = 0.0f; 28 | int vert_idx[3] = {0, 0, 0}; 29 | bool is_in_list = false; 30 | }; 31 | 32 | // Source: http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html 33 | static float findVertexScore(const VertData &vertexData) { 34 | const float cache_decay_power = 1.5f; 35 | const float last_tri_score = 0.75f; 36 | const float valence_boost_scale = 2.0f; 37 | const float valence_boost_power = 0.5f; 38 | 39 | // If nobody needs this vertex, return -1.0 40 | if(vertexData.num_unadded_refs < 1) 41 | return -1.0f; 42 | float score = 0.0f; 43 | 44 | if(vertexData.cache_pos < 0) { 45 | // Vertex is not in FIFO cache - no score. 46 | } else { 47 | if(vertexData.cache_pos < 3) { 48 | // This vertex was used in the last triangle, 49 | // so it has a fixed score, whichever of the three 50 | // it's in. Otherwise, you can get very different 51 | // answers depending on whether you add 52 | // the triangle 1,2,3 or 3,1,2 - which is silly. 53 | score = last_tri_score; 54 | } else { 55 | DASSERT((vertexData.cache_pos < max_vertex_cache_size) && 56 | "Out of range cache position for vertex"); 57 | 58 | // Points for being high in the cache. 59 | const float Scaler = 1.0f / (max_vertex_cache_size - 3); 60 | score = 1.0f - (vertexData.cache_pos - 3) * Scaler; 61 | score = pow(score, cache_decay_power); 62 | } 63 | } 64 | 65 | // Bonus points for having a low number of tris still to 66 | // use the vert, so we get rid of lone verts quickly. 67 | float ValenceBoost = pow(vertexData.num_unadded_refs, -valence_boost_power); 68 | score += valence_boost_scale * ValenceBoost; 69 | 70 | return score; 71 | } 72 | 73 | class LRUCacheModel { 74 | public: 75 | LRUCacheModel(Span vertices) : m_vertices(vertices) { 76 | m_entries.reserve(vertices.size()); 77 | } 78 | 79 | void enforceSize(int max_size, Vector &outTrisToUpdate); 80 | void useVertex(int vidx); 81 | int getCachePosition(int vidx); 82 | 83 | private: 84 | static constexpr int null_entry = -1; 85 | 86 | struct Entry { 87 | int next = null_entry; 88 | int vidx = 0; 89 | }; 90 | 91 | Span m_vertices; 92 | vector m_entries; 93 | vector m_empty_entries; 94 | int m_head = null_entry; 95 | }; 96 | 97 | void LRUCacheModel::useVertex(int vidx) { 98 | int search = m_head, last = null_entry; 99 | while(search != null_entry) { 100 | if(m_entries[search].vidx == vidx) 101 | break; 102 | last = search; 103 | search = m_entries[search].next; 104 | } 105 | 106 | // If this vertex wasn't found in the cache, create a new entry 107 | if(search == null_entry) { 108 | if(m_empty_entries) { 109 | search = m_empty_entries.back(); 110 | m_empty_entries.pop_back(); 111 | m_entries[search] = {null_entry, vidx}; 112 | } else { 113 | search = m_entries.size(); 114 | m_entries.emplace_back(null_entry, vidx); 115 | } 116 | } 117 | 118 | if(search != m_head) { 119 | // Unlink the entry from the linked list 120 | if(last != null_entry) 121 | m_entries[last].next = m_entries[search].next; 122 | // Vertex that got passed in is now at the head of the cache 123 | m_entries[search].next = m_head; 124 | m_head = search; 125 | } 126 | } 127 | 128 | void LRUCacheModel::enforceSize(int max_size, Vector &out_tris_to_update) { 129 | // Clear list of triangles to update scores for 130 | out_tris_to_update.clear(); 131 | 132 | int length = 0; 133 | int next = m_head, last = null_entry; 134 | 135 | // Run through list, up to the max size 136 | while(next != null_entry && length < max_vertex_cache_size) { 137 | VertData &vert_data = m_vertices[m_entries[next].vidx]; 138 | 139 | // Update cache position on verts still in cache 140 | vert_data.cache_pos = length++; 141 | 142 | for(int i = 0; i < vert_data.num_refs; i++) { 143 | const int &tri_idx = vert_data.tri_index[i]; 144 | if(tri_idx > -1) { 145 | int j = 0; 146 | for(; j < out_tris_to_update.size(); j++) 147 | if(out_tris_to_update[j] == tri_idx) 148 | break; 149 | if(j == out_tris_to_update.size()) 150 | out_tris_to_update.push_back(tri_idx); 151 | } 152 | } 153 | 154 | vert_data.score = findVertexScore(vert_data); 155 | last = next; 156 | next = m_entries[next].next; 157 | } 158 | 159 | // nullptr out the pointer to the next entry on the last valid entry 160 | m_entries[last].next = null_entry; 161 | // If next != nullptr, than we need to prune entries from the tail of the cache 162 | while(next != null_entry) { 163 | // Update cache position on verts which are going to get tossed from cache 164 | m_vertices[m_entries[next].vidx].cache_pos = -1; 165 | m_empty_entries.emplace_back(next); 166 | next = m_entries[next].next; 167 | } 168 | } 169 | 170 | int LRUCacheModel::getCachePosition(const int vidx) { 171 | int length = 0; 172 | int next = m_head; 173 | while(next != null_entry) { 174 | if(m_entries[next].vidx == vidx) 175 | return length; 176 | next = m_entries[next].next; 177 | length++; 178 | } 179 | 180 | return -1; 181 | } 182 | 183 | /// This method will look at the index buffer for a triangle list, and generate 184 | /// a new index buffer which is optimized using Tom Forsyth's paper: 185 | /// "Linear-Speed Vertex Cache Optimization" 186 | /// http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html 187 | /// @note Both 'indices' and 'outIndices' can point to the same memory. 188 | /// TODO: this is still quite slow 189 | void optimizeTriangleOrdering(const int num_verts, CSpan indices, Span out_indices) { 190 | if(num_verts == 0 || indices.size() == 0) 191 | return; 192 | 193 | DASSERT(indices.size() % 3 == 0); 194 | DASSERT(out_indices.size() == indices.size()); 195 | int num_primitives = indices.size() / 3; 196 | 197 | // Step 1: initialization 198 | vector vertex_data(num_verts); 199 | vector tri_data(num_primitives); 200 | 201 | uint cur_idx = 0; 202 | uint num_refs = 0; 203 | for(int tri = 0; tri < num_primitives; tri++) { 204 | TriData &cur_tri = tri_data[tri]; 205 | 206 | for(int c = 0; c < 3; c++) { 207 | int cur_vidx = indices[cur_idx]; 208 | cur_tri.vert_idx[c] = cur_vidx; 209 | vertex_data[cur_vidx].num_unadded_refs++; 210 | num_refs++; 211 | cur_idx++; 212 | } 213 | } 214 | 215 | PodVector refs(num_refs); 216 | num_refs = 0; 217 | for(int v = 0; v < num_verts; v++) { 218 | VertData &cur_vert = vertex_data[v]; 219 | cur_vert.tri_index = &refs[num_refs]; 220 | num_refs += cur_vert.num_unadded_refs; 221 | cur_vert.score = findVertexScore(cur_vert); 222 | } 223 | 224 | int next_next_best_tri_idx = -1, next_best_tri_idx = -1; 225 | float next_next_best_tri_score = -1.0f, next_best_tri_score = -1.0f; 226 | 227 | auto validate_tri_idx = [&](int idx) { 228 | if(idx > -1) { 229 | DASSERT(idx < num_primitives && "Out of range triangle index."); 230 | DASSERT(!tri_data[idx].is_in_list && "Triangle already in list, bad."); 231 | } 232 | }; 233 | 234 | auto check_next_next_best = [&](float score, int idx) { 235 | if(score > next_next_best_tri_score) { 236 | next_next_best_tri_idx = idx; 237 | next_next_best_tri_score = score; 238 | } 239 | }; 240 | 241 | auto check_next_best = [&](float score, int idx) { 242 | if(score > next_best_tri_score) { 243 | check_next_next_best(next_best_tri_score, next_best_tri_idx); 244 | next_best_tri_idx = idx; 245 | next_best_tri_score = score; 246 | } 247 | validate_tri_idx(next_best_tri_idx); 248 | }; 249 | 250 | // TODO: use heap? 251 | std::map> ordered_tris; 252 | 253 | auto remove_ordered_tri = [&](int tri_idx) { 254 | auto &tri = tri_data[tri_idx]; 255 | auto it = ordered_tris.find(tri.score); 256 | listRemove([&](int i) -> ListNode & { return tri_data[i].node; }, it->second, tri_idx); 257 | if(it->second.empty()) 258 | ordered_tris.erase(it); 259 | }; 260 | 261 | auto get_next_best_tris = [&]() { 262 | int count = 0; 263 | auto it = ordered_tris.begin(); 264 | while(count < 2 && it != ordered_tris.end()) { 265 | auto idx = it->second.head; 266 | while(idx != -1 && count < 2) { 267 | check_next_best(it->first, idx); 268 | check_next_next_best(it->first, idx); 269 | idx = tri_data[idx].node.next; 270 | count++; 271 | } 272 | it++; 273 | } 274 | }; 275 | 276 | auto add_ordered_tri = [&](int tri_idx) { 277 | auto &tri = tri_data[tri_idx]; 278 | auto &list = ordered_tris[tri.score]; 279 | listInsert([&](int i) -> ListNode & { return tri_data[i].node; }, list, tri_idx); 280 | }; 281 | 282 | // Fill-in per-vertex triangle lists, and sum the scores of each vertex used 283 | // per-triangle, to get the starting triangle score 284 | cur_idx = 0; 285 | for(int tri = 0; tri < num_primitives; tri++) { 286 | TriData &cur_tri = tri_data[tri]; 287 | for(int c = 0; c < 3; c++) { 288 | VertData &cur_vert = vertex_data[indices[cur_idx]]; 289 | cur_vert.tri_index[cur_vert.num_refs++] = tri; 290 | cur_tri.score += cur_vert.score; 291 | cur_idx++; 292 | } 293 | add_ordered_tri(tri); 294 | } 295 | get_next_best_tris(); 296 | 297 | // Step 2: Start emitting triangles...this is the emit loop 298 | LRUCacheModel lru_cache(vertex_data); 299 | 300 | for(int out_idx = 0; out_idx < indices.size();) { 301 | // If there is no next best triangle, than search for the next highest 302 | // scored triangle that isn't in the list already 303 | if(next_best_tri_idx < 0) { 304 | next_best_tri_score = next_next_best_tri_score = -1.0f; 305 | next_best_tri_idx = next_next_best_tri_idx = -1; 306 | get_next_best_tris(); 307 | } 308 | DASSERT(next_best_tri_idx > -1); 309 | 310 | TriData &next_best_tri = tri_data[next_best_tri_idx]; 311 | DASSERT(!next_best_tri.is_in_list); 312 | for(int i = 0; i < 3; i++) { 313 | out_indices[out_idx++] = int(next_best_tri.vert_idx[i]); 314 | VertData &cur_vert = vertex_data[next_best_tri.vert_idx[i]]; 315 | cur_vert.num_unadded_refs--; 316 | for(int t = 0; t < cur_vert.num_refs; t++) { 317 | if(cur_vert.tri_index[t] == next_best_tri_idx) { 318 | cur_vert.tri_index[t] = -1; 319 | break; 320 | } 321 | } 322 | lru_cache.useVertex(next_best_tri.vert_idx[i]); 323 | } 324 | 325 | next_best_tri.is_in_list = true; 326 | remove_ordered_tri(next_best_tri_idx); 327 | 328 | // Enforce cache size, this will update the cache position of all verts 329 | // still in the cache. It will also update the score of the verts in the 330 | // cache, and give back a list of triangle indicies that need updating. 331 | vector tris_to_update; 332 | lru_cache.enforceSize(max_vertex_cache_size, tris_to_update); 333 | 334 | // Now update scores for triangles that need updates, and find the new best 335 | // triangle score/index 336 | next_best_tri_idx = -1; 337 | next_best_tri_score = -1.0f; 338 | 339 | // TODO: use idx directly 340 | for(auto itr = tris_to_update.begin(); itr != tris_to_update.end(); itr++) { 341 | TriData &tri = tri_data[*itr]; 342 | 343 | // If this triangle isn't already emitted, re-score it 344 | if(!tri.is_in_list) { 345 | remove_ordered_tri(*itr); 346 | tri.score = 0.0f; 347 | for(int i = 0; i < 3; i++) 348 | tri.score += vertex_data[tri.vert_idx[i]].score; 349 | check_next_best(tri.score, *itr); 350 | check_next_next_best(tri.score, *itr); 351 | add_ordered_tri(*itr); 352 | } 353 | } 354 | 355 | // If there was no love finding a good triangle, than see if there is a 356 | // next-next-best triangle, and if there isn't one of those...well than 357 | // I guess we have to find one next time 358 | if(next_best_tri_idx < 0 && next_next_best_tri_idx > -1) { 359 | if(!tri_data[next_next_best_tri_idx].is_in_list) { 360 | next_best_tri_idx = next_next_best_tri_idx; 361 | next_best_tri_score = next_next_best_tri_score; 362 | validate_tri_idx(next_next_best_tri_idx); 363 | } 364 | 365 | // Nuke the next-next best 366 | next_next_best_tri_idx = -1; 367 | next_next_best_tri_score = -1.0f; 368 | } 369 | 370 | // Validate triangle we are marking as next-best 371 | validate_tri_idx(next_best_tri_idx); 372 | } 373 | } 374 | -------------------------------------------------------------------------------- /src/scene_setup.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) Krzysztof Jakubowski 2 | // This file is part of LucidRaster. See license.txt for details. 3 | 4 | // TODO: safety for data types: when loading how can we make sure 5 | // that there are no leftovers from previous processing ? 6 | // 7 | // TODO: when de-serializing enums we have to make sure that they are in valid range, 8 | // inf not then sanitize them and report error! 9 | // 10 | // W dowolnym momencie coś w streamie może się zepsuć; Jak to naprawić ? 11 | // Czy chcemy dać limit na wielkość stringa ? 12 | // Możemy mieć funkcję loadString która bierze taki argument ? 13 | // OK, jak jest błąd to w jaki sposób go zgłaszamy ? 14 | // 15 | // Na pewno stream może przechowywać błąd wewnątrz. ale jak go wyciągnąć ? 16 | // 17 | // A) przechwycenie błędu w strumieniu ręcznie ? 18 | // B) jak poleci inny błąd, to trzeba też sprawdzić, czy nie zaszedł wcześniej błąd w strunieniu 19 | // Może zrobić tu podobnie jak z XMLem ? 20 | // - przy zamknięciu strumienia można wymusić pobranie błędu ? 21 | // C) jakakolwiek operacja na strumieniu ktora generuje blad EX<> 22 | // 23 | // Wszystkie te operacje mają zwracać EX<> ? 24 | // Może żadna nie powinna ? albo raportujemy wewn. Strumienia albo zwracamy Ex<> 25 | // Wtedy 26 | // Chcę wiedzieć kiedy coś złego w strumieniu się zadziało 27 | // 28 | // Mogę zrobić strumienie tak, że błędy przy strumieniowaniu są domyślnie ukrywane, tzn. 29 | // nie mam wszędzie zwracać Ex<> 30 | // 31 | // A nie mogę po prostu założyć, że dane są poprawne ? 32 | // 33 | // Sprecyzujmy wymagania jakie mamy w stosunku do wszystkich podsystemów 34 | // 35 | // 36 | // Strumień: 37 | // - prosty sposób strumieniowania danych; obsługiwane opcje: 38 | // pliki / pipe-y(potem) / pamięć 39 | // - jak coś się psuje to podczas zapisywania sie nic nie dzieje a podczas odczytu 40 | // zwraca 0 i tyle 41 | // - zapisuje pierwszy błąd jaki się pojawił; można ten błąd odzyskać 42 | // ** jak zapewnić, że błąd nie jest ignorowany ? ** 43 | // przy destrukcji strumienia jakoś go wypisywać jak nie został 44 | // przechwycony ? później można zrobić prosty system warningów... OK! 45 | // coś mi się tutaj nie podoba... 46 | // fakt, że można zignorować błąd, czy coś jeszcze ? 47 | // 48 | // - tylko podstawowe typy są domyślnie serializowalne ? 49 | // ok a co z: vector<>, hash_map<>, string, pair<> ? 50 | // tu też mogę dać limity ? 51 | // albo najpierw zbudować na podstawowe typy i potem w ramach potrzeb! 52 | // 53 | // 54 | // Tak samo z XMLem i z parserem ? 55 | // - parser i plik XML mogą przechowywać informacje o błedach 56 | // a moze po prostu potrzebuje globalnego systemu do raportowanie bledow ? 57 | // duzo sie nie zmieni z wyjatkiem lokalizacji bledow 58 | // 59 | // Chodzi o to, żeby 60 | 61 | // Ex< T & > >> dziala jesli nie zawiera bledu, w przypadku bledu ignoruje kolejne wywolania ? 62 | // a moze po prostu zamiast expected zawsze trzymac blad serializacji wewn. strumienia ? 63 | // funkcje serializujace by nie zwracaly ex<> tylko ustawialy to w strumieniu ? 64 | // OK ale blad w strumieniu mozna zignnorowac latwo ? 65 | //stream >> meshes >> materials >> instances; 66 | 67 | // TODO: extended support for exConstruct ? 68 | // TODO: emplace_back mogłoby mieć opcję kontruowania z EX<> 69 | // w przypadku błędu nie dodaje elementu ? 70 | // TODO: może funkcja loadMany w takiej sytuacji się przyda ? 71 | // Tutaj problem jest z Ex a nie z loadem: chodzi o to, żeby to się 72 | // tak wygodnie przekazywało jak wyjątek; Jak taką funkcję nazwać ? 73 | // funkcja która wielokrotnie odpala zestaw funkcj 74 | // generalnie construct<> powinno być wydajne (copy elison) 75 | 76 | // Jak bład w strumieniu zamieniam na EX<> ? 77 | // muszę go jakoś wykryć i zwrócić ? 78 | // I tak muszę obsługiwać sytuacje, gdzie dane są błędne, chodzi tylko o to, 79 | 80 | // Przydałyby się też opcje serializacji, żeby te funkcje były sensownie rozszerzalne 81 | // a interfejs mnie nie ograniczał... 82 | // 83 | // Strumien ma tylko support dla typów POD ? reszta ręcznie ? 84 | // można też użyć exConstruct zamiast load ? 85 | 86 | #include "scene_setup.h" 87 | 88 | #include "shading.h" 89 | #include 90 | #include 91 | #include 92 | #include 93 | #include 94 | #include 95 | 96 | FilePath mainPath(); 97 | 98 | SceneSetup::SceneSetup(string name) : name(std::move(name)) {} 99 | SceneSetup::~SceneSetup() = default; 100 | 101 | BoxesSetup::BoxesSetup() : SceneSetup("#boxes") { render_config.scene_opacity = 0.5; } 102 | PlanesSetup::PlanesSetup() : SceneSetup("#planes") { render_config.scene_opacity = 0.25; } 103 | 104 | void BoxesSetup::doMenu(VDeviceRef device) { 105 | auto scene_dims = m_dims; 106 | auto &gui = Gui::instance(); 107 | gui.text("Dimensions:"); 108 | ImGui::SameLine(); 109 | if(ImGui::InputInt3("##dims", &scene_dims.x, ImGuiInputTextFlags_EnterReturnsTrue)) { 110 | scene_dims = vclamp(scene_dims, int3(1), int3(16)); 111 | if(scene_dims != m_dims) { 112 | m_dims = scene_dims; 113 | if(scene) 114 | updateScene(device).check(); 115 | } 116 | } 117 | } 118 | 119 | string BoxesSetup::sceneId() const { return format("#boxes_%", m_dims); } 120 | 121 | void PlanesSetup::doMenu(VDeviceRef device) { 122 | auto &gui = Gui::instance(); 123 | int label_size = (int)ImGui::CalcTextSize("Num planes").x; 124 | ImGui::SetNextItemWidth(220 * gui.dpiScale() - label_size); 125 | int num_planes = m_num_planes; 126 | ImGui::SliderInt("Num planes", &num_planes, 1, 256); 127 | if(scene && m_num_planes != num_planes) { 128 | m_num_planes = num_planes; 129 | updateScene(device).check(); 130 | } 131 | } 132 | 133 | string PlanesSetup::sceneId() const { return format("#planes_%", m_num_planes); } 134 | 135 | static void addBox(Scene &scene, SceneMesh &out, IColor color, float size, float3 pos) { 136 | auto corners = (FBox(float3(size)) + pos).corners(); 137 | array tris[12] = {{0, 2, 3}, {0, 3, 1}, {1, 3, 7}, {1, 7, 5}, {2, 6, 7}, {2, 7, 3}, 138 | {0, 6, 2}, {0, 4, 6}, {0, 5, 4}, {0, 1, 5}, {4, 7, 6}, {4, 5, 7}}; 139 | 140 | int off = scene.positions.size(); 141 | for(auto &tri : tris) 142 | out.tris.emplace_back(tri[0] + off, tri[1] + off, tri[2] + off); 143 | insertBack(scene.positions, corners); 144 | scene.colors.resize(scene.colors.size() + corners.size(), color); 145 | } 146 | 147 | static void addQuad(Scene &scene, SceneMesh &out, IColor color, float size, float3 pos) { 148 | auto corners = (FRect(float2(size)) + pos.xy()).corners(); 149 | array tris[2] = {{0, 1, 2}, {0, 2, 3}}; 150 | 151 | int off = scene.positions.size(); 152 | for(auto &tri : tris) 153 | out.tris.emplace_back(tri[0] + off, tri[1] + off, tri[2] + off); 154 | for(auto &corner : corners) 155 | scene.positions.emplace_back(float3(corner, pos.z)); 156 | scene.colors.resize(scene.colors.size() + corners.size(), color); 157 | } 158 | 159 | Ex<> BoxesSetup::updateScene(VDeviceRef device) { 160 | if(m_current_dims == m_dims && scene) 161 | return {}; 162 | 163 | m_current_dims = m_dims; 164 | float3 offset = -float3(m_dims) * (m_box_size + m_box_dist) * 0.5f; 165 | float3 col_scale = vinv(float3(m_dims) - float3(1)); 166 | Random rand; 167 | 168 | scene = Scene{}; 169 | scene->id = sceneId(); 170 | SceneMesh mesh; 171 | for(int x = 0; x < m_dims.x; x++) { 172 | for(int y = 0; y < m_dims.y; y++) { 173 | for(int z = 0; z < m_dims.z; z++) { 174 | float3 pos = offset + float3(x, y, z) * (m_box_size + m_box_dist); 175 | FColor color(float3(x, y, z) * col_scale, 1.0f); 176 | pos += rand.sampleBox(float3(-0.1f), float3(0.1f)); 177 | addBox(*scene, mesh, IColor(color), m_box_size, pos); 178 | } 179 | } 180 | } 181 | mesh.bounding_box = enclose(scene->positions); 182 | mesh.colors_opaque = true; 183 | 184 | scene->materials.emplace_back("default"); 185 | scene->meshes.emplace_back(std::move(mesh)); 186 | scene->generateQuads(4.0f); 187 | 188 | views = {OrbitingCamera({}, 10.0f, 0.5f, 0.8f)}; 189 | if(!camera) 190 | camera = views.front(); 191 | return scene->updateRenderingData(*device); 192 | } 193 | 194 | Ex<> PlanesSetup::updateScene(VDeviceRef device) { 195 | if(m_current_planes == m_num_planes && scene) 196 | return {}; 197 | m_current_planes = m_num_planes; 198 | 199 | scene = Scene{}; 200 | scene->id = sceneId(); 201 | SceneMesh mesh; 202 | for(int z = 0; z < m_num_planes; z++) { 203 | float size = m_plane_size * (1.0 + float(z) * 0.05); 204 | float t = float(z) / (m_num_planes - 1); 205 | float3 pos = float3(-0.5, -0.5, 0.0) * size + float3(0.0, 0, z * m_plane_dist); 206 | FColor color(hsvToRgb(float3(t, 1.0, 1.0)), 1.0f); 207 | addQuad(*scene, mesh, IColor(color), size, pos); 208 | } 209 | mesh.bounding_box = enclose(scene->positions); 210 | mesh.colors_opaque = true; 211 | 212 | scene->materials.emplace_back("default"); 213 | scene->meshes.emplace_back(std::move(mesh)); 214 | scene->generateQuads(4.0f); 215 | 216 | // TODO: rasterization bugs visible in this scene when camera forward 217 | // vector is equal to -plane.normal 218 | views = {FppCamera({0, 0, -5.0}, normalize(float2(0.001, 1.0)), 0.001)}; 219 | if(!camera) 220 | camera = views.front(); 221 | return scene->updateRenderingData(*device); 222 | } 223 | 224 | LoadedSetup::LoadedSetup(string name) : SceneSetup(std::move(name)) {} 225 | 226 | Ex<> LoadedSetup::updateScene(VDeviceRef device) { 227 | if(scene) 228 | return {}; 229 | auto path = format("%/scenes/%.scene", mainPath(), name); 230 | scene = EX_PASS(Scene::load(path)); 231 | scene->id = name; 232 | 233 | auto name = this->name; 234 | if(name.rfind("_old") != string::npos) 235 | name.resize(name.size() - 4); 236 | 237 | if(isOneOf(name, "bunny", "hairball", "teapot")) 238 | render_config.scene_opacity = 0.5; 239 | else 240 | render_config.scene_opacity = 0.8; 241 | if(name == "backpack") { 242 | render_config.scene_opacity = 1.0f; 243 | render_config.pbr_mode = true; 244 | } 245 | 246 | EXPECT(scene->updateRenderingData(*device)); 247 | 248 | auto box = scene->bounding_box; 249 | auto max_size = max(box.width(), box.height(), box.depth()); 250 | OrbitingCamera default_cam(box.center(), max_size, 0.5f, 0.8f); 251 | 252 | views.clear(); 253 | if(name == "powerplant") { 254 | insertBack(views, 255 | {FppCamera{{6.479178, 15.869515, -5.917777}, {-0.349199, 0.602876}, 0.847362}, 256 | FppCamera{{-5.031062, 14.030015, 8.243547}, {0.491592, -0.493698}, 0.405695}, 257 | FppCamera{{-5.077578, 2.493423, 10.024123}, {0.534752, 0.446587}, 0.162463}}); 258 | } else if(name == "gallery") { 259 | insertBack(views, 260 | {FppCamera{{-0.518197, 11.031467, -28.708052}, {-0.034821, 0.695836}, 0.180695}, 261 | FppCamera{{-10.443496, 9.609625, -6.724856}, {0.483258, 0.501858}, 0.364028}}); 262 | } else if(name == "conference") { 263 | insertBack( 264 | views, 265 | {FppCamera{{46.586071, 4.31637, -15.807777}, {-0.775055, 0.554252}, 0.106798}, 266 | FppCamera{{50.89043, 4.515099, 19.323435}, {-0.863081, -0.403725}, 0.031798}, 267 | FppCamera{{57.927486, 19.688606, -36.936211}, {-0.834299, 0.460266}, 0.273465}}); 268 | } else if(name == "dragon") { 269 | views.emplace_back(OrbitingCamera{{-1.61925, 6.953201, 2.7753}, 100, -0.933333, 0.516666}); 270 | } else if(name == "sponza") { 271 | insertBack(views, 272 | {FppCamera{{-4.045459, 12.368692, -4.060153}, {-0.075328, 0.69262}, 0.639028}, 273 | FppCamera{{28.343906, 16.751987, -5.52777}, {-0.638277, 0.279287}, 0.147361}, 274 | FppCamera{{30.492384, 5.880484, -1.414973}, {-0.69641, 0.020292}, 0.014028}, 275 | FppCamera{{21.594707, 4.578131, 5.349687}, {-0.69641, 0.020292}, 0.014028}}); 276 | } else if(name == "san-miguel") { 277 | insertBack(views, 278 | {FppCamera{{34.412136, 26.08173, 19.988665}, {-0.487458, -0.497777}, 0.814029}, 279 | FppCamera{{18.670315, 1.785693, 8.138441}, {-0.461969, -0.521516}, 0.672362}}); 280 | } else if(name == "white_oak") { 281 | insertBack(views, 282 | {OrbitingCamera{{-1.35474, 44.001759, -1.40296}, 32.840042, 0.483333, 0.425}, 283 | default_cam}); 284 | } else if(name == "hairball") { 285 | views.emplace_back(default_cam); 286 | views.emplace_back( 287 | FppCamera{{0.133944, 0.298333, -0.290384}, {-0.334019, 0.611418}, 0.830695}); 288 | } else if(name == "buddha") { 289 | views.emplace_back( 290 | OrbitingCamera{{-2.746651, 75.126877, -3.378395}, 103.999908, 3.108339, 0.266667}); 291 | } else if(name == "dragon2") { 292 | views.emplace_back( 293 | OrbitingCamera{{1.100576, -3.339322, 4.876184}, 73.448845, 0.741666, 0.283334}); 294 | } else { 295 | views.emplace_back(default_cam); 296 | } 297 | 298 | if(isOneOf(name, "powerplant", "conference", "bunny", "dragon")) 299 | render_config.backface_culling = true; 300 | 301 | if(!camera) 302 | camera = views.front(); 303 | return {}; 304 | } 305 | 306 | vector LoadedSetup::findAll() { 307 | auto out = findFiles("scenes/", ".scene"); 308 | makeSorted(out); 309 | return out; 310 | } 311 | --------------------------------------------------------------------------------