├── img ├── teaser.jpg └── cc-by.svg ├── include ├── constants.h ├── scene.h ├── debug_plane.h ├── context.h ├── utils.h └── stb_image_write.h ├── shaders ├── simple.vert.glsl ├── extensions.glsl ├── plane.vert.glsl ├── dense_eval.comp.glsl ├── plane.frag.glsl ├── culling.comp.glsl ├── eval.glsl ├── common_culling.glsl ├── common.glsl └── simple.frag.glsl ├── .gitmodules ├── scenes └── test.json ├── scripts └── make_package.py ├── LICENCE ├── CMakeLists.txt ├── README.md ├── .github └── workflows │ └── cmake-multi-platform.yml └── src ├── debug_plane.cpp ├── utils.cpp ├── scene.cpp └── main.cpp /img/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wbrbr/LipschitzPruning/HEAD/img/teaser.jpg -------------------------------------------------------------------------------- /include/constants.h: -------------------------------------------------------------------------------- 1 | const int OP_UNION = 0; 2 | const int OP_SUB = 1; 3 | const int OP_INTER = 2; 4 | 5 | const int SHADING_MODE_SHADED = 0; 6 | const int SHADING_MODE_HEATMAP = 1; 7 | const int SHADING_MODE_NORMALS = 2; 8 | const int SHADING_MODE_BEAUTY = 3; -------------------------------------------------------------------------------- /include/scene.h: -------------------------------------------------------------------------------- 1 | #ifndef SDFCULLING_SCENE_H 2 | #define SDFCULLING_SCENE_H 3 | #include "utils.h" 4 | 5 | void load_json(const char* path, std::vector& nodes, glm::vec3& aabb_min, glm::vec3& aabb_max); 6 | void write_json(const std::vector& nodes, const char* path); 7 | #endif //SDFCULLING_SCENE_H 8 | -------------------------------------------------------------------------------- /shaders/simple.vert.glsl: -------------------------------------------------------------------------------- 1 | #version 450 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | 5 | vec2 positions[3] = vec2[](vec2 (-1, 1), vec2 (3, -1), vec2 (-1, 3)); 6 | 7 | void main () 8 | { 9 | vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); 10 | gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f); 11 | } -------------------------------------------------------------------------------- /include/debug_plane.h: -------------------------------------------------------------------------------- 1 | #ifndef SDFCULLING_DEBUG_PLANE_H 2 | #define SDFCULLING_DEBUG_PLANE_H 3 | #include "utils.h" 4 | 5 | void create_debug_plane_pipeline(Init& init, RenderData& render_data, VkPipeline& pipeline, VkPipelineLayout& pipeline_layout); 6 | void draw_debug_plane(const Init& init, const RenderData& data, VkCommandBuffer cmd_buf); 7 | 8 | #endif //SDFCULLING_DEBUG_PLANE_H 9 | -------------------------------------------------------------------------------- /shaders/extensions.glsl: -------------------------------------------------------------------------------- 1 | #extension GL_ARB_separate_shader_objects : enable 2 | #extension GL_EXT_buffer_reference : enable 3 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 4 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 5 | #extension GL_EXT_shader_8bit_storage : require 6 | #extension GL_KHR_shader_subgroup_vote : enable 7 | #extension GL_KHR_shader_subgroup_ballot : enable 8 | #extension GL_EXT_debug_printf : enable 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ext/vk-bootstrap"] 2 | path = ext/vk-bootstrap 3 | url = https://github.com/charles-lunarg/vk-bootstrap 4 | [submodule "ext/glfw"] 5 | path = ext/glfw 6 | url = https://github.com/glfw/glfw 7 | [submodule "ext/glm"] 8 | path = ext/glm 9 | url = https://github.com/g-truc/glm 10 | [submodule "ext/imgui"] 11 | path = ext/imgui 12 | url = https://github.com/ocornut/imgui 13 | [submodule "ext/rapidjson"] 14 | path = ext/rapidjson 15 | url = https://github.com/Tencent/rapidjson/ 16 | -------------------------------------------------------------------------------- /scenes/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "aabb_min": [-2,-2,-2], 3 | "aabb_max": [2,2,2], 4 | "nodeType": "binaryOperator", 5 | "blendMode": "sub", 6 | "blendRadius": 0.1, 7 | "matrix": [ 8 | 1, 0, 0, 0, 9 | 0, 0.866, 0.5, 0, 10 | 0, -0.5, 0.866, 0 ], 11 | "leftChild": { 12 | "nodeType": "primitive", 13 | "matrix": [ 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 ], 14 | "primitiveType": "sphere", 15 | "radius": 1, 16 | "round_x": 0, 17 | "round_y": 0, 18 | "color": [1,0,0] 19 | }, 20 | "rightChild": { 21 | "nodeType": "primitive", 22 | "matrix": [ 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0 ], 23 | "primitiveType": "cylinder", 24 | "height": 2, 25 | "radius": 0.5, 26 | "round_x": 0, 27 | "round_y": 0, 28 | "color": [0,1,0] 29 | } 30 | } -------------------------------------------------------------------------------- /scripts/make_package.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import shutil 3 | import glob 4 | 5 | os.system('cmake --build . --config Release') 6 | 7 | PKG_DIR = 'LipschitzPruning' 8 | 9 | if os.path.exists(PKG_DIR): 10 | shutil.rmtree(PKG_DIR) 11 | 12 | os.mkdir(f'{PKG_DIR}') 13 | os.mkdir(f'{PKG_DIR}/bin') 14 | os.mkdir(f'{PKG_DIR}/scenes') 15 | 16 | for spv_path in glob.glob('*.spv'): 17 | print(spv_path) 18 | shutil.copyfile(spv_path, f'{PKG_DIR}/bin/{spv_path}') 19 | 20 | shutil.copyfile('Release/LipschitzPruning.exe', f'{PKG_DIR}/bin/LipschitzPruning.exe') 21 | 22 | for scene_path in glob.glob('*.json', root_dir='../scenes'): 23 | print(scene_path) 24 | shutil.copyfile('../scenes/' + scene_path, f'{PKG_DIR}/scenes/{scene_path}') 25 | 26 | shutil.make_archive('LipschitzPruning', 'zip', PKG_DIR) -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Copyright 2025 Wilhem Barbier 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /shaders/plane.vert.glsl: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "../include/constants.h" 4 | #include "extensions.glsl" 5 | #include "common.glsl" 6 | 7 | layout(push_constant) uniform PushConstant{ 8 | vec4 aabb_min; 9 | vec4 aabb_max; 10 | vec4 farfield_color; 11 | PrimitivesRef prims; 12 | BinaryOpsRef binary_ops; 13 | NodesRef nodes; 14 | ActiveNodesRef active_nodes_out; 15 | IntArrayRef cells_offset; 16 | IntArrayRef cells_num_active; 17 | FloatArrayRef cell_error_out; 18 | Mat4Ref mvp; 19 | int total_num_nodes; 20 | int grid_size; 21 | float plane_y; 22 | float viz_max; 23 | float plane_alpha; 24 | }; 25 | 26 | 27 | vec2 uvs[] = { 28 | vec2(0,0), vec2(1,0), vec2(0,1), 29 | vec2(0,1), vec2(1,0), vec2(1,1) 30 | }; 31 | 32 | layout(location = 0) out vec3 o_Position; 33 | 34 | void main() { 35 | vec2 uv = uvs[gl_VertexIndex]; 36 | 37 | vec2 xz = aabb_min.xz + (aabb_max - aabb_min).xz * uv; 38 | 39 | vec3 p = vec3(xz.x, plane_y, xz.y); 40 | o_Position = p; 41 | gl_Position = mvp.m * vec4(p,1); 42 | gl_Position.y *= -1; 43 | } -------------------------------------------------------------------------------- /shaders/dense_eval.comp.glsl: -------------------------------------------------------------------------------- 1 | #version 460 core 2 | #include "extensions.glsl" 3 | 4 | layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; 5 | 6 | #include "../include/constants.h" 7 | #include "common.glsl" 8 | 9 | layout(push_constant) uniform PushConstant { 10 | vec4 aabb_min; 11 | vec4 aabb_max; 12 | PrimitivesRef prims; 13 | BinaryOpsRef binary_ops; 14 | NodesRef nodes; 15 | ActiveNodesRef active_nodes_out; 16 | IntArrayRef cells_offset; 17 | IntArrayRef cells_num_active; 18 | FloatArrayRef cell_error_out; 19 | FloatArrayRef output_dist; 20 | int total_num_nodes; 21 | int grid_size; 22 | int culling_enabled; 23 | }; 24 | 25 | #include "eval.glsl" 26 | 27 | void main() { 28 | ivec3 cell = ivec3(gl_GlobalInvocationID.xyz); 29 | if (any(greaterThanEqual(cell, ivec3(grid_size)))) return; 30 | 31 | 32 | vec3 cell_size = (aabb_max - aabb_min).xyz / float(grid_size); 33 | int cell_idx = int(get_cell_idx(cell, grid_size)); 34 | 35 | vec3 p = vec3(aabb_min) + cell_size * (0.5 + vec3(cell)); 36 | bool nf; 37 | 38 | if (bool(culling_enabled)) { 39 | output_dist.tab[cell_idx] = sdf_active(p, cell_idx, nf); 40 | } else { 41 | output_dist.tab[cell_idx] = sdf(p); 42 | } 43 | } -------------------------------------------------------------------------------- /include/context.h: -------------------------------------------------------------------------------- 1 | #ifndef SDFCULLING_CONTEXT_H 2 | #define SDFCULLING_CONTEXT_H 3 | #include "utils.h" 4 | 5 | const int WIDTH = 1920; 6 | const int HEIGHT = 1080; 7 | 8 | struct Timings { 9 | float culling_elapsed_ms; 10 | float tracing_elapsed_ms; 11 | float render_elapsed_ms; 12 | float eval_grid_elapsed_ms; 13 | float pruning_mem_usage_gb; 14 | float tracing_mem_usage_gb; 15 | }; 16 | 17 | struct GPUNode { 18 | NodeType type; 19 | int idx_in_type; 20 | }; 21 | 22 | class Context { 23 | public: 24 | void initialize(bool gui, int final_grid_lvl); 25 | Timings render(glm::vec3 cam_position, glm::vec3 cam_target=glm::vec3(0)); 26 | void upload(const std::vector& nodes, int root_idx); 27 | void alloc_input_buffers(int num_nodes); 28 | 29 | Init init; 30 | RenderData render_data; 31 | bool gui; 32 | bool culling = true; 33 | }; 34 | 35 | void create_culling_pipelines(Init& init, RenderData& render_data); 36 | int create_graphics_pipeline(Init& init, RenderData& data); 37 | int ConvertToGPUTree(int root_idx, const std::vector& csg_nodes, std::vector& gpu_nodes, std::vector& primitives, std::vector& binary_ops, std::vector& parent, std::vector& active_nodes); 38 | void UploadGPUTree(const std::vector& binary_ops, const std::vector& gpu_nodes, const std::vector& primitives, const std::vector& parent, const std::vector& active_nodes, RenderData& render_data, Init& init); 39 | void get_pipeline_stats(Init& init, VkPipeline pipeline, uint32_t executable_idx, char* buf, uint32_t buf_size); 40 | 41 | 42 | #endif //SDFCULLING_CONTEXT_H 43 | -------------------------------------------------------------------------------- /shaders/plane.frag.glsl: -------------------------------------------------------------------------------- 1 | #version 460 core 2 | #include "../include/constants.h" 3 | #include "extensions.glsl" 4 | 5 | #include "common.glsl" 6 | 7 | layout(push_constant) uniform PushConstant{ 8 | vec4 aabb_min; 9 | vec4 aabb_max; 10 | vec4 farfield_color; 11 | PrimitivesRef prims; 12 | BinaryOpsRef binary_ops; 13 | NodesRef nodes; 14 | ActiveNodesRef active_nodes_out; 15 | IntArrayRef cells_offset; 16 | IntArrayRef cells_num_active; 17 | FloatArrayRef cell_error_out; 18 | Mat4Ref mvp; 19 | int total_num_nodes; 20 | int grid_size; 21 | float plane_y; 22 | float viz_max; 23 | float plane_alpha; 24 | }; 25 | 26 | layout(location = 0) out vec4 o_Color; 27 | layout(location = 0) in vec3 i_Position; 28 | 29 | #include "eval.glsl" 30 | 31 | vec3 isoline_cmap(float d) 32 | { 33 | d = max(d, 0.0); 34 | float c = cos(100.0f* d) > 0.25 ? 0.8 : 1.0; 35 | vec3 col = mix(vec3(1,1,1), vec3(0,0.5,0), d / 1.0); 36 | return col * vec3(c); 37 | } 38 | 39 | void main() { 40 | o_Color = vec4(1,0,0,1); 41 | 42 | vec3 cell_size = (aabb_max - aabb_min).xyz / float(grid_size); 43 | ivec3 cell = ivec3(clamp((i_Position - aabb_min.xyz) / cell_size, ivec3(0), ivec3(grid_size-1))); 44 | int cell_idx = int(get_cell_idx(cell, grid_size)); 45 | 46 | int num_active = cells_num_active.tab[cell_idx]; 47 | //vec3 col = inferno(min(float(num_active) / float(viz_max),1)); 48 | //vec3 col = isoline_cmap(sdf(i_Position)*2); 49 | 50 | bool nf; 51 | float d_active = sdf_active(i_Position, cell_idx, nf); 52 | float d = sdf(i_Position); 53 | float err = abs(d - d_active); 54 | vec3 col = inferno(min(err / viz_max, 1)); 55 | 56 | o_Color = vec4(col, 1); 57 | } -------------------------------------------------------------------------------- /shaders/culling.comp.glsl: -------------------------------------------------------------------------------- 1 | #version 450 2 | #include "extensions.glsl" 3 | 4 | layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; 5 | 6 | #include "../include/constants.h" 7 | 8 | #include "common.glsl" 9 | 10 | layout(push_constant) uniform PushConstant { 11 | //mat4 world_to_clip; 12 | vec4 aabb_min, aabb_max; 13 | ivec2 u_Resolution; 14 | PrimitivesRef prims; 15 | BinaryOpsRef binary_ops; 16 | NodesRef nodes; 17 | Uint16ArrayRef parents_in; 18 | Uint16ArrayRef parents_out; 19 | ActiveNodesRef active_nodes_in; 20 | ActiveNodesRef active_nodes_out; 21 | IntArrayRef parent_cells_offset; 22 | IntArrayRef child_cells_offset; 23 | IntArrayRef parent_cells_num_active; 24 | IntArrayRef num_active_out; 25 | IntRef active_count; 26 | FloatArrayRef cell_value_in; 27 | FloatArrayRef cell_value_out; 28 | Uint16ArrayRef old_to_new_scratch; 29 | IntRef old_to_new_count; 30 | TmpArrayRef tmp; 31 | Mat4Ref mvp; 32 | Vec4ArrayRef cam; 33 | int total_num_nodes; 34 | int grid_size; 35 | int first_lvl; 36 | float max_rel_err; 37 | float viz_max; 38 | float alpha; 39 | int culling_enabled; 40 | uint8_t num_samples_u8; 41 | }; 42 | 43 | #include "common_culling.glsl" 44 | 45 | void main() { 46 | if (any(greaterThanEqual(gl_GlobalInvocationID.xyz, uvec3(grid_size)))) return; 47 | uint cell_idx = get_cell_idx(ivec3(gl_GlobalInvocationID.xyz), grid_size); 48 | 49 | vec3 cell_size = (aabb_max.xyz - aabb_min.xyz) / float(grid_size); 50 | vec3 cell_center = aabb_min.xyz + cell_size * (vec3(gl_GlobalInvocationID.xyz) + 0.5); 51 | 52 | 53 | int num_nodes; 54 | if (bool(first_lvl)) { 55 | num_nodes = total_num_nodes; 56 | } else { 57 | uint parent_cell_idx = get_parent_cell_idx(cell_idx, grid_size); 58 | num_nodes = parent_cells_num_active.tab[parent_cell_idx]; 59 | } 60 | 61 | compute_pruning(cell_center, cell_size, int(cell_idx)); 62 | } 63 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(LipschitzPruning LANGUAGES CXX) 3 | 4 | set(CMAKE_CXX_STANDARD 20) 5 | 6 | add_subdirectory(ext/vk-bootstrap) 7 | 8 | find_package(Vulkan REQUIRED) 9 | 10 | include(FetchContent) 11 | 12 | FetchContent_Declare( 13 | 14 | CLI11 15 | 16 | GIT_REPOSITORY "https://github.com/CLIUtils/CLI11" 17 | 18 | GIT_TAG "v2.5.0" 19 | 20 | ) 21 | FetchContent_MakeAvailable(CLI11) 22 | 23 | find_package(glfw3) 24 | if (NOT ${glfw3_FOUND}) 25 | FetchContent_Declare( 26 | glfw 27 | GIT_REPOSITORY "https://github.com/glfw/glfw" 28 | GIT_TAG 3.4 29 | ) 30 | FetchContent_MakeAvailable(glfw) 31 | endif() 32 | 33 | 34 | 35 | set(SHADER_SRCS 36 | simple.vert.glsl 37 | simple.frag.glsl 38 | culling.comp.glsl 39 | plane.vert.glsl 40 | plane.frag.glsl 41 | dense_eval.comp.glsl) 42 | set(SHADER_STAGES 43 | vert 44 | frag 45 | comp 46 | vert 47 | frag 48 | comp) 49 | set(SHADER_BINS 50 | vert.spv 51 | frag.spv 52 | culling.comp.spv 53 | plane.vert.spv 54 | plane.frag.spv 55 | dense_eval.comp.spv) 56 | 57 | set(SHARED_SRC 58 | src/utils.cpp 59 | src/debug_plane.cpp 60 | src/context.cpp 61 | src/scene.cpp 62 | ext/imgui/imgui.cpp 63 | ext/imgui/imgui_draw.cpp 64 | ext/imgui/imgui_demo.cpp 65 | ext/imgui/imgui_tables.cpp 66 | ext/imgui/imgui_widgets.cpp 67 | ext/imgui/backends/imgui_impl_vulkan.cpp 68 | ext/imgui/backends/imgui_impl_glfw.cpp 69 | ) 70 | 71 | foreach(src_file bin_file stage IN ZIP_LISTS SHADER_SRCS SHADER_BINS SHADER_STAGES) 72 | add_custom_command(OUTPUT "${CMAKE_BINARY_DIR}/${bin_file}" COMMAND ${Vulkan_GLSLC_EXECUTABLE} ARGS -fshader-stage=${stage} --target-spv=spv1.4 -g ${CMAKE_SOURCE_DIR}/shaders/${src_file} -o ${CMAKE_BINARY_DIR}/${bin_file} MAIN_DEPENDENCY ${CMAKE_SOURCE_DIR}/shaders/${src_file} DEPENDS ${CMAKE_SOURCE_DIR}/shaders/eval.glsl ${CMAKE_SOURCE_DIR}/shaders/common.glsl ${CMAKE_SOURCE_DIR}/shaders/common_culling.glsl ${CMAKE_SOURCE_DIR}/include/constants.h) 73 | endforeach() 74 | 75 | include_directories(PRIVATE include/ ext/imgui ext/json/include ext/rapidjson/include ext/glm) 76 | link_libraries(glfw vk-bootstrap Vulkan::Vulkan CLI11::CLI11) 77 | 78 | add_executable(LipschitzPruning src/main.cpp ${SHARED_SRC}) 79 | foreach(bin_file IN LISTS SHADER_BINS) 80 | target_sources(LipschitzPruning PRIVATE ${CMAKE_BINARY_DIR}/${bin_file}) 81 | endforeach() 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lipschitz Pruning: Hierarchical Simplification of Primitive-Based SDFs 2 | 3 | Wilhem Barbier*, Mathieu Sanchez*, Axel Paris, Élie Michel, Thibaud Lambert, Tamy Boubekeur, Mathias Paulin, Théo Thonat 4 | 5 | (* joint first authors) 6 | 7 | ![Teaser](img/teaser.jpg) 8 | 9 | 🌐 [**Project page**](https://wbrbr.org/publications/LipschitzPruning) 10 | 11 | 📰 Published in *Computer Graphics Forum* (Eurographics 2025) 12 | 13 | 🏆 Received an Honorable Mention for the Best Paper award 14 | 15 | 📢 Presented at **SIGGRAPH 2025** 16 | 17 | 18 | 19 | ## Abstract 20 | 21 | Rendering tree-based analytical Signed Distance Fields (SDFs) through sphere tracing often requires to evaluate many primitives per tracing step, for many steps per pixel of the end image. This cost quickly becomes prohibitive as the number of primitives that constitute the SDF grows. In this paper, we alleviate this cost by computing local pruned trees that are equivalent to the full tree within their region of space while being much faster to evaluate. We introduce an efficient hierarchical tree pruning method based on the Lipschitz property of SDFs, which is compatible with hard and smooth CSG operators. We propose a GPU implementation that enables real-time sphere tracing of complex SDFs composed of thousands of primitives with dynamic animation. Our pruning technique provides significant speedups for SDF evaluation in general, which we demonstrate on sphere tracing tasks but could also lead to significant improvements for SDF discretization or polygonization. 22 | 23 | ## Prebuilt executables 24 | 25 | Windows executables are available as [releases](https://github.com/wbrbr/LipschitzPruning/releases). The executable should be run from the `bin` subdirectory: 26 | ``` 27 | cd bin 28 | LipschitzPruning.exe 29 | ``` 30 | 31 | 32 | # Build from source 33 | 34 | ## Dependencies 35 | 36 | - Vulkan SDK 37 | 38 | ## Build 39 | 40 | * Install the Vulkan SDK. (Ubuntu 24.04 instructions: `apt install libvulkan-dev libvulkan-memory-allocator-dev glslc libglfw3-dev`) 41 | * Fetch the submodules: `git submodule update --init --recursive` 42 | * Build: 43 | ``` 44 | mkdir build 45 | cd build 46 | cmake .. 47 | cmake --build . 48 | ``` 49 | 50 | ## Run 51 | 52 | The executable should be run from the `build` folder. Examples: 53 | * Linux build: `./LipschitzPruning` 54 | * Windows build (Debug): `Debug\LipschitzPruning.exe` 55 | 56 | 57 | # Assets 58 | The *Trees* and *Monument* scenes are courtesy of Élie Michel and available under the CC-BY 4.0 licence (Creative Commons with attribution). 59 | 60 | ![CC-BY logo](img/cc-by.svg) 61 | 62 | The *Molecule* scene is borrowed from the paper [Segment Tracing Using Local Lipschitz Bounds](https://aparis69.github.io/public_html/projects/galin2020_Segment.html) by Galin et al. 63 | -------------------------------------------------------------------------------- /shaders/eval.glsl: -------------------------------------------------------------------------------- 1 | float sdf(vec3 p) { 2 | const int STACK_DEPTH = 128; 3 | float stack[STACK_DEPTH]; 4 | int stack_idx = 0; 5 | 6 | for (int i = 0; i < total_num_nodes; i++) { 7 | Node node = nodes.tab[i]; 8 | 9 | float d; 10 | if (node.type == NODETYPE_BINARY) { 11 | float left_val = stack[stack_idx-2]; 12 | float right_val = stack[stack_idx-1]; 13 | BinaryOp op = binary_ops.tab[node.idx_in_type]; 14 | float k = BinaryOp_blend_factor(op); 15 | float s = BinaryOp_sign(op); 16 | uint typ = BinaryOp_op(op); 17 | if (typ == OP_SUB) right_val *= -1; 18 | stack_idx -= 2; 19 | d = s*(min(s*left_val, s*right_val) - kernel(abs(left_val-right_val), k)); 20 | } else if (node.type == NODETYPE_PRIMITIVE) { 21 | Primitive prim = prims.tab[node.idx_in_type]; 22 | d = eval_prim(p, prim); 23 | } 24 | 25 | if (stack_idx >= STACK_DEPTH) { 26 | //debugPrintfEXT("Stack overflow\n"); 27 | return 1.0 / 0.0; 28 | } 29 | stack[stack_idx++] = d; 30 | } 31 | 32 | return stack[0]; 33 | } 34 | 35 | 36 | float sdf_active(vec3 p, int cell_idx, out bool near_field) { 37 | int num_active = cells_num_active.tab[cell_idx]; 38 | 39 | if (num_active == 0) { 40 | near_field = false; 41 | return cell_error_out.tab[cell_idx]; 42 | } 43 | 44 | const int STACK_DEPTH = 128; 45 | float stack[STACK_DEPTH]; 46 | int stack_idx = 0; 47 | 48 | int cell_offset = cells_offset.tab[cell_idx]; 49 | 50 | for (int i = 0; i < num_active; i++) { 51 | ActiveNode active_node = active_nodes_out.tab[cell_offset + i]; 52 | int node_idx = ActiveNode_index(active_node); 53 | 54 | Node node = nodes.tab[node_idx]; 55 | float d; 56 | if (node.type == NODETYPE_BINARY) { 57 | float left_val = stack[stack_idx-2]; 58 | float right_val = stack[stack_idx-1]; 59 | stack_idx -= 2; 60 | BinaryOp op = binary_ops.tab[node.idx_in_type]; 61 | float k = BinaryOp_blend_factor(op); 62 | float s = BinaryOp_sign(op); 63 | d = s*(min(s*left_val, s*right_val)-kernel(abs(left_val-right_val), k)); 64 | } else if (node.type == NODETYPE_PRIMITIVE) { 65 | Primitive prim = prims.tab[node.idx_in_type]; 66 | d = eval_prim(p, prim); 67 | } 68 | 69 | d *= ActiveNode_sign(active_node) ? 1 : -1; 70 | if (stack_idx >= STACK_DEPTH) { 71 | //debugPrintfEXT("Stack overflow\n"); 72 | return 1.0 / 0.0; 73 | } 74 | stack[stack_idx++] = d; 75 | } 76 | 77 | near_field = (num_active > 0); 78 | 79 | return stack[0]; 80 | } -------------------------------------------------------------------------------- /.github/workflows/cmake-multi-platform.yml: -------------------------------------------------------------------------------- 1 | # This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform. 2 | # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml 3 | name: CMake on multiple platforms 4 | 5 | on: push 6 | 7 | jobs: 8 | build-ubuntu: 9 | runs-on: ${{ matrix.os }} 10 | 11 | strategy: 12 | # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable. 13 | fail-fast: false 14 | 15 | # Set up a matrix to run the following 3 configurations: 16 | # 1. 17 | # 2. 18 | # 3. 19 | # 20 | # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list. 21 | matrix: 22 | os: [ ubuntu-latest ] 23 | build_type: [Release] 24 | c_compiler: [gcc, clang, cl] 25 | include: 26 | #- os: windows-latest 27 | # c_compiler: cl 28 | # cpp_compiler: cl 29 | - os: ubuntu-latest 30 | c_compiler: gcc 31 | cpp_compiler: g++ 32 | - os: ubuntu-latest 33 | c_compiler: clang 34 | cpp_compiler: clang++ 35 | exclude: 36 | - os: windows-latest 37 | c_compiler: gcc 38 | - os: windows-latest 39 | c_compiler: clang 40 | - os: ubuntu-latest 41 | c_compiler: cl 42 | 43 | steps: 44 | - uses: actions/checkout@v4 45 | 46 | - name: Checkout submodules 47 | run: cd ${{ github.workspace }} && git submodule update --init --recursive 48 | 49 | - name: Install dependencies 50 | run: sudo apt install libvulkan-dev libvulkan-memory-allocator-dev glslc libglfw3-dev 51 | 52 | - name: Set reusable strings 53 | # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. 54 | id: strings 55 | shell: bash 56 | run: | 57 | echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" 58 | 59 | - name: Configure CMake 60 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 61 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 62 | run: > 63 | cmake -B ${{ steps.strings.outputs.build-output-dir }} 64 | -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} 65 | -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} 66 | -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} 67 | -S ${{ github.workspace }} 68 | 69 | - name: Build 70 | # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). 71 | run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} 72 | 73 | build-windows: 74 | runs-on: windows-latest 75 | 76 | steps: 77 | - uses: actions/checkout@v4 78 | 79 | - name: Checkout submodules 80 | run: cd ${{ github.workspace }} && git submodule update --init --recursive 81 | 82 | - name: Fetch Vulkan SDK version 83 | id: vk 84 | shell: bash 85 | run: echo "version=$(curl -s https://vulkan.lunarg.com/sdk/latest/windows.txt)" >> "$GITHUB_OUTPUT" 86 | 87 | - name: Install Vulkan SDK 88 | run: | 89 | curl -O https://sdk.lunarg.com/sdk/download/latest/windows/vulkan_sdk.exe 90 | ls 91 | ${{ github.workspace}}\vulkan_sdk.exe --accept-licenses --default-answer --confirm-command install 92 | 93 | - name: Set reusable strings 94 | # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. 95 | id: strings 96 | shell: bash 97 | run: | 98 | echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" 99 | 100 | - name: Configure CMake 101 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 102 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 103 | run: > 104 | $env:VULKAN_SDK='C:\VulkanSDK\${{ steps.vk.outputs.version }}'; 105 | cmake -B ${{ steps.strings.outputs.build-output-dir }} 106 | -DCMAKE_BUILD_TYPE=Release 107 | -S ${{ github.workspace }} 108 | 109 | - name: Build 110 | # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). 111 | run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config Release 112 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef SDFCULLING_UTILS_H 2 | #define SDFCULLING_UTILS_H 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include "vma/vk_mem_alloc.h" 9 | #include "constants.h" 10 | 11 | #define _USE_MATH_DEFINES 12 | #include 13 | 14 | extern int MAX_ACTIVE_COUNT; 15 | extern int MAX_TMP_COUNT; 16 | extern std::string spv_dir; 17 | 18 | #define VK_CHECK(x) \ 19 | do \ 20 | { \ 21 | VkResult err = x; \ 22 | if (err) \ 23 | { \ 24 | fprintf(stderr, "%d\n", err); \ 25 | abort(); \ 26 | } \ 27 | } while (0) 28 | 29 | const int MAX_FRAMES_IN_FLIGHT = 3; 30 | 31 | struct Init { 32 | GLFWwindow* window; 33 | vkb::Instance instance; 34 | vkb::InstanceDispatchTable inst_disp; 35 | VkSurfaceKHR surface; 36 | vkb::Device device; 37 | vkb::DispatchTable disp; 38 | vkb::Swapchain swapchain; 39 | }; 40 | 41 | 42 | struct PushConstants { 43 | //glm::mat4 world_to_clip; 44 | glm::vec4 aabb_min; 45 | glm::vec4 aabb_max; 46 | glm::ivec2 resolution; 47 | uint64_t prims_ref; 48 | uint64_t binary_ops_ref; 49 | uint64_t nodes_ref; 50 | uint64_t parents_in_ref; 51 | uint64_t parents_out_ref; 52 | uint64_t active_nodes_in_ref; 53 | uint64_t active_nodes_out_ref; 54 | uint64_t cell_offsets_in_ref; 55 | uint64_t cell_offsets_out_ref; 56 | uint64_t num_active_in_ref; 57 | uint64_t num_active_out_ref; 58 | uint64_t active_count_ref; 59 | uint64_t cell_error_in_ref; 60 | uint64_t cell_error_out_ref; 61 | uint64_t old_to_new_scratch_ref; 62 | uint64_t old_to_new_count_ref; 63 | uint64_t tmp_ref; 64 | uint64_t mvp_ref; 65 | uint64_t cam_ref; 66 | int num_nodes; 67 | int grid_size; 68 | int first_lvl; 69 | float max_rel_err = 0; 70 | float viz_max; 71 | float alpha; 72 | int culling_enabled; 73 | float gamma; 74 | int num_samples; 75 | }; 76 | 77 | struct Buffer { 78 | VkBuffer buf; 79 | VmaAllocation alloc; 80 | uint64_t address; 81 | }; 82 | 83 | struct Image { 84 | VkImage img; 85 | VmaAllocation alloc; 86 | }; 87 | 88 | struct Pipeline { 89 | VkPipeline pipe; 90 | VkPipelineLayout layout; 91 | }; 92 | 93 | struct RenderData { 94 | VmaAllocator alloc; 95 | VkQueue graphics_queue; 96 | VkQueue present_queue; 97 | uint32_t graphics_queue_family; 98 | 99 | std::vector swapchain_images; 100 | std::vector swapchain_image_views; 101 | std::vector render_images; 102 | std::vector render_image_views; 103 | std::vector framebuffers; 104 | std::vector depth_images; 105 | std::vector depth_image_views; 106 | 107 | VkRenderPass render_pass; 108 | VkPipelineLayout pipeline_layout; 109 | VkPipeline graphics_pipeline; 110 | 111 | Pipeline culling_pipeline; 112 | Pipeline eval_grid_pipeline; 113 | 114 | VkPipelineLayout debug_plane_pipeline_layout; 115 | VkPipeline debug_plane_pipeline; 116 | 117 | VkPipelineLayout single_block_scan_pipeline_layout; 118 | VkPipeline single_block_scan_pipeline; 119 | 120 | Pipeline fxaa_pipeline; 121 | 122 | VkCommandPool command_pool; 123 | std::vector command_buffers; 124 | 125 | std::vector available_semaphores; 126 | std::vector finished_semaphore; 127 | std::vector in_flight_fences; 128 | std::vector image_in_flight; 129 | size_t current_frame = 0; 130 | 131 | VkDescriptorPool descriptor_pool; 132 | VkQueryPool query_pool; 133 | 134 | PushConstants push_constants; 135 | Buffer staging_buffer; 136 | Buffer prims_buffer; 137 | Buffer nodes_buffer; 138 | Buffer binary_ops_buffer; 139 | Buffer spheres_buffer; 140 | Buffer active_nodes_buffer[2]; 141 | Buffer parents_buffer[2]; 142 | Buffer num_active_buffer[2]; 143 | Buffer cell_offsets_buffer[2]; 144 | Buffer active_count_buffer; 145 | Buffer parents_init_buffer; 146 | Buffer cell_errors[2]; 147 | Buffer active_nodes_init_buffer; 148 | Buffer old_to_new_scratch_buffer; 149 | Buffer old_to_new_count_buffer; 150 | Buffer tmp_buffer; 151 | Buffer mvp_buffer; 152 | Buffer cam_buffer; 153 | 154 | int input_idx = 0; 155 | int output_idx = 1; 156 | 157 | float culling_elapsed_ms, tracing_elapsed_ms, render_elapsed_ms, eval_grid_elapsed_ms; 158 | uint64_t tracing_mem_usage; 159 | uint64_t pruning_mem_usage; 160 | int max_active_count = 0; 161 | int max_tmp_count = 0; 162 | int total_num_nodes; 163 | int colormap_max = 25; 164 | glm::vec3 aabb_min = glm::vec3(-1.f); 165 | glm::vec3 aabb_max = glm::vec3(1); 166 | int final_grid_lvl = 8; 167 | int shading_mode = SHADING_MODE_SHADED; 168 | bool render_enabled = true; 169 | bool culling_enabled = true; 170 | bool hierarchy_enabled = true; 171 | bool eval_grid_enabled = false; 172 | bool show_imgui = true; 173 | int num_samples = 1; 174 | glm::vec3 cam_pos; 175 | float gamma = 1.2; 176 | bool compute_culling = true; 177 | glm::vec3 sphere_albedo = glm::vec3(1,0,1); 178 | glm::vec3 background_color = glm::vec3(1); 179 | }; 180 | 181 | enum PrimitiveType { 182 | PRIMITIVE_SPHERE = 0, 183 | PRIMITIVE_BOX = 1, 184 | PRIMITIVE_CYLINDER = 2, 185 | PRIMITIVE_CONE = 3 186 | }; 187 | 188 | struct SphereData { 189 | glm::vec4 radius; 190 | }; 191 | 192 | struct BoxData { 193 | glm::vec4 sizes; 194 | }; 195 | 196 | struct CylinderData { 197 | float height; 198 | float radius; 199 | float pad0; 200 | float pad1; 201 | }; 202 | 203 | struct ConeData { 204 | float radius; 205 | float height; 206 | float pad0, pad1; 207 | }; 208 | 209 | struct Primitive { 210 | union { 211 | SphereData sphere; 212 | BoxData box; 213 | CylinderData cylinder; 214 | ConeData cone; 215 | }; 216 | glm::vec4 m_row0; 217 | glm::vec4 m_row1; 218 | glm::vec4 m_row2; 219 | glm::vec2 extrude_rounding; 220 | PrimitiveType type; 221 | float bevel; 222 | uint32_t color; 223 | 224 | float pad0, pad1, pad2; 225 | }; 226 | static_assert(sizeof(Primitive) == 6*16); 227 | 228 | struct BinaryOp { 229 | uint32_t blend_factor_and_sign; 230 | 231 | BinaryOp() = default; 232 | // 0 = union, 1 = sub, 2 = inter 233 | BinaryOp(float k, bool sign, uint32_t op); 234 | }; 235 | 236 | enum NodeType { 237 | NODETYPE_BINARY, 238 | NODETYPE_PRIMITIVE, 239 | }; 240 | 241 | struct CSGNode { 242 | union { 243 | BinaryOp binary_op; 244 | Primitive primitive; 245 | }; 246 | NodeType type; 247 | int left, right; 248 | bool sign; 249 | }; 250 | 251 | std::vector readFile(const std::string& filename); 252 | VkShaderModule createShaderModule(Init& init, const std::vector& code, const char* debug_name); 253 | void TransferToBuffer(const VmaAllocator& alloc, const Buffer& buffer, const void* data, int size); 254 | void TransferFromBuffer(const VmaAllocator& alloc, const Buffer& buffer, void* data, int size); 255 | void CopyBuffer(const RenderData& render_data, const Init& init, const Buffer& src, const Buffer& dst, int size); 256 | void CopyImageToBuffer(const RenderData& render_data, const Init& init, VkImage src, const Buffer& dst, int width, int height); 257 | uint64_t GetBufferAddress(const Init& init, const Buffer& buffer); 258 | Pipeline create_compute_pipeline(Init& init, const char* shader_path, const char* shader_name, unsigned int push_constant_size); 259 | Buffer create_buffer(Init& init, RenderData& render_data, unsigned int size, VkBufferUsageFlags usage, const char* name); 260 | 261 | extern size_t g_memory_usage; 262 | 263 | #endif //SDFCULLING_UTILS_H 264 | -------------------------------------------------------------------------------- /shaders/common_culling.glsl: -------------------------------------------------------------------------------- 1 | shared ActiveNode s_parent_active_nodes[64]; 2 | shared uint16_t s_parent_node_parents[64]; 3 | 4 | #define INVALID_INDEX 0xffffu 5 | 6 | 7 | 8 | void compute_pruning(vec3 cell_center, vec3 cell_size, int cell_idx) { 9 | struct StackEntry { 10 | int idx; 11 | float d; 12 | }; 13 | const int STACK_DEPTH = 128; 14 | StackEntry stack[STACK_DEPTH]; 15 | int stack_idx = 0; 16 | 17 | float R = length(cell_size) * 0.5; 18 | 19 | const int NODESTATE_INACTIVE = 0; 20 | const int NODESTATE_SKIPPED = 1; 21 | const int NODESTATE_ACTIVE = 2; 22 | 23 | int parent_cell_idx; 24 | int parent_offset; 25 | int num_nodes; 26 | if (bool(first_lvl)) { 27 | parent_cell_idx = 0; 28 | parent_offset = 0; 29 | num_nodes = total_num_nodes; 30 | } else { 31 | parent_cell_idx = int(get_parent_cell_idx(cell_idx, grid_size)); 32 | parent_offset = parent_cells_offset.tab[parent_cell_idx]; 33 | num_nodes = parent_cells_num_active.tab[parent_cell_idx]; 34 | } 35 | 36 | if (num_nodes == 0) { 37 | num_active_out.tab[cell_idx] = 0; 38 | cell_value_out.tab[cell_idx] = cell_value_in.tab[parent_cell_idx]; 39 | return; 40 | } 41 | 42 | if (num_nodes == 1) { 43 | int cell_offset = atomicAdd(active_count.val, 1); 44 | num_active_out.tab[cell_idx] = 1; 45 | child_cells_offset.tab[cell_idx] = cell_offset; 46 | parents_out.tab[cell_offset] = uint16_t(INVALID_INDEX); 47 | active_nodes_out.tab[cell_offset] = active_nodes_in.tab[parent_offset]; 48 | cell_value_out.tab[cell_idx] = cell_value_in.tab[parent_cell_idx]; 49 | return; 50 | } 51 | 52 | int tmp_offset = -1; 53 | 54 | if (subgroupElect()) { 55 | tmp_offset = atomicAdd(old_to_new_count.val, 32*num_nodes); 56 | } 57 | tmp_offset = subgroupBroadcastFirst(tmp_offset); 58 | 59 | for (int block = 0; block < (num_nodes+63) / 64; block++) { 60 | if (block*64+gl_LocalInvocationIndex < num_nodes) { 61 | s_parent_active_nodes[gl_LocalInvocationIndex] = active_nodes_in.tab[parent_offset + block*64 + gl_LocalInvocationIndex]; 62 | } 63 | barrier(); 64 | 65 | for (int element_idx = 0; element_idx < 64; element_idx++) { 66 | int i = block*64 + element_idx; 67 | if (i >= num_nodes) break; 68 | 69 | #if 1 70 | ActiveNode active_node = active_nodes_in.tab[parent_offset + i]; 71 | #else 72 | ActiveNode active_node = s_parent_active_nodes[element_idx]; 73 | #endif 74 | int node_idx = ActiveNode_index(active_node); 75 | Node node = nodes.tab[node_idx]; 76 | 77 | float d; 78 | if (node.type == NODETYPE_BINARY) { 79 | StackEntry left_entry = stack[stack_idx-2]; 80 | StackEntry right_entry = stack[stack_idx-1]; 81 | float left_val = left_entry.d; 82 | float right_val = right_entry.d; 83 | stack_idx -= 2; 84 | 85 | BinaryOp op = binary_ops.tab[node.idx_in_type]; 86 | float k = BinaryOp_blend_factor(op); 87 | float s = BinaryOp_sign(op); 88 | 89 | // there are two ways to write the binary operator evaluation 90 | // the first is how we show in the paper, which doesn't make any additional assumptions on the operators but needs a few additional FLOPS 91 | // the second relies on the specific form of the operators, which are all min() - kernel() with some signs. This allows us to make a few simplifications 92 | // compared to the more general form 93 | // both are strictly equivalent, if you unroll the math they compute the exact same expressions 94 | #if 0 95 | float c_a = BinaryOp_ca(op); 96 | float c_b = BinaryOp_cb(op); 97 | d = BinaryOp_eval(op, c_a*left_val, c_b*right_val, k); 98 | #else 99 | d = s*(min(s*left_val, s*right_val) - kernel(abs(left_val-right_val), k)); 100 | #endif 101 | 102 | int current_state; 103 | if (abs(left_val - right_val) <= 2 * R + k) { 104 | current_state = NODESTATE_ACTIVE; 105 | } else { 106 | current_state = NODESTATE_SKIPPED; 107 | if (s*left_val < s*right_val) { 108 | Tmp_state_write(tmp.tab[tmp_offset + 32*right_entry.idx + gl_SubgroupInvocationID], NODESTATE_INACTIVE); 109 | } else { 110 | Tmp_state_write(tmp.tab[tmp_offset + 32*left_entry.idx + gl_SubgroupInvocationID], NODESTATE_INACTIVE); 111 | } 112 | } 113 | 114 | tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID] = Tmp(0); 115 | Tmp_state_write(tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID], current_state); 116 | //prim_dist[i] = 1e20; 117 | } else if (node.type == NODETYPE_PRIMITIVE) { 118 | Primitive prim = prims.tab[node.idx_in_type]; 119 | d = eval_prim(cell_center, prim); 120 | tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID] = Tmp(0); 121 | Tmp_state_write(tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID], NODESTATE_ACTIVE); 122 | //prim_dist[i] = d; 123 | } 124 | 125 | d *= ActiveNode_sign(active_node) ? 1 : -1; 126 | StackEntry new_entry; 127 | new_entry.idx = i; 128 | new_entry.d = d; 129 | stack[stack_idx++] = new_entry; 130 | } 131 | } 132 | 133 | float d = stack[0].d; 134 | if (abs(d) > 2*R) { 135 | num_active_out.tab[cell_idx] = 0; 136 | cell_value_out.tab[cell_idx] = sign(stack[0].d) * (abs(stack[0].d) - R); 137 | return; 138 | } 139 | 140 | 141 | int cell_num_active = 0; 142 | for (int i = num_nodes-1; i >= 0; i--) { 143 | 144 | Tmp tmp_i = tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID]; 145 | 146 | if (Tmp_state_get(tmp_i) == NODESTATE_INACTIVE) { 147 | Tmp_active_global_write(tmp_i, false); 148 | Tmp_inactive_ancestors_write(tmp_i, true); 149 | tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID] = tmp_i; 150 | } else { 151 | uint16_t parent_idx = uint16_t(parents_in.tab[parent_offset+i]); 152 | Tmp tmp_parent; 153 | if (parent_idx != uint16_t(INVALID_INDEX)) tmp_parent = tmp.tab[tmp_offset + 32*parent_idx + gl_SubgroupInvocationID]; 154 | bool node_has_inactive_ancestors = parent_idx != uint16_t(INVALID_INDEX) ? Tmp_inactive_ancestors_get(tmp_parent) : false; 155 | bool node_active_global = ((Tmp_state_get(tmp_i) == NODESTATE_ACTIVE) && !node_has_inactive_ancestors); 156 | if (node_active_global) cell_num_active += 1; 157 | 158 | 159 | ActiveNode old_active_node = active_nodes_in.tab[parent_offset+i]; 160 | int node_sign = ActiveNode_sign(old_active_node) ? 1 : -1; 161 | uint16_t new_parent_idx; 162 | if (parent_idx != INVALID_INDEX && Tmp_state_get(tmp_parent) == NODESTATE_SKIPPED) { 163 | node_sign *= Tmp_sign_get(tmp_parent) ? 1 : -1; 164 | new_parent_idx = Tmp_parent_get(tmp_parent); 165 | } else { 166 | new_parent_idx = parent_idx == INVALID_INDEX ? uint16_t(INVALID_INDEX) : uint16_t(parent_idx); 167 | } 168 | 169 | Tmp_inactive_ancestors_write(tmp_i, node_has_inactive_ancestors); 170 | Tmp_active_global_write(tmp_i, node_active_global); 171 | Tmp_parent_write(tmp_i, new_parent_idx); 172 | Tmp_sign_write(tmp_i, node_sign == 1); 173 | tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID] = tmp_i; 174 | } 175 | } 176 | 177 | 178 | // TODO: warp aggregated atomics 179 | int cell_offset = atomicAdd(active_count.val, cell_num_active); 180 | 181 | 182 | int out_idx = cell_num_active-1; 183 | for (int i = num_nodes-1; i >= 0; i--) { 184 | Tmp tmp_i = tmp.tab[tmp_offset + 32*i + gl_SubgroupInvocationID]; 185 | if (Tmp_active_global_get(tmp_i)) { 186 | active_nodes_out.tab[cell_offset + out_idx] = ActiveNode_make(ActiveNode_index(active_nodes_in.tab[parent_offset+i]), Tmp_sign_get(tmp_i)); 187 | old_to_new_scratch.tab[tmp_offset + i*32 + gl_SubgroupInvocationID] = uint16_t(out_idx); 188 | 189 | int new_parent_old_idx = Tmp_parent_get(tmp_i); 190 | uint16_t new_parent_idx = new_parent_old_idx != INVALID_INDEX ? old_to_new_scratch.tab[tmp_offset + 32*new_parent_old_idx + gl_SubgroupInvocationID] : uint16_t(INVALID_INDEX); 191 | parents_out.tab[cell_offset + out_idx] = new_parent_idx; 192 | 193 | out_idx--; 194 | } 195 | } 196 | 197 | child_cells_offset.tab[cell_idx] = cell_offset; 198 | num_active_out.tab[cell_idx] = cell_num_active; 199 | 200 | // TODO: constant for max grid size 201 | if (out_idx == 1 || grid_size == 256) { 202 | cell_value_out.tab[cell_idx] = 0; 203 | } 204 | } -------------------------------------------------------------------------------- /img/cc-by.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | image/svg+xml 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/debug_plane.cpp: -------------------------------------------------------------------------------- 1 | #include "debug_plane.h" 2 | #include "glm/gtc/matrix_transform.hpp" 3 | #include "imgui.h" 4 | 5 | struct DebugPlanePushConstants { 6 | glm::vec4 aabb_min; 7 | glm::vec4 aabb_max; 8 | glm::vec4 farfield_color; 9 | uint64_t prims_ref; 10 | uint64_t binary_ops_ref; 11 | uint64_t nodes_ref; 12 | uint64_t active_nodes_out_ref; 13 | uint64_t cells_offset_ref; 14 | uint64_t cells_num_active_ref; 15 | uint64_t cell_error_out_ref; 16 | uint64_t mvp_ref; 17 | int total_num_nodes; 18 | int grid_size; 19 | float plane_y; 20 | float viz_max; 21 | float plane_alpha; 22 | }; 23 | 24 | void create_debug_plane_pipeline(Init& init, RenderData& data, VkPipeline& pipeline, VkPipelineLayout& pipeline_layout) { 25 | auto vert_code = readFile("plane.vert.spv"); 26 | auto frag_code = readFile("plane.frag.spv"); 27 | 28 | VkShaderModule vert_module = createShaderModule(init, vert_code, "plane.vert.glsl"); 29 | VkShaderModule frag_module = createShaderModule(init, frag_code, "plane.frag.glsl"); 30 | if (vert_module == VK_NULL_HANDLE || frag_module == VK_NULL_HANDLE) { 31 | fprintf(stderr, "failed to create shader module\n"); 32 | abort(); 33 | } 34 | 35 | VkPipelineShaderStageCreateInfo vert_stage_info = {}; 36 | vert_stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 37 | vert_stage_info.stage = VK_SHADER_STAGE_VERTEX_BIT; 38 | vert_stage_info.module = vert_module; 39 | vert_stage_info.pName = "main"; 40 | 41 | VkPipelineShaderStageCreateInfo frag_stage_info = {}; 42 | frag_stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 43 | frag_stage_info.stage = VK_SHADER_STAGE_FRAGMENT_BIT; 44 | frag_stage_info.module = frag_module; 45 | frag_stage_info.pName = "main"; 46 | 47 | VkPipelineShaderStageCreateInfo shader_stages[] = { vert_stage_info, frag_stage_info }; 48 | 49 | VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; 50 | vertex_input_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; 51 | vertex_input_info.vertexBindingDescriptionCount = 0; 52 | vertex_input_info.vertexAttributeDescriptionCount = 0; 53 | 54 | VkPipelineInputAssemblyStateCreateInfo input_assembly = {}; 55 | input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; 56 | input_assembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 57 | input_assembly.primitiveRestartEnable = VK_FALSE; 58 | 59 | VkViewport viewport = {}; 60 | viewport.x = 0.0f; 61 | viewport.y = 0.0f; 62 | viewport.width = (float)init.swapchain.extent.width; 63 | viewport.height = (float)init.swapchain.extent.height; 64 | viewport.minDepth = 0.0f; 65 | viewport.maxDepth = 1.0f; 66 | 67 | VkRect2D scissor = {}; 68 | scissor.offset = { 0, 0 }; 69 | scissor.extent = init.swapchain.extent; 70 | 71 | VkPipelineViewportStateCreateInfo viewport_state = {}; 72 | viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; 73 | viewport_state.viewportCount = 1; 74 | viewport_state.pViewports = &viewport; 75 | viewport_state.scissorCount = 1; 76 | viewport_state.pScissors = &scissor; 77 | 78 | VkPipelineRasterizationStateCreateInfo rasterizer = {}; 79 | rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; 80 | rasterizer.depthClampEnable = VK_FALSE; 81 | rasterizer.rasterizerDiscardEnable = VK_FALSE; 82 | rasterizer.polygonMode = VK_POLYGON_MODE_FILL; 83 | rasterizer.lineWidth = 1.0f; 84 | rasterizer.cullMode = VK_CULL_MODE_NONE; 85 | rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE; 86 | rasterizer.depthBiasEnable = VK_FALSE; 87 | 88 | VkPipelineMultisampleStateCreateInfo multisampling = {}; 89 | multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; 90 | multisampling.sampleShadingEnable = VK_FALSE; 91 | multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; 92 | 93 | VkPipelineColorBlendAttachmentState colorBlendAttachment = {}; 94 | colorBlendAttachment.colorWriteMask = 95 | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; 96 | colorBlendAttachment.blendEnable = VK_TRUE; 97 | colorBlendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; 98 | colorBlendAttachment.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; 99 | colorBlendAttachment.colorBlendOp = VK_BLEND_OP_ADD; 100 | colorBlendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; 101 | colorBlendAttachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; 102 | colorBlendAttachment.alphaBlendOp = VK_BLEND_OP_ADD; 103 | 104 | VkPipelineColorBlendStateCreateInfo color_blending = {}; 105 | color_blending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; 106 | color_blending.logicOpEnable = VK_FALSE; 107 | color_blending.logicOp = VK_LOGIC_OP_COPY; 108 | color_blending.attachmentCount = 1; 109 | color_blending.pAttachments = &colorBlendAttachment; 110 | color_blending.blendConstants[0] = 0.0f; 111 | color_blending.blendConstants[1] = 0.0f; 112 | color_blending.blendConstants[2] = 0.0f; 113 | color_blending.blendConstants[3] = 0.0f; 114 | 115 | VkPushConstantRange range = { 116 | .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 117 | .offset = 0, 118 | .size = sizeof(DebugPlanePushConstants) 119 | }; 120 | 121 | VkPipelineLayoutCreateInfo pipeline_layout_info = {}; 122 | pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; 123 | pipeline_layout_info.setLayoutCount = 0; 124 | pipeline_layout_info.pushConstantRangeCount = 1; 125 | pipeline_layout_info.pPushConstantRanges = ⦥ 126 | 127 | if (init.disp.createPipelineLayout(&pipeline_layout_info, nullptr, &pipeline_layout) != VK_SUCCESS) { 128 | fprintf(stderr, "failed to create pipeline layout\n"); 129 | abort(); 130 | } 131 | 132 | std::vector dynamic_states = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; 133 | 134 | VkPipelineDynamicStateCreateInfo dynamic_info = {}; 135 | dynamic_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; 136 | dynamic_info.dynamicStateCount = static_cast(dynamic_states.size()); 137 | dynamic_info.pDynamicStates = dynamic_states.data(); 138 | 139 | VkFormat format = VK_FORMAT_R8G8B8A8_SRGB; 140 | VkPipelineRenderingCreateInfo pipeline_rendering = { 141 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, 142 | .pNext = nullptr, 143 | .viewMask = 0, 144 | .colorAttachmentCount = 1, 145 | .pColorAttachmentFormats = &format, 146 | .depthAttachmentFormat = VK_FORMAT_D24_UNORM_S8_UINT, 147 | .stencilAttachmentFormat = {} 148 | }; 149 | 150 | VkPipelineDepthStencilStateCreateInfo depth_stencil = { 151 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 152 | .pNext = nullptr, 153 | .flags = 0, 154 | .depthTestEnable = true, 155 | .depthWriteEnable = true, 156 | .depthCompareOp = VK_COMPARE_OP_LESS, 157 | .depthBoundsTestEnable = false, 158 | .stencilTestEnable = false 159 | }; 160 | 161 | VkGraphicsPipelineCreateInfo pipeline_info = {}; 162 | pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; 163 | pipeline_info.stageCount = 2; 164 | pipeline_info.pStages = shader_stages; 165 | pipeline_info.pVertexInputState = &vertex_input_info; 166 | pipeline_info.pInputAssemblyState = &input_assembly; 167 | pipeline_info.pViewportState = &viewport_state; 168 | pipeline_info.pRasterizationState = &rasterizer; 169 | pipeline_info.pMultisampleState = &multisampling; 170 | pipeline_info.pColorBlendState = &color_blending; 171 | pipeline_info.pDynamicState = &dynamic_info; 172 | pipeline_info.pDepthStencilState = &depth_stencil; 173 | pipeline_info.layout = pipeline_layout; 174 | pipeline_info.renderPass = VK_NULL_HANDLE; 175 | pipeline_info.subpass = 0; 176 | pipeline_info.basePipelineHandle = VK_NULL_HANDLE; 177 | pipeline_info.pNext = &pipeline_rendering; 178 | 179 | VK_CHECK(init.disp.createGraphicsPipelines(VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline)); 180 | 181 | init.disp.destroyShaderModule(frag_module, nullptr); 182 | init.disp.destroyShaderModule(vert_module, nullptr); 183 | } 184 | 185 | void draw_debug_plane(const Init& init, const RenderData& data, VkCommandBuffer cmd_buf) { 186 | static float plane_y = 0.f; 187 | ImGui::SliderFloat("Plane Y", &plane_y, -1, 1); 188 | static int viz_max = 10; 189 | ImGui::DragInt("Range max", &viz_max, 1, 1, data.push_constants.num_nodes); 190 | glm::mat4 view_mat = glm::lookAt(glm::vec3(data.cam_pos), glm::vec3(0), glm::vec3(0,1,0)); 191 | glm::mat4 proj_mat = glm::perspective((float)M_PI / 2.f, (float)init.swapchain.extent.width / (float)init.swapchain.extent.height, 0.01f, 10.f); 192 | 193 | static glm::vec4 farfield_color = glm::vec4(1, 0, 0, 0.5); 194 | ImGui::ColorPicker4("Near-field color", &farfield_color[0]); 195 | 196 | static float plane_alpha = 1.f; 197 | ImGui::SliderFloat("Plane alpha", &plane_alpha, 0, 1); 198 | 199 | vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, data.debug_plane_pipeline); 200 | DebugPlanePushConstants push_constants = { 201 | glm::vec4(data.aabb_min, 0), 202 | glm::vec4(data.aabb_max, 0), 203 | farfield_color, 204 | data.push_constants.prims_ref, 205 | data.push_constants.binary_ops_ref, 206 | data.push_constants.nodes_ref, 207 | data.push_constants.active_nodes_out_ref, 208 | data.push_constants.cell_offsets_out_ref, 209 | data.push_constants.num_active_out_ref, 210 | data.push_constants.cell_error_out_ref, 211 | data.push_constants.mvp_ref, 212 | data.total_num_nodes, 213 | 1 << data.final_grid_lvl, 214 | plane_y, 215 | (float)viz_max, 216 | plane_alpha, 217 | }; 218 | vkCmdPushConstants(cmd_buf, data.debug_plane_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(DebugPlanePushConstants), &push_constants); 219 | vkCmdDraw(cmd_buf, 6, 1, 0, 0); 220 | } -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | 4 | std::string spv_dir; 5 | 6 | int MAX_ACTIVE_COUNT = 100 * 1000 * 1000; 7 | int MAX_TMP_COUNT = 400 * 1000 * 1000; 8 | 9 | std::vector readFile(const std::string& filename) { 10 | std::ifstream file(spv_dir + "/" + filename, std::ios::ate | std::ios::binary); 11 | 12 | if (!file.is_open()) { 13 | fprintf(stderr, "failed to open file %s\n", filename.c_str()); 14 | abort(); 15 | } 16 | 17 | size_t file_size = (size_t)file.tellg(); 18 | std::vector buffer(file_size); 19 | 20 | file.seekg(0); 21 | file.read(buffer.data(), static_cast(file_size)); 22 | 23 | file.close(); 24 | 25 | return buffer; 26 | } 27 | 28 | VkShaderModule createShaderModule(Init& init, const std::vector& code, const char* debug_name) { 29 | VkShaderModuleCreateInfo create_info = {}; 30 | create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 31 | create_info.codeSize = code.size(); 32 | create_info.pCode = reinterpret_cast(code.data()); 33 | 34 | VkShaderModule shaderModule; 35 | if (init.disp.createShaderModule(&create_info, nullptr, &shaderModule) != VK_SUCCESS) { 36 | return VK_NULL_HANDLE; // failed to create shader module 37 | } 38 | 39 | VkDebugUtilsObjectNameInfoEXT name_info = { 40 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, 41 | .objectType = VK_OBJECT_TYPE_SHADER_MODULE, 42 | .objectHandle = (uint64_t)shaderModule, 43 | .pObjectName = debug_name 44 | }; 45 | init.disp.setDebugUtilsObjectNameEXT(&name_info); 46 | 47 | return shaderModule; 48 | } 49 | 50 | void TransferToBuffer(const VmaAllocator& alloc, const Buffer& buffer, const void* data, int size) { 51 | void* ptr; 52 | VK_CHECK(vmaMapMemory(alloc, buffer.alloc, (void**)&ptr)); 53 | memcpy(ptr, data, size); 54 | vmaUnmapMemory(alloc, buffer.alloc); 55 | } 56 | 57 | void TransferFromBuffer(const VmaAllocator& alloc, const Buffer& buffer, void* data, int size) { 58 | void* ptr; 59 | VK_CHECK(vmaMapMemory(alloc, buffer.alloc, (void**)&ptr)); 60 | memcpy(data, ptr, size); 61 | vmaUnmapMemory(alloc, buffer.alloc); 62 | } 63 | 64 | void CopyImageToBuffer(const RenderData& render_data, const Init& init, VkImage src, const Buffer& dst, int width, int height) { 65 | VK_CHECK(vkDeviceWaitIdle(init.device)); 66 | VkCommandBufferBeginInfo begin_info = { 67 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 68 | .pNext = nullptr, 69 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 70 | .pInheritanceInfo = nullptr 71 | }; 72 | VkCommandBuffer cmd_buf = render_data.command_buffers[0]; 73 | VK_CHECK(vkBeginCommandBuffer(cmd_buf, &begin_info)); 74 | 75 | VkImageMemoryBarrier2 barrier = { 76 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 77 | .pNext = nullptr, 78 | .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, 79 | .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 80 | .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, 81 | .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT, 82 | .oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 83 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 84 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 85 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 86 | .image = src, 87 | .subresourceRange = { 88 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 89 | .baseMipLevel = 0, 90 | .levelCount = 1, 91 | .baseArrayLayer = 0, 92 | .layerCount = 1 93 | } 94 | }; 95 | VkDependencyInfo dependency_info = { 96 | .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 97 | .pNext = nullptr, 98 | .dependencyFlags = 0, 99 | .memoryBarrierCount = 0, 100 | .pMemoryBarriers = nullptr, 101 | .bufferMemoryBarrierCount = 0, 102 | .pBufferMemoryBarriers = nullptr, 103 | .imageMemoryBarrierCount = 1, 104 | .pImageMemoryBarriers = &barrier 105 | }; 106 | vkCmdPipelineBarrier2(cmd_buf, &dependency_info); 107 | 108 | VkBufferImageCopy copy = { 109 | .bufferOffset = 0, 110 | .bufferRowLength = 0, 111 | .bufferImageHeight = 0, 112 | .imageSubresource = { 113 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 114 | .mipLevel = 0, 115 | .baseArrayLayer = 0, 116 | .layerCount = 1 117 | }, 118 | .imageOffset = { 0, 0, 0 }, 119 | .imageExtent = { (uint32_t)width, (uint32_t)height, 1 } 120 | }; 121 | vkCmdCopyImageToBuffer(cmd_buf, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst.buf, 1, ©); 122 | 123 | vkEndCommandBuffer(cmd_buf); 124 | 125 | VkCommandBufferSubmitInfo cmd_buf_info = { 126 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, 127 | .pNext = nullptr, 128 | .commandBuffer = cmd_buf, 129 | .deviceMask = 0 130 | }; 131 | 132 | VkSubmitInfo2 submit_info = { 133 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, 134 | .pNext = 0, 135 | .flags = 0, 136 | .waitSemaphoreInfoCount = 0, 137 | .pWaitSemaphoreInfos = nullptr, 138 | .commandBufferInfoCount = 1, 139 | .pCommandBufferInfos = &cmd_buf_info, 140 | .signalSemaphoreInfoCount = 0, 141 | .pSignalSemaphoreInfos = nullptr 142 | }; 143 | VK_CHECK(vkQueueSubmit2(render_data.graphics_queue, 1, &submit_info, VK_NULL_HANDLE)); 144 | 145 | VK_CHECK(vkDeviceWaitIdle(init.device)); 146 | 147 | } 148 | 149 | void CopyBuffer(const RenderData& render_data, const Init& init, const Buffer& src, const Buffer& dst, int size) { 150 | VK_CHECK(vkDeviceWaitIdle(init.device)); 151 | VkCommandBufferBeginInfo begin_info = { 152 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 153 | .pNext = nullptr, 154 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 155 | .pInheritanceInfo = nullptr 156 | }; 157 | VkCommandBuffer cmd_buf = render_data.command_buffers[0]; 158 | VK_CHECK(vkBeginCommandBuffer(cmd_buf, &begin_info)); 159 | 160 | VkBufferCopy copy = { 161 | .srcOffset = 0, 162 | .dstOffset = 0, 163 | .size = (VkDeviceSize)size 164 | }; 165 | vkCmdCopyBuffer(cmd_buf, src.buf, dst.buf, 1, ©); 166 | 167 | vkEndCommandBuffer(cmd_buf); 168 | 169 | VkCommandBufferSubmitInfo cmd_buf_info = { 170 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, 171 | .pNext = nullptr, 172 | .commandBuffer = cmd_buf, 173 | .deviceMask = 0 174 | }; 175 | 176 | VkSubmitInfo2 submit_info = { 177 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, 178 | .pNext = 0, 179 | .flags = 0, 180 | .waitSemaphoreInfoCount = 0, 181 | .pWaitSemaphoreInfos = nullptr, 182 | .commandBufferInfoCount = 1, 183 | .pCommandBufferInfos = &cmd_buf_info, 184 | .signalSemaphoreInfoCount = 0, 185 | .pSignalSemaphoreInfos = nullptr 186 | }; 187 | VK_CHECK(vkQueueSubmit2(render_data.graphics_queue, 1, &submit_info, VK_NULL_HANDLE)); 188 | 189 | VK_CHECK(vkDeviceWaitIdle(init.device)); 190 | } 191 | 192 | uint64_t GetBufferAddress(const Init& init, const Buffer& buffer) { 193 | VkBufferDeviceAddressInfo address_info = { 194 | .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, 195 | .pNext = nullptr, 196 | .buffer = buffer.buf 197 | }; 198 | return vkGetBufferDeviceAddress(init.device, &address_info); 199 | } 200 | 201 | Pipeline create_compute_pipeline(Init& init, const char* shader_path, const char* shader_name, unsigned int push_constant_size) { 202 | auto code = readFile(shader_path); 203 | VkShaderModule module = createShaderModule(init, code, shader_name); 204 | if (module == VK_NULL_HANDLE) abort(); 205 | 206 | VkPushConstantRange range = { 207 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 208 | .offset = 0, 209 | .size = push_constant_size 210 | }; 211 | VkPipelineLayoutCreateInfo layout_info = { 212 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 213 | .pNext = nullptr, 214 | .flags = 0, 215 | .setLayoutCount = 0, 216 | .pSetLayouts = nullptr, 217 | .pushConstantRangeCount = 1, 218 | .pPushConstantRanges = &range 219 | }; 220 | 221 | Pipeline pipeline{}; 222 | VK_CHECK(vkCreatePipelineLayout(init.device, &layout_info, nullptr, &pipeline.layout)); 223 | 224 | VkComputePipelineCreateInfo pipeline_info = { 225 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 226 | .pNext = nullptr, 227 | .flags = 0, 228 | .stage = { 229 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 230 | .pNext = nullptr, 231 | .flags = 0, 232 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, 233 | .module = module, 234 | .pName = "main", 235 | .pSpecializationInfo = nullptr 236 | }, 237 | .layout = pipeline.layout, 238 | .basePipelineHandle = VK_NULL_HANDLE, 239 | .basePipelineIndex = -1 240 | }; 241 | VK_CHECK(vkCreateComputePipelines(init.device, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, &pipeline.pipe)); 242 | 243 | return pipeline; 244 | } 245 | 246 | size_t g_memory_usage = 0; 247 | 248 | Buffer create_buffer(Init& init, RenderData& data, unsigned int size, VkBufferUsageFlags usage, const char* name) { 249 | g_memory_usage += size; 250 | 251 | VkBufferCreateInfo buffer_info = { 252 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 253 | .pNext = nullptr, 254 | .flags = 0, 255 | .size = size, 256 | .usage = usage, 257 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 258 | .queueFamilyIndexCount = 0, 259 | .pQueueFamilyIndices = nullptr 260 | }; 261 | VmaAllocationCreateInfo alloc_info{}; 262 | alloc_info.usage = VMA_MEMORY_USAGE_AUTO; 263 | 264 | Buffer res{}; 265 | VK_CHECK(vmaCreateBuffer(data.alloc, &buffer_info, &alloc_info, &res.buf, &res.alloc, nullptr)); 266 | 267 | VkBufferDeviceAddressInfo addr_info = { 268 | .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, 269 | .pNext = nullptr, 270 | .buffer = res.buf 271 | }; 272 | res.address = vkGetBufferDeviceAddress(init.device.device, &addr_info); 273 | 274 | VkDebugUtilsObjectNameInfoEXT name_info = { 275 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, 276 | .objectType = VK_OBJECT_TYPE_BUFFER, 277 | .objectHandle = (uint64_t)res.buf, 278 | .pObjectName = name 279 | }; 280 | VK_CHECK(init.disp.setDebugUtilsObjectNameEXT(&name_info)); 281 | 282 | return res; 283 | } 284 | 285 | BinaryOp::BinaryOp(float k, bool sign, unsigned int op) { 286 | uint32_t x = (uint32_t)sign; 287 | op &= 3u; 288 | x |= (op << 1); 289 | 290 | uint32_t k_bits; 291 | 292 | static_assert(sizeof(float) == sizeof(uint32_t)); 293 | memcpy(&k_bits, &k, sizeof(float)); 294 | 295 | // clear lower 3 bits 296 | k_bits &= ~7u; 297 | x |= k_bits; 298 | 299 | blend_factor_and_sign = x; 300 | } -------------------------------------------------------------------------------- /shaders/common.glsl: -------------------------------------------------------------------------------- 1 | #define PRIMITIVE_SPHERE 0 2 | #define PRIMITIVE_BOX 1 3 | #define PRIMITIVE_CYLINDER 2 4 | #define PRIMITIVE_CONE 3 5 | 6 | #define NODETYPE_BINARY 0 7 | #define NODETYPE_PRIMITIVE 1 8 | 9 | struct Primitive { 10 | vec4 data; 11 | vec4 m_row0; 12 | vec4 m_row1; 13 | vec4 m_row2; 14 | vec2 extrude_rounding; 15 | int type; 16 | float bevel; 17 | uint color; 18 | 19 | float pad0, pad1, pad2; 20 | }; 21 | 22 | struct Node { 23 | int type; 24 | int idx_in_type; 25 | }; 26 | 27 | struct BinaryOp { 28 | uint blend_factor_and_sign; 29 | }; 30 | 31 | struct ActiveNode { 32 | uint16_t idx_and_sign; 33 | }; 34 | 35 | struct Tmp { 36 | // local state: 2 bits (0) 37 | // global state: 1 bit (2) 38 | // inactive ancestors flag: 1 bit (3) 39 | // sign : 1 bit (4) 40 | // parent: 16 bits (5) 41 | uint x; 42 | }; 43 | 44 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer PrimitivesRef { 45 | Primitive tab[]; 46 | }; 47 | 48 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer NodesRef { 49 | Node tab[]; 50 | }; 51 | 52 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer BinaryOpsRef { 53 | BinaryOp tab[]; 54 | }; 55 | 56 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer ActiveNodesRef { 57 | ActiveNode tab[]; 58 | }; 59 | 60 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer IntArrayRef { 61 | int tab[]; 62 | }; 63 | 64 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer UintArrayRef { 65 | uint tab[]; 66 | }; 67 | 68 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer Uint16ArrayRef { 69 | uint16_t tab[]; 70 | }; 71 | 72 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer TmpArrayRef { 73 | Tmp tab[]; 74 | }; 75 | 76 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer FloatArrayRef { 77 | float tab[]; 78 | }; 79 | 80 | layout(std430, buffer_reference, buffer_reference_align = 4) buffer IntRef { 81 | int val; 82 | }; 83 | 84 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer Mat4Ref { 85 | mat4 m; 86 | }; 87 | 88 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer Vec4ArrayRef { 89 | vec4 tab[]; 90 | }; 91 | 92 | layout(std430, buffer_reference, buffer_reference_align = 8) buffer DebugPlaneRef { 93 | mat4 world_to_clip; 94 | vec4 farfield_color; 95 | float plane_y; 96 | }; 97 | 98 | void set_bit(inout uint x, int idx) { 99 | x |= 1 << idx; 100 | } 101 | 102 | void unset_bit(inout uint x, int idx) { 103 | x &= ~(1 << idx); 104 | } 105 | 106 | bool get_bit(uint x, int idx) { 107 | return bool((x >> idx) & 1); 108 | } 109 | 110 | void write_bit(inout uint x, int idx, bool val) { 111 | unset_bit(x, idx); 112 | x |= uint(val) << idx; 113 | } 114 | 115 | int read_node_state(uint64_t node_state_bitfield, int idx) { 116 | return int((node_state_bitfield >> (2*idx)) & 3ul); 117 | } 118 | 119 | void write_node_state(inout uint64_t node_state, int idx, int val) { 120 | node_state &= ~(uint64_t(3) << (2*idx)); 121 | node_state |= uint64_t(val) << (2*idx); 122 | } 123 | 124 | int Tmp_state_get(Tmp t) { 125 | return int(t.x & 3u); 126 | } 127 | 128 | bool Tmp_active_global_get(Tmp t) { 129 | return bool((t.x >> 2) & 1); 130 | } 131 | 132 | bool Tmp_inactive_ancestors_get(Tmp t) { 133 | return bool((t.x >> 3) & 1); 134 | } 135 | 136 | bool Tmp_sign_get(Tmp t) { 137 | return bool((t.x >> 4) & 1); 138 | } 139 | 140 | uint16_t Tmp_parent_get(Tmp t) { 141 | return uint16_t((t.x >> 5) & 0xffff); 142 | } 143 | 144 | void Tmp_state_write(inout Tmp t, int state) { 145 | t.x &= ~3u; 146 | t.x |= uint(state); 147 | } 148 | 149 | void Tmp_active_global_write(inout Tmp t, bool b) { 150 | write_bit(t.x, 2, b); 151 | } 152 | 153 | void Tmp_inactive_ancestors_write(inout Tmp t, bool b) { 154 | write_bit(t.x, 3, b); 155 | } 156 | 157 | void Tmp_sign_write(inout Tmp t, bool b) { 158 | write_bit(t.x, 4, b); 159 | } 160 | 161 | void Tmp_parent_write(inout Tmp t, uint16_t p) { 162 | t.x &= ~(0xffffu << 5); 163 | t.x |= uint(p) << 5; 164 | } 165 | 166 | 167 | float kernel(float x, float k) { 168 | if (k == 0) return 0; 169 | float m = max(0, k-x); 170 | return m*m*0.25/k; 171 | } 172 | 173 | #if 1 174 | // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ 175 | uint Part1By2(uint x) 176 | { 177 | x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 178 | x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 179 | x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 180 | x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 181 | x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 182 | return x; 183 | } 184 | 185 | uint morton_encode(ivec3 cell) 186 | { 187 | uint x = uint(cell.x); 188 | uint y = uint(cell.y); 189 | uint z = uint(cell.z); 190 | return (Part1By2(z) << 2) + (Part1By2(y) << 1) + Part1By2(x); 191 | } 192 | 193 | 194 | uint get_cell_idx(ivec3 cell, int grid_size) { 195 | //return cell.z * grid_size * grid_size + cell.y * grid_size + cell.x; 196 | return morton_encode(cell); 197 | } 198 | 199 | // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ 200 | uint Compact1By2(uint x) 201 | { 202 | x &= 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 203 | x = (x ^ (x >> 2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 204 | x = (x ^ (x >> 4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 205 | x = (x ^ (x >> 8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 206 | x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 207 | return x; 208 | } 209 | 210 | ivec3 morton_decode(uint cell_idx) { 211 | uint x = Compact1By2(cell_idx >> 0); 212 | uint y = Compact1By2(cell_idx >> 1); 213 | uint z = Compact1By2(cell_idx >> 2); 214 | 215 | return ivec3(x,y,z); 216 | } 217 | 218 | ivec3 get_cell(uint cell_idx, int grid_size) { 219 | //return morton_decode(cell_idx); 220 | ivec3 cell; 221 | cell.z = int(cell_idx / (grid_size*grid_size)); 222 | cell.y = int((cell_idx % (grid_size*grid_size)) / grid_size); 223 | cell.x = int(cell_idx % grid_size); 224 | return cell; 225 | } 226 | 227 | uint get_parent_cell_idx(uint cell_idx, int grid_size) { 228 | return cell_idx / 64; 229 | //ivec3 cell = get_cell(cell_idx, grid_size); 230 | //ivec3 parent_cell = cell / 2; 231 | //return get_cell_idx(parent_cell, grid_size/2); 232 | } 233 | 234 | float BinaryOp_blend_factor(BinaryOp op) { 235 | return uintBitsToFloat(op.blend_factor_and_sign & (~7u)); 236 | } 237 | 238 | float BinaryOp_sign(BinaryOp op) { 239 | uint x = op.blend_factor_and_sign & 1; 240 | return -1. + 2. * float(x); 241 | } 242 | 243 | uint BinaryOp_op(BinaryOp op) { 244 | return (op.blend_factor_and_sign >> 1) & 3; 245 | } 246 | 247 | float BinaryOp_ca(BinaryOp op) { 248 | return 1; 249 | } 250 | 251 | float BinaryOp_cb(BinaryOp op) { 252 | return BinaryOp_op(op) == OP_SUB ? -1 : 1; 253 | } 254 | 255 | float BinaryOp_eval(BinaryOp op, float a, float b, float k) { 256 | // this common form is specific to the union, sub and inter operators that we use 257 | float s = BinaryOp_sign(op); 258 | float c_b = BinaryOp_cb(op); 259 | return s * min(s * a, s * c_b * b) - kernel(abs(a - c_b * b), k); 260 | } 261 | 262 | int ActiveNode_index(ActiveNode n) { 263 | return int(n.idx_and_sign & ~(1 << 15)); 264 | } 265 | 266 | bool ActiveNode_sign(ActiveNode n) { 267 | return (n.idx_and_sign >> 15 == 0); 268 | } 269 | 270 | ActiveNode ActiveNode_make(int idx, bool sgn) { 271 | uint16_t v = uint16_t(idx); 272 | v |= uint16_t(!sgn) << 15; 273 | return ActiveNode(v); 274 | } 275 | #endif 276 | 277 | vec3 inferno(float t) { 278 | 279 | const vec3 c0 = vec3(0.0002189403691192265, 0.001651004631001012, -0.01948089843709184); 280 | const vec3 c1 = vec3(0.1065134194856116, 0.5639564367884091, 3.932712388889277); 281 | const vec3 c2 = vec3(11.60249308247187, -3.972853965665698, -15.9423941062914); 282 | const vec3 c3 = vec3(-41.70399613139459, 17.43639888205313, 44.35414519872813); 283 | const vec3 c4 = vec3(77.162935699427, -33.40235894210092, -81.80730925738993); 284 | const vec3 c5 = vec3(-71.31942824499214, 32.62606426397723, 73.20951985803202); 285 | const vec3 c6 = vec3(25.13112622477341, -12.24266895238567, -23.07032500287172); 286 | 287 | return c0+t*(c1+t*(c2+t*(c3+t*(c4+t*(c5+t*c6))))); 288 | } 289 | 290 | 291 | #define PCG u64vec2 292 | 293 | // GPU implementation of CPU PCG 294 | uint rand_uint32(inout PCG pcg) 295 | { 296 | const uint64_t old_state = pcg.x; 297 | pcg.x = old_state * 6364136223846793005UL + pcg.y; 298 | 299 | const uint xorshifted = uint(((old_state >> 18) ^ old_state) >> 27); 300 | const uint rot = uint(old_state >> 59); 301 | return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 302 | } 303 | 304 | void init_pcg(inout PCG pcg, const uint64_t seed) 305 | { 306 | pcg.x = 0; // m_state in CPU code 307 | pcg.y = (0xDA3E39CB94B95BDBUL << 1) | 1; // m_inc in CPU code 308 | rand_uint32(pcg); 309 | pcg.x += seed; 310 | rand_uint32(pcg); 311 | } 312 | 313 | float rand_float_0_1(inout PCG pcg) { return rand_uint32(pcg) * 2.32830616e-10f; } 314 | 315 | 316 | float sdRoundBox( in vec2 p, in vec2 b, in vec4 r ) 317 | { 318 | r.xy = (p.x>0.0)?r.xy : r.zw; 319 | r.x = (p.y>0.0)?r.x : r.y; 320 | vec2 q = abs(p)-b+r.x; 321 | return min(max(q.x,q.y),0.0) + length(max(q,0.0)) - r.x; 322 | } 323 | 324 | 325 | float sdExtrude( in float d, float z, in float h, float r ) { 326 | vec2 q = vec2(d+r, abs(z)-h); 327 | return min(max(q.x, q.y), 0.0) + length(max(q, 0.0))-r; 328 | } 329 | 330 | 331 | float sdCone(vec3 position, float radius, float halfHeight) { 332 | vec2 p = vec2(length(position.xz) - radius, position.y + halfHeight); 333 | vec2 e = vec2(-radius, 2.0 * halfHeight); 334 | vec2 q = p - e * clamp(dot(p, e) / dot(e, e), 0.0, 1.0); 335 | float d = length(q); 336 | if (max(q.x, q.y) > 0.0) { 337 | return d; 338 | } 339 | return -min(d, p.y); 340 | } 341 | 342 | float eval_prim(vec3 p, Primitive prim) { 343 | mat4x3 m = transpose(mat3x4(prim.m_row0, prim.m_row1, prim.m_row2)); 344 | p = vec3(m * vec4(p, 1)); 345 | 346 | float dist; 347 | if (prim.type == PRIMITIVE_SPHERE) { 348 | float r = prim.data.x; 349 | dist = length(p) - r; 350 | } else if (prim.type == PRIMITIVE_BOX) { 351 | vec3 half_sides = prim.data.xyz * 0.5; 352 | float scale = max(half_sides.x, half_sides.z) * 2; 353 | uint corner_data = floatBitsToUint(prim.data.w); 354 | vec4 corner_rounding; 355 | corner_rounding.x = float((corner_data >> 0) & 0xff); 356 | corner_rounding.y = float((corner_data >> 8) & 0xff); 357 | corner_rounding.z = float((corner_data >> 16) & 0xff); 358 | corner_rounding.w = float((corner_data >> 24) & 0xff); 359 | corner_rounding = corner_rounding * scale / 255.f; 360 | corner_rounding /= 2; 361 | float d_2D = sdRoundBox(p.xz, half_sides.xz, corner_rounding); 362 | 363 | float er = p.y > 0 ? prim.extrude_rounding.x : prim.extrude_rounding.y; 364 | dist = sdExtrude( d_2D, p.y, half_sides.y-er, er ); 365 | } else if (prim.type == PRIMITIVE_CYLINDER) { 366 | float h = prim.data.x / 2; 367 | float r = prim.data.y; 368 | vec2 d = abs(vec2(length(p.xz),p.y)) - vec2(r,h); 369 | dist = min(max(d.x,d.y),0.0) + length(max(d,0.0)); 370 | } else if (prim.type == PRIMITIVE_CONE) { 371 | dist = sdCone(p, prim.data.x, prim.data.y * 0.5); 372 | } else { 373 | dist = 1e20; 374 | } 375 | //dist -= prim.rounding; 376 | return dist; 377 | } -------------------------------------------------------------------------------- /src/scene.cpp: -------------------------------------------------------------------------------- 1 | #include "scene.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #define GLM_ENABLE_EXPERIMENTAL 8 | #include 9 | 10 | #include "glm/gtx/transform.hpp" 11 | 12 | void load_json(const char *path, std::vector &nodes, glm::vec3& aabb_min, glm::vec3& aabb_max) { 13 | FILE *fp = fopen(path, "rb"); 14 | if (!fp) { 15 | fprintf(stderr, "Failed to open file: %s\n", path); 16 | abort(); 17 | } 18 | fseek(fp, 0, SEEK_END); 19 | size_t s = ftell(fp); 20 | fseek(fp, 0, SEEK_SET); 21 | std::vector buf(s + 1); 22 | if (fread(buf.data(), 1, s, fp) != s) { 23 | fprintf(stderr, "Failed to read file: %s\n", path); 24 | abort(); 25 | } 26 | fclose(fp); 27 | buf[s] = 0; 28 | rapidjson::Document d; 29 | d.Parse(buf.data()); 30 | if (d.HasParseError()) { 31 | rapidjson::ParseErrorCode code = d.GetParseError(); 32 | fprintf(stderr, "JSON parsing failed: error %d\n", code); 33 | abort(); 34 | } 35 | 36 | auto aabb_min_arr = d["aabb_min"].GetArray(); 37 | aabb_min.x = aabb_min_arr[0].GetFloat(); 38 | aabb_min.y = aabb_min_arr[1].GetFloat(); 39 | aabb_min.z = aabb_min_arr[2].GetFloat(); 40 | auto aabb_max_arr = d["aabb_max"].GetArray(); 41 | aabb_max.x = aabb_max_arr[0].GetFloat(); 42 | aabb_max.y = aabb_max_arr[1].GetFloat(); 43 | aabb_max.z = aabb_max_arr[2].GetFloat(); 44 | 45 | struct StackEntry { 46 | const rapidjson::Value* j; 47 | glm::mat4 mat; 48 | int idx; 49 | bool sign; 50 | }; 51 | 52 | nodes.push_back({}); 53 | std::vector stack = { { &d, glm::mat4(1), 0, true } }; 54 | 55 | while (!stack.empty()) { 56 | StackEntry e = stack.back(); 57 | stack.pop_back(); 58 | 59 | const auto& j = *e.j; 60 | 61 | auto m = j["matrix"].GetArray(); 62 | glm::mat4 node_mat = glm::mat4( 63 | m[0].GetFloat(), m[4].GetFloat(), m[8].GetFloat(), 0.f, 64 | m[1].GetFloat(), m[5].GetFloat(), m[9].GetFloat(), 0.f, 65 | m[2].GetFloat(), m[6].GetFloat(), m[10].GetFloat(), 0.f, 66 | m[3].GetFloat(), m[7].GetFloat(), m[11].GetFloat(), 1.f 67 | ); 68 | glm::mat4 mat = e.mat; 69 | std::string type = j["nodeType"].GetString(); 70 | int node_idx = e.idx; 71 | if (type == "primitive") { 72 | CSGNode &node = nodes[node_idx]; 73 | node.type = NODETYPE_PRIMITIVE; 74 | node.left = -1; 75 | node.right = -1; 76 | node.sign = e.sign; 77 | 78 | glm::mat4 world_to_prim = mat * node_mat; 79 | node.primitive.m_row0 = glm::vec4(world_to_prim[0][0], world_to_prim[1][0], world_to_prim[2][0], 80 | world_to_prim[3][0]); 81 | node.primitive.m_row1 = glm::vec4(world_to_prim[0][1], world_to_prim[1][1], world_to_prim[2][1], 82 | world_to_prim[3][1]); 83 | node.primitive.m_row2 = glm::vec4(world_to_prim[0][2], world_to_prim[1][2], world_to_prim[2][2], 84 | world_to_prim[3][2]); 85 | 86 | node.primitive.extrude_rounding.x = j["round_x"].GetFloat(); 87 | node.primitive.extrude_rounding.y = j["round_y"].GetFloat(); 88 | 89 | { 90 | auto c = j["color"].GetArray(); 91 | node.primitive.color = 0; 92 | node.primitive.color |= std::min((uint32_t)(c[0].GetFloat() * 255.99f), 255u) << 0; 93 | node.primitive.color |= std::min((uint32_t)(c[1].GetFloat() * 255.99f), 255u) << 8; 94 | node.primitive.color |= std::min((uint32_t)(c[2].GetFloat() * 255.99f), 255u) << 16; 95 | } 96 | 97 | std::string prim_type = j["primitiveType"].GetString(); 98 | if (prim_type == "sphere") { 99 | node.primitive.type = PRIMITIVE_SPHERE; 100 | } else if (prim_type == "box") { 101 | node.primitive.type = PRIMITIVE_BOX; 102 | } else if (prim_type == "cylinder") { 103 | node.primitive.type = PRIMITIVE_CYLINDER; 104 | } else if (prim_type == "cone") { 105 | node.primitive.type = PRIMITIVE_CONE; 106 | } else { 107 | fprintf(stderr, "Unknown primitive: %s\n", prim_type.c_str()); 108 | abort(); 109 | } 110 | 111 | switch (node.primitive.type) { 112 | case PRIMITIVE_SPHERE: { 113 | float r = j["radius"].GetFloat(); 114 | node.primitive.sphere.radius = glm::vec4(r, 0, 0, 0); 115 | break; 116 | } 117 | case PRIMITIVE_BOX: { 118 | auto s = j["sides"].GetArray(); 119 | float s_x = s[0].GetFloat(); 120 | float s_y = s[1].GetFloat(); 121 | float s_z = s[2].GetFloat(); 122 | node.primitive.box.sizes = glm::vec4(s_x, s_y, s_z, 0); 123 | 124 | float scale = fmaxf(node.primitive.box.sizes.x, node.primitive.box.sizes.z) * 2; 125 | auto c = j["bevel"].GetArray(); 126 | glm::vec4 corner_rounding; 127 | corner_rounding.x = c[0].GetFloat(); 128 | corner_rounding.y = c[1].GetFloat(); 129 | corner_rounding.z = c[2].GetFloat(); 130 | corner_rounding.w = c[3].GetFloat(); 131 | 132 | #if 1 133 | uint32_t corner_data = 0; 134 | corner_data |= (uint32_t)(corner_rounding.x * 255.f * 2.f / scale) << 0; 135 | corner_data |= (uint32_t)(corner_rounding.y * 255.f * 2.f / scale) << 8; 136 | corner_data |= (uint32_t)(corner_rounding.z * 255.f * 2.f / scale) << 16; 137 | corner_data |= (uint32_t)(corner_rounding.w * 255.f * 2.f / scale) << 24; 138 | memcpy(&node.primitive.box.sizes.w, &corner_data, sizeof(uint32_t)); 139 | #endif 140 | break; 141 | } 142 | case PRIMITIVE_CYLINDER: { 143 | float h = j["height"].GetFloat(); 144 | float r = j["radius"].GetFloat(); 145 | node.primitive.cylinder.height = h; 146 | node.primitive.cylinder.radius = r; 147 | break; 148 | } 149 | case PRIMITIVE_CONE: { 150 | float h = j["height"].GetFloat(); 151 | float r = j["radius"].GetFloat(); 152 | node.primitive.cone.height = h; 153 | node.primitive.cone.radius = r; 154 | break; 155 | } 156 | 157 | default: 158 | abort(); 159 | } 160 | } else if (type == "binaryOperator") { 161 | std::string blend_mode = j["blendMode"].GetString(); 162 | uint32_t op; 163 | if (blend_mode == "union") { 164 | op = OP_UNION; 165 | } else if (blend_mode == "sub") { 166 | op = OP_SUB; 167 | } else if (blend_mode == "inter") { 168 | op = OP_INTER; 169 | } else { 170 | fprintf(stderr, "Unknown blend mode: %s\n", blend_mode.c_str()); 171 | abort(); 172 | } 173 | 174 | int left_idx = nodes.size(); 175 | nodes.push_back({}); 176 | int right_idx = nodes.size(); 177 | nodes.push_back({}); 178 | 179 | stack.push_back({ &j["leftChild"], mat * node_mat, left_idx, 1 }); 180 | stack.push_back({ &j["rightChild"], mat * node_mat, right_idx, op != OP_SUB }); 181 | 182 | CSGNode &node = nodes[node_idx]; 183 | node.left = left_idx; 184 | node.right = right_idx; 185 | node.sign = e.sign; 186 | float k = j["blendRadius"].GetFloat(); 187 | 188 | node.binary_op = BinaryOp(k, op == OP_UNION, op); 189 | } else { 190 | fprintf(stderr, "invalid type: %s\n", type.c_str()); 191 | abort(); 192 | } 193 | } 194 | } 195 | 196 | rapidjson::Value write_node(rapidjson::Document &d, const std::vector &nodes, int node_idx) { 197 | CSGNode node = nodes[node_idx]; 198 | 199 | rapidjson::Value v(rapidjson::kObjectType); 200 | 201 | auto a = d.GetAllocator(); 202 | 203 | 204 | glm::mat4x3 mat; 205 | switch (node.type) { 206 | case NODETYPE_BINARY: { 207 | v.AddMember("nodeType", "binaryOperator", a); 208 | mat = glm::mat3x4(1); 209 | rapidjson::Value left_child = write_node(d, nodes, node.left); 210 | rapidjson::Value right_child = write_node(d, nodes, node.right); 211 | v.AddMember("leftChild", left_child, a); 212 | v.AddMember("rightChild", right_child, a); 213 | float k; 214 | uint32_t k_uint = node.binary_op.blend_factor_and_sign & ~7u; 215 | memcpy(&k, &k_uint, sizeof(float)); 216 | v.AddMember("blendRadius", k, a); 217 | uint32_t op = (node.binary_op.blend_factor_and_sign >> 1) & 3u; 218 | switch (op) { 219 | case OP_UNION: 220 | v.AddMember("blendMode", "union", a); 221 | break; 222 | case OP_INTER: 223 | v.AddMember("blendMode", "inter", a); 224 | break; 225 | case OP_SUB: 226 | v.AddMember("blendMode", "sub", a); 227 | break; 228 | default: 229 | abort(); 230 | } 231 | break; 232 | } 233 | 234 | case NODETYPE_PRIMITIVE: { 235 | Primitive p = node.primitive; 236 | v.AddMember("nodeType", "primitive", a); 237 | mat = glm::transpose(glm::mat3x4(p.m_row0, p.m_row1, p.m_row2)); 238 | 239 | { 240 | float r = (float)((node.primitive.color >> 0) & 0xff) / 255.99f;; 241 | float g = (float)((node.primitive.color >> 8) & 0xff) / 255.99f;; 242 | float b = (float)((node.primitive.color >> 16) & 0xff) / 255.99f;; 243 | rapidjson::Value color_arr(rapidjson::kArrayType); 244 | color_arr.PushBack(r, a); 245 | color_arr.PushBack(g, a); 246 | color_arr.PushBack(b, a); 247 | v.AddMember("color", color_arr, a); 248 | } 249 | 250 | v.AddMember("round_x", node.primitive.extrude_rounding.x, a); 251 | v.AddMember("round_y", node.primitive.extrude_rounding.y, a); 252 | switch (node.primitive.type) { 253 | case PRIMITIVE_BOX: { 254 | v.AddMember("primitiveType", "box", a); 255 | rapidjson::Value sides_arr(rapidjson::kArrayType); 256 | sides_arr.PushBack(rapidjson::Value(node.primitive.box.sizes.x).Move(), a); 257 | sides_arr.PushBack(rapidjson::Value(node.primitive.box.sizes.y).Move(), a); 258 | sides_arr.PushBack(rapidjson::Value(node.primitive.box.sizes.z).Move(), a); 259 | v.AddMember("sides", sides_arr, a); 260 | 261 | float scale = fmaxf(node.primitive.box.sizes.x, node.primitive.box.sizes.z) * 2; 262 | rapidjson::Value bevel_arr(rapidjson::kArrayType); 263 | uint32_t corner_data; 264 | memcpy(&corner_data, &node.primitive.box.sizes.w, sizeof(uint32_t)); 265 | 266 | glm::vec4 corner_rounding; 267 | corner_rounding.x = (float)((corner_data >> 0) & 0xff); 268 | corner_rounding.y = (float)((corner_data >> 8) & 0xff); 269 | corner_rounding.z = (float)((corner_data >> 16) & 0xff); 270 | corner_rounding.w = (float)((corner_data >> 24) & 0xff); 271 | corner_rounding = corner_rounding * scale / 255.f; 272 | corner_rounding /= 2; 273 | bevel_arr.PushBack(corner_rounding.x, a); 274 | bevel_arr.PushBack(corner_rounding.y, a); 275 | bevel_arr.PushBack(corner_rounding.z, a); 276 | bevel_arr.PushBack(corner_rounding.w, a); 277 | v.AddMember("bevel", bevel_arr, a); 278 | } 279 | break; 280 | 281 | case PRIMITIVE_CONE: 282 | v.AddMember("primitiveType", "cone", a); 283 | v.AddMember("height", node.primitive.cone.height, a); 284 | v.AddMember("radius", node.primitive.cone.radius, a); 285 | break; 286 | 287 | case PRIMITIVE_SPHERE: 288 | v.AddMember("primitiveType", "sphere", a); 289 | v.AddMember("radius", node.primitive.sphere.radius.x, a); 290 | break; 291 | 292 | default: 293 | abort(); 294 | } 295 | break; 296 | } 297 | } 298 | { 299 | rapidjson::Value mat_array(rapidjson::kArrayType); 300 | for (int row = 0; row < 3; row++) { 301 | for (int col = 0; col < 4; col++) { 302 | rapidjson::Value f(rapidjson::kNumberType); 303 | f.SetFloat(mat[col][row]); 304 | mat_array.PushBack(f, a); 305 | } 306 | } 307 | v.AddMember("matrix", mat_array, a); 308 | } 309 | 310 | return v; 311 | } 312 | 313 | void write_json(const std::vector &nodes, const char *path) { 314 | rapidjson::Document d; 315 | d.SetObject(); 316 | 317 | rapidjson::Value v = write_node(d, nodes, nodes.size() - 1); 318 | for (auto &m: v.GetObject()) { 319 | d.AddMember(m.name, m.value, d.GetAllocator()); 320 | } 321 | 322 | FILE *fp = fopen(path, "w"); 323 | 324 | char write_buf[64 * 1024]; 325 | rapidjson::FileWriteStream os(fp, write_buf, sizeof(write_buf)); 326 | rapidjson::Writer writer(os); 327 | d.Accept(writer); 328 | 329 | fflush(fp); 330 | fclose(fp); 331 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "context.h" 2 | //#include "example_config.h" 3 | #include 4 | #include "CLI/CLI.hpp" 5 | #include 6 | #include 7 | #include "imgui.h" 8 | #include "backends/imgui_impl_vulkan.h" 9 | #include "backends/imgui_impl_glfw.h" 10 | #include "scene.h" 11 | #include 12 | 13 | int create_scene(std::vector& csg_tree, const std::string& input_path, glm::vec3& aabb_min, glm::vec3& aabb_max) { 14 | csg_tree.clear(); 15 | load_json(input_path.c_str(), csg_tree, aabb_min, aabb_max); 16 | int root_idx = 0; 17 | return root_idx; 18 | } 19 | 20 | float cam_distance = 3.f; 21 | 22 | void scroll_callback(GLFWwindow* window, double xoffset, double yoffset) { 23 | cam_distance -= yoffset * 0.1; 24 | } 25 | 26 | bool show_imgui = true; 27 | 28 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) { 29 | ImGui_ImplGlfw_KeyCallback(window, key, scancode, action, mods); 30 | if (key == GLFW_KEY_I && action == GLFW_PRESS) { 31 | show_imgui = !show_imgui; 32 | } 33 | } 34 | 35 | int main(int argc, char** argv) { 36 | glm::vec3 cam_target = glm::vec3(0); 37 | float cam_yaw = 0.f; 38 | float cam_pitch = M_PI / 2; 39 | 40 | bool culling_enabled = true; 41 | int num_samples = 1; 42 | std::string shading_mode_str = "shaded"; 43 | 44 | std::string anim_path = ""; 45 | //std::string input_file = "../build/catalog/guy.json"; 46 | std::string input_file = "../scenes/trees.json"; 47 | spv_dir = "."; 48 | CLI::App cli{ "Lipschitz Pruning demo" }; 49 | cli.add_option("-i,--input", input_file, "Input"); 50 | cli.add_option("-s,--shaders", spv_dir, "SPIR-V path"); 51 | float min_coord = -1; 52 | float max_coord = 1; 53 | cli.add_option("--min", min_coord, "AABB min"); 54 | cli.add_option("--max", max_coord, "AABB min"); 55 | cli.add_option("--cam_yaw", cam_yaw, "Camera yaw"); 56 | cli.add_option("--cam_pitch", cam_pitch, "Camera pitch"); 57 | cli.add_option("--cam_dist", cam_distance, "Camera pitch"); 58 | cli.add_option("--culling", culling_enabled, "Enable culling"); 59 | cli.add_option("--samples", num_samples, "Samples per pixel"); 60 | cli.add_option("--shading", shading_mode_str, "Shading mode"); 61 | cli.add_option("--max-active", MAX_ACTIVE_COUNT, "Max active count"); 62 | cli.add_option("--max-tmp", MAX_TMP_COUNT, "Max tmp count"); 63 | cli.add_option("--anim", anim_path, "Animation directory"); 64 | cli.add_option("--target_x", cam_target.x, "Target X"); 65 | cli.add_option("--target_y", cam_target.y, "Target Y"); 66 | cli.add_option("--target_z", cam_target.z, "Target Z"); 67 | CLI11_PARSE(cli, argc, argv); 68 | 69 | //std::string input_file = "C:\\Users\\schtr\\Documents\\projects\\SDFCulling\\build\\catalog\\guy.json"; 70 | //spv_dir = "C:\\Users\\schtr\\Documents\\projects\\SDFCulling\\build"; 71 | 72 | constexpr int NUM_SCENES = 3; 73 | const char* preset_scenes[NUM_SCENES+1][2] = { 74 | { "Trees", "trees.json" }, 75 | { "Monument", "monument.json" }, 76 | { "Molecule", "molecule.json" }, 77 | { "Custom", nullptr } 78 | }; 79 | int preset_scene_idx = 0; 80 | 81 | int num_nodes = 0; 82 | std::vector csg_tree; 83 | 84 | 85 | 86 | Context ctx; 87 | ctx.initialize(true, 8); 88 | 89 | int root_idx = create_scene(csg_tree, input_file, ctx.render_data.aabb_min, ctx.render_data.aabb_max); 90 | num_nodes = csg_tree.size(); 91 | 92 | ctx.alloc_input_buffers(num_nodes); 93 | ctx.upload(csg_tree, root_idx); 94 | 95 | ctx.render_data.push_constants.alpha = 1; 96 | ctx.render_data.culling_enabled = culling_enabled; 97 | ctx.render_data.num_samples = num_samples; 98 | 99 | if (shading_mode_str == "normals") { 100 | ctx.render_data.shading_mode = SHADING_MODE_NORMALS; 101 | } 102 | else if (shading_mode_str == "shaded") { 103 | ctx.render_data.shading_mode = SHADING_MODE_SHADED; 104 | } 105 | else if (shading_mode_str == "beauty") { 106 | ctx.render_data.shading_mode = SHADING_MODE_BEAUTY; 107 | } 108 | else { 109 | fprintf(stderr, "Unknown shading mode: %s\n", shading_mode_str.c_str()); 110 | abort(); 111 | } 112 | 113 | glfwSetKeyCallback(ctx.init.window, key_callback); 114 | glfwSetScrollCallback(ctx.init.window, scroll_callback); 115 | 116 | double last_x, last_y; 117 | glfwGetCursorPos(ctx.init.window, &last_x, &last_y); 118 | 119 | Timings timing = { }; 120 | 121 | double anim_start_time; 122 | float anim_speed = 4; 123 | bool anim_play = false; 124 | while (!glfwWindowShouldClose(ctx.init.window)/* && g_frame < 10000*/) { 125 | glfwPollEvents(); 126 | 127 | ImGui_ImplVulkan_NewFrame(); 128 | ImGui_ImplGlfw_NewFrame(); 129 | ImGui::NewFrame(); 130 | 131 | ImGui::Begin("GUI"); 132 | 133 | ImGui::SeparatorText("Scene"); 134 | if (ImGui::BeginCombo("Preset", preset_scenes[preset_scene_idx][0])) { 135 | for (int i = 0; i < NUM_SCENES; i++) { 136 | bool is_selected = (i == preset_scene_idx); 137 | if (ImGui::Selectable(preset_scenes[i][0], is_selected)) { 138 | preset_scene_idx = i; 139 | input_file = "../scenes/" + std::string(preset_scenes[i][1]); 140 | root_idx = create_scene(csg_tree, input_file, ctx.render_data.aabb_min, ctx.render_data.aabb_max); 141 | num_nodes = csg_tree.size(); 142 | ctx.alloc_input_buffers(num_nodes); 143 | ctx.upload(csg_tree, root_idx); 144 | 145 | anim_play = false; 146 | ctx.render_data.culling_enabled = true; 147 | } 148 | } 149 | ImGui::EndCombo(); 150 | } 151 | if (ImGui::Button("Reload scene")) { 152 | std::vector csg_tree; 153 | int root_idx = create_scene(csg_tree, input_file, ctx.render_data.aabb_min, ctx.render_data.aabb_max); 154 | ctx.upload(csg_tree, root_idx); 155 | } 156 | ImGui::SliderFloat3("AABB min", &ctx.render_data.aabb_min[0], -3, 0); 157 | ImGui::SliderFloat3("AABB max", &ctx.render_data.aabb_max[0], 0, 3); 158 | ImGui::Text("Num nodes: %d", num_nodes); 159 | 160 | ImGui::SeparatorText("Animation"); 161 | if (anim_play) { 162 | if (ImGui::Button("Stop anim")) { 163 | anim_play = false; 164 | root_idx = create_scene(csg_tree, input_file, ctx.render_data.aabb_min, ctx.render_data.aabb_max); 165 | num_nodes = csg_tree.size(); 166 | ctx.alloc_input_buffers(num_nodes); 167 | ctx.upload(csg_tree, root_idx); 168 | } 169 | } else if (ImGui::Button("Play anim")) { 170 | anim_play = true; 171 | anim_start_time = glfwGetTime(); 172 | { 173 | CSGNode node{}; 174 | node.primitive.sphere = { .radius = glm::vec4(0.2) }; 175 | node.primitive.m_row0 = glm::vec4(1,0,0,0);; 176 | node.primitive.m_row1 = glm::vec4(0,1,0,0);; 177 | node.primitive.m_row2 = glm::vec4(0,0,1,0);; 178 | node.primitive.type = PRIMITIVE_SPHERE; 179 | node.primitive.color = 0xaaaaff; 180 | node.type = NODETYPE_PRIMITIVE; 181 | node.left = -1; 182 | node.right = -1; 183 | node.sign = true; 184 | csg_tree.push_back(node); 185 | } 186 | { 187 | CSGNode node{}; 188 | node.binary_op = BinaryOp(1e-1, true, OP_UNION); 189 | node.type = NODETYPE_BINARY; 190 | node.sign = true; 191 | node.left = root_idx; 192 | node.right = csg_tree.size()-1; 193 | csg_tree.push_back(node); 194 | } 195 | num_nodes = csg_tree.size(); 196 | root_idx = csg_tree.size()-1; 197 | ctx.alloc_input_buffers(num_nodes); 198 | ctx.upload(csg_tree, root_idx); 199 | } 200 | ImGui::SliderFloat("Anim speed", &anim_speed, 0, 4.f); 201 | if (anim_play) { 202 | auto before = std::chrono::high_resolution_clock::now(); 203 | float anim_time = (float)(glfwGetTime() - anim_start_time); 204 | anim_time *= anim_speed; 205 | glm::vec3 center = { 206 | cosf(anim_time), 207 | cosf(anim_time*0.3f), 208 | sinf(anim_time)}; 209 | center *= sin(anim_time*0.56 + 123.4); 210 | float radius = 0.2; 211 | glm::vec3 scale = ctx.render_data.aabb_max-ctx.render_data.aabb_min-2.f*radius; 212 | center = ctx.render_data.aabb_min + radius + (center * 0.5f + 0.5f) * scale; 213 | csg_tree[num_nodes-2].primitive.m_row0[3] = -center.x; 214 | csg_tree[num_nodes-2].primitive.m_row1[3] = -center.y; 215 | csg_tree[num_nodes-2].primitive.m_row2[3] = -center.z; 216 | ctx.upload(csg_tree, root_idx); 217 | auto after = std::chrono::high_resolution_clock::now(); 218 | float upload_ms = (float)std::chrono::duration_cast(after - before).count() / (float)1000.f; 219 | ImGui::Text("Upload time: %fms\n", upload_ms); 220 | } 221 | 222 | ImGui::SeparatorText("Pruning"); 223 | if (num_nodes > 500) { 224 | ImGui::BeginDisabled(); 225 | } 226 | ImGui::Checkbox("Enable pruning", &ctx.render_data.culling_enabled); 227 | if (num_nodes > 500) { 228 | ImGui::SameLine(); 229 | ImGui::Text("(forced when > 500 nodes)"); 230 | ImGui::EndDisabled(); 231 | } 232 | ImGui::Checkbox("Recompute pruning", &ctx.render_data.compute_culling); 233 | if (ImGui::Button("-")) { 234 | ctx.render_data.final_grid_lvl -= 2; 235 | if (ctx.render_data.final_grid_lvl < 2) ctx.render_data.final_grid_lvl = 2; 236 | } 237 | ImGui::SameLine(); 238 | if (ImGui::Button("+")) { 239 | ctx.render_data.final_grid_lvl += 2; 240 | if (ctx.render_data.final_grid_lvl > 8) ctx.render_data.final_grid_lvl = 8; 241 | } 242 | ImGui::SameLine(); ImGui::Text("Grid size: 1 << %d\n", ctx.render_data.final_grid_lvl); 243 | 244 | 245 | ctx.render_data.show_imgui = show_imgui; 246 | 247 | 248 | 249 | glm::vec3 v = glm::vec3{ 250 | cam_distance * sinf(cam_yaw) * sinf(cam_pitch), 251 | cam_distance * cosf(cam_pitch), 252 | cam_distance * cosf(cam_yaw) * sinf(cam_pitch), 253 | }; 254 | glm::vec3 cam_position = cam_target + v; 255 | 256 | double cur_x, cur_y; 257 | glfwGetCursorPos(ctx.init.window, &cur_x, &cur_y); 258 | double delta_x = cur_x - last_x; 259 | double delta_y = cur_y - last_y; 260 | last_x = cur_x; 261 | last_y = cur_y; 262 | 263 | if (!ImGui::GetIO().WantCaptureMouse && glfwGetMouseButton(ctx.init.window, GLFW_MOUSE_BUTTON_LEFT)) { 264 | cam_yaw -= 0.01 * delta_x; 265 | cam_pitch -= 0.01 * delta_y; 266 | cam_yaw = fmodf(cam_yaw, 2.f * M_PI); 267 | cam_pitch = fminf(fmaxf(cam_pitch, 1e-3), M_PI - 1e-3); 268 | } 269 | 270 | if (!ImGui::GetIO().WantCaptureMouse && glfwGetMouseButton(ctx.init.window, GLFW_MOUSE_BUTTON_RIGHT)) { 271 | glm::vec3 vel = glm::vec3(0); 272 | 273 | glm::vec3 right = glm::normalize(glm::cross(v, glm::vec3(0, 1, 0))); 274 | glm::vec3 cam_up = glm::normalize(glm::cross(right, v)); 275 | vel += right * 0.01f * (float)delta_x; 276 | vel += cam_up * 0.01f * (float)delta_y; 277 | cam_target += vel; 278 | cam_position += vel; 279 | } 280 | 281 | 282 | //ImGui::ShowDemoWindow(); 283 | ImGui::SeparatorText("Timings"); 284 | ImGui::Text("Render: %fms", ctx.render_data.render_elapsed_ms); 285 | ImGui::Text("Culling: %fms", ctx.render_data.culling_elapsed_ms); 286 | ImGui::Text("Tracing: %fms", ctx.render_data.tracing_elapsed_ms); 287 | 288 | ImGui::SeparatorText("VRAM"); 289 | //ImGui::Text("Memory usage: %lfG", (double)g_memory_usage / (1024. * 1024. * 1024.)); 290 | { 291 | VkPhysicalDeviceMemoryBudgetPropertiesEXT budget; 292 | budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 293 | budget.pNext = nullptr; 294 | VkPhysicalDeviceMemoryProperties2 props; 295 | props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; 296 | props.pNext = &budget; 297 | vkGetPhysicalDeviceMemoryProperties2(ctx.init.device.physical_device, &props); 298 | 299 | for (int i = 0; i < props.memoryProperties.memoryHeapCount; i++) { 300 | if ((props.memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) == 0) continue; 301 | ImGui::Text("Heap %i: %lfG / %lfG", i, (double)budget.heapUsage[i] / (1024.*1024.*1024.), (double)budget.heapBudget[i] / (1024.*1024.*1024.)); 302 | } 303 | } 304 | ImGui::Text("Actual mem usage: %fG", timing.pruning_mem_usage_gb); 305 | ImGui::Text("Actual active ratio: %.2f", (float)ctx.render_data.max_active_count / (float)MAX_ACTIVE_COUNT); 306 | ImGui::Text("Actual tmp ratio: %.2f", (float)ctx.render_data.max_tmp_count / (float)MAX_TMP_COUNT); 307 | 308 | 309 | 310 | 311 | ImGui::SeparatorText("Render"); 312 | if (ImGui::Combo("Shading mode", &ctx.render_data.shading_mode, "Shaded\0Heatmap\0Normals\0AO\0")) { 313 | ctx.init.disp.destroyPipeline(ctx.render_data.graphics_pipeline, nullptr); 314 | create_graphics_pipeline(ctx.init, ctx.render_data); 315 | } 316 | if (ctx.render_data.shading_mode == SHADING_MODE_HEATMAP) { 317 | ImGui::SliderInt("Colormap max", &ctx.render_data.colormap_max, 1, 64); 318 | } 319 | ImGui::SliderInt("Samples per pixel", &ctx.render_data.num_samples, 1, 64); 320 | 321 | ImGui::SliderFloat("Gamma", &ctx.render_data.gamma, 1, 4); 322 | 323 | #if 1 324 | ImGui::SeparatorText("Shader statistics"); 325 | if (ImGui::TreeNode("Pruning shader")) { 326 | char pipeline_stats[4096]; 327 | get_pipeline_stats(ctx.init, ctx.render_data.culling_pipeline.pipe, 0, pipeline_stats, 4096); 328 | ImGui::Text("%s", pipeline_stats); 329 | ImGui::TreePop(); 330 | } 331 | if (ImGui::TreeNode("Fragment shader")) { 332 | char pipeline_stats[4096]; 333 | get_pipeline_stats(ctx.init, ctx.render_data.graphics_pipeline, 1, pipeline_stats, 4096); 334 | ImGui::Text("%s", pipeline_stats); 335 | ImGui::TreePop(); 336 | } 337 | #endif 338 | 339 | #if 0 340 | if (ImGui::Button("Copy command")) { 341 | char command[1024]; 342 | snprintf(command, 1024, "-i %s --cam_yaw=%f --cam_pitch=%f --cam_dist=%f --target_x=%f --target_y=%f --target_z=%f --min_x=%f --min_y=%f --min_z=%f --max_x=%f --max_y=%f --max_z=%f --gamma=%f", 343 | input_file.c_str(), 344 | cam_yaw, 345 | cam_pitch, 346 | cam_distance, 347 | cam_target.x, 348 | cam_target.y, 349 | cam_target.z, 350 | ctx.render_data.aabb_min.x, 351 | ctx.render_data.aabb_min.y, 352 | ctx.render_data.aabb_min.z, 353 | ctx.render_data.aabb_max.x, 354 | ctx.render_data.aabb_max.y, 355 | ctx.render_data.aabb_max.z, 356 | ctx.render_data.gamma 357 | ); 358 | glfwSetClipboardString(ctx.init.window, command); 359 | } 360 | #endif 361 | 362 | 363 | timing = ctx.render(cam_position, cam_target); 364 | } 365 | VK_CHECK(ctx.init.disp.deviceWaitIdle()); 366 | 367 | //{ 368 | // FILE* fp = fopen("timings.csv", "w"); 369 | // for (int i = 0; i < 10000; i++) { 370 | // fprintf(fp, "%f\n", g_timings[i]); 371 | // } 372 | // fflush(fp); 373 | // fclose(fp); 374 | //} 375 | 376 | return 0; 377 | } 378 | -------------------------------------------------------------------------------- /shaders/simple.frag.glsl: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | #include "extensions.glsl" 3 | layout (location = 0) out vec4 outColor; 4 | layout(constant_id = 0) const int shading_mode = 0; 5 | 6 | #include "../include/constants.h" 7 | #include "common.glsl" 8 | 9 | layout(push_constant) uniform PushConstant { 10 | //mat4 world_to_clip; 11 | vec4 aabb_min; 12 | vec4 aabb_max; 13 | ivec2 u_Resolution; 14 | PrimitivesRef prims; 15 | BinaryOpsRef binary_ops; 16 | NodesRef nodes; 17 | IntArrayRef parents_in; 18 | IntArrayRef parents_out; 19 | ActiveNodesRef active_nodes_in; 20 | ActiveNodesRef active_nodes_out; 21 | IntArrayRef parent_cells_offset; 22 | IntArrayRef cells_offset; 23 | IntArrayRef parent_cells_num_active; 24 | IntArrayRef cells_num_active; 25 | ivec2 pad7; 26 | FloatArrayRef cell_error_in; 27 | FloatArrayRef cell_error_out; 28 | ivec2 pad8; 29 | ivec2 pad9; 30 | ivec2 pad10; 31 | Mat4Ref mvp; 32 | Vec4ArrayRef cam; 33 | int total_num_nodes; 34 | int grid_size; 35 | int first_lvl; 36 | float max_rel_err; 37 | float viz_max; 38 | float alpha; 39 | int culling_enabled; 40 | float gamma; 41 | int num_samples; 42 | }; 43 | 44 | #include "eval.glsl" 45 | 46 | vec2 smin_blend( float a, float b, float k ) 47 | { 48 | float h = max(k-abs(a-b), 0) / k; 49 | float m = h*h*0.5; 50 | float s = m*k*0.5; 51 | return (a> 0) & 0xff) / 255.f; 94 | float g = float((prim.color >> 8) & 0xff) / 255.f; 95 | float b = float((prim.color >> 16) & 0xff) / 255.f; 96 | albedo = vec3(r,g,b); 97 | } 98 | 99 | d *= ActiveNode_sign(active_node) ? 1 : -1; 100 | if (stack_idx >= STACK_DEPTH) { 101 | //debugPrintfEXT("Stack overflow\n"); 102 | return vec3(0); 103 | } 104 | stack[stack_idx++] = StackEntry(d, albedo); 105 | } 106 | 107 | return stack[0].col; 108 | } 109 | 110 | vec3 get_color(vec3 p) { 111 | const int STACK_DEPTH = 128; 112 | 113 | struct StackEntry { 114 | float d; 115 | vec3 col; 116 | }; 117 | StackEntry stack[STACK_DEPTH]; 118 | int stack_idx = 0; 119 | 120 | for (int i = 0; i < total_num_nodes; i++) { 121 | int node_idx = i; 122 | 123 | Node node = nodes.tab[node_idx]; 124 | float d; 125 | vec3 albedo; 126 | if (node.type == NODETYPE_BINARY) { 127 | StackEntry left_entry = stack[stack_idx-2]; 128 | StackEntry right_entry = stack[stack_idx-1]; 129 | float left_val = left_entry.d; 130 | float right_val = right_entry.d; 131 | stack_idx -= 2; 132 | BinaryOp op = binary_ops.tab[node.idx_in_type]; 133 | float k = BinaryOp_blend_factor(op); 134 | float s = BinaryOp_sign(op); 135 | uint typ = BinaryOp_op(op); 136 | if (typ == OP_SUB) right_val *= -1; 137 | vec2 v = s*smin_blend(s*left_val, s*right_val, k); 138 | d = v.x; 139 | albedo = mix(left_entry.col, right_entry.col, v.y); 140 | } else if (node.type == NODETYPE_PRIMITIVE) { 141 | Primitive prim = prims.tab[node.idx_in_type]; 142 | d = eval_prim(p, prim); 143 | float r = float((prim.color >> 0) & 0xff) / 255.f; 144 | float g = float((prim.color >> 8) & 0xff) / 255.f; 145 | float b = float((prim.color >> 16) & 0xff) / 255.f; 146 | albedo = vec3(r,g,b); 147 | } 148 | 149 | if (stack_idx >= STACK_DEPTH) { 150 | //debugPrintfEXT("Stack overflow\n"); 151 | return vec3(0); 152 | } 153 | stack[stack_idx++] = StackEntry(d, albedo); 154 | } 155 | 156 | return stack[0].col; 157 | } 158 | 159 | 160 | vec3 grad_active(vec3 p, int cell_idx) { 161 | float h = 5e-4; 162 | const vec2 k = vec2(1,-1); 163 | bool nf; 164 | return normalize(k.xyy*sdf_active(p+k.xyy*h, cell_idx,nf)+ 165 | k.yyx*sdf_active(p+k.yyx*h, cell_idx,nf)+ 166 | k.yxy*sdf_active(p+k.yxy*h, cell_idx,nf)+ 167 | k.xxx*sdf_active(p+k.xxx*h, cell_idx,nf)); 168 | } 169 | 170 | vec3 grad(vec3 p) { 171 | float h = 5e-4; 172 | const vec2 k = vec2(1,-1); 173 | bool nf; 174 | return normalize(k.xyy*sdf(p+k.xyy*h)+ 175 | k.yyx*sdf(p+k.yyx*h)+ 176 | k.yxy*sdf(p+k.yxy*h)+ 177 | k.xxx*sdf(p+k.xxx*h)); 178 | } 179 | 180 | 181 | #if 0 182 | float ambient_occlusion(vec3 p, vec3 N, int cell_idx) { 183 | float s = 0; 184 | float h = 5e-3; 185 | bool nf; 186 | for (int i = 1; i <= 5 ; i++) { 187 | float offset_dist = h * float(i); 188 | vec3 offset_dir = normalize(N + normalize(sin(float(i)+vec3(0,2,4)))); 189 | s += offset_dist - sdf_active(p + offset_dir * offset_dist, cell_idx,nf); 190 | } 191 | return exp(-30*s); 192 | } 193 | #endif 194 | 195 | float hash(float uv) 196 | { 197 | return fract(sin(11.23 * uv) * 23758.5453); 198 | } 199 | 200 | #define PI 3.1415 201 | 202 | vec3 randomSphereDir(vec2 rnd) 203 | { 204 | float s = rnd.x*PI*2.; 205 | float t = rnd.y*2.-1.; 206 | return vec3(sin(s), cos(s), t) / sqrt(1.0 + t * t); 207 | } 208 | vec3 randomHemisphereDir(vec3 dir, float i) 209 | { 210 | vec3 v = randomSphereDir( vec2(hash(i+1.), hash(i+2.)) ); 211 | return v * sign(dot(v, dir)); 212 | } 213 | 214 | 215 | float ambient_occlusion( in vec3 p, in vec3 n, in float maxDist, in float falloff ) 216 | { 217 | const int nbIte = 32; 218 | const float nbIteInv = 1./float(nbIte); 219 | const float rad = 1.-1.*nbIteInv; //Hemispherical factor (self occlusion correction) 220 | 221 | float ao = 0.0; 222 | 223 | for( int i=0; i max(t0, 0.0); 252 | } 253 | 254 | bool shadow_ray_intersects_active(vec3 ray_o, vec3 ray_d, vec3 cell_size) { 255 | //float t = 3e-3; 256 | float t = 0; 257 | for (int i = 0; i < 2048; i++) { 258 | vec3 p = ray_o + t * ray_d; 259 | 260 | if (any(lessThan(p, aabb_min.xyz)) || any(greaterThanEqual(p, aabb_max.xyz))) { 261 | return false; 262 | } 263 | 264 | ivec3 cell = ivec3((p - aabb_min.xyz) / cell_size); 265 | cell = clamp(cell, ivec3(0), ivec3(grid_size-1)); 266 | int cell_idx = int(get_cell_idx(cell, grid_size)); 267 | 268 | bool near_field = true; 269 | float d = sdf_active(p, cell_idx, near_field); 270 | 271 | if (d < 1e-4) { 272 | return true; 273 | } 274 | t += abs(d); 275 | } 276 | return true; 277 | } 278 | 279 | bool shadow_ray_intersects(vec3 ray_o, vec3 ray_d, vec3 cell_size) { 280 | //float t = 3e-3; 281 | float t = 0; 282 | for (int i = 0; i < 2048; i++) { 283 | vec3 p = ray_o + t * ray_d; 284 | 285 | if (any(lessThan(p, aabb_min.xyz)) || any(greaterThanEqual(p, aabb_max.xyz))) { 286 | return false; 287 | } 288 | 289 | float d = sdf(p); 290 | 291 | if (d < 1e-4) { 292 | return true; 293 | } 294 | t += abs(d); 295 | } 296 | return true; 297 | } 298 | 299 | 300 | void main () { 301 | 302 | vec3 cam_pos = vec3(cam.tab[0]); 303 | vec3 cam_target = vec3(cam.tab[1]); 304 | 305 | outColor = vec4(0); 306 | 307 | PCG pcg; 308 | init_pcg(pcg, uint64_t(gl_FragCoord.y) * uint64_t(u_Resolution.x) + uint64_t(gl_FragCoord.x)); 309 | 310 | for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) { 311 | 312 | float du = 0.5; 313 | float dv = 0.5; 314 | if (sample_idx != 0) { 315 | du = rand_float_0_1(pcg); 316 | dv = rand_float_0_1(pcg); 317 | } 318 | 319 | 320 | vec2 uv = gl_FragCoord.xy / vec2(u_Resolution); 321 | uv.y = 1 - uv.y; 322 | uv += (vec2(du,dv) * 2 - 1) * 0.5 / vec2(u_Resolution); 323 | 324 | 325 | vec3 forward = normalize(cam_target-cam_pos); 326 | vec3 right = normalize(cross(forward, vec3(0,1,0))); 327 | vec3 up = normalize(cross(right, forward)); 328 | 329 | mat3 ViewToWorld = mat3(right, up, forward); 330 | float aspect = float(u_Resolution.x) / float(u_Resolution.y); 331 | 332 | #if 1 333 | // perspective 334 | 335 | vec3 ray_o = vec3(cam_pos); 336 | vec3 ray_d_viewspace = normalize(vec3(0,0,1) + vec3(uv*2-1, 0)); 337 | ray_d_viewspace.x *= aspect; 338 | ray_d_viewspace = normalize(ray_d_viewspace); 339 | vec3 ray_d = ViewToWorld * ray_d_viewspace; 340 | #else 341 | // orthographic 342 | 343 | vec3 ray_d_viewspace = vec3(0,0,1); 344 | vec3 ray_o_viewspace = vec3(uv * 2.0 -1.0, 0); 345 | ray_o_viewspace.x *= aspect; 346 | vec3 ray_o = vec3(cam_pos) + ViewToWorld * ray_o_viewspace; 347 | //vec3 ray_o = vec3(cam_pos) + ray_o_viewspace; 348 | //vec3 ray_d = ray_d_viewspace; 349 | vec3 ray_d = ViewToWorld * ray_d_viewspace; 350 | #endif 351 | 352 | gl_FragDepth = 1; 353 | 354 | float t = 0; 355 | if (!BBoxIntersect(aabb_min.xyz, aabb_max.xyz, ray_o, ray_d, t)) { 356 | outColor += vec4(cam.tab[3].rgb,1); 357 | continue; 358 | } 359 | t += 1e-4; 360 | 361 | vec3 cell_size = (aabb_max.xyz - aabb_min.xyz) / float(grid_size); 362 | 363 | for (int i = 0; i < 256; i++) { 364 | vec3 p = ray_o + t * ray_d; 365 | 366 | if (any(lessThan(p, aabb_min.xyz)) || any(greaterThanEqual(p, aabb_max.xyz))) { 367 | t = -1; 368 | break; 369 | } 370 | 371 | ivec3 cell = ivec3((p - aabb_min.xyz) / cell_size); 372 | cell = clamp(cell, ivec3(0), ivec3(grid_size-1)); 373 | //if (all(greaterThanEqual(debug_cell.xyz, ivec3(0)))) { 374 | // cell = debug_cell.xyz; 375 | //} 376 | int cell_idx = int(get_cell_idx(cell, grid_size)); 377 | 378 | 379 | 380 | bool near_field = true; 381 | float d; 382 | if (bool(culling_enabled)) { 383 | d = sdf_active(p, cell_idx, near_field); 384 | } else { 385 | d = sdf(p); 386 | } 387 | 388 | if (d < -1e-4) { 389 | outColor = vec4(0,1,0,1); 390 | return; 391 | } 392 | 393 | if (near_field && abs(d) < min(5e-4, 5e-4*t)) { 394 | break; 395 | } 396 | t += abs(d); 397 | } 398 | 399 | vec3 color = vec3(0); 400 | if (t >= 0) { 401 | vec3 p = ray_o + t * ray_d; 402 | 403 | const vec4 projected_hit = mvp.m * vec4(p, 1.0); 404 | const float projected_depth = projected_hit.z / projected_hit.w; 405 | 406 | //gl_FragDepth = (( * projected_depth) + gl_DepthRange.near + gl_DepthRange.far) / 2.0; 407 | gl_FragDepth = projected_depth; 408 | 409 | ivec3 cell = ivec3((p - aabb_min.xyz) / cell_size); 410 | cell = clamp(cell, ivec3(0), ivec3(grid_size-1)); 411 | int cell_idx = int(get_cell_idx(cell, grid_size)); 412 | 413 | vec3 normal; 414 | if (bool(culling_enabled)) { 415 | normal = normalize(grad_active(p, cell_idx)); 416 | } else { 417 | normal = normalize(grad(p)); 418 | } 419 | if (shading_mode == SHADING_MODE_NORMALS) { 420 | color = vec3(0.5+0.5*normal); 421 | } else { 422 | vec3 L = normalize(vec3(1,1,1)); 423 | //color = p * 0.5 + 0.5; 424 | vec3 albedo; 425 | if (bool(culling_enabled)) { 426 | albedo = get_color_active(p, cell_idx); 427 | } else { 428 | albedo = get_color(p); 429 | } 430 | 431 | // divide by 2 to get number of primitives 432 | int num_active = (cells_num_active.tab[cell_idx] + 1 )/ 2; 433 | 434 | if (shading_mode == SHADING_MODE_HEATMAP) { 435 | albedo = inferno(min(1, float(num_active) / viz_max)); 436 | } 437 | 438 | float ao; 439 | if (shading_mode == SHADING_MODE_BEAUTY) { 440 | ao = 0.4 * ambient_occlusion(p,normal,1e-1,3); 441 | } else { 442 | ao = 0.4; 443 | } 444 | 445 | //outColor = vec4(vec3(ao), 1); 446 | //return; 447 | // half-lambert 448 | //color = albedo * (dot(L,normal) * 0.5 + 0.5); 449 | //color = albedo * ao; 450 | color = albedo * ao; 451 | //color = vec3(dot(L,normal)); 452 | 453 | bool in_shadow; 454 | if (bool(culling_enabled)) { 455 | in_shadow = shadow_ray_intersects_active(p + 5e-4 * normal, L, cell_size); 456 | } else { 457 | in_shadow = shadow_ray_intersects(p + 5e-4 * normal, L, cell_size); 458 | } 459 | 460 | if (dot(normal,L) > 0 && !in_shadow) { 461 | color += albedo * dot(L,normal); 462 | } 463 | } 464 | //color = vec3(ao); 465 | //color = vec3(ambient_occlusion(p, normal, cell_idx)); 466 | //color = normal * 0.5 + 0.5; 467 | 468 | //vec4 p_clip = world_to_clip * vec4(p, 1); 469 | //gl_FragDepth = p_clip.z / p_clip.w; 470 | //gl_FragDepth = 0.5; 471 | } else { 472 | color = vec3(1); 473 | } 474 | 475 | 476 | outColor += vec4 (color, 1); 477 | } 478 | outColor /= num_samples; 479 | outColor = vec4(pow(outColor.rgb, vec3(gamma)), 1); 480 | } 481 | -------------------------------------------------------------------------------- /include/stb_image_write.h: -------------------------------------------------------------------------------- 1 | /* stb_image_write - v1.16 - public domain - http://nothings.org/stb 2 | writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 3 | no warranty implied; use at your own risk 4 | 5 | Before #including, 6 | 7 | #define STB_IMAGE_WRITE_IMPLEMENTATION 8 | 9 | in the file that you want to have the implementation. 10 | 11 | Will probably not work correctly with strict-aliasing optimizations. 12 | 13 | ABOUT: 14 | 15 | This header file is a library for writing images to C stdio or a callback. 16 | 17 | The PNG output is not optimal; it is 20-50% larger than the file 18 | written by a decent optimizing implementation; though providing a custom 19 | zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. 20 | This library is designed for source code compactness and simplicity, 21 | not optimal image file size or run-time performance. 22 | 23 | BUILDING: 24 | 25 | You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. 26 | You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace 27 | malloc,realloc,free. 28 | You can #define STBIW_MEMMOVE() to replace memmove() 29 | You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function 30 | for PNG compression (instead of the builtin one), it must have the following signature: 31 | unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); 32 | The returned data will be freed with STBIW_FREE() (free() by default), 33 | so it must be heap allocated with STBIW_MALLOC() (malloc() by default), 34 | 35 | UNICODE: 36 | 37 | If compiling for Windows and you wish to use Unicode filenames, compile 38 | with 39 | #define STBIW_WINDOWS_UTF8 40 | and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert 41 | Windows wchar_t filenames to utf8. 42 | 43 | USAGE: 44 | 45 | There are five functions, one for each image file format: 46 | 47 | int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 48 | int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 49 | int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 50 | int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); 51 | int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 52 | 53 | void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically 54 | 55 | There are also five equivalent functions that use an arbitrary write function. You are 56 | expected to open/close your file-equivalent before and after calling these: 57 | 58 | int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 59 | int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 60 | int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 61 | int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 62 | int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 63 | 64 | where the callback is: 65 | void stbi_write_func(void *context, void *data, int size); 66 | 67 | You can configure it with these global variables: 68 | int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE 69 | int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression 70 | int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode 71 | 72 | 73 | You can define STBI_WRITE_NO_STDIO to disable the file variant of these 74 | functions, so the library will not use stdio.h at all. However, this will 75 | also disable HDR writing, because it requires stdio for formatted output. 76 | 77 | Each function returns 0 on failure and non-0 on success. 78 | 79 | The functions create an image file defined by the parameters. The image 80 | is a rectangle of pixels stored from left-to-right, top-to-bottom. 81 | Each pixel contains 'comp' channels of data stored interleaved with 8-bits 82 | per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is 83 | monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. 84 | The *data pointer points to the first byte of the top-left-most pixel. 85 | For PNG, "stride_in_bytes" is the distance in bytes from the first byte of 86 | a row of pixels to the first byte of the next row of pixels. 87 | 88 | PNG creates output files with the same number of components as the input. 89 | The BMP format expands Y to RGB in the file format and does not 90 | output alpha. 91 | 92 | PNG supports writing rectangles of data even when the bytes storing rows of 93 | data are not consecutive in memory (e.g. sub-rectangles of a larger image), 94 | by supplying the stride between the beginning of adjacent rows. The other 95 | formats do not. (Thus you cannot write a native-format BMP through the BMP 96 | writer, both because it is in BGR order and because it may have padding 97 | at the end of the line.) 98 | 99 | PNG allows you to set the deflate compression level by setting the global 100 | variable 'stbi_write_png_compression_level' (it defaults to 8). 101 | 102 | HDR expects linear float data. Since the format is always 32-bit rgb(e) 103 | data, alpha (if provided) is discarded, and for monochrome data it is 104 | replicated across all three channels. 105 | 106 | TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed 107 | data, set the global variable 'stbi_write_tga_with_rle' to 0. 108 | 109 | JPEG does ignore alpha channels in input data; quality is between 1 and 100. 110 | Higher quality looks better but results in a bigger image. 111 | JPEG baseline (no JPEG progressive). 112 | 113 | CREDITS: 114 | 115 | 116 | Sean Barrett - PNG/BMP/TGA 117 | Baldur Karlsson - HDR 118 | Jean-Sebastien Guay - TGA monochrome 119 | Tim Kelsey - misc enhancements 120 | Alan Hickman - TGA RLE 121 | Emmanuel Julien - initial file IO callback implementation 122 | Jon Olick - original jo_jpeg.cpp code 123 | Daniel Gibson - integrate JPEG, allow external zlib 124 | Aarni Koskela - allow choosing PNG filter 125 | 126 | bugfixes: 127 | github:Chribba 128 | Guillaume Chereau 129 | github:jry2 130 | github:romigrou 131 | Sergio Gonzalez 132 | Jonas Karlsson 133 | Filip Wasil 134 | Thatcher Ulrich 135 | github:poppolopoppo 136 | Patrick Boettcher 137 | github:xeekworx 138 | Cap Petschulat 139 | Simon Rodriguez 140 | Ivan Tikhonov 141 | github:ignotion 142 | Adam Schackart 143 | Andrew Kensler 144 | 145 | LICENSE 146 | 147 | See end of file for license information. 148 | 149 | */ 150 | 151 | #ifndef INCLUDE_STB_IMAGE_WRITE_H 152 | #define INCLUDE_STB_IMAGE_WRITE_H 153 | 154 | #include 155 | 156 | // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' 157 | #ifndef STBIWDEF 158 | #ifdef STB_IMAGE_WRITE_STATIC 159 | #define STBIWDEF static 160 | #else 161 | #ifdef __cplusplus 162 | #define STBIWDEF extern "C" 163 | #else 164 | #define STBIWDEF extern 165 | #endif 166 | #endif 167 | #endif 168 | 169 | #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations 170 | STBIWDEF int stbi_write_tga_with_rle; 171 | STBIWDEF int stbi_write_png_compression_level; 172 | STBIWDEF int stbi_write_force_png_filter; 173 | #endif 174 | 175 | #ifndef STBI_WRITE_NO_STDIO 176 | STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 177 | STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 178 | STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 179 | STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 180 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); 181 | 182 | #ifdef STBIW_WINDOWS_UTF8 183 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); 184 | #endif 185 | #endif 186 | 187 | typedef void stbi_write_func(void *context, void *data, int size); 188 | 189 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 190 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 191 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 192 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 193 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 194 | 195 | STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); 196 | 197 | #endif//INCLUDE_STB_IMAGE_WRITE_H 198 | 199 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION 200 | 201 | #ifdef _WIN32 202 | #ifndef _CRT_SECURE_NO_WARNINGS 203 | #define _CRT_SECURE_NO_WARNINGS 204 | #endif 205 | #ifndef _CRT_NONSTDC_NO_DEPRECATE 206 | #define _CRT_NONSTDC_NO_DEPRECATE 207 | #endif 208 | #endif 209 | 210 | #ifndef STBI_WRITE_NO_STDIO 211 | #include 212 | #endif // STBI_WRITE_NO_STDIO 213 | 214 | #include 215 | #include 216 | #include 217 | #include 218 | 219 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) 220 | // ok 221 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) 222 | // ok 223 | #else 224 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." 225 | #endif 226 | 227 | #ifndef STBIW_MALLOC 228 | #define STBIW_MALLOC(sz) malloc(sz) 229 | #define STBIW_REALLOC(p,newsz) realloc(p,newsz) 230 | #define STBIW_FREE(p) free(p) 231 | #endif 232 | 233 | #ifndef STBIW_REALLOC_SIZED 234 | #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) 235 | #endif 236 | 237 | 238 | #ifndef STBIW_MEMMOVE 239 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) 240 | #endif 241 | 242 | 243 | #ifndef STBIW_ASSERT 244 | #include 245 | #define STBIW_ASSERT(x) assert(x) 246 | #endif 247 | 248 | #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) 249 | 250 | #ifdef STB_IMAGE_WRITE_STATIC 251 | static int stbi_write_png_compression_level = 8; 252 | static int stbi_write_tga_with_rle = 1; 253 | static int stbi_write_force_png_filter = -1; 254 | #else 255 | int stbi_write_png_compression_level = 8; 256 | int stbi_write_tga_with_rle = 1; 257 | int stbi_write_force_png_filter = -1; 258 | #endif 259 | 260 | static int stbi__flip_vertically_on_write = 0; 261 | 262 | STBIWDEF void stbi_flip_vertically_on_write(int flag) 263 | { 264 | stbi__flip_vertically_on_write = flag; 265 | } 266 | 267 | typedef struct 268 | { 269 | stbi_write_func *func; 270 | void *context; 271 | unsigned char buffer[64]; 272 | int buf_used; 273 | } stbi__write_context; 274 | 275 | // initialize a callback-based context 276 | static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) 277 | { 278 | s->func = c; 279 | s->context = context; 280 | } 281 | 282 | #ifndef STBI_WRITE_NO_STDIO 283 | 284 | static void stbi__stdio_write(void *context, void *data, int size) 285 | { 286 | fwrite(data,1,size,(FILE*) context); 287 | } 288 | 289 | #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) 290 | #ifdef __cplusplus 291 | #define STBIW_EXTERN extern "C" 292 | #else 293 | #define STBIW_EXTERN extern 294 | #endif 295 | STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); 296 | STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); 297 | 298 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) 299 | { 300 | return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); 301 | } 302 | #endif 303 | 304 | static FILE *stbiw__fopen(char const *filename, char const *mode) 305 | { 306 | FILE *f; 307 | #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) 308 | wchar_t wMode[64]; 309 | wchar_t wFilename[1024]; 310 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) 311 | return 0; 312 | 313 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) 314 | return 0; 315 | 316 | #if defined(_MSC_VER) && _MSC_VER >= 1400 317 | if (0 != _wfopen_s(&f, wFilename, wMode)) 318 | f = 0; 319 | #else 320 | f = _wfopen(wFilename, wMode); 321 | #endif 322 | 323 | #elif defined(_MSC_VER) && _MSC_VER >= 1400 324 | if (0 != fopen_s(&f, filename, mode)) 325 | f=0; 326 | #else 327 | f = fopen(filename, mode); 328 | #endif 329 | return f; 330 | } 331 | 332 | static int stbi__start_write_file(stbi__write_context *s, const char *filename) 333 | { 334 | FILE *f = stbiw__fopen(filename, "wb"); 335 | stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); 336 | return f != NULL; 337 | } 338 | 339 | static void stbi__end_write_file(stbi__write_context *s) 340 | { 341 | fclose((FILE *)s->context); 342 | } 343 | 344 | #endif // !STBI_WRITE_NO_STDIO 345 | 346 | typedef unsigned int stbiw_uint32; 347 | typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; 348 | 349 | static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) 350 | { 351 | while (*fmt) { 352 | switch (*fmt++) { 353 | case ' ': break; 354 | case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); 355 | s->func(s->context,&x,1); 356 | break; } 357 | case '2': { int x = va_arg(v,int); 358 | unsigned char b[2]; 359 | b[0] = STBIW_UCHAR(x); 360 | b[1] = STBIW_UCHAR(x>>8); 361 | s->func(s->context,b,2); 362 | break; } 363 | case '4': { stbiw_uint32 x = va_arg(v,int); 364 | unsigned char b[4]; 365 | b[0]=STBIW_UCHAR(x); 366 | b[1]=STBIW_UCHAR(x>>8); 367 | b[2]=STBIW_UCHAR(x>>16); 368 | b[3]=STBIW_UCHAR(x>>24); 369 | s->func(s->context,b,4); 370 | break; } 371 | default: 372 | STBIW_ASSERT(0); 373 | return; 374 | } 375 | } 376 | } 377 | 378 | static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) 379 | { 380 | va_list v; 381 | va_start(v, fmt); 382 | stbiw__writefv(s, fmt, v); 383 | va_end(v); 384 | } 385 | 386 | static void stbiw__write_flush(stbi__write_context *s) 387 | { 388 | if (s->buf_used) { 389 | s->func(s->context, &s->buffer, s->buf_used); 390 | s->buf_used = 0; 391 | } 392 | } 393 | 394 | static void stbiw__putc(stbi__write_context *s, unsigned char c) 395 | { 396 | s->func(s->context, &c, 1); 397 | } 398 | 399 | static void stbiw__write1(stbi__write_context *s, unsigned char a) 400 | { 401 | if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) 402 | stbiw__write_flush(s); 403 | s->buffer[s->buf_used++] = a; 404 | } 405 | 406 | static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) 407 | { 408 | int n; 409 | if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) 410 | stbiw__write_flush(s); 411 | n = s->buf_used; 412 | s->buf_used = n+3; 413 | s->buffer[n+0] = a; 414 | s->buffer[n+1] = b; 415 | s->buffer[n+2] = c; 416 | } 417 | 418 | static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) 419 | { 420 | unsigned char bg[3] = { 255, 0, 255}, px[3]; 421 | int k; 422 | 423 | if (write_alpha < 0) 424 | stbiw__write1(s, d[comp - 1]); 425 | 426 | switch (comp) { 427 | case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case 428 | case 1: 429 | if (expand_mono) 430 | stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp 431 | else 432 | stbiw__write1(s, d[0]); // monochrome TGA 433 | break; 434 | case 4: 435 | if (!write_alpha) { 436 | // composite against pink background 437 | for (k = 0; k < 3; ++k) 438 | px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; 439 | stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); 440 | break; 441 | } 442 | /* FALLTHROUGH */ 443 | case 3: 444 | stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); 445 | break; 446 | } 447 | if (write_alpha > 0) 448 | stbiw__write1(s, d[comp - 1]); 449 | } 450 | 451 | static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) 452 | { 453 | stbiw_uint32 zero = 0; 454 | int i,j, j_end; 455 | 456 | if (y <= 0) 457 | return; 458 | 459 | if (stbi__flip_vertically_on_write) 460 | vdir *= -1; 461 | 462 | if (vdir < 0) { 463 | j_end = -1; j = y-1; 464 | } else { 465 | j_end = y; j = 0; 466 | } 467 | 468 | for (; j != j_end; j += vdir) { 469 | for (i=0; i < x; ++i) { 470 | unsigned char *d = (unsigned char *) data + (j*x+i)*comp; 471 | stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); 472 | } 473 | stbiw__write_flush(s); 474 | s->func(s->context, &zero, scanline_pad); 475 | } 476 | } 477 | 478 | static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) 479 | { 480 | if (y < 0 || x < 0) { 481 | return 0; 482 | } else { 483 | va_list v; 484 | va_start(v, fmt); 485 | stbiw__writefv(s, fmt, v); 486 | va_end(v); 487 | stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); 488 | return 1; 489 | } 490 | } 491 | 492 | static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) 493 | { 494 | if (comp != 4) { 495 | // write RGB bitmap 496 | int pad = (-x*3) & 3; 497 | return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, 498 | "11 4 22 4" "4 44 22 444444", 499 | 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header 500 | 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header 501 | } else { 502 | // RGBA bitmaps need a v4 header 503 | // use BI_BITFIELDS mode with 32bpp and alpha mask 504 | // (straight BI_RGB with alpha mask doesn't work in most readers) 505 | return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0, 506 | "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444", 507 | 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header 508 | 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header 509 | } 510 | } 511 | 512 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 513 | { 514 | stbi__write_context s = { 0 }; 515 | stbi__start_write_callbacks(&s, func, context); 516 | return stbi_write_bmp_core(&s, x, y, comp, data); 517 | } 518 | 519 | #ifndef STBI_WRITE_NO_STDIO 520 | STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) 521 | { 522 | stbi__write_context s = { 0 }; 523 | if (stbi__start_write_file(&s,filename)) { 524 | int r = stbi_write_bmp_core(&s, x, y, comp, data); 525 | stbi__end_write_file(&s); 526 | return r; 527 | } else 528 | return 0; 529 | } 530 | #endif //!STBI_WRITE_NO_STDIO 531 | 532 | static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) 533 | { 534 | int has_alpha = (comp == 2 || comp == 4); 535 | int colorbytes = has_alpha ? comp-1 : comp; 536 | int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 537 | 538 | if (y < 0 || x < 0) 539 | return 0; 540 | 541 | if (!stbi_write_tga_with_rle) { 542 | return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, 543 | "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); 544 | } else { 545 | int i,j,k; 546 | int jend, jdir; 547 | 548 | stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); 549 | 550 | if (stbi__flip_vertically_on_write) { 551 | j = 0; 552 | jend = y; 553 | jdir = 1; 554 | } else { 555 | j = y-1; 556 | jend = -1; 557 | jdir = -1; 558 | } 559 | for (; j != jend; j += jdir) { 560 | unsigned char *row = (unsigned char *) data + j * x * comp; 561 | int len; 562 | 563 | for (i = 0; i < x; i += len) { 564 | unsigned char *begin = row + i * comp; 565 | int diff = 1; 566 | len = 1; 567 | 568 | if (i < x - 1) { 569 | ++len; 570 | diff = memcmp(begin, row + (i + 1) * comp, comp); 571 | if (diff) { 572 | const unsigned char *prev = begin; 573 | for (k = i + 2; k < x && len < 128; ++k) { 574 | if (memcmp(prev, row + k * comp, comp)) { 575 | prev += comp; 576 | ++len; 577 | } else { 578 | --len; 579 | break; 580 | } 581 | } 582 | } else { 583 | for (k = i + 2; k < x && len < 128; ++k) { 584 | if (!memcmp(begin, row + k * comp, comp)) { 585 | ++len; 586 | } else { 587 | break; 588 | } 589 | } 590 | } 591 | } 592 | 593 | if (diff) { 594 | unsigned char header = STBIW_UCHAR(len - 1); 595 | stbiw__write1(s, header); 596 | for (k = 0; k < len; ++k) { 597 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); 598 | } 599 | } else { 600 | unsigned char header = STBIW_UCHAR(len - 129); 601 | stbiw__write1(s, header); 602 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); 603 | } 604 | } 605 | } 606 | stbiw__write_flush(s); 607 | } 608 | return 1; 609 | } 610 | 611 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 612 | { 613 | stbi__write_context s = { 0 }; 614 | stbi__start_write_callbacks(&s, func, context); 615 | return stbi_write_tga_core(&s, x, y, comp, (void *) data); 616 | } 617 | 618 | #ifndef STBI_WRITE_NO_STDIO 619 | STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) 620 | { 621 | stbi__write_context s = { 0 }; 622 | if (stbi__start_write_file(&s,filename)) { 623 | int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); 624 | stbi__end_write_file(&s); 625 | return r; 626 | } else 627 | return 0; 628 | } 629 | #endif 630 | 631 | // ************************************************************************************************* 632 | // Radiance RGBE HDR writer 633 | // by Baldur Karlsson 634 | 635 | #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) 636 | 637 | #ifndef STBI_WRITE_NO_STDIO 638 | 639 | static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) 640 | { 641 | int exponent; 642 | float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); 643 | 644 | if (maxcomp < 1e-32f) { 645 | rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; 646 | } else { 647 | float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; 648 | 649 | rgbe[0] = (unsigned char)(linear[0] * normalize); 650 | rgbe[1] = (unsigned char)(linear[1] * normalize); 651 | rgbe[2] = (unsigned char)(linear[2] * normalize); 652 | rgbe[3] = (unsigned char)(exponent + 128); 653 | } 654 | } 655 | 656 | static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) 657 | { 658 | unsigned char lengthbyte = STBIW_UCHAR(length+128); 659 | STBIW_ASSERT(length+128 <= 255); 660 | s->func(s->context, &lengthbyte, 1); 661 | s->func(s->context, &databyte, 1); 662 | } 663 | 664 | static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) 665 | { 666 | unsigned char lengthbyte = STBIW_UCHAR(length); 667 | STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code 668 | s->func(s->context, &lengthbyte, 1); 669 | s->func(s->context, data, length); 670 | } 671 | 672 | static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) 673 | { 674 | unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; 675 | unsigned char rgbe[4]; 676 | float linear[3]; 677 | int x; 678 | 679 | scanlineheader[2] = (width&0xff00)>>8; 680 | scanlineheader[3] = (width&0x00ff); 681 | 682 | /* skip RLE for images too small or large */ 683 | if (width < 8 || width >= 32768) { 684 | for (x=0; x < width; x++) { 685 | switch (ncomp) { 686 | case 4: /* fallthrough */ 687 | case 3: linear[2] = scanline[x*ncomp + 2]; 688 | linear[1] = scanline[x*ncomp + 1]; 689 | linear[0] = scanline[x*ncomp + 0]; 690 | break; 691 | default: 692 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 693 | break; 694 | } 695 | stbiw__linear_to_rgbe(rgbe, linear); 696 | s->func(s->context, rgbe, 4); 697 | } 698 | } else { 699 | int c,r; 700 | /* encode into scratch buffer */ 701 | for (x=0; x < width; x++) { 702 | switch(ncomp) { 703 | case 4: /* fallthrough */ 704 | case 3: linear[2] = scanline[x*ncomp + 2]; 705 | linear[1] = scanline[x*ncomp + 1]; 706 | linear[0] = scanline[x*ncomp + 0]; 707 | break; 708 | default: 709 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 710 | break; 711 | } 712 | stbiw__linear_to_rgbe(rgbe, linear); 713 | scratch[x + width*0] = rgbe[0]; 714 | scratch[x + width*1] = rgbe[1]; 715 | scratch[x + width*2] = rgbe[2]; 716 | scratch[x + width*3] = rgbe[3]; 717 | } 718 | 719 | s->func(s->context, scanlineheader, 4); 720 | 721 | /* RLE each component separately */ 722 | for (c=0; c < 4; c++) { 723 | unsigned char *comp = &scratch[width*c]; 724 | 725 | x = 0; 726 | while (x < width) { 727 | // find first run 728 | r = x; 729 | while (r+2 < width) { 730 | if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) 731 | break; 732 | ++r; 733 | } 734 | if (r+2 >= width) 735 | r = width; 736 | // dump up to first run 737 | while (x < r) { 738 | int len = r-x; 739 | if (len > 128) len = 128; 740 | stbiw__write_dump_data(s, len, &comp[x]); 741 | x += len; 742 | } 743 | // if there's a run, output it 744 | if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd 745 | // find next byte after run 746 | while (r < width && comp[r] == comp[x]) 747 | ++r; 748 | // output run up to r 749 | while (x < r) { 750 | int len = r-x; 751 | if (len > 127) len = 127; 752 | stbiw__write_run_data(s, len, comp[x]); 753 | x += len; 754 | } 755 | } 756 | } 757 | } 758 | } 759 | } 760 | 761 | static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) 762 | { 763 | if (y <= 0 || x <= 0 || data == NULL) 764 | return 0; 765 | else { 766 | // Each component is stored separately. Allocate scratch space for full output scanline. 767 | unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); 768 | int i, len; 769 | char buffer[128]; 770 | char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; 771 | s->func(s->context, header, sizeof(header)-1); 772 | 773 | #ifdef __STDC_LIB_EXT1__ 774 | len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 775 | #else 776 | len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 777 | #endif 778 | s->func(s->context, buffer, len); 779 | 780 | for(i=0; i < y; i++) 781 | stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); 782 | STBIW_FREE(scratch); 783 | return 1; 784 | } 785 | } 786 | 787 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) 788 | { 789 | stbi__write_context s = { 0 }; 790 | stbi__start_write_callbacks(&s, func, context); 791 | return stbi_write_hdr_core(&s, x, y, comp, (float *) data); 792 | } 793 | 794 | STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) 795 | { 796 | stbi__write_context s = { 0 }; 797 | if (stbi__start_write_file(&s,filename)) { 798 | int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); 799 | stbi__end_write_file(&s); 800 | return r; 801 | } else 802 | return 0; 803 | } 804 | #endif // STBI_WRITE_NO_STDIO 805 | 806 | 807 | ////////////////////////////////////////////////////////////////////////////// 808 | // 809 | // PNG writer 810 | // 811 | 812 | #ifndef STBIW_ZLIB_COMPRESS 813 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() 814 | #define stbiw__sbraw(a) ((int *) (void *) (a) - 2) 815 | #define stbiw__sbm(a) stbiw__sbraw(a)[0] 816 | #define stbiw__sbn(a) stbiw__sbraw(a)[1] 817 | 818 | #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) 819 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) 820 | #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) 821 | 822 | #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) 823 | #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) 824 | #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) 825 | 826 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) 827 | { 828 | int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; 829 | void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); 830 | STBIW_ASSERT(p); 831 | if (p) { 832 | if (!*arr) ((int *) p)[1] = 0; 833 | *arr = (void *) ((int *) p + 2); 834 | stbiw__sbm(*arr) = m; 835 | } 836 | return *arr; 837 | } 838 | 839 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) 840 | { 841 | while (*bitcount >= 8) { 842 | stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); 843 | *bitbuffer >>= 8; 844 | *bitcount -= 8; 845 | } 846 | return data; 847 | } 848 | 849 | static int stbiw__zlib_bitrev(int code, int codebits) 850 | { 851 | int res=0; 852 | while (codebits--) { 853 | res = (res << 1) | (code & 1); 854 | code >>= 1; 855 | } 856 | return res; 857 | } 858 | 859 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) 860 | { 861 | int i; 862 | for (i=0; i < limit && i < 258; ++i) 863 | if (a[i] != b[i]) break; 864 | return i; 865 | } 866 | 867 | static unsigned int stbiw__zhash(unsigned char *data) 868 | { 869 | stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); 870 | hash ^= hash << 3; 871 | hash += hash >> 5; 872 | hash ^= hash << 4; 873 | hash += hash >> 17; 874 | hash ^= hash << 25; 875 | hash += hash >> 6; 876 | return hash; 877 | } 878 | 879 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) 880 | #define stbiw__zlib_add(code,codebits) \ 881 | (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) 882 | #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) 883 | // default huffman tables 884 | #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) 885 | #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) 886 | #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) 887 | #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) 888 | #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) 889 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) 890 | 891 | #define stbiw__ZHASH 16384 892 | 893 | #endif // STBIW_ZLIB_COMPRESS 894 | 895 | STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) 896 | { 897 | #ifdef STBIW_ZLIB_COMPRESS 898 | // user provided a zlib compress implementation, use that 899 | return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); 900 | #else // use builtin 901 | static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; 902 | static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; 903 | static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; 904 | static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; 905 | unsigned int bitbuf=0; 906 | int i,j, bitcount=0; 907 | unsigned char *out = NULL; 908 | unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**)); 909 | if (hash_table == NULL) 910 | return NULL; 911 | if (quality < 5) quality = 5; 912 | 913 | stbiw__sbpush(out, 0x78); // DEFLATE 32K window 914 | stbiw__sbpush(out, 0x5e); // FLEVEL = 1 915 | stbiw__zlib_add(1,1); // BFINAL = 1 916 | stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman 917 | 918 | for (i=0; i < stbiw__ZHASH; ++i) 919 | hash_table[i] = NULL; 920 | 921 | i=0; 922 | while (i < data_len-3) { 923 | // hash next 3 bytes of data to be compressed 924 | int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; 925 | unsigned char *bestloc = 0; 926 | unsigned char **hlist = hash_table[h]; 927 | int n = stbiw__sbcount(hlist); 928 | for (j=0; j < n; ++j) { 929 | if (hlist[j]-data > i-32768) { // if entry lies within window 930 | int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); 931 | if (d >= best) { best=d; bestloc=hlist[j]; } 932 | } 933 | } 934 | // when hash table entry is too long, delete half the entries 935 | if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { 936 | STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); 937 | stbiw__sbn(hash_table[h]) = quality; 938 | } 939 | stbiw__sbpush(hash_table[h],data+i); 940 | 941 | if (bestloc) { 942 | // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal 943 | h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); 944 | hlist = hash_table[h]; 945 | n = stbiw__sbcount(hlist); 946 | for (j=0; j < n; ++j) { 947 | if (hlist[j]-data > i-32767) { 948 | int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); 949 | if (e > best) { // if next match is better, bail on current match 950 | bestloc = NULL; 951 | break; 952 | } 953 | } 954 | } 955 | } 956 | 957 | if (bestloc) { 958 | int d = (int) (data+i - bestloc); // distance back 959 | STBIW_ASSERT(d <= 32767 && best <= 258); 960 | for (j=0; best > lengthc[j+1]-1; ++j); 961 | stbiw__zlib_huff(j+257); 962 | if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); 963 | for (j=0; d > distc[j+1]-1; ++j); 964 | stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); 965 | if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); 966 | i += best; 967 | } else { 968 | stbiw__zlib_huffb(data[i]); 969 | ++i; 970 | } 971 | } 972 | // write out final bytes 973 | for (;i < data_len; ++i) 974 | stbiw__zlib_huffb(data[i]); 975 | stbiw__zlib_huff(256); // end of block 976 | // pad with 0 bits to byte boundary 977 | while (bitcount) 978 | stbiw__zlib_add(0,1); 979 | 980 | for (i=0; i < stbiw__ZHASH; ++i) 981 | (void) stbiw__sbfree(hash_table[i]); 982 | STBIW_FREE(hash_table); 983 | 984 | // store uncompressed instead if compression was worse 985 | if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) { 986 | stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1 987 | for (j = 0; j < data_len;) { 988 | int blocklen = data_len - j; 989 | if (blocklen > 32767) blocklen = 32767; 990 | stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression 991 | stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN 992 | stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8)); 993 | stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN 994 | stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8)); 995 | memcpy(out+stbiw__sbn(out), data+j, blocklen); 996 | stbiw__sbn(out) += blocklen; 997 | j += blocklen; 998 | } 999 | } 1000 | 1001 | { 1002 | // compute adler32 on input 1003 | unsigned int s1=1, s2=0; 1004 | int blocklen = (int) (data_len % 5552); 1005 | j=0; 1006 | while (j < data_len) { 1007 | for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } 1008 | s1 %= 65521; s2 %= 65521; 1009 | j += blocklen; 1010 | blocklen = 5552; 1011 | } 1012 | stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); 1013 | stbiw__sbpush(out, STBIW_UCHAR(s2)); 1014 | stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); 1015 | stbiw__sbpush(out, STBIW_UCHAR(s1)); 1016 | } 1017 | *out_len = stbiw__sbn(out); 1018 | // make returned pointer freeable 1019 | STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); 1020 | return (unsigned char *) stbiw__sbraw(out); 1021 | #endif // STBIW_ZLIB_COMPRESS 1022 | } 1023 | 1024 | static unsigned int stbiw__crc32(unsigned char *buffer, int len) 1025 | { 1026 | #ifdef STBIW_CRC32 1027 | return STBIW_CRC32(buffer, len); 1028 | #else 1029 | static unsigned int crc_table[256] = 1030 | { 1031 | 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 1032 | 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 1033 | 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 1034 | 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 1035 | 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 1036 | 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 1037 | 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 1038 | 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 1039 | 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 1040 | 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 1041 | 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 1042 | 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 1043 | 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 1044 | 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 1045 | 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 1046 | 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 1047 | 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 1048 | 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 1049 | 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 1050 | 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 1051 | 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 1052 | 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 1053 | 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 1054 | 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 1055 | 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 1056 | 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 1057 | 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 1058 | 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 1059 | 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 1060 | 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 1061 | 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 1062 | 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D 1063 | }; 1064 | 1065 | unsigned int crc = ~0u; 1066 | int i; 1067 | for (i=0; i < len; ++i) 1068 | crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; 1069 | return ~crc; 1070 | #endif 1071 | } 1072 | 1073 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) 1074 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); 1075 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) 1076 | 1077 | static void stbiw__wpcrc(unsigned char **data, int len) 1078 | { 1079 | unsigned int crc = stbiw__crc32(*data - len - 4, len+4); 1080 | stbiw__wp32(*data, crc); 1081 | } 1082 | 1083 | static unsigned char stbiw__paeth(int a, int b, int c) 1084 | { 1085 | int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); 1086 | if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); 1087 | if (pb <= pc) return STBIW_UCHAR(b); 1088 | return STBIW_UCHAR(c); 1089 | } 1090 | 1091 | // @OPTIMIZE: provide an option that always forces left-predict or paeth predict 1092 | static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) 1093 | { 1094 | static int mapping[] = { 0,1,2,3,4 }; 1095 | static int firstmap[] = { 0,1,0,5,6 }; 1096 | int *mymap = (y != 0) ? mapping : firstmap; 1097 | int i; 1098 | int type = mymap[filter_type]; 1099 | unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); 1100 | int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; 1101 | 1102 | if (type==0) { 1103 | memcpy(line_buffer, z, width*n); 1104 | return; 1105 | } 1106 | 1107 | // first loop isn't optimized since it's just one pixel 1108 | for (i = 0; i < n; ++i) { 1109 | switch (type) { 1110 | case 1: line_buffer[i] = z[i]; break; 1111 | case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; 1112 | case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; 1113 | case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; 1114 | case 5: line_buffer[i] = z[i]; break; 1115 | case 6: line_buffer[i] = z[i]; break; 1116 | } 1117 | } 1118 | switch (type) { 1119 | case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; 1120 | case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; 1121 | case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; 1122 | case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; 1123 | case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; 1124 | case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; 1125 | } 1126 | } 1127 | 1128 | STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) 1129 | { 1130 | int force_filter = stbi_write_force_png_filter; 1131 | int ctype[5] = { -1, 0, 4, 2, 6 }; 1132 | unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; 1133 | unsigned char *out,*o, *filt, *zlib; 1134 | signed char *line_buffer; 1135 | int j,zlen; 1136 | 1137 | if (stride_bytes == 0) 1138 | stride_bytes = x * n; 1139 | 1140 | if (force_filter >= 5) { 1141 | force_filter = -1; 1142 | } 1143 | 1144 | filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; 1145 | line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } 1146 | for (j=0; j < y; ++j) { 1147 | int filter_type; 1148 | if (force_filter > -1) { 1149 | filter_type = force_filter; 1150 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); 1151 | } else { // Estimate the best filter by running through all of them: 1152 | int best_filter = 0, best_filter_val = 0x7fffffff, est, i; 1153 | for (filter_type = 0; filter_type < 5; filter_type++) { 1154 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); 1155 | 1156 | // Estimate the entropy of the line using this filter; the less, the better. 1157 | est = 0; 1158 | for (i = 0; i < x*n; ++i) { 1159 | est += abs((signed char) line_buffer[i]); 1160 | } 1161 | if (est < best_filter_val) { 1162 | best_filter_val = est; 1163 | best_filter = filter_type; 1164 | } 1165 | } 1166 | if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it 1167 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); 1168 | filter_type = best_filter; 1169 | } 1170 | } 1171 | // when we get here, filter_type contains the filter type, and line_buffer contains the data 1172 | filt[j*(x*n+1)] = (unsigned char) filter_type; 1173 | STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); 1174 | } 1175 | STBIW_FREE(line_buffer); 1176 | zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); 1177 | STBIW_FREE(filt); 1178 | if (!zlib) return 0; 1179 | 1180 | // each tag requires 12 bytes of overhead 1181 | out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); 1182 | if (!out) return 0; 1183 | *out_len = 8 + 12+13 + 12+zlen + 12; 1184 | 1185 | o=out; 1186 | STBIW_MEMMOVE(o,sig,8); o+= 8; 1187 | stbiw__wp32(o, 13); // header length 1188 | stbiw__wptag(o, "IHDR"); 1189 | stbiw__wp32(o, x); 1190 | stbiw__wp32(o, y); 1191 | *o++ = 8; 1192 | *o++ = STBIW_UCHAR(ctype[n]); 1193 | *o++ = 0; 1194 | *o++ = 0; 1195 | *o++ = 0; 1196 | stbiw__wpcrc(&o,13); 1197 | 1198 | stbiw__wp32(o, zlen); 1199 | stbiw__wptag(o, "IDAT"); 1200 | STBIW_MEMMOVE(o, zlib, zlen); 1201 | o += zlen; 1202 | STBIW_FREE(zlib); 1203 | stbiw__wpcrc(&o, zlen); 1204 | 1205 | stbiw__wp32(o,0); 1206 | stbiw__wptag(o, "IEND"); 1207 | stbiw__wpcrc(&o,0); 1208 | 1209 | STBIW_ASSERT(o == out + *out_len); 1210 | 1211 | return out; 1212 | } 1213 | 1214 | #ifndef STBI_WRITE_NO_STDIO 1215 | STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) 1216 | { 1217 | FILE *f; 1218 | int len; 1219 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); 1220 | if (png == NULL) return 0; 1221 | 1222 | f = stbiw__fopen(filename, "wb"); 1223 | if (!f) { STBIW_FREE(png); return 0; } 1224 | fwrite(png, 1, len, f); 1225 | fclose(f); 1226 | STBIW_FREE(png); 1227 | return 1; 1228 | } 1229 | #endif 1230 | 1231 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) 1232 | { 1233 | int len; 1234 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); 1235 | if (png == NULL) return 0; 1236 | func(context, png, len); 1237 | STBIW_FREE(png); 1238 | return 1; 1239 | } 1240 | 1241 | 1242 | /* *************************************************************************** 1243 | * 1244 | * JPEG writer 1245 | * 1246 | * This is based on Jon Olick's jo_jpeg.cpp: 1247 | * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html 1248 | */ 1249 | 1250 | static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, 1251 | 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; 1252 | 1253 | static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { 1254 | int bitBuf = *bitBufP, bitCnt = *bitCntP; 1255 | bitCnt += bs[1]; 1256 | bitBuf |= bs[0] << (24 - bitCnt); 1257 | while(bitCnt >= 8) { 1258 | unsigned char c = (bitBuf >> 16) & 255; 1259 | stbiw__putc(s, c); 1260 | if(c == 255) { 1261 | stbiw__putc(s, 0); 1262 | } 1263 | bitBuf <<= 8; 1264 | bitCnt -= 8; 1265 | } 1266 | *bitBufP = bitBuf; 1267 | *bitCntP = bitCnt; 1268 | } 1269 | 1270 | static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { 1271 | float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; 1272 | float z1, z2, z3, z4, z5, z11, z13; 1273 | 1274 | float tmp0 = d0 + d7; 1275 | float tmp7 = d0 - d7; 1276 | float tmp1 = d1 + d6; 1277 | float tmp6 = d1 - d6; 1278 | float tmp2 = d2 + d5; 1279 | float tmp5 = d2 - d5; 1280 | float tmp3 = d3 + d4; 1281 | float tmp4 = d3 - d4; 1282 | 1283 | // Even part 1284 | float tmp10 = tmp0 + tmp3; // phase 2 1285 | float tmp13 = tmp0 - tmp3; 1286 | float tmp11 = tmp1 + tmp2; 1287 | float tmp12 = tmp1 - tmp2; 1288 | 1289 | d0 = tmp10 + tmp11; // phase 3 1290 | d4 = tmp10 - tmp11; 1291 | 1292 | z1 = (tmp12 + tmp13) * 0.707106781f; // c4 1293 | d2 = tmp13 + z1; // phase 5 1294 | d6 = tmp13 - z1; 1295 | 1296 | // Odd part 1297 | tmp10 = tmp4 + tmp5; // phase 2 1298 | tmp11 = tmp5 + tmp6; 1299 | tmp12 = tmp6 + tmp7; 1300 | 1301 | // The rotator is modified from fig 4-8 to avoid extra negations. 1302 | z5 = (tmp10 - tmp12) * 0.382683433f; // c6 1303 | z2 = tmp10 * 0.541196100f + z5; // c2-c6 1304 | z4 = tmp12 * 1.306562965f + z5; // c2+c6 1305 | z3 = tmp11 * 0.707106781f; // c4 1306 | 1307 | z11 = tmp7 + z3; // phase 5 1308 | z13 = tmp7 - z3; 1309 | 1310 | *d5p = z13 + z2; // phase 6 1311 | *d3p = z13 - z2; 1312 | *d1p = z11 + z4; 1313 | *d7p = z11 - z4; 1314 | 1315 | *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; 1316 | } 1317 | 1318 | static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { 1319 | int tmp1 = val < 0 ? -val : val; 1320 | val = val < 0 ? val-1 : val; 1321 | bits[1] = 1; 1322 | while(tmp1 >>= 1) { 1323 | ++bits[1]; 1324 | } 1325 | bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { 1368 | } 1369 | // end0pos = first element in reverse order !=0 1370 | if(end0pos == 0) { 1371 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1372 | return DU[0]; 1373 | } 1374 | for(i = 1; i <= end0pos; ++i) { 1375 | int startpos = i; 1376 | int nrzeroes; 1377 | unsigned short bits[2]; 1378 | for (; DU[i]==0 && i<=end0pos; ++i) { 1379 | } 1380 | nrzeroes = i-startpos; 1381 | if ( nrzeroes >= 16 ) { 1382 | int lng = nrzeroes>>4; 1383 | int nrmarker; 1384 | for (nrmarker=1; nrmarker <= lng; ++nrmarker) 1385 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); 1386 | nrzeroes &= 15; 1387 | } 1388 | stbiw__jpg_calcBits(DU[i], bits); 1389 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); 1390 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); 1391 | } 1392 | if(end0pos != 63) { 1393 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1394 | } 1395 | return DU[0]; 1396 | } 1397 | 1398 | static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { 1399 | // Constants that don't pollute global namespace 1400 | static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; 1401 | static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; 1402 | static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; 1403 | static const unsigned char std_ac_luminance_values[] = { 1404 | 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, 1405 | 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, 1406 | 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, 1407 | 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, 1408 | 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, 1409 | 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, 1410 | 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1411 | }; 1412 | static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; 1413 | static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; 1414 | static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; 1415 | static const unsigned char std_ac_chrominance_values[] = { 1416 | 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, 1417 | 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, 1418 | 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, 1419 | 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, 1420 | 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, 1421 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, 1422 | 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1423 | }; 1424 | // Huffman tables 1425 | static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; 1426 | static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; 1427 | static const unsigned short YAC_HT[256][2] = { 1428 | {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1429 | {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1430 | {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1431 | {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1432 | {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1433 | {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1434 | {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1435 | {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1436 | {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1437 | {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1438 | {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1439 | {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1440 | {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1441 | {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1442 | {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, 1443 | {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} 1444 | }; 1445 | static const unsigned short UVAC_HT[256][2] = { 1446 | {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1447 | {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1448 | {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1449 | {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1450 | {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1451 | {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1452 | {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1453 | {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1454 | {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1455 | {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1456 | {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1457 | {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1458 | {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1459 | {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1460 | {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, 1461 | {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} 1462 | }; 1463 | static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, 1464 | 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; 1465 | static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, 1466 | 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; 1467 | static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1468 | 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; 1469 | 1470 | int row, col, i, k, subsample; 1471 | float fdtbl_Y[64], fdtbl_UV[64]; 1472 | unsigned char YTable[64], UVTable[64]; 1473 | 1474 | if(!data || !width || !height || comp > 4 || comp < 1) { 1475 | return 0; 1476 | } 1477 | 1478 | quality = quality ? quality : 90; 1479 | subsample = quality <= 90 ? 1 : 0; 1480 | quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; 1481 | quality = quality < 50 ? 5000 / quality : 200 - quality * 2; 1482 | 1483 | for(i = 0; i < 64; ++i) { 1484 | int uvti, yti = (YQT[i]*quality+50)/100; 1485 | YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); 1486 | uvti = (UVQT[i]*quality+50)/100; 1487 | UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); 1488 | } 1489 | 1490 | for(row = 0, k = 0; row < 8; ++row) { 1491 | for(col = 0; col < 8; ++col, ++k) { 1492 | fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1493 | fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1494 | } 1495 | } 1496 | 1497 | // Write Headers 1498 | { 1499 | static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; 1500 | static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; 1501 | const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), 1502 | 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; 1503 | s->func(s->context, (void*)head0, sizeof(head0)); 1504 | s->func(s->context, (void*)YTable, sizeof(YTable)); 1505 | stbiw__putc(s, 1); 1506 | s->func(s->context, UVTable, sizeof(UVTable)); 1507 | s->func(s->context, (void*)head1, sizeof(head1)); 1508 | s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); 1509 | s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); 1510 | stbiw__putc(s, 0x10); // HTYACinfo 1511 | s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); 1512 | s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); 1513 | stbiw__putc(s, 1); // HTUDCinfo 1514 | s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); 1515 | s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); 1516 | stbiw__putc(s, 0x11); // HTUACinfo 1517 | s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); 1518 | s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); 1519 | s->func(s->context, (void*)head2, sizeof(head2)); 1520 | } 1521 | 1522 | // Encode 8x8 macroblocks 1523 | { 1524 | static const unsigned short fillBits[] = {0x7F, 7}; 1525 | int DCY=0, DCU=0, DCV=0; 1526 | int bitBuf=0, bitCnt=0; 1527 | // comp == 2 is grey+alpha (alpha is ignored) 1528 | int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; 1529 | const unsigned char *dataR = (const unsigned char *)data; 1530 | const unsigned char *dataG = dataR + ofsG; 1531 | const unsigned char *dataB = dataR + ofsB; 1532 | int x, y, pos; 1533 | if(subsample) { 1534 | for(y = 0; y < height; y += 16) { 1535 | for(x = 0; x < width; x += 16) { 1536 | float Y[256], U[256], V[256]; 1537 | for(row = y, pos = 0; row < y+16; ++row) { 1538 | // row >= height => use last input row 1539 | int clamped_row = (row < height) ? row : height - 1; 1540 | int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; 1541 | for(col = x; col < x+16; ++col, ++pos) { 1542 | // if col >= width => use pixel from last input column 1543 | int p = base_p + ((col < width) ? col : (width-1))*comp; 1544 | float r = dataR[p], g = dataG[p], b = dataB[p]; 1545 | Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; 1546 | U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; 1547 | V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; 1548 | } 1549 | } 1550 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1551 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1552 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1553 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1554 | 1555 | // subsample U,V 1556 | { 1557 | float subU[64], subV[64]; 1558 | int yy, xx; 1559 | for(yy = 0, pos = 0; yy < 8; ++yy) { 1560 | for(xx = 0; xx < 8; ++xx, ++pos) { 1561 | int j = yy*32+xx*2; 1562 | subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f; 1563 | subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f; 1564 | } 1565 | } 1566 | DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); 1567 | DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); 1568 | } 1569 | } 1570 | } 1571 | } else { 1572 | for(y = 0; y < height; y += 8) { 1573 | for(x = 0; x < width; x += 8) { 1574 | float Y[64], U[64], V[64]; 1575 | for(row = y, pos = 0; row < y+8; ++row) { 1576 | // row >= height => use last input row 1577 | int clamped_row = (row < height) ? row : height - 1; 1578 | int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; 1579 | for(col = x; col < x+8; ++col, ++pos) { 1580 | // if col >= width => use pixel from last input column 1581 | int p = base_p + ((col < width) ? col : (width-1))*comp; 1582 | float r = dataR[p], g = dataG[p], b = dataB[p]; 1583 | Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; 1584 | U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; 1585 | V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; 1586 | } 1587 | } 1588 | 1589 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1590 | DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); 1591 | DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); 1592 | } 1593 | } 1594 | } 1595 | 1596 | // Do the bit alignment of the EOI marker 1597 | stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); 1598 | } 1599 | 1600 | // EOI 1601 | stbiw__putc(s, 0xFF); 1602 | stbiw__putc(s, 0xD9); 1603 | 1604 | return 1; 1605 | } 1606 | 1607 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) 1608 | { 1609 | stbi__write_context s = { 0 }; 1610 | stbi__start_write_callbacks(&s, func, context); 1611 | return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); 1612 | } 1613 | 1614 | 1615 | #ifndef STBI_WRITE_NO_STDIO 1616 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) 1617 | { 1618 | stbi__write_context s = { 0 }; 1619 | if (stbi__start_write_file(&s,filename)) { 1620 | int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); 1621 | stbi__end_write_file(&s); 1622 | return r; 1623 | } else 1624 | return 0; 1625 | } 1626 | #endif 1627 | 1628 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION 1629 | 1630 | /* Revision history 1631 | 1.16 (2021-07-11) 1632 | make Deflate code emit uncompressed blocks when it would otherwise expand 1633 | support writing BMPs with alpha channel 1634 | 1.15 (2020-07-13) unknown 1635 | 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels 1636 | 1.13 1637 | 1.12 1638 | 1.11 (2019-08-11) 1639 | 1640 | 1.10 (2019-02-07) 1641 | support utf8 filenames in Windows; fix warnings and platform ifdefs 1642 | 1.09 (2018-02-11) 1643 | fix typo in zlib quality API, improve STB_I_W_STATIC in C++ 1644 | 1.08 (2018-01-29) 1645 | add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter 1646 | 1.07 (2017-07-24) 1647 | doc fix 1648 | 1.06 (2017-07-23) 1649 | writing JPEG (using Jon Olick's code) 1650 | 1.05 ??? 1651 | 1.04 (2017-03-03) 1652 | monochrome BMP expansion 1653 | 1.03 ??? 1654 | 1.02 (2016-04-02) 1655 | avoid allocating large structures on the stack 1656 | 1.01 (2016-01-16) 1657 | STBIW_REALLOC_SIZED: support allocators with no realloc support 1658 | avoid race-condition in crc initialization 1659 | minor compile issues 1660 | 1.00 (2015-09-14) 1661 | installable file IO function 1662 | 0.99 (2015-09-13) 1663 | warning fixes; TGA rle support 1664 | 0.98 (2015-04-08) 1665 | added STBIW_MALLOC, STBIW_ASSERT etc 1666 | 0.97 (2015-01-18) 1667 | fixed HDR asserts, rewrote HDR rle logic 1668 | 0.96 (2015-01-17) 1669 | add HDR output 1670 | fix monochrome BMP 1671 | 0.95 (2014-08-17) 1672 | add monochrome TGA output 1673 | 0.94 (2014-05-31) 1674 | rename private functions to avoid conflicts with stb_image.h 1675 | 0.93 (2014-05-27) 1676 | warning fixes 1677 | 0.92 (2010-08-01) 1678 | casts to unsigned char to fix warnings 1679 | 0.91 (2010-07-17) 1680 | first public release 1681 | 0.90 first internal release 1682 | */ 1683 | 1684 | /* 1685 | ------------------------------------------------------------------------------ 1686 | This software is available under 2 licenses -- choose whichever you prefer. 1687 | ------------------------------------------------------------------------------ 1688 | ALTERNATIVE A - MIT License 1689 | Copyright (c) 2017 Sean Barrett 1690 | Permission is hereby granted, free of charge, to any person obtaining a copy of 1691 | this software and associated documentation files (the "Software"), to deal in 1692 | the Software without restriction, including without limitation the rights to 1693 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 1694 | of the Software, and to permit persons to whom the Software is furnished to do 1695 | so, subject to the following conditions: 1696 | The above copyright notice and this permission notice shall be included in all 1697 | copies or substantial portions of the Software. 1698 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1699 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1700 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1701 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1702 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1703 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 1704 | SOFTWARE. 1705 | ------------------------------------------------------------------------------ 1706 | ALTERNATIVE B - Public Domain (www.unlicense.org) 1707 | This is free and unencumbered software released into the public domain. 1708 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 1709 | software, either in source code form or as a compiled binary, for any purpose, 1710 | commercial or non-commercial, and by any means. 1711 | In jurisdictions that recognize copyright laws, the author or authors of this 1712 | software dedicate any and all copyright interest in the software to the public 1713 | domain. We make this dedication for the benefit of the public at large and to 1714 | the detriment of our heirs and successors. We intend this dedication to be an 1715 | overt act of relinquishment in perpetuity of all present and future rights to 1716 | this software under copyright law. 1717 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1718 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1719 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1720 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 1721 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 1722 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1723 | ------------------------------------------------------------------------------ 1724 | */ 1725 | --------------------------------------------------------------------------------