├── .gitignore ├── media ├── AVX2.gif ├── Cross.gif ├── glsl.png ├── AVX512.gif ├── Serial.gif ├── SSE-NEON.gif ├── qTriangle.aep ├── CrossMethod.gif └── BarycentricMethod.gif ├── .gitmodules ├── include └── qTriangle │ ├── Util.hpp │ ├── qTriangle.hpp │ └── Types.hpp ├── source └── qTriangle │ ├── Util.cpp │ ├── qTriangle.cpp │ └── qTriangle-x86.hpp ├── test ├── Bench.hpp ├── Display.cpp ├── Benchmark.cpp ├── FillShape.cpp └── stb_image_write.h ├── LICENSE ├── CMakeLists.txt ├── scripts └── GenGifs.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .* 3 | !/.gitignore 4 | -------------------------------------------------------------------------------- /media/AVX2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/AVX2.gif -------------------------------------------------------------------------------- /media/Cross.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/Cross.gif -------------------------------------------------------------------------------- /media/glsl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/glsl.png -------------------------------------------------------------------------------- /media/AVX512.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/AVX512.gif -------------------------------------------------------------------------------- /media/Serial.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/Serial.gif -------------------------------------------------------------------------------- /media/SSE-NEON.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/SSE-NEON.gif -------------------------------------------------------------------------------- /media/qTriangle.aep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/qTriangle.aep -------------------------------------------------------------------------------- /media/CrossMethod.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/CrossMethod.gif -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "extern/glm"] 2 | path = extern/glm 3 | url = git@github.com:g-truc/glm.git 4 | -------------------------------------------------------------------------------- /media/BarycentricMethod.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wunkolo/qTriangle/HEAD/media/BarycentricMethod.gif -------------------------------------------------------------------------------- /include/qTriangle/Util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace qTri 4 | { 5 | class Image; 6 | 7 | namespace Util 8 | { 9 | void Draw(const qTri::Image& Frame); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /include/qTriangle/qTriangle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Types.hpp" 9 | #include "Util.hpp" 10 | 11 | namespace qTri 12 | { 13 | extern const std::vector< 14 | std::pair< 15 | void(* const)( 16 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 17 | const Triangle& Tri 18 | ), 19 | const char* 20 | > 21 | > FillAlgorithms; 22 | } 23 | -------------------------------------------------------------------------------- /include/qTriangle/Types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace qTri 10 | { 11 | class Image 12 | { 13 | public: 14 | Image(std::size_t Width, std::size_t Height) 15 | : Width(Width), 16 | Height(Height) 17 | { 18 | Pixels.resize(Width * Height); 19 | } 20 | 21 | std::size_t Width; 22 | std::size_t Height; 23 | std::vector Pixels; 24 | }; 25 | 26 | using Triangle = std::array; 27 | } 28 | -------------------------------------------------------------------------------- /source/qTriangle/Util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace qTri 5 | { 6 | namespace Util 7 | { 8 | void Draw(const qTri::Image& Frame) 9 | { 10 | for( std::size_t y = 0; y < Frame.Height; ++y ) 11 | { 12 | std::fputs("\033[0;35m|\033[1;36m", stdout); 13 | for( std::size_t x = 0; x < Frame.Width; ++x ) 14 | { 15 | std::putchar( 16 | " @"[Frame.Pixels[x + y * Frame.Width] & 1] 17 | ); 18 | } 19 | std::fputs("\033[0;35m|\n", stdout); 20 | } 21 | std::fputs("\033[0m", stdout); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /test/Bench.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // Measures the time it takes to execute a execute a function 5 | 6 | template< typename TimeT = std::chrono::nanoseconds > 7 | struct Bench 8 | { 9 | template< typename FunctionT, typename ...ArgsT > 10 | static TimeT Duration(FunctionT&& Func, ArgsT&&... Arguments) 11 | { 12 | // Start time 13 | const auto Start = std::chrono::high_resolution_clock::now(); 14 | // Run function, perfect-forward arguments 15 | std::forward(Func)(std::forward(Arguments)...); 16 | // Return executation time. 17 | return std::chrono::duration_cast( 18 | std::chrono::high_resolution_clock::now() - Start 19 | ); 20 | } 21 | }; 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Wunkolo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required( VERSION 3.2.2 ) 2 | project( qTriangle CXX ) 3 | 4 | ### Standard 5 | set( CMAKE_CXX_STANDARD 17 ) 6 | set( CMAKE_CXX_STANDARD_REQUIRED ON ) 7 | set( CMAKE_CXX_EXTENSIONS ON ) 8 | 9 | ### Verbosity 10 | set( CMAKE_COLOR_MAKEFILE ON ) 11 | set( CMAKE_VERBOSE_MAKEFILE ON ) 12 | 13 | # Generate 'compile_commands.json' for clang_complete 14 | set( CMAKE_EXPORT_COMPILE_COMMANDS ON ) 15 | 16 | ### Global includes 17 | include_directories( 18 | include 19 | ) 20 | 21 | ### Optimizations 22 | if( MSVC ) 23 | add_compile_options( /arch:AVX2 ) 24 | add_compile_options( /W3 ) 25 | add_compile_options( /Gv ) 26 | elseif( CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) 27 | add_compile_options( -march=native ) 28 | add_compile_options( -Ofast ) 29 | add_compile_options( -Wall ) 30 | add_compile_options( -Wextra ) 31 | # Force colored diagnostic messages in Ninja's output 32 | if( CMAKE_GENERATOR STREQUAL "Ninja" ) 33 | add_compile_options( -fdiagnostics-color=always ) 34 | endif() 35 | endif() 36 | 37 | ## GLM 38 | set( GLM_TEST_ENABLE OFF CACHE BOOL "Build GLM Unit Tests") 39 | add_subdirectory( extern/glm ) 40 | 41 | ### Target 42 | add_library( 43 | qTriangle 44 | STATIC 45 | source/qTriangle/qTriangle.cpp 46 | source/qTriangle/Util.cpp 47 | ) 48 | target_link_libraries( 49 | qTriangle 50 | PRIVATE 51 | glm 52 | ) 53 | 54 | ### Tests 55 | enable_testing() 56 | 57 | ## Display 58 | add_executable( 59 | Display 60 | test/Display.cpp 61 | ) 62 | target_link_libraries( 63 | Display 64 | PRIVATE 65 | qTriangle 66 | glm 67 | ) 68 | add_test( 69 | NAME Display 70 | COMMAND Display 71 | ) 72 | 73 | ## Benchmark 74 | add_executable( 75 | Benchmark 76 | test/Benchmark.cpp 77 | ) 78 | target_link_libraries( 79 | Benchmark 80 | PRIVATE 81 | qTriangle 82 | glm 83 | ) 84 | add_test( 85 | NAME Benchmark 86 | COMMAND Benchmark 87 | ) 88 | 89 | ## FillShape 90 | add_executable( 91 | FillShape 92 | test/FillShape.cpp 93 | ) 94 | target_link_libraries( 95 | FillShape 96 | PRIVATE 97 | qTriangle 98 | glm 99 | ) 100 | add_test( 101 | NAME FillShape 102 | COMMAND FillShape 103 | ) 104 | # Link filesystem libs for GCC/Clang 105 | if( CMAKE_COMPILER_IS_GNUCXX ) 106 | target_link_libraries( 107 | FillShape 108 | PRIVATE 109 | stdc++fs 110 | ) 111 | endif() 112 | -------------------------------------------------------------------------------- /source/qTriangle/qTriangle.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define GLM_ENABLE_EXPERIMENTAL 4 | #include 5 | #include 6 | 7 | namespace qTri 8 | { 9 | 10 | // Get Cross-Product Z component from two directional vectors 11 | inline std::int32_t Det( 12 | const glm::i32vec2& Top, 13 | const glm::i32vec2& Bottom 14 | ) 15 | { 16 | return Top.x * Bottom.y - Top.y * Bottom.x; 17 | } 18 | 19 | //// Cross Product Method 20 | 21 | template 22 | inline void CrossProductMethod( 23 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 24 | const Triangle& Tri 25 | ) 26 | { 27 | CrossProductMethod( 28 | Points, Results, Count, 29 | Tri 30 | ); 31 | } 32 | 33 | //// Barycentric Method 34 | 35 | template 36 | inline void BarycentricMethod( 37 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 38 | const Triangle& Tri 39 | ) 40 | { 41 | BarycentricMethod( 42 | Points, Results, Count, 43 | Tri 44 | ); 45 | } 46 | 47 | #if defined(__x86_64__) || defined(_M_X64) 48 | #include "qTriangle-x86.hpp" 49 | #else 50 | template<> 51 | inline void CrossProductMethod<0>( 52 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 53 | const Triangle& Tri 54 | ) 55 | { 56 | // Directional vectors along all three triangle edges 57 | const glm::i32vec2 EdgeDir[3] = { 58 | Tri[1] - Tri[0], 59 | Tri[2] - Tri[1], 60 | Tri[0] - Tri[2] 61 | }; 62 | 63 | for( std::size_t i = 0; i < Count; ++i ) 64 | { 65 | const glm::i32vec2 PointDir[3] = { 66 | Points[i] - Tri[0], 67 | Points[i] - Tri[1], 68 | Points[i] - Tri[2] 69 | }; 70 | 71 | const glm::i32vec3 Crosses = glm::vec3( 72 | Det( EdgeDir[0], PointDir[0] ), 73 | Det( EdgeDir[1], PointDir[1] ), 74 | Det( EdgeDir[2], PointDir[2] ) 75 | ); 76 | 77 | Results[i] |= glm::all( 78 | glm::greaterThanEqual( 79 | Crosses, 80 | glm::i32vec3(0) 81 | ) 82 | ); 83 | } 84 | } 85 | 86 | template<> 87 | inline void BarycentricMethod<0>( 88 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 89 | const Triangle& Tri 90 | ) 91 | { 92 | const std::int32_t Det01 = Det( Tri[0], Tri[1] ); 93 | const std::int32_t Det20 = Det( Tri[2], Tri[0] ); 94 | const std::int32_t Area = Det( Tri[1], Tri[2] ) + Det20 + Det01; 95 | 96 | for( std::size_t i = 0; i < Count; ++i ) 97 | { 98 | const std::int32_t U = Det20 99 | + Det( Tri[0], Points[i] ) 100 | + Det( Points[i], Tri[2] ); 101 | const std::int32_t V = Det01 102 | + Det( Tri[1], Points[i] ) 103 | + Det( Points[i], Tri[0] ); 104 | 105 | Results[i] |= (U + V) < Area && U >= 0 && V >= 0; 106 | } 107 | } 108 | #endif 109 | 110 | //// Exports 111 | 112 | const std::vector< 113 | std::pair< 114 | void(* const)( 115 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 116 | const Triangle& Tri 117 | ), 118 | const char* 119 | > 120 | > FillAlgorithms = { 121 | // Cross-Product methods 122 | {CrossProductMethod< 0>, "Serial-CrossProduct"}, 123 | {CrossProductMethod< -1>, "CrossProductMethod"}, 124 | // Barycentric methods 125 | {BarycentricMethod< 0>, "Serial-Barycentric"}, 126 | {BarycentricMethod< -1>, "BarycentricMethod"}, 127 | }; 128 | } 129 | 130 | -------------------------------------------------------------------------------- /test/Display.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "Bench.hpp" 15 | 16 | #ifdef _WIN32 17 | #define NOMINMAX 18 | #include 19 | // Statically enables "ENABLE_VIRTUAL_TERMINAL_PROCESSING" for the terminal 20 | // at runtime to allow for unix-style escape sequences. 21 | static const bool _WndV100Enabled = []() -> bool 22 | { 23 | const auto Handle = GetStdHandle(STD_OUTPUT_HANDLE); 24 | DWORD ConsoleMode; 25 | GetConsoleMode( 26 | Handle, 27 | &ConsoleMode 28 | ); 29 | SetConsoleMode( 30 | Handle, 31 | ConsoleMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING 32 | ); 33 | GetConsoleMode( 34 | Handle, 35 | &ConsoleMode 36 | ); 37 | return ConsoleMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING; 38 | }(); 39 | #endif 40 | 41 | constexpr std::size_t Width = 80; 42 | constexpr std::size_t Height = 50; 43 | 44 | static qTri::Triangle Triangles[12]; 45 | 46 | int main() 47 | { 48 | // Generate random triangles 49 | std::random_device RandomDevice; 50 | std::mt19937 RandomEngine(RandomDevice()); 51 | std::uniform_int_distribution WidthDis(0, Width); 52 | std::uniform_int_distribution HeightDis(0, Height); 53 | for( qTri::Triangle& CurTriangle : Triangles ) 54 | { 55 | glm::i32vec2 Center{}; 56 | // Randomly place vertices 57 | for( glm::i32vec2& CurVert : CurTriangle ) 58 | { 59 | CurVert.x = WidthDis(RandomEngine); 60 | CurVert.y = HeightDis(RandomEngine); 61 | Center += CurVert; 62 | } 63 | // Sort points in clockwise order 64 | Center /= 3; 65 | std::sort( 66 | std::begin(CurTriangle), 67 | std::end(CurTriangle), 68 | [&Center](const glm::i32vec2& A, const glm::i32vec2& B) -> bool 69 | { 70 | // Points that have a larger angle away from the center are "heavier" 71 | const glm::i32vec2 DirectionA = Center - A; 72 | const glm::i32vec2 DirectionB = Center - B; 73 | const auto AngleA = glm::atan(DirectionA.y, DirectionA.x); 74 | const auto AngleB = glm::atan(DirectionB.y, DirectionB.x); 75 | return AngleA < AngleB; 76 | } 77 | ); 78 | } 79 | 80 | // Generate 2d grid of points to test against 81 | std::vector FragCoords; 82 | for( std::size_t y = 0; y < Height; ++y ) 83 | { 84 | for( std::size_t x = 0; x < Width; ++x ) 85 | { 86 | FragCoords.emplace_back(x,y); 87 | } 88 | } 89 | 90 | for( const auto& FillAlgorithm : qTri::FillAlgorithms ) 91 | { 92 | std::printf( 93 | "%s - ", 94 | FillAlgorithm.second 95 | ); 96 | qTri::Image CurFrame(Width, Height); 97 | std::size_t ExecTime = 0; 98 | for( const qTri::Triangle& CurTriangle : Triangles ) 99 | { 100 | ExecTime += Bench<>::Duration( 101 | FillAlgorithm.first, 102 | FragCoords.data(), 103 | CurFrame.Pixels.data(), 104 | FragCoords.size(), 105 | CurTriangle 106 | ).count(); 107 | } 108 | ExecTime /= std::extent::value; 109 | std::printf( 110 | "%zu ns\n", 111 | ExecTime 112 | ); 113 | qTri::Util::Draw(CurFrame); 114 | } 115 | 116 | return EXIT_SUCCESS; 117 | } 118 | -------------------------------------------------------------------------------- /test/Benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include "Bench.hpp" 13 | 14 | #ifdef _WIN32 15 | #define NOMINMAX 16 | #include 17 | // Statically enables "ENABLE_VIRTUAL_TERMINAL_PROCESSING" for the terminal 18 | // at runtime to allow for unix-style escape sequences. 19 | static const bool _WndV100Enabled = []() -> bool 20 | { 21 | const auto Handle = GetStdHandle(STD_OUTPUT_HANDLE); 22 | DWORD ConsoleMode; 23 | GetConsoleMode( 24 | Handle, 25 | &ConsoleMode 26 | ); 27 | SetConsoleMode( 28 | Handle, 29 | ConsoleMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING 30 | ); 31 | GetConsoleMode( 32 | Handle, 33 | &ConsoleMode 34 | ); 35 | return ConsoleMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING; 36 | }(); 37 | #endif 38 | 39 | constexpr std::size_t Width = 80; 40 | constexpr std::size_t Height = 50; 41 | constexpr std::size_t Loops = 5; 42 | static qTri::Triangle Triangles[100'000]; 43 | 44 | int main() 45 | { 46 | // Generate random triangles 47 | std::random_device RandomDevice; 48 | std::mt19937 RandomEngine(RandomDevice()); 49 | std::uniform_int_distribution WidthDis(0, Width); 50 | std::uniform_int_distribution HeightDis(0, Height); 51 | for( qTri::Triangle& CurTriangle : Triangles ) 52 | { 53 | glm::i32vec2 Center{}; 54 | // Randomly place vertices 55 | for( glm::i32vec2& CurVert : CurTriangle ) 56 | { 57 | CurVert.x = WidthDis(RandomEngine); 58 | CurVert.y = HeightDis(RandomEngine); 59 | Center += CurVert; 60 | } 61 | // Sort points in clockwise order 62 | Center /= 3; 63 | std::sort( 64 | std::begin(CurTriangle), 65 | std::end(CurTriangle), 66 | [&Center](const glm::i32vec2& A, const glm::i32vec2& B) -> bool 67 | { 68 | // Sort points by its angle from the center 69 | const glm::i32vec2 DirectionA = Center - A; 70 | const glm::i32vec2 DirectionB = Center - B; 71 | const auto AngleA = glm::atan(DirectionA.y, DirectionA.x); 72 | const auto AngleB = glm::atan(DirectionB.y, DirectionB.x); 73 | return AngleA < AngleB; 74 | } 75 | ); 76 | } 77 | std::printf( 78 | "%zu Triangles x %zu times\n" 79 | "%zu x %zu Image map\n", 80 | std::extent::value, 81 | Loops, 82 | Width, 83 | Height 84 | ); 85 | std::printf( 86 | "Algorithm | Average per triangle(ns)\n" 87 | ); 88 | // Generate 2d grid of points to test against 89 | std::vector FragCoords; 90 | for( std::size_t y = 0; y < Height; ++y ) 91 | { 92 | for( std::size_t x = 0; x < Width; ++x ) 93 | { 94 | FragCoords.emplace_back(x,y); 95 | } 96 | } 97 | // Benchmark each algorithm against all triangles 98 | for( const auto& FillAlgorithm : qTri::FillAlgorithms ) 99 | { 100 | std::printf( 101 | "%s\t", 102 | FillAlgorithm.second 103 | ); 104 | qTri::Image CurFrame(Width, Height); 105 | std::size_t ExecTime = 0; 106 | for( std::size_t i = 0; i < Loops; ++i) 107 | { 108 | for( const qTri::Triangle& CurTriangle : Triangles ) 109 | { 110 | ExecTime += Bench<>::Duration( 111 | FillAlgorithm.first, 112 | FragCoords.data(), 113 | CurFrame.Pixels.data(), 114 | FragCoords.size(), 115 | CurTriangle 116 | ).count(); 117 | } 118 | } 119 | ExecTime /= std::extent::value * Loops; 120 | std::printf( 121 | "| %zu ns\n", 122 | ExecTime 123 | ); 124 | } 125 | return EXIT_SUCCESS; 126 | } 127 | -------------------------------------------------------------------------------- /scripts/GenGifs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import multiprocessing 4 | import numpy as np 5 | import itertools 6 | from PIL import Image 7 | from PIL import ImageDraw 8 | 9 | TestTriangle = ( 10 | ( 5, 5), 11 | (95,40), 12 | (30,95) 13 | ) 14 | 15 | # Barycentric method 16 | def PointInTriangle( Point, Triangle ): 17 | V0 = tuple(np.subtract(Triangle[2], Triangle[0])) 18 | V1 = tuple(np.subtract(Triangle[1], Triangle[0])) 19 | V2 = tuple(np.subtract(Point , Triangle[0])) 20 | 21 | Dot00 = np.dot(V0, V0) 22 | Dot01 = np.dot(V0, V1) 23 | Dot02 = np.dot(V0, V2) 24 | Dot11 = np.dot(V1, V1) 25 | Dot12 = np.dot(V1, V2) 26 | Area = (Dot00 * Dot11 - Dot01 * Dot01) 27 | U = (Dot11 * Dot02 - Dot01 * Dot12) 28 | V = (Dot00 * Dot12 - Dot01 * Dot02) 29 | return (U >= 0) & (V >= 0) & (U + V < Area) 30 | 31 | def chunks(List, Widths): 32 | i = 0 33 | for CurWidth in Widths: 34 | while i + CurWidth < len(List): 35 | yield List[i:i + CurWidth] 36 | i += CurWidth 37 | 38 | # Params: 39 | # Name: Name of generated frames. Default "Serial" 40 | # Path: Path for output images Default: "./frames/(Name)/" 41 | # Size: Of the image, default (100,100) 42 | # Scale: Scaling for the resulting image. Default: 2 43 | # Granularity: List of widths in which elements may be processed in parallel 44 | # Default: [1] 45 | def RenderTriangle( Params ): 46 | # Params 47 | Name = Params.get("Name", "Serial") 48 | Size = Params.get("Size", (100, 100)) 49 | Path = "./frames/" + Name + "/" 50 | Scale = Params.get("Scale", 2) 51 | Granularity = Params.get("Granularity", [1]) 52 | # Sort by largest to smallest 53 | Granularity.sort() 54 | Granularity.reverse() 55 | # Create target path recursively 56 | os.makedirs(Path, exist_ok=True) 57 | # Create image 58 | Img = Image.new('RGB', Size) 59 | Draw = ImageDraw.Draw(Img) 60 | # Generate each row of points up-front 61 | Points = [ 62 | (x,y) for y in range(Size[1]) for x in range(Size[0]) 63 | ] 64 | i = 0 65 | for CurPoints in chunks(Points,Granularity): 66 | # Hilight the pixels being currently processed 67 | # Hilight hits and misses 68 | Hit = [(x,y) for (x,y) in CurPoints if PointInTriangle((x,y),TestTriangle)] 69 | Miss = [(x,y) for (x,y) in CurPoints if not PointInTriangle((x,y),TestTriangle)] 70 | Draw.point( 71 | Hit, 72 | fill=(0x00, 0xFF, 0x00) 73 | ) 74 | Draw.point( 75 | Miss, 76 | fill=(0xFF, 0x00, 0x00) 77 | ) 78 | Img.resize( 79 | (Img.width * Scale, Img.height * Scale), 80 | Image.NEAREST 81 | ).save(Path + Name + '_' + str(i).zfill(6) + ".png") 82 | i += 1 83 | # Save the "processed" frame 84 | Draw.point( 85 | Hit, 86 | fill=(0xFF, 0xFF, 0xFF) 87 | ) 88 | Draw.point( 89 | Miss, 90 | fill=(0x00, 0x00, 0x00) 91 | ) 92 | Img.resize( 93 | (Img.width * Scale, Img.height * Scale), 94 | Image.NEAREST 95 | ).save(Path + Name + '_' + str(i).zfill(6) + ".png") 96 | i += 1 97 | subprocess.Popen( 98 | [ 99 | 'ffmpeg', 100 | '-y', 101 | '-framerate','50', 102 | '-i', Path + Name + '_%06d.png', 103 | Name + '.gif' 104 | ] 105 | ).wait() 106 | 107 | Configs = [ 108 | # Serial 109 | { 110 | "Name": "Serial", 111 | "Granularity": [1], 112 | "Scale": 2, 113 | "Size": (100, 100) 114 | }, 115 | # SSE/NEON 116 | { 117 | "Name": "SSE-NEON", 118 | "Granularity": [4,1], 119 | "Scale": 2, 120 | "Size": (100, 100) 121 | }, 122 | # AVX2 123 | { 124 | "Name": "AVX2", 125 | "Granularity": [8,4,1], 126 | "Scale": 2, 127 | "Size": (100, 100) 128 | }, 129 | # AVX512 130 | { 131 | "Name": "AVX512", 132 | "Granularity": [16,8,4,1], 133 | "Scale": 2, 134 | "Size": (100, 100) 135 | } 136 | ] 137 | 138 | Processes = [ 139 | multiprocessing.Process( 140 | target=RenderTriangle, args=(Config,) 141 | ) for Config in Configs 142 | ] 143 | 144 | for Process in Processes: 145 | Process.start() 146 | 147 | for Process in Processes: 148 | Process.join() 149 | -------------------------------------------------------------------------------- /test/FillShape.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | namespace fs = std::experimental::filesystem; 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #define STB_IMAGE_WRITE_IMPLEMENTATION 17 | #include "stb_image_write.h" 18 | 19 | constexpr std::size_t Width = 300; 20 | constexpr std::size_t Height = 300; 21 | 22 | int main() 23 | { 24 | // "a" 25 | const glm::i32vec2 Edges[] = { 26 | glm::i32vec2{235,152}, 27 | glm::i32vec2{176,165}, 28 | glm::i32vec2{144,173}, 29 | glm::i32vec2{129,184}, 30 | glm::i32vec2{124,202}, 31 | glm::i32vec2{135,226}, 32 | glm::i32vec2{168,235}, 33 | glm::i32vec2{206,226}, 34 | glm::i32vec2{230,201}, 35 | glm::i32vec2{236,165}, 36 | glm::i32vec2{235,152}, 37 | 38 | glm::i32vec2{238,233}, 39 | glm::i32vec2{200,257}, 40 | glm::i32vec2{159,264}, 41 | glm::i32vec2{105,247}, 42 | glm::i32vec2{86,203}, 43 | glm::i32vec2{93,174}, 44 | glm::i32vec2{112,153}, 45 | glm::i32vec2{138,141}, 46 | glm::i32vec2{171,136}, 47 | glm::i32vec2{236,123}, 48 | glm::i32vec2{236,114}, 49 | glm::i32vec2{226,82}, 50 | glm::i32vec2{184,70}, 51 | glm::i32vec2{146,79}, 52 | glm::i32vec2{128,111}, 53 | glm::i32vec2{92,106}, 54 | glm::i32vec2{108,69}, 55 | glm::i32vec2{140,48}, 56 | glm::i32vec2{189,40}, 57 | glm::i32vec2{234,47}, 58 | glm::i32vec2{259,63}, 59 | glm::i32vec2{270,87}, 60 | glm::i32vec2{272,121}, 61 | glm::i32vec2{272,169}, 62 | glm::i32vec2{274,233}, 63 | glm::i32vec2{283,259}, 64 | glm::i32vec2{246,259}, 65 | glm::i32vec2{238,233}, 66 | }; 67 | 68 | qTri::Triangle Triangles[std::extent::value]; 69 | 70 | for( std::size_t i = 0; i < std::extent::value; ++i ) 71 | { 72 | Triangles[i] = qTri::Triangle{ 73 | { 74 | {0, 0}, 75 | Edges[i], 76 | Edges[(i + 1) % std::extent::value] 77 | } 78 | }; 79 | const glm::i32vec2 Center = std::accumulate( 80 | std::cbegin(Triangles[i]), 81 | std::cend(Triangles[i]), 82 | glm::i32vec2{0,0} 83 | ) / 3; 84 | std::sort( 85 | std::begin(Triangles[i]), 86 | std::end(Triangles[i]), 87 | [&Center](const glm::i32vec2& A, const glm::i32vec2& B) -> bool 88 | { 89 | // Sort points by its angle from the center 90 | const glm::i32vec2 DirectionA = Center - A; 91 | const glm::i32vec2 DirectionB = Center - B; 92 | const auto AngleA = glm::atan(DirectionA.y, DirectionA.x); 93 | const auto AngleB = glm::atan(DirectionB.y, DirectionB.x); 94 | return AngleA < AngleB; 95 | } 96 | ); 97 | } 98 | // Generate 2d grid of points to test against 99 | std::vector FragCoords; 100 | for( std::size_t y = 0; y < Height; ++y ) 101 | { 102 | for( std::size_t x = 0; x < Width; ++x ) 103 | { 104 | FragCoords.emplace_back(x,y); 105 | } 106 | } 107 | 108 | for( const auto& FillAlgorithm : qTri::FillAlgorithms ) 109 | { 110 | std::printf( 111 | "%s:\n", 112 | FillAlgorithm.second 113 | ); 114 | const auto FrameFolder = fs::path("Frames") / FillAlgorithm.second; 115 | fs::create_directories(FrameFolder); 116 | qTri::Image Frame(Width, Height); 117 | std::size_t FrameIdx = 0; 118 | for( const qTri::Triangle& CurTriangle : Triangles ) 119 | { 120 | qTri::Image CurInversion(Width, Height); 121 | // Render triangle to inversion mask 122 | FillAlgorithm.first( 123 | FragCoords.data(), 124 | CurInversion.Pixels.data(), 125 | FragCoords.size(), 126 | CurTriangle 127 | ); 128 | 129 | // Append inversion mask 130 | for( std::size_t i = 0; i < Width * Height; ++i ) 131 | { 132 | Frame.Pixels[i] = CurInversion.Pixels[i] ? ~Frame.Pixels[i] : Frame.Pixels[i]; 133 | } 134 | stbi_write_png( 135 | ((FrameFolder / std::to_string(FrameIdx)).string() + ".png").c_str(), 136 | Width, 137 | Height, 138 | 1, 139 | Frame.Pixels.data(), 140 | 0 141 | ); 142 | 143 | // Write an image of the current triangle 144 | // Post-process from [0x00,0x01] to [0x00,0xFF] 145 | // Compiler vectorization loves loops like this 146 | for( std::size_t i = 0; i < Width * Height; ++i ) 147 | { 148 | CurInversion.Pixels[i] *= 0xFF; 149 | } 150 | stbi_write_png( 151 | (FrameFolder / ("Tri" + std::to_string(FrameIdx) + ".png")).c_str(), 152 | Width, 153 | Height, 154 | 1, 155 | CurInversion.Pixels.data(), 156 | 0 157 | ); 158 | // ffmpeg -f image2 -framerate 2 -i %d.png -vf "scale=iw*2:ih*2" -sws_flags neighbor Anim.gif 159 | ++FrameIdx; 160 | } 161 | } 162 | 163 | return EXIT_SUCCESS; 164 | } 165 | -------------------------------------------------------------------------------- /source/qTriangle/qTriangle-x86.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template 5 | inline void CrossProductMethod( 6 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 7 | const qTri::Triangle& Tri 8 | ); 9 | template 10 | inline void BarycentricMethod( 11 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 12 | const qTri::Triangle& Tri 13 | ); 14 | 15 | #if defined(__SSE4_1__) 16 | 17 | // Serial 18 | template<> 19 | inline void CrossProductMethod<0>( 20 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 21 | const Triangle& Tri 22 | ) 23 | { 24 | // [ Tri[1].y, Tri[1].x, Tri[0].y, Tri[0].x] 25 | const __m128i Tri10 = _mm_loadu_si128( 26 | reinterpret_cast(Tri.data()) 27 | ); 28 | // [ Tri[2].y, Tri[2].x, Tri[2].y, Tri[2].x] 29 | const __m128i Tri22 = _mm_set1_epi64x( 30 | *reinterpret_cast(Tri.data() + 2) 31 | ); 32 | 33 | // unpacklo(above) 34 | // [ Tri[2].y, Tri[0].y, Tri[2].x, Tri[0].x] 35 | // unpackhi(above) 36 | // [ Tri[2].y, Tri[1].y, Tri[2].x, Tri[1].x] 37 | const __m128i Tri20yyxx = _mm_unpacklo_epi32( 38 | Tri10, Tri22 39 | ); 40 | const __m128i Tri21yyxx = _mm_unpackhi_epi32( 41 | Tri10, Tri22 42 | ); 43 | 44 | // unpacklo(above) 45 | // [ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x] 46 | // unpackhi(above) 47 | // [ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y] 48 | const __m128i Tri2210x = _mm_unpacklo_epi32( 49 | Tri20yyxx, Tri21yyxx 50 | ); 51 | const __m128i Tri2210y = _mm_unpackhi_epi32( 52 | Tri20yyxx, Tri21yyxx 53 | ); 54 | 55 | // [ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x] 56 | // - 57 | // [ Tri[2].x, Tri[1].x, Tri[0].x, Tri[2].x] 58 | // ^ alignr_epi8([ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x],12) 59 | const __m128i EdgeDirx = _mm_sub_epi32( 60 | Tri2210x, 61 | _mm_alignr_epi8( 62 | Tri2210x,Tri2210x, 63 | 12 64 | ) 65 | ); 66 | // [ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y] 67 | // - 68 | // [ Tri[2].y, Tri[1].y, Tri[0].y, Tri[2].y] 69 | // ^ alignr_epi8([ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y],12) 70 | const __m128i EdgeDiry = _mm_sub_epi32( 71 | Tri2210y, 72 | _mm_alignr_epi8( 73 | Tri2210y,Tri2210y, 74 | 12 75 | ) 76 | ); 77 | 78 | for( std::size_t i = 0; i < Count; ++i ) 79 | { 80 | const __m128i CurPoint = _mm_loadl_epi64( 81 | reinterpret_cast(&Points[i]) 82 | ); 83 | const __m128i CurPointx = _mm_shuffle_epi32( 84 | CurPoint, 0b00'00'00'00 85 | ); 86 | const __m128i CurPointy = _mm_shuffle_epi32( 87 | CurPoint, 0b01'01'01'01 88 | ); 89 | // PointDirx = Point[i].x - Tri2210x 90 | // PointDiry = Point[i].y - Tri2210y 91 | 92 | const __m128i PointDirx = _mm_sub_epi32( 93 | CurPointx, Tri2210x 94 | ); 95 | const __m128i PointDiry = _mm_sub_epi32( 96 | CurPointy, Tri2210y 97 | ); 98 | // | -- | EdgeDir[2].x | EdgeDir[1].x | EdgeDir[0].x | < EdgeDirX 99 | // | mul | 100 | // | -- | PointDir[2].y | PointDir[1].y | PointDir[0].y | < PointDiry 101 | // | sub | 102 | // | -- | EdgeDir[2].y | EdgeDir[1].y | EdgeDir[0].y | < EdgeDiry 103 | // | mul | 104 | // | -- | PointDir[2].x | PointDir[1].x | PointDir[0].x | < PointDirx 105 | 106 | // We're only checking if the signs are >=0 so there is a lot of 107 | // optimization that can be done, such as eliminating the subtraction 108 | // in the determinant to just comparing the two products directly 109 | // ex: a.x*b.y - a.y*b.x >= 0 110 | // a.x*b.y >= a.y*b.x 111 | // DetHi = EdgeDirx * PointDiry 112 | // DetLo = EdgeDiry * PointDirx 113 | const __m128i DetHi = _mm_mullo_epi32( 114 | EdgeDirx, PointDiry 115 | ); 116 | const __m128i DetLo = _mm_mullo_epi32( 117 | EdgeDiry, PointDirx 118 | ); 119 | 120 | const std::uint16_t CheckMask = _mm_movemask_epi8( 121 | _mm_cmplt_epi32( 122 | DetHi, DetLo 123 | ) 124 | ) & 0x0'F'F'F; 125 | 126 | // Check = DetHi >= DetLo = -(DetHi < DetLo) 127 | Results[i] |= CheckMask == 0x0'0'0'0; 128 | } 129 | } 130 | 131 | #if defined(__AVX2__) 132 | 133 | // Two at a time 134 | template<> 135 | inline void CrossProductMethod<1>( 136 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 137 | const Triangle& Tri 138 | ) 139 | { 140 | // [ Tri[1].y, Tri[1].x, Tri[0].y, Tri[0].x] 141 | const __m128i Tri10 = _mm_loadu_si128( 142 | reinterpret_cast(Tri.data()) 143 | ); 144 | // [ Tri[2].y, Tri[2].x, Tri[2].y, Tri[2].x] 145 | const __m128i Tri22 = _mm_set1_epi64x( 146 | *reinterpret_cast(Tri.data() + 2) 147 | ); 148 | 149 | // unpacklo(above) 150 | // [ Tri[2].y, Tri[0].y, Tri[2].x, Tri[0].x] 151 | // unpackhi(above) 152 | // [ Tri[2].y, Tri[1].y, Tri[2].x, Tri[1].x] 153 | const __m128i Tri20yyxx = _mm_unpacklo_epi32( 154 | Tri10, Tri22 155 | ); 156 | const __m128i Tri21yyxx = _mm_unpackhi_epi32( 157 | Tri10, Tri22 158 | ); 159 | 160 | // unpacklo(above) 161 | // [ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x] 162 | // unpackhi(above) 163 | // [ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y] 164 | const __m256i Tri2210x2x = _mm256_broadcastsi128_si256( 165 | _mm_unpacklo_epi32( 166 | Tri20yyxx, Tri21yyxx 167 | ) 168 | ); 169 | const __m256i Tri2210x2y = _mm256_broadcastsi128_si256( 170 | _mm_unpackhi_epi32( 171 | Tri20yyxx, Tri21yyxx 172 | ) 173 | ); 174 | 175 | // [ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x] 176 | // - 177 | // [ Tri[2].x, Tri[1].x, Tri[0].x, Tri[2].x] 178 | // ^ alignr_epi8([ Tri[2].x, Tri[2].x, Tri[1].x, Tri[0].x],12) 179 | const __m256i EdgeDirx2x = 180 | _mm256_sub_epi32( 181 | Tri2210x2x, 182 | _mm256_alignr_epi8( 183 | Tri2210x2x,Tri2210x2x, 184 | 12 185 | ) 186 | ); 187 | // [ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y] 188 | // - 189 | // [ Tri[2].y, Tri[1].y, Tri[0].y, Tri[2].y] 190 | // ^ alignr_epi8([ Tri[2].y, Tri[2].y, Tri[1].y, Tri[0].y],12) 191 | const __m256i EdgeDirx2y = 192 | _mm256_sub_epi32( 193 | Tri2210x2y, 194 | _mm256_alignr_epi8( 195 | Tri2210x2y,Tri2210x2y, 196 | 12 197 | ) 198 | ); 199 | 200 | for( std::size_t i = 0; i < Count; i += 2 ) 201 | { 202 | const __m256i CurPointx2 = _mm256_permute4x64_epi64( 203 | _mm256_castsi128_si256( 204 | _mm_loadu_si128( 205 | reinterpret_cast(&Points[i]) 206 | ) 207 | ), 208 | 0b01'01'00'00 209 | ); 210 | const __m256i CurPointx2x = _mm256_shuffle_epi32( 211 | CurPointx2, 0b00'00'00'00 212 | ); 213 | const __m256i CurPointx2y = _mm256_shuffle_epi32( 214 | CurPointx2, 0b01'01'01'01 215 | ); 216 | 217 | const __m256i PointDirx2x = _mm256_sub_epi32( 218 | CurPointx2x, Tri2210x2x 219 | ); 220 | const __m256i PointDirx2y = _mm256_sub_epi32( 221 | CurPointx2y, Tri2210x2y 222 | ); 223 | const __m256i DetHix2 = _mm256_mullo_epi32( 224 | EdgeDirx2x, PointDirx2y 225 | ); 226 | const __m256i DetLox2 = _mm256_mullo_epi32( 227 | EdgeDirx2y, PointDirx2x 228 | ); 229 | // Check = DetHi >= DetLo = -(DetHi < DetLo) = ~(DetLo > DetHi) 230 | const std::uint32_t CheckMaskx2 = (~_mm256_movemask_epi8( 231 | _mm256_cmpgt_epi32( 232 | DetLox2, DetHix2 233 | ) 234 | ) & 0x0'F'F'F'0'F'F'F) + 0x0001'0001; 235 | 236 | *reinterpret_cast(Results + i) |= 237 | static_cast( 238 | 0x0101'0100'0001'0000 >> (_pext_u32( CheckMaskx2, 0x1000'1000) * 16) 239 | ); 240 | 241 | // Results[i + 0] |= (CheckMaskx2 & 0x0000FFFF) == 0; 242 | // Results[i + 1] |= (CheckMaskx2 & 0xFFFF0000) == 0; 243 | } 244 | CrossProductMethod<0>( 245 | Points, Results, Count, Tri 246 | ); 247 | } 248 | #endif 249 | 250 | template<> 251 | inline void BarycentricMethod<0>( 252 | const glm::i32vec2 Points[], std::uint8_t Results[], std::size_t Count, 253 | const qTri::Triangle& Tri 254 | ) 255 | { 256 | // [ Tri[1].y, Tri[1].x, Tri[0].y, Tri[0].x] 257 | const __m128i Tri10 = _mm_loadu_si128( 258 | reinterpret_cast(Tri.data()) 259 | ); 260 | // [ Tri[2].y, Tri[2].x, Tri[2].y, Tri[2].x] 261 | const __m128i Tri22 = _mm_set1_epi64x( 262 | *reinterpret_cast(Tri.data() + 2) 263 | ); 264 | // [ Tri[2].y, Tri[2].x, Tri[1].y, Tri[1].x] 265 | const __m128i Tri21 = _mm_alignr_epi8( 266 | Tri22, Tri10, 267 | 8 268 | ); 269 | 270 | // | Tri[1].x | Tri[0].y | Tri[2].y | Tri[0].x | 271 | const __m128i ConstVec_1x0y2y0x = _mm_blend_epi32( 272 | _mm_shuffle_epi32(Tri10,0b10'01'00'00),// 1x0y__0x 273 | Tri22, // ____2y__ 274 | 0b0010 275 | ); 276 | 277 | // | Tri[1].y | Tri[0].x | Tri[2].x | Tri[0].y | 278 | const __m128i ConstVec_1y0x2x0y = _mm_blend_epi32( 279 | _mm_shuffle_epi32(Tri10,0b11'00'00'01),// 1y0x__0y 280 | _mm_shuffle_epi32(Tri22,0b00'00'00'00),// ____2x__ 281 | 0b0010 282 | ); 283 | 284 | // Det01: Tri[1].y * Tri[0].x - Tri[0].y * Tri[1].x 285 | // Det20: Tri[2].x * Tri[0].y - Tri[0].x * Tri[2].y 286 | 287 | // | Tri[1].y | Tri[2].x | Tri[1].y | Tri[2].x | 288 | // | * | * | * | * | 289 | // | Tri[0].x | Tri[0].y | Tri[0].x | Tri[0].y | 290 | // | - | - | - | - | 291 | // | Tri[0].y | Tri[0].x | Tri[0].y | Tri[0].x | 292 | // | * | * | * | * | 293 | // | Tri[1].x | Tri[2].y | Tri[1].x | Tri[2].y | 294 | // [ Det01 , Det20 , Det01 , Det20 ] 295 | const __m128i Det0120 = _mm_sub_epi32( 296 | _mm_mullo_epi32( 297 | _mm_shuffle_epi32(Tri21,0b01'10'01'10), 298 | _mm_shuffle_epi32(Tri10,0b00'01'00'01) 299 | ), 300 | _mm_mullo_epi32( 301 | _mm_shuffle_epi32(Tri10,0b01'00'01'00), 302 | _mm_shuffle_epi32(Tri21,0b00'11'00'11) 303 | ) 304 | ); 305 | 306 | // Area: Tri[2].y * Tri[1].x - Tri[2].x * Tri[1].y 307 | // + Det20 + Det01 308 | 309 | // | Tri[2].x | Tri[2].y | Tri[2].x | Tri[2].y | 310 | // | * | * | * | * | 311 | // | Tri[1].y | Tri[1].x | Tri[1].y | Tri[1].x | 312 | // | hsub | hsub | 313 | // | + | + | + | + | 314 | // [ Det01 | Det01 | Det01 | Det01 ] 315 | // | + | + | + | + | 316 | // [ Det20 | Det20 | Det20 | Det20 ] 317 | // [ Area | Area | Area | Area ] 318 | const __m128i AreaProduct = _mm_mullo_epi32( 319 | _mm_shuffle_epi32( 320 | Tri22, 0b00'01'00'01 321 | ), 322 | _mm_shuffle_epi32( 323 | Tri10, 0b11'10'11'10 324 | ) 325 | ); 326 | const __m128i Area = _mm_add_epi32( 327 | _mm_hsub_epi32( 328 | AreaProduct, AreaProduct 329 | ), 330 | _mm_hadd_epi32( 331 | Det0120,Det0120 332 | ) 333 | ); 334 | 335 | // [Area-1, Area-1, 0,0] 336 | const __m128i CheckConst = _mm_blend_epi16( 337 | _mm_setzero_si128(), 338 | _mm_sub_epi32( // Area - 1 339 | Area, 340 | _mm_set1_epi32(1) 341 | ), 342 | 0b11'11'00'00 343 | ); 344 | 345 | for( std::size_t i = 0; i < Count; ++i ) 346 | { 347 | // YXYX 348 | const __m128i Point = _mm_loadl_epi64( 349 | reinterpret_cast(Points + i) 350 | ); 351 | const __m128i PointYXXY= _mm_shuffle_epi32( 352 | Point, 353 | 0b01'00'00'01 354 | ); 355 | const __m128i PointXYYX = _mm_alignr_epi8( 356 | PointYXXY,PointYXXY,8 357 | ); 358 | 359 | // U: 360 | // Point.y * Tri[0].x - Point.x * Tri[0].y 361 | // + 362 | // Point.x * Tri[2].y - Point.y * Tri[2].x 363 | // + Det20 364 | // V: 365 | // Point.x * Tri[0].y - Point.y * Tri[0].x 366 | // + 367 | // Point.y * Tri[1].x - Point.x * Tri[1].y 368 | // + Det01 369 | 370 | // If I wanted to do two at a time, I could fit 371 | // two UVs into one 128-bit lane. Putting this 372 | // here for reference 373 | // | Point.x | Point.y | Point.x | Point.y | 374 | // | * | * | * | * | 375 | // [ Tri[0].y | Tri[0].x | Tri[0].y | Tri[0].x ] < const 376 | // | - | - | - | - | 377 | // | Point.y | Point.x | Point.y | Point.x | 378 | // | * | * | * | * | 379 | // [ Tri[0].x | Tri[0].y | Tri[0].x | Tri[0].y ] < const 380 | // | + | + | + | + | 381 | // | Point.y | Point.x | Point.y | Point.x | 382 | // | * | * | * | * | 383 | // [ Tri[1].x | Tri[2].y | Tri[1].x | Tri[2].y ] < const 384 | // | - | - | - | - | 385 | // | Point.x | Point.y | Point.x | Point.y | 386 | // | * | * | * | * | 387 | // [ Tri[1].y | Tri[2].x | Tri[1].y | Tri[2].x ] < const 388 | // | + | + | + | + | 389 | // [ Det01 | Det20 | Det01 | Det20 ] < const 390 | // | V1 | U1 | V0 | U0 | 391 | 392 | // If I wanted to do 1 at a time though, 393 | // I could utilize more lanes to do 394 | // independent calculations in parallel 395 | // Such as the adds and multplications 396 | 397 | // | Point.x | Point.y | xy 398 | // | * | * | 399 | // | Tri[0].y | Tri[0].x ] < const 400 | // | - | - | 401 | // | Point.y | Point.x | yx 402 | // | * | * | 403 | // | Tri[0].x | Tri[0].y ] < const 404 | // | + | + | 405 | // | Point.y | Point.x | yx 406 | // | * | * | 407 | // | Tri[1].x | Tri[2].y ] < const 408 | // | - | - | 409 | // | Point.x | Point.y | xy 410 | // | * | * | 411 | // | Tri[1].y | Tri[2].x ] < const 412 | // | + | + | 413 | // | Det01 | Det20 ] < const 414 | // | V | U | 415 | // 416 | // V Utilizing all four lanes V 417 | // !Four determinants at once! 418 | // | Point.y | Point.x | Point.x | Point.y | yxxy 419 | // | * | * | * | * | mul 420 | // | Tri[1].x | Tri[0].y | Tri[2].y | Tri[0].x | < const 421 | // | - | - | - | - | sub 422 | // | Point.x | Point.y | Point.y | Point.x | xyyx 423 | // | * | * | * | * | mul 424 | // | Tri[1].y | Tri[0].x | Tri[2].x | Tri[0].y | < const 425 | // | hadd | hadd | hadd 426 | // | + | + | + | + | add 427 | // [ Det01 | Det20 | Det01 | Det20 ] < const 428 | // | V | U | V | U | 429 | 430 | __m128i VU = _mm_sub_epi32( 431 | _mm_mullo_epi32( 432 | PointYXXY, 433 | ConstVec_1x0y2y0x //1x'0y'2y'0x 434 | ), 435 | _mm_mullo_epi32( 436 | PointXYYX, 437 | ConstVec_1y0x2x0y //1y'0x'2x'0y 438 | ) 439 | ); 440 | VU = _mm_add_epi32( 441 | _mm_hadd_epi32( 442 | VU, VU 443 | ), 444 | Det0120 445 | ); 446 | // Area = (blah) + Det20 + Det01 447 | // U + V < Area ; U + V <= Area - 1 448 | // U + V - Area < 0 449 | // const auto AreaCheck = _mm_cmplt_epi32( 450 | // _mm_hadd_epi32( 451 | // VU, VU 452 | // ), 453 | // Area 454 | // ); 455 | // -U = U + (- 2 * U ) 456 | // |_mm_unpacklo_epi32(0,sign(VU,(-1,-1,-1,-1)))| ( little waste) 457 | // hadd[ ( V,U,V,U) | (0,-V,0,-U) ] 458 | // | V+U | U+V | -V | -U | 459 | const __m128i CheckValues = _mm_hadd_epi32( 460 | _mm_unpacklo_epi32( 461 | _mm_sign_epi32( 462 | VU, 463 | _mm_set_epi32(-1,-1,-1,-1) 464 | ), 465 | _mm_setzero_si128() 466 | ), 467 | VU 468 | ); 469 | // X <= Y;!( X > Y ); !( Y < X ) 470 | const __m128i CheckParallel = _mm_cmplt_epi32( 471 | CheckConst, 472 | CheckValues 473 | ); 474 | const std::uint16_t Mask = ~_mm_movemask_epi8(CheckParallel); 475 | Results[i] |= Mask == 0xFFFF; 476 | // | <= | <= | <= | <= | 477 | // | Area-1 | Area-1 | 0 | 0 | < const 478 | 479 | // U = (blah) + Det20; U >= 0; U >= -Det20; -U <= Det20 480 | // V = (blah) + Det01; V >= 0; V >= -Det01; -V <= Det01 481 | // X >= 0 : !(X < 0) 482 | // const auto SignCheck = _mm_cmplt_epi32( 483 | // VU, _mm_setzero_si128() 484 | // ); 485 | 486 | // const auto AreaSignCheck = _mm_andnot_si128( 487 | // SignCheck, AreaCheck 488 | // ); 489 | // Results[i] |= _mm_movemask_epi8( 490 | // AreaSignCheck 491 | // ) == 0xFFFF; 492 | } 493 | } 494 | #endif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # qTriangle [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/Wunkolo/qTriangle/master/LICENSE) (WIP) 2 | 3 | ||||| 4 | |:-:|:-:|:-:|:-:| 5 | |Serial|SSE/NEON|AVX2|AVX512| 6 | |![Serial](media/Serial.gif)|![SSE/NEON](media/SSE-NEON.gif)|![AVX2](media/AVX2.gif)|![AVX512](media/AVX512.gif)| 7 | 8 | qTriangle is a personal study to design a **q**uick way to detect if a point is within a **Triangle** by means of vectorization 9 | 10 | [![](media/glsl.png)](https://www.shadertoy.com/view/4t3czN) 11 | 12 | 13 | ### [Check out a live GLSL sample here!](https://www.shadertoy.com/view/4t3czN) 14 | 15 | The domain of the Point-In-Triangle problem is determining if a cartesian coordinate happens to land upon the interior of a triangle. In this case the 2D case of triangles will be examined and will require some kind of surface **area** for a point to land on so a case in which all three points are collinear(which is the worst case of having a *very* slim triangle) are ruled out. 16 | 17 | This problem comes up a lot in the domain of computer graphics and gameplay programming at times. Sometimes it's testing if a single point lands upon a polygon(made up of triangles) and sometimes it's testing if thousands of points happen to land on a triangle or not(such as when rendering a vector triangle against a regular grid during rasterization). 18 | 19 | There are several methods to test if a point happens to land within a triangle in 2D space, each have their own pros and cons and scaling properties. 20 | 21 | # Cross Product Method 22 | 23 | The cross-product operation in vector algebra is a binary operation that takes two vectors in 3D space and creates a new vector that is perpendicular to them both. You can think of the two vectors as describing some kind of plane in 2D space, and the cross product creates a new vector that is perpendicular to this plane. Though, there are two ways to create a vector perpendicular to a plane, by going "in" the plane and going "out" the plane. 24 | 25 | The Cross-Product has a lot of useful properties but one of interest at the moment is the **magnitude** of the resulting vector of the cross product which will be the **area** of the parallelogram that the two original vectors create. Since the points are on the X-Y plane in 3D space this *magnitude* will always be the Z-component of the cross product since all vectors perpendicular to the orthogonal X-Y plane will take the form `[ 0, 0, (some value)]`. This becomes useful later on. 26 | 27 | ![](media/Cross.gif) 28 | 29 | The particular numerical value of this area is not of importance either but rather the *parity* of the area is of interest(while a negative-surface-area does not make sense, this value tells us something about two directional vectors). Notice that whenever the direction to the moving point goes to the "left" of the black directional vector, that the area becomes negative, but when it is to the "right", the area is positive. 30 | This is due to the [right hand rule](https://en.wikipedia.org/wiki/Right-hand_rule) where the direction of positive-rotation being **clockwise** or **counter-clockwise** causes the order of the original two directional vectors to determine the proper orientation of the cross product. 31 | 32 | The parity of the cross-product-area depending on which side the direction of the "point" lands on solves the problem at the same tone of turning each edge into a linear inequality then testing if the point solves each of them at once but in a somewhat more optimal way. 33 | 34 | The three positional vectors of the triangle must be in **clockwise** or **counter-clockwise** order so that three directional vectors can be determinately created. 35 | ``` 36 | EdgeDir0 = Vertex1 - Vertex0 37 | EdgeDir1 = Vertex2 - Vertex1 38 | EdgeDir2 = Vertex0 - Vertex2 39 | ``` 40 | Then, three additional directional vectors can be made that point from the triangle vertex position to the point that is being tested against 41 | ``` 42 | PointDir0 = Point - Vertex0 43 | PointDir1 = Point - Vertex1 44 | PointDir2 = Point - Vertex2 45 | ``` 46 | 47 | Now, finding out if a point lands within the triangle is determined by using three cross-products, and checking if each area is positive. If they are all positive. Then the point is to the "right" of all the edges. If any of them are negative, then it is not within the triangle. 48 | ``` 49 | | EdgeDir0 × PointDir0 | >= 0 && 50 | | EdgeDir1 × PointDir1 | >= 0 && 51 | | EdgeDir2 × PointDir2 | >= 0 52 | ``` 53 | 54 | ![](media/CrossMethod.gif) 55 | 56 | ## Optimizations 57 | 58 | Previously it was determined that all cross products against the X-Y plane will take the form `[ 0, 0, (some value)]`. With this the arithmetic behind the cross-product operation can be much more simplified. Since the cross product can be [calculated using partial determinants of a 3x3 matrix](https://en.wikipedia.org/wiki/Rule_of_Sarrus) then attention only has to be given to the calculations that determine the Z-component alone which is but a 2x2 determinant of the two input vectors. 59 | 60 | This means that if I had two vectors `A` and `B` on the X-Y plane. The cross-product's magnitude is simply: 61 | ``` 62 | A.x * B.y - A.y * B.x; 63 | ``` 64 | which reduces the previous arithmetic to: 65 | ``` 66 | EdgeDir0.x * PointDir0.y - EdgeDir0.y * PointDir0.x >= 0 && 67 | EdgeDir1.x * PointDir1.y - EdgeDir1.y * PointDir1.x >= 0 && 68 | EdgeDir2.x * PointDir2.y - EdgeDir2.y * PointDir2.x >= 0 69 | ``` 70 | 71 | The full pseudo-code: 72 | ```cpp 73 | // Point - Position that is being tested 74 | // Vertex0,1,2 - Vertices of the triangle in **clockwise order** 75 | 76 | // Directional vertices along the edges of the triangle in clockwise order 77 | EdgeDir0 = Vertex1 - Vertex0 78 | EdgeDir1 = Vertex2 - Vertex1 79 | EdgeDir2 = Vertex0 - Vertex2 80 | 81 | // Directional vertices pointing from the triangle vertices to the point 82 | PointDir0 = Point - Vertex0 83 | PointDir1 = Point - Vertex1 84 | PointDir2 = Point - Vertex2 85 | 86 | // Test if each cross-product results in a positive area 87 | if( 88 | EdgeDir0.x * PointDir0.y - EdgeDir0.y * PointDir0.x >= 0 && 89 | EdgeDir1.x * PointDir1.y - EdgeDir1.y * PointDir1.x >= 0 && 90 | EdgeDir2.x * PointDir2.y - EdgeDir2.y * PointDir2.x >= 0 91 | ) 92 | { 93 | // CurPoint is in triangle! 94 | } 95 | ``` 96 | 97 | ## Scaling 98 | 99 | If I was to throw thousands of points at a triangle in a for-loop using this algorithm then not all variables have to be re-calculated for each point. 100 | 101 | The vectors `EdgeDir0`, `EdgeDir1`, `EdgeDir2` only have to be calculated once. For each point the vectors `PointDir0`, `PointDir1`, `PointDir2` have to be recreated. 102 | 103 | ``` 104 | EdgeDir0 = Vertex1 - Vertex0 105 | EdgeDir1 = Vertex2 - Vertex1 106 | EdgeDir2 = Vertex0 - Vertex2 107 | foreach(CurPoint in LotsOfPoints) 108 | { 109 | PointDir0 = Point - Vertex0 110 | PointDir1 = Point - Vertex1 111 | PointDir2 = Point - Vertex2 112 | if( 113 | EdgeDir0.x * PointDir0.y - EdgeDir0.y * PointDir0.x >= 0 && 114 | EdgeDir1.x * PointDir1.y - EdgeDir1.y * PointDir1.x >= 0 && 115 | EdgeDir2.x * PointDir2.y - EdgeDir2.y * PointDir2.x >= 0 116 | ) 117 | { 118 | // CurPoint is in triangle! 119 | } 120 | } 121 | ``` 122 | Which results in the total overhead for each point being 123 | 124 | Subtractions|Multiplications|Comparisons 125 | :-:|:-:|:-: 126 | 6|6|3 127 | 128 | # Barycentric Coordinate Method 129 | 130 | With some barycentric coordinate trickery one can derive a coordiante system that allows a triangle to be described as a linear "mixture" of its three vertices, with some constraints on how they mix. 131 | 132 | The Barycentric coordinates of a triangle involves it's three position-vectors `p1`, `p2`, `p3`. 133 | Using these three points, any new point `p'` _within_ this triangle can be generated by *mixing* the three vertex positions according to three scalar weights `w1`, `w2`, `w3` such that: 134 | `p' = w1 * p1 + w2 * p2 + w3 * p3`. 135 | This is just a linear combination of three points, but `p'` isnt meant to just be any wild combination of three points. What you actually want is for the combination of three points to always land on the triangular surface that they all contain. This is called a [convex combination](https://en.wikipedia.org/wiki/Convex_combination) where if you want to only reach every point within this triangle then you must bind these three weight weight to the conditions of being non-negative and also summing to the exact value `1.0`. Making this the full barycentric equation for a triangle defined by three points: 136 | 137 | 138 | 139 | In this case though. **You already have `p'` and want to determine if it is within this triangle!** Time to cleverly work backwards! 140 | 141 | --- 142 | 143 | So given a `p1`, `p2`, `p3` and `p'`, you have to figure out the three positive unknowns `w1`, `w2`, `w3` that balance these conditions. Some linear alg! 144 | 145 | So starting with this: 146 | 147 | 148 | 149 | How about expanding those vectors into their individual(2D) dimensions. 150 | 151 | 152 | 153 | Now that looks like a matrix! Take out all those weights and put it into a vector! 154 | 155 | 156 | 157 | So the solution is to invert that 3x3 matrix there and multiplying it by the point we are testing it against to get the resulting three weights. And once you get the three weights. All you have to do is test if they are positive(**Note**: If this smells somewhat like a close derivation of the cross-product method you're right!) 158 | 159 | 160 | 161 | Matrix inverse... This is where it gets a little hairy 162 | 163 | 164 | 165 | 166 | And with this, `w1`, `w2`, and `w3` all reduce to relatively "simple" linear equations and all that would have to be checked is if they are greater than `0`! 167 | 168 | Here is the equivalent code in GLSL. Keep in mind that GLSL defines matrices and vectors as column-major. So going `vec3(1)` means a 1x3(_width_ x _height_) arrangement of elements and `mat3(vec(1),vec(2),vec(3))` means a 3x3 matrix with the columns(the vertical ones) being defined by `{1,1,1}`,`{2,2,2}`,`{3,3,3}`. 169 | ```c 170 | bool PointInTriangleBarycentric( 171 | in vec2 Triangle[3], 172 | in vec2 Point 173 | ) 174 | { 175 | mat3 Barycentric = inverse( 176 | mat3( 177 | Triangle[0], 1.0f, 178 | Triangle[1], 1.0f, 179 | Triangle[2], 1.0f 180 | ) 181 | ); 182 | 183 | vec3 Weights = Barycentric * vec3( Point, 1.0 ); 184 | 185 | if( 186 | // Weights.x >= 0.0f && 187 | // Weights.y >= 0.0f && 188 | // Weights.z >= 0.0f 189 | all( greaterThanEqual( Weights, vec3(0.0f) ) ) 190 | ) 191 | { 192 | return true; 193 | } 194 | 195 | return false; 196 | } 197 | ``` 198 | 199 | Though, this is a pretty naive approach. It still has its uses. 200 | If you wanted to test a million points against a single triangle. You'd have to do a single matrix inverse(pretty expensive!) and then the overhead for testing each individual point you want to test would be: a matrix-vector multiplication(a 3x3 matrix times an ℝ³ vector, which is pretty much just three dot-products) and three comparisons: 201 | 202 | Additions|Multiplications|Comparisons 203 | :-:|:-:|:-: 204 | 6|9|3 205 | 206 | Though, a matrix inverse is pretty expensive operation to do. Especially in a context of having possibly thousands upon millions of triangles and a matrix inverse each involving almost dozens of multiplications, additions, and a division on top of it all. Some more clever observations can allow for some slim optimizations. Especially since all that matters is if the weights are positive or not. 207 | 208 | ## Optimizations 209 | 210 | 211 | 212 | --- 213 | 214 | The two directional vectors are derived from three points that describe a plane while the two scalars are typically denoted as *U* and *V* and determine how much these two directional vectors should mix together to create another point on this plane. 215 | Since a triangle has three points, these two vectors can be derived by picking any one point of the triangle and obtaining two directional vectors from this point to the two other points. 216 | 217 | The *U* and *V* scalar values are typically normalized within the [0.0,1.0] range to easily translate these values into *percentages* that determine how much much the two vectors should contribute to the resulting vector. 218 | Ex, if I had the two directional vectors `( 8, 2 )` and `( 7, 1 )` and the `U` `V` values `0.5` `1.0` respectively. The resulting point is ` ( 8, 2 ) * 0.5 + ( 7, 1 ) * 1.0 = ( 11, 3 )` which in english would be something like `I want 50% of the ( 8, 2 ) direction and 140% of ( 7, 1 )`. 219 | 220 | The actual *triangle shape* is made by constraining the `U` and `V` values so that rather than describing a plane very generally it instead will stay within the bounds of a triangle. 221 | 222 | 223 | The first two constraints are `U >= 0` and `V >= 0` which guarentee that the U,V coordinates are always on the *positive* side of the two vectors and do not go backwards, off the triangle. The third constraint is `U + V <= 1` is [just the line](http://www.wolframalpha.com/input/?i=1+-+x+-+y++%3D+0) `y = 1 - x` [turned into an inequality](http://www.wolframalpha.com/input/?i=1+-+x+-+y++%3D%3E+0) such that all solutions to the inequality equate to a point within a triangle. The actual derivation of this involves some barycentric coordinate limbo. 224 | 225 | 226 | 227 | Given a triangle, the two directional vectors are easy to calculate. Pick any of the three points of a triangle, get the vector direction from this point, to the two other points ( which is just a vector subtraction). After the two directional vectors are obtained, now all that has to be done to see if a point is within a triangle is [*projecting*](https://en.wikipedia.org/wiki/Vector_projection) this point against the two directional vectors to get the *U* and *V* values to test against the three conditions. 228 | 229 | Projecting a point against these two vectors to get the *U* and *V* values is but trivial dot-product arithmetic. First, another directional vector has to be created as a positional-vector and a directional-vector wouldn't make sense in this instance. The very same point that was selected to in step 1 to generate the first two directional vectors must be used once more to generate a third directional vector between this triangle vertex and the point being sampled against (another vector subtraction). This new vector will then be dot-product-ed against the two directional edges ( vertial vector multiplication and horizontal addition ) to finally determine the `U` and `V` values to test against `U >= 0`, `V >= 0`, and `U + V <= 1`. 230 | 231 | ![](media/BarycentricMethod.gif) 232 | -------------------------------------------------------------------------------- /test/stb_image_write.h: -------------------------------------------------------------------------------- 1 | /* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h 2 | writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 3 | no warranty implied; use at your own risk 4 | 5 | Before #including, 6 | 7 | #define STB_IMAGE_WRITE_IMPLEMENTATION 8 | 9 | in the file that you want to have the implementation. 10 | 11 | Will probably not work correctly with strict-aliasing optimizations. 12 | 13 | If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause 14 | compilation warnings or even errors. To avoid this, also before #including, 15 | 16 | #define STBI_MSC_SECURE_CRT 17 | 18 | ABOUT: 19 | 20 | This header file is a library for writing images to C stdio. It could be 21 | adapted to write to memory or a general streaming interface; let me know. 22 | 23 | The PNG output is not optimal; it is 20-50% larger than the file 24 | written by a decent optimizing implementation; though providing a custom 25 | zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. 26 | This library is designed for source code compactness and simplicity, 27 | not optimal image file size or run-time performance. 28 | 29 | BUILDING: 30 | 31 | You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. 32 | You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace 33 | malloc,realloc,free. 34 | You can #define STBIW_MEMMOVE() to replace memmove() 35 | You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function 36 | for PNG compression (instead of the builtin one), it must have the following signature: 37 | unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); 38 | The returned data will be freed with STBIW_FREE() (free() by default), 39 | so it must be heap allocated with STBIW_MALLOC() (malloc() by default), 40 | 41 | USAGE: 42 | 43 | There are five functions, one for each image file format: 44 | 45 | int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 46 | int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 47 | int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 48 | int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); 49 | int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 50 | 51 | void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically 52 | 53 | There are also five equivalent functions that use an arbitrary write function. You are 54 | expected to open/close your file-equivalent before and after calling these: 55 | 56 | int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 57 | int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 58 | int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 59 | int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 60 | int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 61 | 62 | where the callback is: 63 | void stbi_write_func(void *context, void *data, int size); 64 | 65 | You can configure it with these global variables: 66 | int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE 67 | int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression 68 | int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode 69 | 70 | 71 | You can define STBI_WRITE_NO_STDIO to disable the file variant of these 72 | functions, so the library will not use stdio.h at all. However, this will 73 | also disable HDR writing, because it requires stdio for formatted output. 74 | 75 | Each function returns 0 on failure and non-0 on success. 76 | 77 | The functions create an image file defined by the parameters. The image 78 | is a rectangle of pixels stored from left-to-right, top-to-bottom. 79 | Each pixel contains 'comp' channels of data stored interleaved with 8-bits 80 | per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is 81 | monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. 82 | The *data pointer points to the first byte of the top-left-most pixel. 83 | For PNG, "stride_in_bytes" is the distance in bytes from the first byte of 84 | a row of pixels to the first byte of the next row of pixels. 85 | 86 | PNG creates output files with the same number of components as the input. 87 | The BMP format expands Y to RGB in the file format and does not 88 | output alpha. 89 | 90 | PNG supports writing rectangles of data even when the bytes storing rows of 91 | data are not consecutive in memory (e.g. sub-rectangles of a larger image), 92 | by supplying the stride between the beginning of adjacent rows. The other 93 | formats do not. (Thus you cannot write a native-format BMP through the BMP 94 | writer, both because it is in BGR order and because it may have padding 95 | at the end of the line.) 96 | 97 | PNG allows you to set the deflate compression level by setting the global 98 | variable 'stbi_write_png_compression_level' (it defaults to 8). 99 | 100 | HDR expects linear float data. Since the format is always 32-bit rgb(e) 101 | data, alpha (if provided) is discarded, and for monochrome data it is 102 | replicated across all three channels. 103 | 104 | TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed 105 | data, set the global variable 'stbi_write_tga_with_rle' to 0. 106 | 107 | JPEG does ignore alpha channels in input data; quality is between 1 and 100. 108 | Higher quality looks better but results in a bigger image. 109 | JPEG baseline (no JPEG progressive). 110 | 111 | CREDITS: 112 | 113 | 114 | Sean Barrett - PNG/BMP/TGA 115 | Baldur Karlsson - HDR 116 | Jean-Sebastien Guay - TGA monochrome 117 | Tim Kelsey - misc enhancements 118 | Alan Hickman - TGA RLE 119 | Emmanuel Julien - initial file IO callback implementation 120 | Jon Olick - original jo_jpeg.cpp code 121 | Daniel Gibson - integrate JPEG, allow external zlib 122 | Aarni Koskela - allow choosing PNG filter 123 | 124 | bugfixes: 125 | github:Chribba 126 | Guillaume Chereau 127 | github:jry2 128 | github:romigrou 129 | Sergio Gonzalez 130 | Jonas Karlsson 131 | Filip Wasil 132 | Thatcher Ulrich 133 | github:poppolopoppo 134 | Patrick Boettcher 135 | github:xeekworx 136 | Cap Petschulat 137 | Simon Rodriguez 138 | Ivan Tikhonov 139 | github:ignotion 140 | Adam Schackart 141 | 142 | LICENSE 143 | 144 | See end of file for license information. 145 | 146 | */ 147 | 148 | #ifndef INCLUDE_STB_IMAGE_WRITE_H 149 | #define INCLUDE_STB_IMAGE_WRITE_H 150 | 151 | // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' 152 | #ifndef STBIWDEF 153 | #ifdef STB_IMAGE_WRITE_STATIC 154 | #define STBIWDEF static 155 | #else 156 | #ifdef __cplusplus 157 | #define STBIWDEF extern "C" 158 | #else 159 | #define STBIWDEF extern 160 | #endif 161 | #endif 162 | #endif 163 | 164 | #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations 165 | extern int stbi_write_tga_with_rle; 166 | extern int stbi_write_png_compression_level; 167 | extern int stbi_write_force_png_filter; 168 | #endif 169 | 170 | #ifndef STBI_WRITE_NO_STDIO 171 | STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 172 | STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 173 | STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 174 | STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 175 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); 176 | #endif 177 | 178 | typedef void stbi_write_func(void *context, void *data, int size); 179 | 180 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 181 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 182 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 183 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 184 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 185 | 186 | STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); 187 | 188 | #endif//INCLUDE_STB_IMAGE_WRITE_H 189 | 190 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION 191 | 192 | #ifdef _WIN32 193 | #ifndef _CRT_SECURE_NO_WARNINGS 194 | #define _CRT_SECURE_NO_WARNINGS 195 | #endif 196 | #ifndef _CRT_NONSTDC_NO_DEPRECATE 197 | #define _CRT_NONSTDC_NO_DEPRECATE 198 | #endif 199 | #endif 200 | 201 | #ifndef STBI_WRITE_NO_STDIO 202 | #include 203 | #endif // STBI_WRITE_NO_STDIO 204 | 205 | #include 206 | #include 207 | #include 208 | #include 209 | 210 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) 211 | // ok 212 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) 213 | // ok 214 | #else 215 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." 216 | #endif 217 | 218 | #ifndef STBIW_MALLOC 219 | #define STBIW_MALLOC(sz) malloc(sz) 220 | #define STBIW_REALLOC(p,newsz) realloc(p,newsz) 221 | #define STBIW_FREE(p) free(p) 222 | #endif 223 | 224 | #ifndef STBIW_REALLOC_SIZED 225 | #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) 226 | #endif 227 | 228 | 229 | #ifndef STBIW_MEMMOVE 230 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) 231 | #endif 232 | 233 | 234 | #ifndef STBIW_ASSERT 235 | #include 236 | #define STBIW_ASSERT(x) assert(x) 237 | #endif 238 | 239 | #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) 240 | 241 | #ifdef STB_IMAGE_WRITE_STATIC 242 | static int stbi__flip_vertically_on_write = 0; 243 | static int stbi_write_png_compression_level = 8; 244 | static int stbi_write_tga_with_rle = 1; 245 | static int stbi_write_force_png_filter = -1; 246 | #else 247 | int stbi_write_png_compression_level = 8; 248 | int stbi__flip_vertically_on_write = 0; 249 | int stbi_write_tga_with_rle = 1; 250 | int stbi_write_force_png_filter = -1; 251 | #endif 252 | 253 | STBIWDEF void stbi_flip_vertically_on_write(int flag) 254 | { 255 | stbi__flip_vertically_on_write = flag; 256 | } 257 | 258 | typedef struct 259 | { 260 | stbi_write_func *func; 261 | void *context; 262 | } stbi__write_context; 263 | 264 | // initialize a callback-based context 265 | static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) 266 | { 267 | s->func = c; 268 | s->context = context; 269 | } 270 | 271 | #ifndef STBI_WRITE_NO_STDIO 272 | 273 | static void stbi__stdio_write(void *context, void *data, int size) 274 | { 275 | fwrite(data, 1, size, (FILE*)context); 276 | } 277 | 278 | static int stbi__start_write_file(stbi__write_context *s, const char *filename) 279 | { 280 | FILE *f; 281 | #ifdef STBI_MSC_SECURE_CRT 282 | if( fopen_s(&f, filename, "wb") ) 283 | f = NULL; 284 | #else 285 | f = fopen(filename, "wb"); 286 | #endif 287 | stbi__start_write_callbacks(s, stbi__stdio_write, (void *)f); 288 | return f != NULL; 289 | } 290 | 291 | static void stbi__end_write_file(stbi__write_context *s) 292 | { 293 | fclose((FILE *)s->context); 294 | } 295 | 296 | #endif // !STBI_WRITE_NO_STDIO 297 | 298 | typedef unsigned int stbiw_uint32; 299 | typedef int stb_image_write_test[sizeof(stbiw_uint32) == 4 ? 1 : -1]; 300 | 301 | static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) 302 | { 303 | while( *fmt ) 304 | { 305 | switch( *fmt++ ) 306 | { 307 | case ' ': break; 308 | case '1': 309 | { 310 | unsigned char x = STBIW_UCHAR(va_arg(v, int)); 311 | s->func(s->context, &x, 1); 312 | break; 313 | } 314 | case '2': 315 | { 316 | int x = va_arg(v, int); 317 | unsigned char b[2]; 318 | b[0] = STBIW_UCHAR(x); 319 | b[1] = STBIW_UCHAR(x >> 8); 320 | s->func(s->context, b, 2); 321 | break; 322 | } 323 | case '4': 324 | { 325 | stbiw_uint32 x = va_arg(v, int); 326 | unsigned char b[4]; 327 | b[0] = STBIW_UCHAR(x); 328 | b[1] = STBIW_UCHAR(x >> 8); 329 | b[2] = STBIW_UCHAR(x >> 16); 330 | b[3] = STBIW_UCHAR(x >> 24); 331 | s->func(s->context, b, 4); 332 | break; 333 | } 334 | default: 335 | STBIW_ASSERT(0); 336 | return; 337 | } 338 | } 339 | } 340 | 341 | static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) 342 | { 343 | va_list v; 344 | va_start(v, fmt); 345 | stbiw__writefv(s, fmt, v); 346 | va_end(v); 347 | } 348 | 349 | static void stbiw__putc(stbi__write_context *s, unsigned char c) 350 | { 351 | s->func(s->context, &c, 1); 352 | } 353 | 354 | static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) 355 | { 356 | unsigned char arr[3]; 357 | arr[0] = a, arr[1] = b, arr[2] = c; 358 | s->func(s->context, arr, 3); 359 | } 360 | 361 | static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) 362 | { 363 | unsigned char bg[3] = { 255, 0, 255 }, px[3]; 364 | int k; 365 | 366 | if( write_alpha < 0 ) 367 | s->func(s->context, &d[comp - 1], 1); 368 | 369 | switch( comp ) 370 | { 371 | case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case 372 | case 1: 373 | if( expand_mono ) 374 | stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp 375 | else 376 | s->func(s->context, d, 1); // monochrome TGA 377 | break; 378 | case 4: 379 | if( !write_alpha ) 380 | { 381 | // composite against pink background 382 | for( k = 0; k < 3; ++k ) 383 | px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; 384 | stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); 385 | break; 386 | } 387 | /* FALLTHROUGH */ 388 | case 3: 389 | stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); 390 | break; 391 | } 392 | if( write_alpha > 0 ) 393 | s->func(s->context, &d[comp - 1], 1); 394 | } 395 | 396 | static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) 397 | { 398 | stbiw_uint32 zero = 0; 399 | int i, j, j_end; 400 | 401 | if( y <= 0 ) 402 | return; 403 | 404 | if( stbi__flip_vertically_on_write ) 405 | vdir *= -1; 406 | 407 | if( vdir < 0 ) 408 | j_end = -1, j = y - 1; 409 | else 410 | j_end = y, j = 0; 411 | 412 | for( ; j != j_end; j += vdir ) 413 | { 414 | for( i = 0; i < x; ++i ) 415 | { 416 | unsigned char *d = (unsigned char *)data + (j*x + i)*comp; 417 | stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); 418 | } 419 | s->func(s->context, &zero, scanline_pad); 420 | } 421 | } 422 | 423 | static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) 424 | { 425 | if( y < 0 || x < 0 ) 426 | { 427 | return 0; 428 | } 429 | else 430 | { 431 | va_list v; 432 | va_start(v, fmt); 433 | stbiw__writefv(s, fmt, v); 434 | va_end(v); 435 | stbiw__write_pixels(s, rgb_dir, vdir, x, y, comp, data, alpha, pad, expand_mono); 436 | return 1; 437 | } 438 | } 439 | 440 | static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) 441 | { 442 | int pad = (-x * 3) & 3; 443 | return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void *)data, 0, pad, 444 | "11 4 22 4" "4 44 22 444444", 445 | 'B', 'M', 14 + 40 + (x * 3 + pad)*y, 0, 0, 14 + 40, // file header 446 | 40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header 447 | } 448 | 449 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 450 | { 451 | stbi__write_context s; 452 | stbi__start_write_callbacks(&s, func, context); 453 | return stbi_write_bmp_core(&s, x, y, comp, data); 454 | } 455 | 456 | #ifndef STBI_WRITE_NO_STDIO 457 | STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) 458 | { 459 | stbi__write_context s; 460 | if( stbi__start_write_file(&s, filename) ) 461 | { 462 | int r = stbi_write_bmp_core(&s, x, y, comp, data); 463 | stbi__end_write_file(&s); 464 | return r; 465 | } 466 | else 467 | return 0; 468 | } 469 | #endif //!STBI_WRITE_NO_STDIO 470 | 471 | static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) 472 | { 473 | int has_alpha = (comp == 2 || comp == 4); 474 | int colorbytes = has_alpha ? comp - 1 : comp; 475 | int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 476 | 477 | if( y < 0 || x < 0 ) 478 | return 0; 479 | 480 | if( !stbi_write_tga_with_rle ) 481 | { 482 | return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *)data, has_alpha, 0, 483 | "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); 484 | } 485 | else 486 | { 487 | int i, j, k; 488 | int jend, jdir; 489 | 490 | stbiw__writef(s, "111 221 2222 11", 0, 0, format + 8, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); 491 | 492 | if( stbi__flip_vertically_on_write ) 493 | { 494 | j = 0; 495 | jend = y; 496 | jdir = 1; 497 | } 498 | else 499 | { 500 | j = y - 1; 501 | jend = -1; 502 | jdir = -1; 503 | } 504 | for( ; j != jend; j += jdir ) 505 | { 506 | unsigned char *row = (unsigned char *)data + j * x * comp; 507 | int len; 508 | 509 | for( i = 0; i < x; i += len ) 510 | { 511 | unsigned char *begin = row + i * comp; 512 | int diff = 1; 513 | len = 1; 514 | 515 | if( i < x - 1 ) 516 | { 517 | ++len; 518 | diff = memcmp(begin, row + (i + 1) * comp, comp); 519 | if( diff ) 520 | { 521 | const unsigned char *prev = begin; 522 | for( k = i + 2; k < x && len < 128; ++k ) 523 | { 524 | if( memcmp(prev, row + k * comp, comp) ) 525 | { 526 | prev += comp; 527 | ++len; 528 | } 529 | else 530 | { 531 | --len; 532 | break; 533 | } 534 | } 535 | } 536 | else 537 | { 538 | for( k = i + 2; k < x && len < 128; ++k ) 539 | { 540 | if( !memcmp(begin, row + k * comp, comp) ) 541 | { 542 | ++len; 543 | } 544 | else 545 | { 546 | break; 547 | } 548 | } 549 | } 550 | } 551 | 552 | if( diff ) 553 | { 554 | unsigned char header = STBIW_UCHAR(len - 1); 555 | s->func(s->context, &header, 1); 556 | for( k = 0; k < len; ++k ) 557 | { 558 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); 559 | } 560 | } 561 | else 562 | { 563 | unsigned char header = STBIW_UCHAR(len - 129); 564 | s->func(s->context, &header, 1); 565 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); 566 | } 567 | } 568 | } 569 | } 570 | return 1; 571 | } 572 | 573 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 574 | { 575 | stbi__write_context s; 576 | stbi__start_write_callbacks(&s, func, context); 577 | return stbi_write_tga_core(&s, x, y, comp, (void *)data); 578 | } 579 | 580 | #ifndef STBI_WRITE_NO_STDIO 581 | STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) 582 | { 583 | stbi__write_context s; 584 | if( stbi__start_write_file(&s, filename) ) 585 | { 586 | int r = stbi_write_tga_core(&s, x, y, comp, (void *)data); 587 | stbi__end_write_file(&s); 588 | return r; 589 | } 590 | else 591 | return 0; 592 | } 593 | #endif 594 | 595 | // ************************************************************************************************* 596 | // Radiance RGBE HDR writer 597 | // by Baldur Karlsson 598 | 599 | #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) 600 | 601 | void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) 602 | { 603 | int exponent; 604 | float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); 605 | 606 | if( maxcomp < 1e-32f ) 607 | { 608 | rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; 609 | } 610 | else 611 | { 612 | float normalize = (float)frexp(maxcomp, &exponent) * 256.0f / maxcomp; 613 | 614 | rgbe[0] = (unsigned char)(linear[0] * normalize); 615 | rgbe[1] = (unsigned char)(linear[1] * normalize); 616 | rgbe[2] = (unsigned char)(linear[2] * normalize); 617 | rgbe[3] = (unsigned char)(exponent + 128); 618 | } 619 | } 620 | 621 | void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) 622 | { 623 | unsigned char lengthbyte = STBIW_UCHAR(length + 128); 624 | STBIW_ASSERT(length + 128 <= 255); 625 | s->func(s->context, &lengthbyte, 1); 626 | s->func(s->context, &databyte, 1); 627 | } 628 | 629 | void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) 630 | { 631 | unsigned char lengthbyte = STBIW_UCHAR(length); 632 | STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code 633 | s->func(s->context, &lengthbyte, 1); 634 | s->func(s->context, data, length); 635 | } 636 | 637 | void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) 638 | { 639 | unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; 640 | unsigned char rgbe[4]; 641 | float linear[3]; 642 | int x; 643 | 644 | scanlineheader[2] = (width & 0xff00) >> 8; 645 | scanlineheader[3] = (width & 0x00ff); 646 | 647 | /* skip RLE for images too small or large */ 648 | if( width < 8 || width >= 32768 ) 649 | { 650 | for( x = 0; x < width; x++ ) 651 | { 652 | switch( ncomp ) 653 | { 654 | case 4: /* fallthrough */ 655 | case 3: linear[2] = scanline[x*ncomp + 2]; 656 | linear[1] = scanline[x*ncomp + 1]; 657 | linear[0] = scanline[x*ncomp + 0]; 658 | break; 659 | default: 660 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 661 | break; 662 | } 663 | stbiw__linear_to_rgbe(rgbe, linear); 664 | s->func(s->context, rgbe, 4); 665 | } 666 | } 667 | else 668 | { 669 | int c, r; 670 | /* encode into scratch buffer */ 671 | for( x = 0; x < width; x++ ) 672 | { 673 | switch( ncomp ) 674 | { 675 | case 4: /* fallthrough */ 676 | case 3: linear[2] = scanline[x*ncomp + 2]; 677 | linear[1] = scanline[x*ncomp + 1]; 678 | linear[0] = scanline[x*ncomp + 0]; 679 | break; 680 | default: 681 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 682 | break; 683 | } 684 | stbiw__linear_to_rgbe(rgbe, linear); 685 | scratch[x + width * 0] = rgbe[0]; 686 | scratch[x + width * 1] = rgbe[1]; 687 | scratch[x + width * 2] = rgbe[2]; 688 | scratch[x + width * 3] = rgbe[3]; 689 | } 690 | 691 | s->func(s->context, scanlineheader, 4); 692 | 693 | /* RLE each component separately */ 694 | for( c = 0; c < 4; c++ ) 695 | { 696 | unsigned char *comp = &scratch[width*c]; 697 | 698 | x = 0; 699 | while( x < width ) 700 | { 701 | // find first run 702 | r = x; 703 | while( r + 2 < width ) 704 | { 705 | if( comp[r] == comp[r + 1] && comp[r] == comp[r + 2] ) 706 | break; 707 | ++r; 708 | } 709 | if( r + 2 >= width ) 710 | r = width; 711 | // dump up to first run 712 | while( x < r ) 713 | { 714 | int len = r - x; 715 | if( len > 128 ) len = 128; 716 | stbiw__write_dump_data(s, len, &comp[x]); 717 | x += len; 718 | } 719 | // if there's a run, output it 720 | if( r + 2 < width ) 721 | { // same test as what we break out of in search loop, so only true if we break'd 722 | // find next byte after run 723 | while( r < width && comp[r] == comp[x] ) 724 | ++r; 725 | // output run up to r 726 | while( x < r ) 727 | { 728 | int len = r - x; 729 | if( len > 127 ) len = 127; 730 | stbiw__write_run_data(s, len, comp[x]); 731 | x += len; 732 | } 733 | } 734 | } 735 | } 736 | } 737 | } 738 | 739 | static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) 740 | { 741 | if( y <= 0 || x <= 0 || data == NULL ) 742 | return 0; 743 | else 744 | { 745 | // Each component is stored separately. Allocate scratch space for full output scanline. 746 | unsigned char *scratch = (unsigned char *)STBIW_MALLOC(x * 4); 747 | int i, len; 748 | char buffer[128]; 749 | char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; 750 | s->func(s->context, header, sizeof(header) - 1); 751 | 752 | #ifdef STBI_MSC_SECURE_CRT 753 | len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 754 | #else 755 | len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 756 | #endif 757 | s->func(s->context, buffer, len); 758 | 759 | for( i = 0; i < y; i++ ) 760 | stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp * x*(stbi__flip_vertically_on_write ? y - 1 - i : i)*x); 761 | STBIW_FREE(scratch); 762 | return 1; 763 | } 764 | } 765 | 766 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) 767 | { 768 | stbi__write_context s; 769 | stbi__start_write_callbacks(&s, func, context); 770 | return stbi_write_hdr_core(&s, x, y, comp, (float *)data); 771 | } 772 | 773 | #ifndef STBI_WRITE_NO_STDIO 774 | STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) 775 | { 776 | stbi__write_context s; 777 | if( stbi__start_write_file(&s, filename) ) 778 | { 779 | int r = stbi_write_hdr_core(&s, x, y, comp, (float *)data); 780 | stbi__end_write_file(&s); 781 | return r; 782 | } 783 | else 784 | return 0; 785 | } 786 | #endif // STBI_WRITE_NO_STDIO 787 | 788 | 789 | ////////////////////////////////////////////////////////////////////////////// 790 | // 791 | // PNG writer 792 | // 793 | 794 | #ifndef STBIW_ZLIB_COMPRESS 795 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() 796 | #define stbiw__sbraw(a) ((int *) (a) - 2) 797 | #define stbiw__sbm(a) stbiw__sbraw(a)[0] 798 | #define stbiw__sbn(a) stbiw__sbraw(a)[1] 799 | 800 | #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) 801 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) 802 | #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) 803 | 804 | #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) 805 | #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) 806 | #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) 807 | 808 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) 809 | { 810 | int m = *arr ? 2 * stbiw__sbm(*arr) + increment : increment + 1; 811 | void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int) * 2) : 0, itemsize * m + sizeof(int) * 2); 812 | STBIW_ASSERT(p); 813 | if( p ) 814 | { 815 | if( !*arr ) ((int *)p)[1] = 0; 816 | *arr = (void *)((int *)p + 2); 817 | stbiw__sbm(*arr) = m; 818 | } 819 | return *arr; 820 | } 821 | 822 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) 823 | { 824 | while( *bitcount >= 8 ) 825 | { 826 | stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); 827 | *bitbuffer >>= 8; 828 | *bitcount -= 8; 829 | } 830 | return data; 831 | } 832 | 833 | static int stbiw__zlib_bitrev(int code, int codebits) 834 | { 835 | int res = 0; 836 | while( codebits-- ) 837 | { 838 | res = (res << 1) | (code & 1); 839 | code >>= 1; 840 | } 841 | return res; 842 | } 843 | 844 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) 845 | { 846 | int i; 847 | for( i = 0; i < limit && i < 258; ++i ) 848 | if( a[i] != b[i] ) break; 849 | return i; 850 | } 851 | 852 | static unsigned int stbiw__zhash(unsigned char *data) 853 | { 854 | stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); 855 | hash ^= hash << 3; 856 | hash += hash >> 5; 857 | hash ^= hash << 4; 858 | hash += hash >> 17; 859 | hash ^= hash << 25; 860 | hash += hash >> 6; 861 | return hash; 862 | } 863 | 864 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) 865 | #define stbiw__zlib_add(code,codebits) \ 866 | (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) 867 | #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) 868 | // default huffman tables 869 | #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) 870 | #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) 871 | #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) 872 | #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) 873 | #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) 874 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) 875 | 876 | #define stbiw__ZHASH 16384 877 | 878 | #endif // STBIW_ZLIB_COMPRESS 879 | 880 | unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) 881 | { 882 | #ifdef STBIW_ZLIB_COMPRESS 883 | // user provided a zlib compress implementation, use that 884 | return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); 885 | #else // use builtin 886 | static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; 887 | static unsigned char lengtheb[] = { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; 888 | static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; 889 | static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; 890 | unsigned int bitbuf = 0; 891 | int i, j, bitcount = 0; 892 | unsigned char *out = NULL; 893 | unsigned char ***hash_table = (unsigned char***)STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); 894 | if( hash_table == NULL ) 895 | return NULL; 896 | if( quality < 5 ) quality = 5; 897 | 898 | stbiw__sbpush(out, 0x78); // DEFLATE 32K window 899 | stbiw__sbpush(out, 0x5e); // FLEVEL = 1 900 | stbiw__zlib_add(1, 1); // BFINAL = 1 901 | stbiw__zlib_add(1, 2); // BTYPE = 1 -- fixed huffman 902 | 903 | for( i = 0; i < stbiw__ZHASH; ++i ) 904 | hash_table[i] = NULL; 905 | 906 | i = 0; 907 | while( i < data_len - 3 ) 908 | { 909 | // hash next 3 bytes of data to be compressed 910 | int h = stbiw__zhash(data + i)&(stbiw__ZHASH - 1), best = 3; 911 | unsigned char *bestloc = 0; 912 | unsigned char **hlist = hash_table[h]; 913 | int n = stbiw__sbcount(hlist); 914 | for( j = 0; j < n; ++j ) 915 | { 916 | if( hlist[j] - data > i - 32768 ) 917 | { // if entry lies within window 918 | int d = stbiw__zlib_countm(hlist[j], data + i, data_len - i); 919 | if( d >= best ) best = d, bestloc = hlist[j]; 920 | } 921 | } 922 | // when hash table entry is too long, delete half the entries 923 | if( hash_table[h] && stbiw__sbn(hash_table[h]) == 2 * quality ) 924 | { 925 | STBIW_MEMMOVE(hash_table[h], hash_table[h] + quality, sizeof(hash_table[h][0])*quality); 926 | stbiw__sbn(hash_table[h]) = quality; 927 | } 928 | stbiw__sbpush(hash_table[h], data + i); 929 | 930 | if( bestloc ) 931 | { 932 | // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal 933 | h = stbiw__zhash(data + i + 1)&(stbiw__ZHASH - 1); 934 | hlist = hash_table[h]; 935 | n = stbiw__sbcount(hlist); 936 | for( j = 0; j < n; ++j ) 937 | { 938 | if( hlist[j] - data > i - 32767 ) 939 | { 940 | int e = stbiw__zlib_countm(hlist[j], data + i + 1, data_len - i - 1); 941 | if( e > best ) 942 | { // if next match is better, bail on current match 943 | bestloc = NULL; 944 | break; 945 | } 946 | } 947 | } 948 | } 949 | 950 | if( bestloc ) 951 | { 952 | int d = (int)(data + i - bestloc); // distance back 953 | STBIW_ASSERT(d <= 32767 && best <= 258); 954 | for( j = 0; best > lengthc[j + 1] - 1; ++j ); 955 | stbiw__zlib_huff(j + 257); 956 | if( lengtheb[j] ) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); 957 | for( j = 0; d > distc[j + 1] - 1; ++j ); 958 | stbiw__zlib_add(stbiw__zlib_bitrev(j, 5), 5); 959 | if( disteb[j] ) stbiw__zlib_add(d - distc[j], disteb[j]); 960 | i += best; 961 | } 962 | else 963 | { 964 | stbiw__zlib_huffb(data[i]); 965 | ++i; 966 | } 967 | } 968 | // write out final bytes 969 | for( ; i < data_len; ++i ) 970 | stbiw__zlib_huffb(data[i]); 971 | stbiw__zlib_huff(256); // end of block 972 | // pad with 0 bits to byte boundary 973 | while( bitcount ) 974 | stbiw__zlib_add(0, 1); 975 | 976 | for( i = 0; i < stbiw__ZHASH; ++i ) 977 | (void)stbiw__sbfree(hash_table[i]); 978 | STBIW_FREE(hash_table); 979 | 980 | { 981 | // compute adler32 on input 982 | unsigned int s1 = 1, s2 = 0; 983 | int blocklen = (int)(data_len % 5552); 984 | j = 0; 985 | while( j < data_len ) 986 | { 987 | for( i = 0; i < blocklen; ++i ) s1 += data[j + i], s2 += s1; 988 | s1 %= 65521, s2 %= 65521; 989 | j += blocklen; 990 | blocklen = 5552; 991 | } 992 | stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); 993 | stbiw__sbpush(out, STBIW_UCHAR(s2)); 994 | stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); 995 | stbiw__sbpush(out, STBIW_UCHAR(s1)); 996 | } 997 | *out_len = stbiw__sbn(out); 998 | // make returned pointer freeable 999 | STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); 1000 | return (unsigned char *)stbiw__sbraw(out); 1001 | #endif // STBIW_ZLIB_COMPRESS 1002 | } 1003 | 1004 | static unsigned int stbiw__crc32(unsigned char *buffer, int len) 1005 | { 1006 | static unsigned int crc_table[256] = 1007 | { 1008 | 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 1009 | 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 1010 | 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 1011 | 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 1012 | 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 1013 | 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 1014 | 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 1015 | 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 1016 | 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 1017 | 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 1018 | 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 1019 | 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 1020 | 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 1021 | 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 1022 | 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 1023 | 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 1024 | 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 1025 | 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 1026 | 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 1027 | 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 1028 | 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 1029 | 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 1030 | 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 1031 | 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 1032 | 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 1033 | 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 1034 | 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 1035 | 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 1036 | 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 1037 | 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 1038 | 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 1039 | 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D 1040 | }; 1041 | 1042 | unsigned int crc = ~0u; 1043 | int i; 1044 | for( i = 0; i < len; ++i ) 1045 | crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; 1046 | return ~crc; 1047 | } 1048 | 1049 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) 1050 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); 1051 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) 1052 | 1053 | static void stbiw__wpcrc(unsigned char **data, int len) 1054 | { 1055 | unsigned int crc = stbiw__crc32(*data - len - 4, len + 4); 1056 | stbiw__wp32(*data, crc); 1057 | } 1058 | 1059 | static unsigned char stbiw__paeth(int a, int b, int c) 1060 | { 1061 | int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c); 1062 | if( pa <= pb && pa <= pc ) return STBIW_UCHAR(a); 1063 | if( pb <= pc ) return STBIW_UCHAR(b); 1064 | return STBIW_UCHAR(c); 1065 | } 1066 | 1067 | // @OPTIMIZE: provide an option that always forces left-predict or paeth predict 1068 | static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) 1069 | { 1070 | static int mapping[] = { 0,1,2,3,4 }; 1071 | static int firstmap[] = { 0,1,0,5,6 }; 1072 | int *mymap = (y != 0) ? mapping : firstmap; 1073 | int i; 1074 | int type = mymap[filter_type]; 1075 | unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y); 1076 | int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; 1077 | for( i = 0; i < n; ++i ) 1078 | { 1079 | switch( type ) 1080 | { 1081 | case 0: line_buffer[i] = z[i]; break; 1082 | case 1: line_buffer[i] = z[i]; break; 1083 | case 2: line_buffer[i] = z[i] - z[i - signed_stride]; break; 1084 | case 3: line_buffer[i] = z[i] - (z[i - signed_stride] >> 1); break; 1085 | case 4: line_buffer[i] = (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0)); break; 1086 | case 5: line_buffer[i] = z[i]; break; 1087 | case 6: line_buffer[i] = z[i]; break; 1088 | } 1089 | } 1090 | for( i = n; i < width*n; ++i ) 1091 | { 1092 | switch( type ) 1093 | { 1094 | case 0: line_buffer[i] = z[i]; break; 1095 | case 1: line_buffer[i] = z[i] - z[i - n]; break; 1096 | case 2: line_buffer[i] = z[i] - z[i - signed_stride]; break; 1097 | case 3: line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1); break; 1098 | case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], z[i - signed_stride - n]); break; 1099 | case 5: line_buffer[i] = z[i] - (z[i - n] >> 1); break; 1100 | case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0); break; 1101 | } 1102 | } 1103 | } 1104 | 1105 | unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) 1106 | { 1107 | int force_filter = stbi_write_force_png_filter; 1108 | int ctype[5] = { -1, 0, 4, 2, 6 }; 1109 | unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; 1110 | unsigned char *out, *o, *filt, *zlib; 1111 | signed char *line_buffer; 1112 | int j, zlen; 1113 | 1114 | if( stride_bytes == 0 ) 1115 | stride_bytes = x * n; 1116 | 1117 | if( force_filter >= 5 ) 1118 | { 1119 | force_filter = -1; 1120 | } 1121 | 1122 | filt = (unsigned char *)STBIW_MALLOC((x*n + 1) * y); if( !filt ) return 0; 1123 | line_buffer = (signed char *)STBIW_MALLOC(x * n); if( !line_buffer ) { STBIW_FREE(filt); return 0; } 1124 | for( j = 0; j < y; ++j ) 1125 | { 1126 | int filter_type; 1127 | if( force_filter > -1 ) 1128 | { 1129 | filter_type = force_filter; 1130 | stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); 1131 | } 1132 | else 1133 | { // Estimate the best filter by running through all of them: 1134 | int best_filter = 0, best_filter_val = 0x7fffffff, est, i; 1135 | for( filter_type = 0; filter_type < 5; filter_type++ ) 1136 | { 1137 | stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); 1138 | 1139 | // Estimate the entropy of the line using this filter; the less, the better. 1140 | est = 0; 1141 | for( i = 0; i < x*n; ++i ) 1142 | { 1143 | est += abs((signed char)line_buffer[i]); 1144 | } 1145 | if( est < best_filter_val ) 1146 | { 1147 | best_filter_val = est; 1148 | best_filter = filter_type; 1149 | } 1150 | } 1151 | if( filter_type != best_filter ) 1152 | { // If the last iteration already got us the best filter, don't redo it 1153 | stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); 1154 | filter_type = best_filter; 1155 | } 1156 | } 1157 | // when we get here, filter_type contains the filter type, and line_buffer contains the data 1158 | filt[j*(x*n + 1)] = (unsigned char)filter_type; 1159 | STBIW_MEMMOVE(filt + j * (x*n + 1) + 1, line_buffer, x*n); 1160 | } 1161 | STBIW_FREE(line_buffer); 1162 | zlib = stbi_zlib_compress(filt, y*(x*n + 1), &zlen, stbi_write_png_compression_level); 1163 | STBIW_FREE(filt); 1164 | if( !zlib ) return 0; 1165 | 1166 | // each tag requires 12 bytes of overhead 1167 | out = (unsigned char *)STBIW_MALLOC(8 + 12 + 13 + 12 + zlen + 12); 1168 | if( !out ) return 0; 1169 | *out_len = 8 + 12 + 13 + 12 + zlen + 12; 1170 | 1171 | o = out; 1172 | STBIW_MEMMOVE(o, sig, 8); o += 8; 1173 | stbiw__wp32(o, 13); // header length 1174 | stbiw__wptag(o, "IHDR"); 1175 | stbiw__wp32(o, x); 1176 | stbiw__wp32(o, y); 1177 | *o++ = 8; 1178 | *o++ = STBIW_UCHAR(ctype[n]); 1179 | *o++ = 0; 1180 | *o++ = 0; 1181 | *o++ = 0; 1182 | stbiw__wpcrc(&o, 13); 1183 | 1184 | stbiw__wp32(o, zlen); 1185 | stbiw__wptag(o, "IDAT"); 1186 | STBIW_MEMMOVE(o, zlib, zlen); 1187 | o += zlen; 1188 | STBIW_FREE(zlib); 1189 | stbiw__wpcrc(&o, zlen); 1190 | 1191 | stbiw__wp32(o, 0); 1192 | stbiw__wptag(o, "IEND"); 1193 | stbiw__wpcrc(&o, 0); 1194 | 1195 | STBIW_ASSERT(o == out + *out_len); 1196 | 1197 | return out; 1198 | } 1199 | 1200 | #ifndef STBI_WRITE_NO_STDIO 1201 | STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) 1202 | { 1203 | FILE *f; 1204 | int len; 1205 | unsigned char *png = stbi_write_png_to_mem((unsigned char *)data, stride_bytes, x, y, comp, &len); 1206 | if( png == NULL ) return 0; 1207 | #ifdef STBI_MSC_SECURE_CRT 1208 | if( fopen_s(&f, filename, "wb") ) 1209 | f = NULL; 1210 | #else 1211 | f = fopen(filename, "wb"); 1212 | #endif 1213 | if( !f ) { STBIW_FREE(png); return 0; } 1214 | fwrite(png, 1, len, f); 1215 | fclose(f); 1216 | STBIW_FREE(png); 1217 | return 1; 1218 | } 1219 | #endif 1220 | 1221 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) 1222 | { 1223 | int len; 1224 | unsigned char *png = stbi_write_png_to_mem((unsigned char *)data, stride_bytes, x, y, comp, &len); 1225 | if( png == NULL ) return 0; 1226 | func(context, png, len); 1227 | STBIW_FREE(png); 1228 | return 1; 1229 | } 1230 | 1231 | 1232 | /* *************************************************************************** 1233 | * 1234 | * JPEG writer 1235 | * 1236 | * This is based on Jon Olick's jo_jpeg.cpp: 1237 | * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html 1238 | */ 1239 | 1240 | static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, 1241 | 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; 1242 | 1243 | static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) 1244 | { 1245 | int bitBuf = *bitBufP, bitCnt = *bitCntP; 1246 | bitCnt += bs[1]; 1247 | bitBuf |= bs[0] << (24 - bitCnt); 1248 | while( bitCnt >= 8 ) 1249 | { 1250 | unsigned char c = (bitBuf >> 16) & 255; 1251 | stbiw__putc(s, c); 1252 | if( c == 255 ) 1253 | { 1254 | stbiw__putc(s, 0); 1255 | } 1256 | bitBuf <<= 8; 1257 | bitCnt -= 8; 1258 | } 1259 | *bitBufP = bitBuf; 1260 | *bitCntP = bitCnt; 1261 | } 1262 | 1263 | static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) 1264 | { 1265 | float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; 1266 | float z1, z2, z3, z4, z5, z11, z13; 1267 | 1268 | float tmp0 = d0 + d7; 1269 | float tmp7 = d0 - d7; 1270 | float tmp1 = d1 + d6; 1271 | float tmp6 = d1 - d6; 1272 | float tmp2 = d2 + d5; 1273 | float tmp5 = d2 - d5; 1274 | float tmp3 = d3 + d4; 1275 | float tmp4 = d3 - d4; 1276 | 1277 | // Even part 1278 | float tmp10 = tmp0 + tmp3; // phase 2 1279 | float tmp13 = tmp0 - tmp3; 1280 | float tmp11 = tmp1 + tmp2; 1281 | float tmp12 = tmp1 - tmp2; 1282 | 1283 | d0 = tmp10 + tmp11; // phase 3 1284 | d4 = tmp10 - tmp11; 1285 | 1286 | z1 = (tmp12 + tmp13) * 0.707106781f; // c4 1287 | d2 = tmp13 + z1; // phase 5 1288 | d6 = tmp13 - z1; 1289 | 1290 | // Odd part 1291 | tmp10 = tmp4 + tmp5; // phase 2 1292 | tmp11 = tmp5 + tmp6; 1293 | tmp12 = tmp6 + tmp7; 1294 | 1295 | // The rotator is modified from fig 4-8 to avoid extra negations. 1296 | z5 = (tmp10 - tmp12) * 0.382683433f; // c6 1297 | z2 = tmp10 * 0.541196100f + z5; // c2-c6 1298 | z4 = tmp12 * 1.306562965f + z5; // c2+c6 1299 | z3 = tmp11 * 0.707106781f; // c4 1300 | 1301 | z11 = tmp7 + z3; // phase 5 1302 | z13 = tmp7 - z3; 1303 | 1304 | *d5p = z13 + z2; // phase 6 1305 | *d3p = z13 - z2; 1306 | *d1p = z11 + z4; 1307 | *d7p = z11 - z4; 1308 | 1309 | *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; 1310 | } 1311 | 1312 | static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) 1313 | { 1314 | int tmp1 = val < 0 ? -val : val; 1315 | val = val < 0 ? val - 1 : val; 1316 | bits[1] = 1; 1317 | while( tmp1 >>= 1 ) 1318 | { 1319 | ++bits[1]; 1320 | } 1321 | bits[0] = val & ((1 << bits[1]) - 1); 1322 | } 1323 | 1324 | static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) 1325 | { 1326 | const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] }; 1327 | const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] }; 1328 | int dataOff, i, diff, end0pos; 1329 | int DU[64]; 1330 | 1331 | // DCT rows 1332 | for( dataOff = 0; dataOff<64; dataOff += 8 ) 1333 | { 1334 | stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + 1], &CDU[dataOff + 2], &CDU[dataOff + 3], &CDU[dataOff + 4], &CDU[dataOff + 5], &CDU[dataOff + 6], &CDU[dataOff + 7]); 1335 | } 1336 | // DCT columns 1337 | for( dataOff = 0; dataOff<8; ++dataOff ) 1338 | { 1339 | stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + 8], &CDU[dataOff + 16], &CDU[dataOff + 24], &CDU[dataOff + 32], &CDU[dataOff + 40], &CDU[dataOff + 48], &CDU[dataOff + 56]); 1340 | } 1341 | // Quantize/descale/zigzag the coefficients 1342 | for( i = 0; i<64; ++i ) 1343 | { 1344 | float v = CDU[i] * fdtbl[i]; 1345 | // DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f)); 1346 | // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway? 1347 | DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? v - 0.5f : v + 0.5f); 1348 | } 1349 | 1350 | // Encode DC 1351 | diff = DU[0] - DC; 1352 | if( diff == 0 ) 1353 | { 1354 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]); 1355 | } 1356 | else 1357 | { 1358 | unsigned short bits[2]; 1359 | stbiw__jpg_calcBits(diff, bits); 1360 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]); 1361 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); 1362 | } 1363 | // Encode ACs 1364 | end0pos = 63; 1365 | for( ; (end0pos>0) && (DU[end0pos] == 0); --end0pos ) 1366 | { 1367 | } 1368 | // end0pos = first element in reverse order !=0 1369 | if( end0pos == 0 ) 1370 | { 1371 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1372 | return DU[0]; 1373 | } 1374 | for( i = 1; i <= end0pos; ++i ) 1375 | { 1376 | int startpos = i; 1377 | int nrzeroes; 1378 | unsigned short bits[2]; 1379 | for( ; DU[i] == 0 && i <= end0pos; ++i ) 1380 | { 1381 | } 1382 | nrzeroes = i - startpos; 1383 | if( nrzeroes >= 16 ) 1384 | { 1385 | int lng = nrzeroes >> 4; 1386 | int nrmarker; 1387 | for( nrmarker = 1; nrmarker <= lng; ++nrmarker ) 1388 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); 1389 | nrzeroes &= 15; 1390 | } 1391 | stbiw__jpg_calcBits(DU[i], bits); 1392 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes << 4) + bits[1]]); 1393 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); 1394 | } 1395 | if( end0pos != 63 ) 1396 | { 1397 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1398 | } 1399 | return DU[0]; 1400 | } 1401 | 1402 | static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) 1403 | { 1404 | // Constants that don't pollute global namespace 1405 | static const unsigned char std_dc_luminance_nrcodes[] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; 1406 | static const unsigned char std_dc_luminance_values[] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; 1407 | static const unsigned char std_ac_luminance_nrcodes[] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d }; 1408 | static const unsigned char std_ac_luminance_values[] = { 1409 | 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, 1410 | 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, 1411 | 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, 1412 | 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, 1413 | 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, 1414 | 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, 1415 | 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1416 | }; 1417 | static const unsigned char std_dc_chrominance_nrcodes[] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; 1418 | static const unsigned char std_dc_chrominance_values[] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; 1419 | static const unsigned char std_ac_chrominance_nrcodes[] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 }; 1420 | static const unsigned char std_ac_chrominance_values[] = { 1421 | 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, 1422 | 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, 1423 | 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, 1424 | 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, 1425 | 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, 1426 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, 1427 | 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1428 | }; 1429 | // Huffman tables 1430 | static const unsigned short YDC_HT[256][2] = { { 0,2 },{ 2,3 },{ 3,3 },{ 4,3 },{ 5,3 },{ 6,3 },{ 14,4 },{ 30,5 },{ 62,6 },{ 126,7 },{ 254,8 },{ 510,9 } }; 1431 | static const unsigned short UVDC_HT[256][2] = { { 0,2 },{ 1,2 },{ 2,2 },{ 6,3 },{ 14,4 },{ 30,5 },{ 62,6 },{ 126,7 },{ 254,8 },{ 510,9 },{ 1022,10 },{ 2046,11 } }; 1432 | static const unsigned short YAC_HT[256][2] = { 1433 | { 10,4 },{ 0,2 },{ 1,2 },{ 4,3 },{ 11,4 },{ 26,5 },{ 120,7 },{ 248,8 },{ 1014,10 },{ 65410,16 },{ 65411,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1434 | { 12,4 },{ 27,5 },{ 121,7 },{ 502,9 },{ 2038,11 },{ 65412,16 },{ 65413,16 },{ 65414,16 },{ 65415,16 },{ 65416,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1435 | { 28,5 },{ 249,8 },{ 1015,10 },{ 4084,12 },{ 65417,16 },{ 65418,16 },{ 65419,16 },{ 65420,16 },{ 65421,16 },{ 65422,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1436 | { 58,6 },{ 503,9 },{ 4085,12 },{ 65423,16 },{ 65424,16 },{ 65425,16 },{ 65426,16 },{ 65427,16 },{ 65428,16 },{ 65429,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1437 | { 59,6 },{ 1016,10 },{ 65430,16 },{ 65431,16 },{ 65432,16 },{ 65433,16 },{ 65434,16 },{ 65435,16 },{ 65436,16 },{ 65437,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1438 | { 122,7 },{ 2039,11 },{ 65438,16 },{ 65439,16 },{ 65440,16 },{ 65441,16 },{ 65442,16 },{ 65443,16 },{ 65444,16 },{ 65445,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1439 | { 123,7 },{ 4086,12 },{ 65446,16 },{ 65447,16 },{ 65448,16 },{ 65449,16 },{ 65450,16 },{ 65451,16 },{ 65452,16 },{ 65453,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1440 | { 250,8 },{ 4087,12 },{ 65454,16 },{ 65455,16 },{ 65456,16 },{ 65457,16 },{ 65458,16 },{ 65459,16 },{ 65460,16 },{ 65461,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1441 | { 504,9 },{ 32704,15 },{ 65462,16 },{ 65463,16 },{ 65464,16 },{ 65465,16 },{ 65466,16 },{ 65467,16 },{ 65468,16 },{ 65469,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1442 | { 505,9 },{ 65470,16 },{ 65471,16 },{ 65472,16 },{ 65473,16 },{ 65474,16 },{ 65475,16 },{ 65476,16 },{ 65477,16 },{ 65478,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1443 | { 506,9 },{ 65479,16 },{ 65480,16 },{ 65481,16 },{ 65482,16 },{ 65483,16 },{ 65484,16 },{ 65485,16 },{ 65486,16 },{ 65487,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1444 | { 1017,10 },{ 65488,16 },{ 65489,16 },{ 65490,16 },{ 65491,16 },{ 65492,16 },{ 65493,16 },{ 65494,16 },{ 65495,16 },{ 65496,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1445 | { 1018,10 },{ 65497,16 },{ 65498,16 },{ 65499,16 },{ 65500,16 },{ 65501,16 },{ 65502,16 },{ 65503,16 },{ 65504,16 },{ 65505,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1446 | { 2040,11 },{ 65506,16 },{ 65507,16 },{ 65508,16 },{ 65509,16 },{ 65510,16 },{ 65511,16 },{ 65512,16 },{ 65513,16 },{ 65514,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1447 | { 65515,16 },{ 65516,16 },{ 65517,16 },{ 65518,16 },{ 65519,16 },{ 65520,16 },{ 65521,16 },{ 65522,16 },{ 65523,16 },{ 65524,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1448 | { 2041,11 },{ 65525,16 },{ 65526,16 },{ 65527,16 },{ 65528,16 },{ 65529,16 },{ 65530,16 },{ 65531,16 },{ 65532,16 },{ 65533,16 },{ 65534,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 } 1449 | }; 1450 | static const unsigned short UVAC_HT[256][2] = { 1451 | { 0,2 },{ 1,2 },{ 4,3 },{ 10,4 },{ 24,5 },{ 25,5 },{ 56,6 },{ 120,7 },{ 500,9 },{ 1014,10 },{ 4084,12 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1452 | { 11,4 },{ 57,6 },{ 246,8 },{ 501,9 },{ 2038,11 },{ 4085,12 },{ 65416,16 },{ 65417,16 },{ 65418,16 },{ 65419,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1453 | { 26,5 },{ 247,8 },{ 1015,10 },{ 4086,12 },{ 32706,15 },{ 65420,16 },{ 65421,16 },{ 65422,16 },{ 65423,16 },{ 65424,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1454 | { 27,5 },{ 248,8 },{ 1016,10 },{ 4087,12 },{ 65425,16 },{ 65426,16 },{ 65427,16 },{ 65428,16 },{ 65429,16 },{ 65430,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1455 | { 58,6 },{ 502,9 },{ 65431,16 },{ 65432,16 },{ 65433,16 },{ 65434,16 },{ 65435,16 },{ 65436,16 },{ 65437,16 },{ 65438,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1456 | { 59,6 },{ 1017,10 },{ 65439,16 },{ 65440,16 },{ 65441,16 },{ 65442,16 },{ 65443,16 },{ 65444,16 },{ 65445,16 },{ 65446,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1457 | { 121,7 },{ 2039,11 },{ 65447,16 },{ 65448,16 },{ 65449,16 },{ 65450,16 },{ 65451,16 },{ 65452,16 },{ 65453,16 },{ 65454,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1458 | { 122,7 },{ 2040,11 },{ 65455,16 },{ 65456,16 },{ 65457,16 },{ 65458,16 },{ 65459,16 },{ 65460,16 },{ 65461,16 },{ 65462,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1459 | { 249,8 },{ 65463,16 },{ 65464,16 },{ 65465,16 },{ 65466,16 },{ 65467,16 },{ 65468,16 },{ 65469,16 },{ 65470,16 },{ 65471,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1460 | { 503,9 },{ 65472,16 },{ 65473,16 },{ 65474,16 },{ 65475,16 },{ 65476,16 },{ 65477,16 },{ 65478,16 },{ 65479,16 },{ 65480,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1461 | { 504,9 },{ 65481,16 },{ 65482,16 },{ 65483,16 },{ 65484,16 },{ 65485,16 },{ 65486,16 },{ 65487,16 },{ 65488,16 },{ 65489,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1462 | { 505,9 },{ 65490,16 },{ 65491,16 },{ 65492,16 },{ 65493,16 },{ 65494,16 },{ 65495,16 },{ 65496,16 },{ 65497,16 },{ 65498,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1463 | { 506,9 },{ 65499,16 },{ 65500,16 },{ 65501,16 },{ 65502,16 },{ 65503,16 },{ 65504,16 },{ 65505,16 },{ 65506,16 },{ 65507,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1464 | { 2041,11 },{ 65508,16 },{ 65509,16 },{ 65510,16 },{ 65511,16 },{ 65512,16 },{ 65513,16 },{ 65514,16 },{ 65515,16 },{ 65516,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1465 | { 16352,14 },{ 65517,16 },{ 65518,16 },{ 65519,16 },{ 65520,16 },{ 65521,16 },{ 65522,16 },{ 65523,16 },{ 65524,16 },{ 65525,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 }, 1466 | { 1018,10 },{ 32707,15 },{ 65526,16 },{ 65527,16 },{ 65528,16 },{ 65529,16 },{ 65530,16 },{ 65531,16 },{ 65532,16 },{ 65533,16 },{ 65534,16 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 },{ 0,0 } 1467 | }; 1468 | static const int YQT[] = { 16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, 1469 | 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99 }; 1470 | static const int UVQT[] = { 17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, 1471 | 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 }; 1472 | static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1473 | 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; 1474 | 1475 | int row, col, i, k; 1476 | float fdtbl_Y[64], fdtbl_UV[64]; 1477 | unsigned char YTable[64], UVTable[64]; 1478 | 1479 | if( !data || !width || !height || comp > 4 || comp < 1 ) 1480 | { 1481 | return 0; 1482 | } 1483 | 1484 | quality = quality ? quality : 90; 1485 | quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; 1486 | quality = quality < 50 ? 5000 / quality : 200 - quality * 2; 1487 | 1488 | for( i = 0; i < 64; ++i ) 1489 | { 1490 | int uvti, yti = (YQT[i] * quality + 50) / 100; 1491 | YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti); 1492 | uvti = (UVQT[i] * quality + 50) / 100; 1493 | UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); 1494 | } 1495 | 1496 | for( row = 0, k = 0; row < 8; ++row ) 1497 | { 1498 | for( col = 0; col < 8; ++col, ++k ) 1499 | { 1500 | fdtbl_Y[k] = 1 / (YTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1501 | fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1502 | } 1503 | } 1504 | 1505 | // Write Headers 1506 | { 1507 | static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; 1508 | static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; 1509 | const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height >> 8),STBIW_UCHAR(height),(unsigned char)(width >> 8),STBIW_UCHAR(width), 1510 | 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; 1511 | s->func(s->context, (void*)head0, sizeof(head0)); 1512 | s->func(s->context, (void*)YTable, sizeof(YTable)); 1513 | stbiw__putc(s, 1); 1514 | s->func(s->context, UVTable, sizeof(UVTable)); 1515 | s->func(s->context, (void*)head1, sizeof(head1)); 1516 | s->func(s->context, (void*)(std_dc_luminance_nrcodes + 1), sizeof(std_dc_luminance_nrcodes) - 1); 1517 | s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); 1518 | stbiw__putc(s, 0x10); // HTYACinfo 1519 | s->func(s->context, (void*)(std_ac_luminance_nrcodes + 1), sizeof(std_ac_luminance_nrcodes) - 1); 1520 | s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); 1521 | stbiw__putc(s, 1); // HTUDCinfo 1522 | s->func(s->context, (void*)(std_dc_chrominance_nrcodes + 1), sizeof(std_dc_chrominance_nrcodes) - 1); 1523 | s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); 1524 | stbiw__putc(s, 0x11); // HTUACinfo 1525 | s->func(s->context, (void*)(std_ac_chrominance_nrcodes + 1), sizeof(std_ac_chrominance_nrcodes) - 1); 1526 | s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); 1527 | s->func(s->context, (void*)head2, sizeof(head2)); 1528 | } 1529 | 1530 | // Encode 8x8 macroblocks 1531 | { 1532 | static const unsigned short fillBits[] = { 0x7F, 7 }; 1533 | const unsigned char *imageData = (const unsigned char *)data; 1534 | int DCY = 0, DCU = 0, DCV = 0; 1535 | int bitBuf = 0, bitCnt = 0; 1536 | // comp == 2 is grey+alpha (alpha is ignored) 1537 | int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; 1538 | int x, y, pos; 1539 | for( y = 0; y < height; y += 8 ) 1540 | { 1541 | for( x = 0; x < width; x += 8 ) 1542 | { 1543 | float YDU[64], UDU[64], VDU[64]; 1544 | for( row = y, pos = 0; row < y + 8; ++row ) 1545 | { 1546 | for( col = x; col < x + 8; ++col, ++pos ) 1547 | { 1548 | int p = (stbi__flip_vertically_on_write ? height - 1 - row : row)*width*comp + col * comp; 1549 | float r, g, b; 1550 | if( row >= height ) 1551 | { 1552 | p -= width * comp*(row + 1 - height); 1553 | } 1554 | if( col >= width ) 1555 | { 1556 | p -= comp * (col + 1 - width); 1557 | } 1558 | 1559 | r = imageData[p + 0]; 1560 | g = imageData[p + ofsG]; 1561 | b = imageData[p + ofsB]; 1562 | YDU[pos] = +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; 1563 | UDU[pos] = -0.16874f*r - 0.33126f*g + 0.50000f*b; 1564 | VDU[pos] = +0.50000f*r - 0.41869f*g - 0.08131f*b; 1565 | } 1566 | } 1567 | 1568 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1569 | DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); 1570 | DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); 1571 | } 1572 | } 1573 | 1574 | // Do the bit alignment of the EOI marker 1575 | stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); 1576 | } 1577 | 1578 | // EOI 1579 | stbiw__putc(s, 0xFF); 1580 | stbiw__putc(s, 0xD9); 1581 | 1582 | return 1; 1583 | } 1584 | 1585 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) 1586 | { 1587 | stbi__write_context s; 1588 | stbi__start_write_callbacks(&s, func, context); 1589 | return stbi_write_jpg_core(&s, x, y, comp, (void *)data, quality); 1590 | } 1591 | 1592 | 1593 | #ifndef STBI_WRITE_NO_STDIO 1594 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) 1595 | { 1596 | stbi__write_context s; 1597 | if( stbi__start_write_file(&s, filename) ) 1598 | { 1599 | int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); 1600 | stbi__end_write_file(&s); 1601 | return r; 1602 | } 1603 | else 1604 | return 0; 1605 | } 1606 | #endif 1607 | 1608 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION 1609 | 1610 | /* Revision history 1611 | 1.09 (2018-02-11) 1612 | fix typo in zlib quality API, improve STB_I_W_STATIC in C++ 1613 | 1.08 (2018-01-29) 1614 | add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter 1615 | 1.07 (2017-07-24) 1616 | doc fix 1617 | 1.06 (2017-07-23) 1618 | writing JPEG (using Jon Olick's code) 1619 | 1.05 ??? 1620 | 1.04 (2017-03-03) 1621 | monochrome BMP expansion 1622 | 1.03 ??? 1623 | 1.02 (2016-04-02) 1624 | avoid allocating large structures on the stack 1625 | 1.01 (2016-01-16) 1626 | STBIW_REALLOC_SIZED: support allocators with no realloc support 1627 | avoid race-condition in crc initialization 1628 | minor compile issues 1629 | 1.00 (2015-09-14) 1630 | installable file IO function 1631 | 0.99 (2015-09-13) 1632 | warning fixes; TGA rle support 1633 | 0.98 (2015-04-08) 1634 | added STBIW_MALLOC, STBIW_ASSERT etc 1635 | 0.97 (2015-01-18) 1636 | fixed HDR asserts, rewrote HDR rle logic 1637 | 0.96 (2015-01-17) 1638 | add HDR output 1639 | fix monochrome BMP 1640 | 0.95 (2014-08-17) 1641 | add monochrome TGA output 1642 | 0.94 (2014-05-31) 1643 | rename private functions to avoid conflicts with stb_image.h 1644 | 0.93 (2014-05-27) 1645 | warning fixes 1646 | 0.92 (2010-08-01) 1647 | casts to unsigned char to fix warnings 1648 | 0.91 (2010-07-17) 1649 | first public release 1650 | 0.90 first internal release 1651 | */ 1652 | 1653 | /* 1654 | ------------------------------------------------------------------------------ 1655 | This software is available under 2 licenses -- choose whichever you prefer. 1656 | ------------------------------------------------------------------------------ 1657 | ALTERNATIVE A - MIT License 1658 | Copyright (c) 2017 Sean Barrett 1659 | Permission is hereby granted, free of charge, to any person obtaining a copy of 1660 | this software and associated documentation files (the "Software"), to deal in 1661 | the Software without restriction, including without limitation the rights to 1662 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 1663 | of the Software, and to permit persons to whom the Software is furnished to do 1664 | so, subject to the following conditions: 1665 | The above copyright notice and this permission notice shall be included in all 1666 | copies or substantial portions of the Software. 1667 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1668 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1669 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1670 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1671 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1672 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 1673 | SOFTWARE. 1674 | ------------------------------------------------------------------------------ 1675 | ALTERNATIVE B - Public Domain (www.unlicense.org) 1676 | This is free and unencumbered software released into the public domain. 1677 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 1678 | software, either in source code form or as a compiled binary, for any purpose, 1679 | commercial or non-commercial, and by any means. 1680 | In jurisdictions that recognize copyright laws, the author or authors of this 1681 | software dedicate any and all copyright interest in the software to the public 1682 | domain. We make this dedication for the benefit of the public at large and to 1683 | the detriment of our heirs and successors. We intend this dedication to be an 1684 | overt act of relinquishment in perpetuity of all present and future rights to 1685 | this software under copyright law. 1686 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1687 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1688 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1689 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 1690 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 1691 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1692 | ------------------------------------------------------------------------------ 1693 | */ --------------------------------------------------------------------------------