├── bin
    └── resampler.exe
├── resampler.sln
├── README.md
├── resampler.vcproj
├── resampler.h
├── test.cpp
├── resampler.cpp
└── stb_image.c


/bin/resampler.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rwohleb/imageresampler/HEAD/bin/resampler.exe


--------------------------------------------------------------------------------
/resampler.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 9.00
 3 | # Visual Studio 2005
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "resampler", "resampler.vcproj", "{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Win32 = Debug|Win32
 9 | 		Release|Win32 = Release|Win32
10 | 	EndGlobalSection
11 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | 		{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}.Debug|Win32.ActiveCfg = Debug|Win32
13 | 		{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}.Debug|Win32.Build.0 = Debug|Win32
14 | 		{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}.Release|Win32.ActiveCfg = Release|Win32
15 | 		{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}.Release|Win32.Build.0 = Release|Win32
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # imageresampler
 2 | Exported from http://code.google.com/p/imageresampler. Original author is Rich Geldreich at *richgel99 at gmail.com*.
 3 | 
 4 | imageresampler is a pubic domain (see [unlicense.org](http://unlicense.org/)) C++ class for memory efficient image/bitmap [resampling](http://en.wikipedia.org/wiki/Resampling_(bitmap)) loosely based on a (heavily bugfixed) version of [Dale Schumacher's public domain resampler](http://tog.acm.org/resources/GraphicsGems/gemsiii/filter.c) in [Graphics Gems 3](http://www.amazon.com/Graphics-Gems-III-IBM-Version/dp/0124096735) (1994). It supports a wide range of filter kernels and windowing functions, clamp/wrap/reflect boundary modes, arbitrary source image phase offsets, floating point pixels with any number of components, and can process images much larger than available memory.
 5 | 
 6 | A long time ago, this resampler was written originally in C and 16-bit x86 assembly and was used in a printer driver for a DOS image viewer product. Memory was extremely tight in this environment, which is why this class supports streaming resampling. I've also successfully used this class to generate texture [mipmaps](http://en.wikipedia.org/wiki/Mipmap) in the tool chains of various PC and console game projects. This version is written in fairly portable C++.
 7 | 
 8 | ## Features
 9 | 
10 | * Can up/downsample completely independently on each axis.
11 | * Plenty of [antialiasing](http://en.wikipedia.org/wiki/Antialiasing) filters: box, tent, bell, b-spline, Mitchell, [Lanczos](http://en.wikipedia.org/wiki/Lanczos_resampling) 3/4/6/12, Blackman, Kaiser, Gaussian, etc.
12 | * Filter kernels can be offset and scaled (shrinking the kernel a bit to sharpen the output is very useful when generating texture mipmaps)
13 | * Supports clamp, wrap, etc. boundary modes - useful when filtering wrapping textures
14 | * Chooses the axis resample order that minimizes the total number of ops, like Heckbert's [Zoom](http://www.xmission.com/~legalize/zoom.html).
15 | * Operates on any number of input channels, floating point input/output
16 | * Minimal memory/streaming operation: you feed it some input scanlines, and it gives you as many output scanlines as it can. Or if you don't care about RAM you can feed it all your input scanlines, then "pull" all the output scanlines.
17 | 
18 | The source archive includes Visual C++ 2005 and 2008 solutions, and a Codeblocks 10.05 workspace.
19 | 
20 | ## Release History
21 | 
22 | * v2.21 - June 4, 2012: Added unlicense.org text (instead of just plain public domain), integrated GCC fixes supplied by Peter Nagy <petern@crytek.com>, Anteru at anteru.net, and clay@coge.net, added Codeblocks project (for testing with MinGW and GCC), added VS2008 project files, VS2008 static code analysis pass, added calls to delete at end of test.cpp.
23 | * v2.20 - Dec. 31, 2008: Released to public domain on Google Code.
24 | 
25 | ## Test Executable Instructions
26 | 
27 | I've included a precompiled Win32 executable under bin/resampler.exe. Example usage:
28 | 
29 | ```
30 |   resampler source_image.tga dest_image.tga dest_width dest_height
31 | ```
32 | 
33 | Where dest_width/dest_height is the desired destination image's resolution. The destination format is hard coded to TGA, but the source image may be in any format that Sean Barrett's [STB Image module](https://github.com/nothings/stb) supports: PNG/BMP/TGA/etc.
34 | 
35 | ## Additional Links
36 | 
37 | * [Mipmapping, Part 1](http://number-none.com/product/Mipmapping,%20Part%201/index.html)
38 | * [Mipmapping, Part 2](http://number-none.com/product/Mipmapping,%20Part%202/index.html)
39 | * [Ray Gardener's image resampler](http://www.daylongraphics.com/download/filter_rcg.zip) - Also derived from Schumacher's code. (URL doesn't appear to work anymore.)
40 | * [Paul Heckbert's "Zoom"](http://www.xmission.com/~legalize/zoom.html)
41 | * [Mitchell, Don P.; Netravali, Arun N. (August 1988). "Reconstruction filters in computer-graphics"](http://www.mentallandscape.com/Papers_siggraph88.pdf)
42 | 
43 | ## Support Contact
44 | 
45 | For any questions or problems with this code please contact Rich Geldreich at *richgel99 at gmail.com*. Here's my [twitter page](http://twitter.com/#!/richgel999).
46 | 


--------------------------------------------------------------------------------
/resampler.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="8.00"
  5 | 	Name="resampler"
  6 | 	ProjectGUID="{2E1E9B38-BE1F-4A69-9252-5D019D8145D6}"
  7 | 	RootNamespace="resampler"
  8 | 	Keyword="Win32Proj"
  9 | 	>
 10 | 	<Platforms>
 11 | 		<Platform
 12 | 			Name="Win32"
 13 | 		/>
 14 | 	</Platforms>
 15 | 	<ToolFiles>
 16 | 	</ToolFiles>
 17 | 	<Configurations>
 18 | 		<Configuration
 19 | 			Name="Debug|Win32"
 20 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 21 | 			IntermediateDirectory="$(ConfigurationName)"
 22 | 			ConfigurationType="1"
 23 | 			CharacterSet="0"
 24 | 			>
 25 | 			<Tool
 26 | 				Name="VCPreBuildEventTool"
 27 | 			/>
 28 | 			<Tool
 29 | 				Name="VCCustomBuildTool"
 30 | 			/>
 31 | 			<Tool
 32 | 				Name="VCXMLDataGeneratorTool"
 33 | 			/>
 34 | 			<Tool
 35 | 				Name="VCWebServiceProxyGeneratorTool"
 36 | 			/>
 37 | 			<Tool
 38 | 				Name="VCMIDLTool"
 39 | 			/>
 40 | 			<Tool
 41 | 				Name="VCCLCompilerTool"
 42 | 				Optimization="0"
 43 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 44 | 				MinimalRebuild="true"
 45 | 				BasicRuntimeChecks="3"
 46 | 				RuntimeLibrary="1"
 47 | 				UsePrecompiledHeader="0"
 48 | 				WarningLevel="3"
 49 | 				Detect64BitPortabilityProblems="true"
 50 | 				DebugInformationFormat="4"
 51 | 			/>
 52 | 			<Tool
 53 | 				Name="VCManagedResourceCompilerTool"
 54 | 			/>
 55 | 			<Tool
 56 | 				Name="VCResourceCompilerTool"
 57 | 			/>
 58 | 			<Tool
 59 | 				Name="VCPreLinkEventTool"
 60 | 			/>
 61 | 			<Tool
 62 | 				Name="VCLinkerTool"
 63 | 				LinkIncremental="2"
 64 | 				GenerateDebugInformation="true"
 65 | 				SubSystem="1"
 66 | 				TargetMachine="1"
 67 | 			/>
 68 | 			<Tool
 69 | 				Name="VCALinkTool"
 70 | 			/>
 71 | 			<Tool
 72 | 				Name="VCManifestTool"
 73 | 			/>
 74 | 			<Tool
 75 | 				Name="VCXDCMakeTool"
 76 | 			/>
 77 | 			<Tool
 78 | 				Name="VCBscMakeTool"
 79 | 			/>
 80 | 			<Tool
 81 | 				Name="VCFxCopTool"
 82 | 			/>
 83 | 			<Tool
 84 | 				Name="VCAppVerifierTool"
 85 | 			/>
 86 | 			<Tool
 87 | 				Name="VCWebDeploymentTool"
 88 | 			/>
 89 | 			<Tool
 90 | 				Name="VCPostBuildEventTool"
 91 | 			/>
 92 | 		</Configuration>
 93 | 		<Configuration
 94 | 			Name="Release|Win32"
 95 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 96 | 			IntermediateDirectory="$(ConfigurationName)"
 97 | 			ConfigurationType="1"
 98 | 			CharacterSet="0"
 99 | 			WholeProgramOptimization="1"
100 | 			>
101 | 			<Tool
102 | 				Name="VCPreBuildEventTool"
103 | 			/>
104 | 			<Tool
105 | 				Name="VCCustomBuildTool"
106 | 			/>
107 | 			<Tool
108 | 				Name="VCXMLDataGeneratorTool"
109 | 			/>
110 | 			<Tool
111 | 				Name="VCWebServiceProxyGeneratorTool"
112 | 			/>
113 | 			<Tool
114 | 				Name="VCMIDLTool"
115 | 			/>
116 | 			<Tool
117 | 				Name="VCCLCompilerTool"
118 | 				Optimization="3"
119 | 				InlineFunctionExpansion="2"
120 | 				FavorSizeOrSpeed="1"
121 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
122 | 				RuntimeLibrary="0"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				Detect64BitPortabilityProblems="true"
126 | 				DebugInformationFormat="3"
127 | 			/>
128 | 			<Tool
129 | 				Name="VCManagedResourceCompilerTool"
130 | 			/>
131 | 			<Tool
132 | 				Name="VCResourceCompilerTool"
133 | 			/>
134 | 			<Tool
135 | 				Name="VCPreLinkEventTool"
136 | 			/>
137 | 			<Tool
138 | 				Name="VCLinkerTool"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCWebDeploymentTool"
166 | 			/>
167 | 			<Tool
168 | 				Name="VCPostBuildEventTool"
169 | 			/>
170 | 		</Configuration>
171 | 	</Configurations>
172 | 	<References>
173 | 	</References>
174 | 	<Files>
175 | 		<Filter
176 | 			Name="Source Files"
177 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
178 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
179 | 			>
180 | 			<File
181 | 				RelativePath=".\resampler.cpp"
182 | 				>
183 | 			</File>
184 | 			<File
185 | 				RelativePath=".\resampler.h"
186 | 				>
187 | 			</File>
188 | 			<File
189 | 				RelativePath=".\stb_image.c"
190 | 				>
191 | 			</File>
192 | 			<File
193 | 				RelativePath=".\test.cpp"
194 | 				>
195 | 			</File>
196 | 		</Filter>
197 | 		<Filter
198 | 			Name="Header Files"
199 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
200 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
201 | 			>
202 | 		</Filter>
203 | 		<Filter
204 | 			Name="Resource Files"
205 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
206 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
207 | 			>
208 | 		</Filter>
209 | 	</Files>
210 | 	<Globals>
211 | 	</Globals>
212 | </VisualStudioProject>
213 | 


--------------------------------------------------------------------------------
/resampler.h:
--------------------------------------------------------------------------------
  1 | // resampler.h, Separable filtering image rescaler v2.21, Rich Geldreich - richgel99@gmail.com
  2 | // See unlicense.org text at the bottom of this file.
  3 | #ifndef __RESAMPLER_H__
  4 | #define __RESAMPLER_H__
  5 | 
  6 | #define RESAMPLER_DEBUG_OPS 0
  7 | #define RESAMPLER_DEFAULT_FILTER "lanczos4"
  8 | 
  9 | #define RESAMPLER_MAX_DIMENSION 16384
 10 | 
 11 | // float or double
 12 | typedef float Resample_Real;
 13 | 
 14 | class Resampler
 15 | {
 16 | public:
 17 |    typedef Resample_Real Sample;
 18 | 
 19 |    struct Contrib
 20 |    {
 21 |       Resample_Real weight;
 22 |       unsigned short pixel;
 23 |    };
 24 | 
 25 |    struct Contrib_List
 26 |    {
 27 |       unsigned short n;
 28 |       Contrib* p;
 29 |    };
 30 | 
 31 |    enum Boundary_Op
 32 |    {
 33 |       BOUNDARY_WRAP = 0,
 34 |       BOUNDARY_REFLECT = 1,
 35 |       BOUNDARY_CLAMP = 2
 36 |    };
 37 | 
 38 |    enum Status
 39 |    {
 40 |       STATUS_OKAY = 0,
 41 |       STATUS_OUT_OF_MEMORY = 1,
 42 |       STATUS_BAD_FILTER_NAME = 2,
 43 |       STATUS_SCAN_BUFFER_FULL = 3
 44 |    };
 45 |    
 46 |    // src_x/src_y - Input dimensions
 47 |    // dst_x/dst_y - Output dimensions
 48 |    // boundary_op - How to sample pixels near the image boundaries
 49 |    // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high
 50 |    // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler
 51 |    // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay)
 52 |    Resampler(
 53 |       int src_x, int src_y,
 54 |       int dst_x, int dst_y,
 55 |       Boundary_Op boundary_op = BOUNDARY_CLAMP,
 56 |       Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f,		
 57 |       const char* Pfilter_name = RESAMPLER_DEFAULT_FILTER,
 58 |       Contrib_List* Pclist_x = NULL,
 59 |       Contrib_List* Pclist_y = NULL,
 60 |       Resample_Real filter_x_scale = 1.0f,
 61 |       Resample_Real filter_y_scale = 1.0f,
 62 |       Resample_Real src_x_ofs = 0.0f, 
 63 |       Resample_Real src_y_ofs = 0.0f);
 64 | 
 65 |    ~Resampler();
 66 | 
 67 |    // Reinits resampler so it can handle another frame.
 68 |    void restart();
 69 | 
 70 |    // false on out of memory.
 71 |    bool put_line(const Sample* Psrc);
 72 | 
 73 |    // NULL if no scanlines are currently available (give the resampler more scanlines!)
 74 |    const Sample* get_line();
 75 | 
 76 |    Status status() const { return m_status; }
 77 | 
 78 |    // Returned contributor lists can be shared with another Resampler. 
 79 |    void get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y);
 80 |    Contrib_List* get_clist_x() const {	return m_Pclist_x; }
 81 |    Contrib_List* get_clist_y() const {	return m_Pclist_y; }
 82 | 
 83 |    // Filter accessors.
 84 |    static int get_filter_num();
 85 |    static char* get_filter_name(int filter_num);
 86 |    
 87 | private:
 88 |    Resampler();
 89 |    Resampler(const Resampler& o);
 90 |    Resampler& operator= (const Resampler& o);
 91 | 
 92 | #ifdef RESAMPLER_DEBUG_OPS
 93 |    int total_ops;
 94 | #endif
 95 | 
 96 |    int m_intermediate_x;
 97 | 
 98 |    int m_resample_src_x;
 99 |    int m_resample_src_y;
100 |    int m_resample_dst_x;
101 |    int m_resample_dst_y;
102 | 
103 |    Boundary_Op m_boundary_op;
104 | 
105 |    Sample* m_Pdst_buf;
106 |    Sample* m_Ptmp_buf;
107 | 
108 |    Contrib_List* m_Pclist_x;
109 |    Contrib_List* m_Pclist_y;
110 | 
111 |    bool m_clist_x_forced;
112 |    bool m_clist_y_forced;
113 | 
114 |    bool m_delay_x_resample;
115 | 
116 |    int* m_Psrc_y_count;
117 |    unsigned char* m_Psrc_y_flag;
118 | 
119 |    // The maximum number of scanlines that can be buffered at one time.
120 |    enum { MAX_SCAN_BUF_SIZE = RESAMPLER_MAX_DIMENSION };
121 |    
122 |    struct Scan_Buf
123 |    {
124 |       int scan_buf_y[MAX_SCAN_BUF_SIZE];
125 |       Sample* scan_buf_l[MAX_SCAN_BUF_SIZE];
126 |    };
127 | 
128 |    Scan_Buf* m_Pscan_buf;
129 | 
130 |    int m_cur_src_y;
131 |    int m_cur_dst_y;
132 | 
133 |    Status m_status;
134 | 
135 |    void resample_x(Sample* Pdst, const Sample* Psrc);
136 |    void scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x);
137 |    void scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x);
138 |    void clamp(Sample* Pdst, int n);
139 |    void resample_y(Sample* Pdst);
140 | 
141 |    int reflect(const int j, const int src_x, const Boundary_Op boundary_op);
142 | 
143 |    Contrib_List* make_clist(
144 |       int src_x, int dst_x, Boundary_Op boundary_op,
145 |       Resample_Real (*Pfilter)(Resample_Real),
146 |       Resample_Real filter_support,
147 |       Resample_Real filter_scale,
148 |       Resample_Real src_ofs);
149 | 
150 |    inline int count_ops(Contrib_List* Pclist, int k)
151 |    {
152 |       int i, t = 0;
153 |       for (i = 0; i < k; i++)
154 |          t += Pclist[i].n;
155 |       return (t);
156 |    }
157 | 
158 |    Resample_Real m_lo;
159 |    Resample_Real m_hi;
160 | 
161 |    inline Resample_Real clamp_sample(Resample_Real f) const
162 |    {
163 |       if (f < m_lo)
164 |          f = m_lo;
165 |       else if (f > m_hi)
166 |          f = m_hi;
167 |       return f;
168 |    }   
169 | };
170 | 
171 | #endif // __RESAMPLER_H__
172 | 
173 | // This is free and unencumbered software released into the public domain.
174 | //
175 | // Anyone is free to copy, modify, publish, use, compile, sell, or
176 | // distribute this software, either in source code form or as a compiled
177 | // binary, for any purpose, commercial or non-commercial, and by any
178 | // means.
179 | // 
180 | // In jurisdictions that recognize copyright laws, the author or authors
181 | // of this software dedicate any and all copyright interest in the
182 | // software to the public domain. We make this dedication for the benefit
183 | // of the public at large and to the detriment of our heirs and
184 | // successors. We intend this dedication to be an overt act of
185 | // relinquishment in perpetuity of all present and future rights to this
186 | // software under copyright law.
187 | // 
188 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
189 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
190 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
191 | // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
192 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
194 | // OTHER DEALINGS IN THE SOFTWARE.
195 | // 
196 | // For more information, please refer to <http://unlicense.org/>
197 | 


--------------------------------------------------------------------------------
/test.cpp:
--------------------------------------------------------------------------------
  1 | // resampler test, Rich Geldreich - richgel99@gmail.com
  2 | // See unlicense.org text at the bottom of resampler.h
  3 | // Example usage: resampler.exe input.tga output.tga width height
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | #include <string.h>
  7 | #include <math.h>
  8 | #include <assert.h>
  9 | #include <vector>
 10 | #include <algorithm>
 11 | 
 12 | #include "resampler.h"
 13 | 
 14 | #define STBI_HEADER_FILE_ONLY
 15 | #include "stb_image.c"
 16 | 
 17 | int main(int arg_c, char** arg_v)
 18 | {
 19 |    if (arg_c != 5)
 20 |    {
 21 |       printf("Usage: input_image output_image.tga width height\n");
 22 |       return EXIT_FAILURE;
 23 |    }
 24 |    
 25 |    const char* pSrc_filename = arg_v[1];
 26 |    const char* pDst_filename = arg_v[2];
 27 |    const int dst_width = atoi(arg_v[3]);
 28 |    const int dst_height = atoi(arg_v[4]);
 29 |    
 30 |    if ((std::min(dst_width, dst_height) < 1) || (std::max(dst_width, dst_height) > RESAMPLER_MAX_DIMENSION))
 31 |    {
 32 |       printf("Invalid output width/height!\n");
 33 |       return EXIT_FAILURE;
 34 |    }
 35 |    
 36 |    printf("Loading image: %s\n", pSrc_filename);
 37 |    
 38 |    int src_width, src_height, n;
 39 |    unsigned char* pSrc_image = stbi_load(pSrc_filename, &src_width, &src_height, &n, 0);
 40 |    if (!pSrc_image)
 41 |    {
 42 |       printf("Failed loading image!\n");
 43 |       return EXIT_FAILURE;
 44 |    }
 45 |    
 46 |    printf("Resolution: %ux%u, Channels: %u\n", src_width, src_height, n);
 47 |    
 48 |    const int max_components = 4;   
 49 |    
 50 |    if ((std::max(src_width, src_height) > RESAMPLER_MAX_DIMENSION) || (n > max_components))
 51 |    {
 52 |       printf("Image is too large!\n");
 53 |       return EXIT_FAILURE;
 54 |    }
 55 |       
 56 |    // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. 
 57 |    const float source_gamma = 1.75f;
 58 |    
 59 |    // Filter scale - values < 1.0 cause aliasing, but create sharper looking mips.
 60 |    const float filter_scale = 1.0f;//.75f;
 61 |    
 62 |    const char* pFilter = "blackman";//RESAMPLER_DEFAULT_FILTER;
 63 |          
 64 |    float srgb_to_linear[256];
 65 |    for (int i = 0; i < 256; ++i)
 66 |       srgb_to_linear[i] = (float)pow(i * 1.0f/255.0f, source_gamma);
 67 | 
 68 |    const int linear_to_srgb_table_size = 4096;
 69 |    unsigned char linear_to_srgb[linear_to_srgb_table_size];
 70 |    
 71 |    const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size;
 72 |    const float inv_source_gamma = 1.0f / source_gamma;
 73 | 
 74 |    for (int i = 0; i < linear_to_srgb_table_size; ++i)
 75 |    {
 76 |       int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f);
 77 |       if (k < 0) k = 0; else if (k > 255) k = 255;
 78 |       linear_to_srgb[i] = (unsigned char)k;
 79 |    }
 80 |    
 81 |    Resampler* resamplers[max_components];
 82 |    std::vector<float> samples[max_components];
 83 |    
 84 |    // Now create a Resampler instance for each component to process. The first instance will create new contributor tables, which are shared by the resamplers 
 85 |    // used for the other components (a memory and slight cache efficiency optimization).
 86 |    resamplers[0] = new Resampler(src_width, src_height, dst_width, dst_height, Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, pFilter, NULL, NULL, filter_scale, filter_scale);
 87 |    samples[0].resize(src_width);
 88 |    for (int i = 1; i < n; i++)
 89 |    {
 90 |       resamplers[i] = new Resampler(src_width, src_height, dst_width, dst_height, Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale);
 91 |       samples[i].resize(src_width);
 92 |    }      
 93 |       
 94 |    std::vector<unsigned char> dst_image(dst_width * n * dst_height);
 95 |    
 96 |    const int src_pitch = src_width * n;
 97 |    const int dst_pitch = dst_width * n;
 98 |    int dst_y = 0;
 99 |    
100 |    printf("Resampling to %ux%u\n", dst_width, dst_height);
101 |       
102 |    for (int src_y = 0; src_y < src_height; src_y++)
103 |    {
104 |       const unsigned char* pSrc = &pSrc_image[src_y * src_pitch];
105 |          
106 |       for (int x = 0; x < src_width; x++)
107 |       {
108 |          for (int c = 0; c < n; c++)
109 |          {
110 |             if ((c == 3) || ((n == 2) && (c == 1)))
111 |                samples[c][x] = *pSrc++ * (1.0f/255.0f);
112 |             else
113 |                samples[c][x] = srgb_to_linear[*pSrc++];        
114 |          }
115 |       }
116 |       
117 |       for (int c = 0; c < n; c++)         
118 |       {
119 |          if (!resamplers[c]->put_line(&samples[c][0]))
120 |          {
121 |             printf("Out of memory!\n");
122 |             return EXIT_FAILURE;
123 |          }
124 |       }         
125 |          
126 |       for ( ; ; )
127 |       {
128 |          int comp_index;
129 |          for (comp_index = 0; comp_index < n; comp_index++)
130 |          {
131 |             const float* pOutput_samples = resamplers[comp_index]->get_line();
132 |             if (!pOutput_samples)
133 |                break;
134 |             
135 |             const bool alpha_channel = (comp_index == 3) || ((n == 2) && (comp_index == 1));
136 |             assert(dst_y < dst_height);
137 |             unsigned char* pDst = &dst_image[dst_y * dst_pitch + comp_index];
138 |             
139 |             for (int x = 0; x < dst_width; x++)
140 |             {
141 |                if (alpha_channel)
142 |                {
143 |                   int c = (int)(255.0f * pOutput_samples[x] + .5f);
144 |                   if (c < 0) c = 0; else if (c > 255) c = 255;
145 |                   *pDst = (unsigned char)c;
146 |                }
147 |                else
148 |                {
149 |                   int j = (int)(linear_to_srgb_table_size * pOutput_samples[x] + .5f);
150 |                   if (j < 0) j = 0; else if (j >= linear_to_srgb_table_size) j = linear_to_srgb_table_size - 1;
151 |                   *pDst = linear_to_srgb[j];
152 |                }
153 |                
154 |                pDst += n;
155 |             }
156 |          }     
157 |          if (comp_index < n)
158 |             break; 
159 |          
160 |          dst_y++;
161 |       }
162 |    }
163 |    
164 |    printf("Writing TGA file: %s\n", pDst_filename);
165 |    
166 |    if (!stbi_write_tga(pDst_filename, dst_width, dst_height, n, &dst_image[0]))
167 |    {
168 |       printf("Failed writing output image!\n");
169 |       return EXIT_FAILURE;
170 |    }
171 |    
172 |    stbi_image_free(pSrc_image);
173 | 
174 |    // Delete the resamplers.
175 |    for (int i = 0; i < n; i++)
176 |       delete resamplers[i];
177 |    
178 |    return EXIT_SUCCESS;
179 | }
180 | 


--------------------------------------------------------------------------------
/resampler.cpp:
--------------------------------------------------------------------------------
   1 | // resampler.cpp, Separable filtering image rescaler v2.21, Rich Geldreich - richgel99@gmail.com
   2 | // See unlicense at the bottom of resampler.h, or at http://unlicense.org/
   3 | //
   4 | // Feb. 1996: Creation, losely based on a heavily bugfixed version of Schumacher's resampler in Graphics Gems 3.
   5 | // Oct. 2000: Ported to C++, tweaks.
   6 | // May 2001: Continous to discrete mapping, box filter tweaks.
   7 | // March 9, 2002: Kaiser filter grabbed from Jonathan Blow's GD magazine mipmap sample code.
   8 | // Sept. 8, 2002: Comments cleaned up a bit.
   9 | // Dec. 31, 2008: v2.2: Bit more cleanup, released as public domain.
  10 | // June 4, 2012: v2.21: Switched to unlicense.org, integrated GCC fixes supplied by Peter Nagy <petern@crytek.com>, Anteru at anteru.net, and clay@coge.net, 
  11 | // added Codeblocks project (for testing with MinGW and GCC), VS2008 static code analysis pass.
  12 | #include <stdlib.h>
  13 | #include <math.h>
  14 | #include <float.h>
  15 | #include <assert.h>
  16 | #include <string.h>
  17 | #include "resampler.h"
  18 | 
  19 | #define resampler_assert assert
  20 | 
  21 | static inline int resampler_range_check(int v, int h) { (void)h; resampler_assert((v >= 0) && (v < h)); return v; }
  22 | 
  23 | #ifndef max
  24 |    #define max(a,b) (((a) > (b)) ? (a) : (b))
  25 | #endif
  26 | 
  27 | #ifndef min
  28 |    #define min(a,b) (((a) < (b)) ? (a) : (b))
  29 | #endif
  30 | 
  31 | #ifndef TRUE
  32 |    #define TRUE (1)
  33 | #endif
  34 | 
  35 | #ifndef FALSE
  36 |    #define FALSE (0)
  37 | #endif
  38 | 
  39 | #define RESAMPLER_DEBUG 0
  40 | 
  41 | #define M_PI 3.14159265358979323846
  42 | 
  43 | // Float to int cast with truncation.
  44 | static inline int cast_to_int(Resample_Real i)
  45 | {
  46 |    return (int)i;
  47 | }
  48 | 
  49 | // (x mod y) with special handling for negative x values.
  50 | static inline int posmod(int x, int y)
  51 | {
  52 |    if (x >= 0)
  53 |       return (x % y);
  54 |    else
  55 |    {
  56 |       int m = (-x) % y;
  57 | 
  58 |       if (m != 0)
  59 |          m = y - m;
  60 | 
  61 |       return (m);
  62 |    }
  63 | }
  64 | 
  65 | // To add your own filter, insert the new function below and update the filter table.
  66 | // There is no need to make the filter function particularly fast, because it's
  67 | // only called during initializing to create the X and Y axis contributor tables.
  68 | 
  69 | #define BOX_FILTER_SUPPORT (0.5f)
  70 | static Resample_Real box_filter(Resample_Real t)    /* pulse/Fourier window */
  71 | {
  72 |    // make_clist() calls the filter function with t inverted (pos = left, neg = right)
  73 |    if ((t >= -0.5f) && (t < 0.5f))
  74 |       return 1.0f;
  75 |    else
  76 |       return 0.0f;
  77 | }
  78 | 
  79 | #define TENT_FILTER_SUPPORT (1.0f)
  80 | static Resample_Real tent_filter(Resample_Real t)   /* box (*) box, bilinear/triangle */
  81 | {
  82 |    if (t < 0.0f)
  83 |       t = -t;
  84 | 
  85 |    if (t < 1.0f)
  86 |       return 1.0f - t;
  87 |    else
  88 |       return 0.0f;
  89 | }
  90 | 
  91 | #define BELL_SUPPORT (1.5f)
  92 | static Resample_Real bell_filter(Resample_Real t)    /* box (*) box (*) box */
  93 | {
  94 |    if (t < 0.0f)
  95 |       t = -t;
  96 | 
  97 |    if (t < .5f)
  98 |       return (.75f - (t * t));
  99 | 
 100 |    if (t < 1.5f)
 101 |    {
 102 |       t = (t - 1.5f);
 103 |       return (.5f * (t * t));
 104 |    }
 105 | 
 106 |    return (0.0f);
 107 | }
 108 | 
 109 | #define B_SPLINE_SUPPORT (2.0f)
 110 | static Resample_Real B_spline_filter(Resample_Real t)  /* box (*) box (*) box (*) box */
 111 | {
 112 |    Resample_Real tt;
 113 | 
 114 |    if (t < 0.0f)
 115 |       t = -t;
 116 | 
 117 |    if (t < 1.0f)
 118 |    {
 119 |       tt = t * t;
 120 |       return ((.5f * tt * t) - tt + (2.0f / 3.0f));
 121 |    }
 122 |    else if (t < 2.0f)
 123 |    {
 124 |       t = 2.0f - t;
 125 |       return ((1.0f / 6.0f) * (t * t * t));
 126 |    }
 127 | 
 128 |    return (0.0f);
 129 | }
 130 | 
 131 | // Dodgson, N., "Quadratic Interpolation for Image Resampling"
 132 | #define QUADRATIC_SUPPORT 1.5f
 133 | static Resample_Real quadratic(Resample_Real t, const Resample_Real R)
 134 | {
 135 |    if (t < 0.0f)
 136 |       t = -t;
 137 |    if (t < QUADRATIC_SUPPORT)
 138 |    {
 139 |       Resample_Real tt = t * t;
 140 |       if (t <= .5f)
 141 |          return (-2.0f * R) * tt + .5f * (R + 1.0f);
 142 |       else
 143 |          return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f);
 144 |    }
 145 |    else
 146 |       return 0.0f;
 147 | }
 148 | 
 149 | static Resample_Real quadratic_interp_filter(Resample_Real t)
 150 | {
 151 |    return quadratic(t, 1.0f);
 152 | }
 153 | 
 154 | static Resample_Real quadratic_approx_filter(Resample_Real t)
 155 | {
 156 |    return quadratic(t, .5f);
 157 | }
 158 | 
 159 | static Resample_Real quadratic_mix_filter(Resample_Real t)
 160 | {
 161 |    return quadratic(t, .8f);
 162 | }
 163 | 
 164 | // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics."
 165 | // Computer Graphics, Vol. 22, No. 4, pp. 221-228.
 166 | // (B, C)
 167 | // (1/3, 1/3)  - Defaults recommended by Mitchell and Netravali
 168 | // (1, 0)	   - Equivalent to the Cubic B-Spline
 169 | // (0, 0.5)		- Equivalent to the Catmull-Rom Spline
 170 | // (0, C)		- The family of Cardinal Cubic Splines
 171 | // (B, 0)		- Duff's tensioned B-Splines.
 172 | static Resample_Real mitchell(Resample_Real t, const Resample_Real B, const Resample_Real C)
 173 | {
 174 |    Resample_Real tt;
 175 | 
 176 |    tt = t * t;
 177 | 
 178 |    if(t < 0.0f)
 179 |       t = -t;
 180 | 
 181 |    if(t < 1.0f)
 182 |    {
 183 |       t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt))
 184 |          + ((-18.0f + 12.0f * B + 6.0f * C) * tt)
 185 |          + (6.0f - 2.0f * B));
 186 | 
 187 |       return (t / 6.0f);
 188 |    }
 189 |    else if (t < 2.0f)
 190 |    {
 191 |       t = (((-1.0f * B - 6.0f * C) * (t * tt))
 192 |          + ((6.0f * B + 30.0f * C) * tt)
 193 |          + ((-12.0f * B - 48.0f * C) * t)
 194 |          + (8.0f * B + 24.0f * C));
 195 | 
 196 |       return (t / 6.0f);
 197 |    }
 198 | 
 199 |    return (0.0f);
 200 | }
 201 | 
 202 | #define MITCHELL_SUPPORT (2.0f)
 203 | static Resample_Real mitchell_filter(Resample_Real t)
 204 | {
 205 |    return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f);
 206 | }
 207 | 
 208 | #define CATMULL_ROM_SUPPORT (2.0f)
 209 | static Resample_Real catmull_rom_filter(Resample_Real t)
 210 | {
 211 |    return mitchell(t, 0.0f, .5f);
 212 | }
 213 | 
 214 | static double sinc(double x)
 215 | {
 216 |    x = (x * M_PI);
 217 | 
 218 |    if ((x < 0.01f) && (x > -0.01f))
 219 |       return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f);
 220 | 
 221 |    return sin(x) / x;
 222 | }
 223 | 
 224 | static Resample_Real clean(double t)
 225 | {
 226 |    const Resample_Real EPSILON = .0000125f;
 227 |    if (fabs(t) < EPSILON)
 228 |       return 0.0f;
 229 |    return (Resample_Real)t;
 230 | }
 231 | 
 232 | //static double blackman_window(double x)
 233 | //{
 234 | //	return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x);
 235 | //}
 236 | 
 237 | static double blackman_exact_window(double x)
 238 | {
 239 |    return 0.42659071f + 0.49656062f * cos(M_PI*x) + 0.07684867f * cos(2.0f*M_PI*x);
 240 | }
 241 | 
 242 | #define BLACKMAN_SUPPORT (3.0f)
 243 | static Resample_Real blackman_filter(Resample_Real t)
 244 | {
 245 |    if (t < 0.0f)
 246 |       t = -t;
 247 | 
 248 |    if (t < 3.0f)
 249 |       //return clean(sinc(t) * blackman_window(t / 3.0f));
 250 |       return clean(sinc(t) * blackman_exact_window(t / 3.0f));
 251 |    else
 252 |       return (0.0f);
 253 | }
 254 | 
 255 | #define GAUSSIAN_SUPPORT (1.25f)
 256 | static Resample_Real gaussian_filter(Resample_Real t) // with blackman window
 257 | {
 258 |    if (t < 0)
 259 |       t = -t;
 260 |    if (t < GAUSSIAN_SUPPORT)
 261 |       return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT));
 262 |    else
 263 |       return 0.0f;
 264 | }
 265 | 
 266 | // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26.
 267 | #define LANCZOS3_SUPPORT (3.0f)
 268 | static Resample_Real lanczos3_filter(Resample_Real t)
 269 | {
 270 |    if (t < 0.0f)
 271 |       t = -t;
 272 | 
 273 |    if (t < 3.0f)
 274 |       return clean(sinc(t) * sinc(t / 3.0f));
 275 |    else
 276 |       return (0.0f);
 277 | }
 278 | 
 279 | #define LANCZOS4_SUPPORT (4.0f)
 280 | static Resample_Real lanczos4_filter(Resample_Real t)
 281 | {
 282 |    if (t < 0.0f)
 283 |       t = -t;
 284 | 
 285 |    if (t < 4.0f)
 286 |       return clean(sinc(t) * sinc(t / 4.0f));
 287 |    else
 288 |       return (0.0f);
 289 | }
 290 | 
 291 | #define LANCZOS6_SUPPORT (6.0f)
 292 | static Resample_Real lanczos6_filter(Resample_Real t)
 293 | {
 294 |    if (t < 0.0f)
 295 |       t = -t;
 296 | 
 297 |    if (t < 6.0f)
 298 |       return clean(sinc(t) * sinc(t / 6.0f));
 299 |    else
 300 |       return (0.0f);
 301 | }
 302 | 
 303 | #define LANCZOS12_SUPPORT (12.0f)
 304 | static Resample_Real lanczos12_filter(Resample_Real t)
 305 | {
 306 |    if (t < 0.0f)
 307 |       t = -t;
 308 | 
 309 |    if (t < 12.0f)
 310 |       return clean(sinc(t) * sinc(t / 12.0f));
 311 |    else
 312 |       return (0.0f);
 313 | }
 314 | 
 315 | static double bessel0(double x)
 316 | {
 317 |    const double EPSILON_RATIO = 1E-16;
 318 |    double xh, sum, pow, ds;
 319 |    int k;
 320 | 
 321 |    xh = 0.5 * x;
 322 |    sum = 1.0;
 323 |    pow = 1.0;
 324 |    k = 0;
 325 |    ds = 1.0;
 326 |    while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety?
 327 |    {
 328 |       ++k;
 329 |       pow = pow * (xh / k);
 330 |       ds = pow * pow;
 331 |       sum = sum + ds;
 332 |    }
 333 | 
 334 |    return sum;
 335 | }
 336 | 
 337 | static const Resample_Real KAISER_ALPHA = 4.0;
 338 | static double kaiser(double alpha, double half_width, double x)
 339 | {
 340 |    const double ratio = (x / half_width);
 341 |    return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha);
 342 | }
 343 | 
 344 | #define KAISER_SUPPORT 3
 345 | static Resample_Real kaiser_filter(Resample_Real t)
 346 | {
 347 |    if (t < 0.0f)
 348 |       t = -t;
 349 | 
 350 |    if (t < KAISER_SUPPORT)
 351 |    {
 352 |       // db atten
 353 |       const Resample_Real att = 40.0f;
 354 |       const Resample_Real alpha = (Resample_Real)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96));
 355 |       //const Resample_Real alpha = KAISER_ALPHA;
 356 |       return (Resample_Real)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t));
 357 |    }
 358 | 
 359 |    return 0.0f;
 360 | }
 361 | 
 362 | // filters[] is a list of all the available filter functions.
 363 | static struct
 364 | {
 365 |    char name[32];
 366 |    Resample_Real (*func)(Resample_Real t);
 367 |    Resample_Real support;
 368 | } g_filters[] =
 369 | {
 370 |    { "box",		            box_filter,			         BOX_FILTER_SUPPORT },
 371 |    { "tent",			      tent_filter,		         TENT_FILTER_SUPPORT },
 372 |    { "bell",			      bell_filter,	            BELL_SUPPORT },
 373 |    { "b-spline",	         B_spline_filter,	         B_SPLINE_SUPPORT },
 374 |    { "mitchell",	         mitchell_filter,	         MITCHELL_SUPPORT },
 375 |    { "lanczos3",	         lanczos3_filter,	         LANCZOS3_SUPPORT },
 376 |    { "blackman",	         blackman_filter,	         BLACKMAN_SUPPORT },
 377 |    { "lanczos4",	         lanczos4_filter,	         LANCZOS4_SUPPORT },
 378 |    { "lanczos6",	         lanczos6_filter,	         LANCZOS6_SUPPORT },
 379 |    { "lanczos12",          lanczos12_filter,          LANCZOS12_SUPPORT },
 380 |    { "kaiser",		         kaiser_filter,		         KAISER_SUPPORT },
 381 |    { "gaussian",	         gaussian_filter,	         GAUSSIAN_SUPPORT },
 382 |    { "catmullrom",         catmull_rom_filter,        CATMULL_ROM_SUPPORT },
 383 |    { "quadratic_interp",   quadratic_interp_filter,   QUADRATIC_SUPPORT },
 384 |    { "quadratic_approx",   quadratic_approx_filter,   QUADRATIC_SUPPORT },
 385 |    { "quadratic_mix",      quadratic_mix_filter,      QUADRATIC_SUPPORT },
 386 | };
 387 | 
 388 | static const int NUM_FILTERS = sizeof(g_filters) / sizeof(g_filters[0]);
 389 | 
 390 | /* Ensure that the contributing source sample is
 391 | * within bounds. If not, reflect, clamp, or wrap.
 392 | */
 393 | int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op)
 394 | {
 395 |    int n;
 396 | 
 397 |    if (j < 0)
 398 |    {
 399 |       if (boundary_op == BOUNDARY_REFLECT)
 400 |       {
 401 |          n = -j;
 402 | 
 403 |          if (n >= src_x)
 404 |             n = src_x - 1;
 405 |       }
 406 |       else if (boundary_op == BOUNDARY_WRAP)
 407 |          n = posmod(j, src_x);
 408 |       else
 409 |          n = 0;
 410 |    }
 411 |    else if (j >= src_x)
 412 |    {
 413 |       if (boundary_op == BOUNDARY_REFLECT)
 414 |       {
 415 |          n = (src_x - j) + (src_x - 1);
 416 | 
 417 |          if (n < 0)
 418 |             n = 0;
 419 |       }
 420 |       else if (boundary_op == BOUNDARY_WRAP)
 421 |          n = posmod(j, src_x);
 422 |       else
 423 |          n = src_x - 1;
 424 |    }
 425 |    else
 426 |       n = j;
 427 | 
 428 |    return n;
 429 | }
 430 | 
 431 | // The make_clist() method generates, for all destination samples,
 432 | // the list of all source samples with non-zero weighted contributions.
 433 | Resampler::Contrib_List* Resampler::make_clist(
 434 |    int src_x, int dst_x, Boundary_Op boundary_op,
 435 |    Resample_Real (*Pfilter)(Resample_Real),
 436 |    Resample_Real filter_support,
 437 |    Resample_Real filter_scale,
 438 |    Resample_Real src_ofs)
 439 | {
 440 |    typedef struct
 441 |    {
 442 |       // The center of the range in DISCRETE coordinates (pixel center = 0.0f).
 443 |       Resample_Real center;
 444 |       int left, right;
 445 |    } Contrib_Bounds;
 446 | 
 447 |    int i, j, k, n, left, right;
 448 |    Resample_Real total_weight;
 449 |    Resample_Real xscale, center, half_width, weight;
 450 |    Contrib_List* Pcontrib;
 451 |    Contrib* Pcpool;
 452 |    Contrib* Pcpool_next;
 453 |    Contrib_Bounds* Pcontrib_bounds;
 454 | 
 455 |    if ((Pcontrib = (Contrib_List*)calloc(dst_x, sizeof(Contrib_List))) == NULL)
 456 |       return NULL;
 457 | 
 458 |    Pcontrib_bounds = (Contrib_Bounds*)calloc(dst_x, sizeof(Contrib_Bounds));
 459 |    if (!Pcontrib_bounds)
 460 |    {
 461 |       free(Pcontrib);
 462 |       return (NULL);
 463 |    }
 464 | 
 465 |    const Resample_Real oo_filter_scale = 1.0f / filter_scale;
 466 | 
 467 |    const Resample_Real NUDGE = 0.5f;
 468 |    xscale = dst_x / (Resample_Real)src_x;
 469 | 
 470 |    if (xscale < 1.0f)
 471 |    {
 472 |       int total; (void)total;
 473 | 
 474 |       /* Handle case when there are fewer destination
 475 |       * samples than source samples (downsampling/minification).
 476 |       */
 477 | 
 478 |       // stretched half width of filter
 479 |       half_width = (filter_support / xscale) * filter_scale;
 480 | 
 481 |       // Find the range of source sample(s) that will contribute to each destination sample.
 482 | 
 483 |       for (i = 0, n = 0; i < dst_x; i++)
 484 |       {
 485 |          // Convert from discrete to continuous coordinates, scale, then convert back to discrete.
 486 |          center = ((Resample_Real)i + NUDGE) / xscale;
 487 |          center -= NUDGE;
 488 |          center += src_ofs;
 489 | 
 490 |          left   = cast_to_int((Resample_Real)floor(center - half_width));
 491 |          right  = cast_to_int((Resample_Real)ceil(center + half_width));
 492 | 
 493 |          Pcontrib_bounds[i].center = center;
 494 |          Pcontrib_bounds[i].left		= left;
 495 |          Pcontrib_bounds[i].right	= right;
 496 | 
 497 |          n += (right - left + 1);
 498 |       }
 499 | 
 500 |       /* Allocate memory for contributors. */
 501 | 
 502 |       if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL))
 503 |       {
 504 |          free(Pcontrib);
 505 |          free(Pcontrib_bounds);
 506 |          return NULL;
 507 |       }
 508 |       total = n;
 509 | 
 510 |       Pcpool_next = Pcpool;
 511 | 
 512 |       /* Create the list of source samples which
 513 |       * contribute to each destination sample.
 514 |       */
 515 | 
 516 |       for (i = 0; i < dst_x; i++)
 517 |       {
 518 |          int max_k = -1;
 519 |          Resample_Real max_w = -1e+20f;
 520 | 
 521 |          center = Pcontrib_bounds[i].center;
 522 |          left   = Pcontrib_bounds[i].left;
 523 |          right  = Pcontrib_bounds[i].right;
 524 | 
 525 |          Pcontrib[i].n = 0;
 526 |          Pcontrib[i].p = Pcpool_next;
 527 |          Pcpool_next += (right - left + 1);
 528 |          resampler_assert ((Pcpool_next - Pcpool) <= total);
 529 | 
 530 |          total_weight = 0;
 531 | 
 532 |          for (j = left; j <= right; j++)
 533 |             total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale);
 534 |          const Resample_Real norm = static_cast<Resample_Real>(1.0f / total_weight);
 535 | 
 536 |          total_weight = 0;
 537 | 
 538 | #if RESAMPLER_DEBUG
 539 |          printf("%i: ", i);
 540 | #endif
 541 | 
 542 |          for (j = left; j <= right; j++)
 543 |          {
 544 |             weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm;
 545 |             if (weight == 0.0f)
 546 |                continue;
 547 | 
 548 |             n = reflect(j, src_x, boundary_op);
 549 | 
 550 | #if RESAMPLER_DEBUG
 551 |             printf("%i(%f), ", n, weight);
 552 | #endif
 553 | 
 554 |             /* Increment the number of source
 555 |             * samples which contribute to the
 556 |             * current destination sample.
 557 |             */
 558 | 
 559 |             k = Pcontrib[i].n++;
 560 | 
 561 |             Pcontrib[i].p[k].pixel  = (unsigned short)(n);       /* store src sample number */
 562 |             Pcontrib[i].p[k].weight = weight; /* store src sample weight */
 563 | 
 564 |             total_weight += weight;          /* total weight of all contributors */
 565 | 
 566 |             if (weight > max_w)
 567 |             {
 568 |                max_w = weight;
 569 |                max_k = k;
 570 |             }
 571 |          }
 572 | 
 573 | #if RESAMPLER_DEBUG
 574 |          printf("\n\n");
 575 | #endif
 576 | 
 577 |          //resampler_assert(Pcontrib[i].n);
 578 |          //resampler_assert(max_k != -1);
 579 |          if ((max_k == -1) || (Pcontrib[i].n == 0))
 580 |          {
 581 |             free(Pcpool);
 582 |             free(Pcontrib);
 583 |             free(Pcontrib_bounds);
 584 |             return NULL;
 585 |          }
 586 | 
 587 |          if (total_weight != 1.0f)
 588 |             Pcontrib[i].p[max_k].weight += 1.0f - total_weight;
 589 |       }
 590 |    }
 591 |    else
 592 |    {
 593 |       /* Handle case when there are more
 594 |       * destination samples than source
 595 |       * samples (upsampling).
 596 |       */
 597 | 
 598 |       half_width = filter_support * filter_scale;
 599 | 
 600 |       // Find the source sample(s) that contribute to each destination sample.
 601 | 
 602 |       for (i = 0, n = 0; i < dst_x; i++)
 603 |       {
 604 |          // Convert from discrete to continuous coordinates, scale, then convert back to discrete.
 605 |          center = ((Resample_Real)i + NUDGE) / xscale;
 606 |          center -= NUDGE;
 607 |          center += src_ofs;
 608 | 
 609 |          left   = cast_to_int((Resample_Real)floor(center - half_width));
 610 |          right  = cast_to_int((Resample_Real)ceil(center + half_width));
 611 | 
 612 |          Pcontrib_bounds[i].center = center;
 613 |          Pcontrib_bounds[i].left		= left;
 614 |          Pcontrib_bounds[i].right	= right;
 615 | 
 616 |          n += (right - left + 1);
 617 |       }
 618 | 
 619 |       /* Allocate memory for contributors. */
 620 | 
 621 |       int total = n;
 622 |       if ((total == 0) || ((Pcpool = (Contrib*)calloc(total, sizeof(Contrib))) == NULL))
 623 |       {
 624 |          free(Pcontrib);
 625 |          free(Pcontrib_bounds);
 626 |          return NULL;
 627 |       }
 628 | 
 629 |       Pcpool_next = Pcpool;
 630 | 
 631 |       /* Create the list of source samples which
 632 |       * contribute to each destination sample.
 633 |       */
 634 | 
 635 |       for (i = 0; i < dst_x; i++)
 636 |       {
 637 |          int max_k = -1;
 638 |          Resample_Real max_w = -1e+20f;
 639 | 
 640 |          center = Pcontrib_bounds[i].center;
 641 |          left   = Pcontrib_bounds[i].left;
 642 |          right  = Pcontrib_bounds[i].right;
 643 | 
 644 |          Pcontrib[i].n = 0;
 645 |          Pcontrib[i].p = Pcpool_next;
 646 |          Pcpool_next += (right - left + 1);
 647 |          resampler_assert((Pcpool_next - Pcpool) <= total);
 648 | 
 649 |          total_weight = 0;
 650 |          for (j = left; j <= right; j++)
 651 |             total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale);
 652 | 
 653 |          const Resample_Real norm = static_cast<Resample_Real>(1.0f / total_weight);
 654 | 
 655 |          total_weight = 0;
 656 | 
 657 | #if RESAMPLER_DEBUG
 658 |          printf("%i: ", i);
 659 | #endif
 660 | 
 661 |          for (j = left; j <= right; j++)
 662 |          {
 663 |             weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm;
 664 |             if (weight == 0.0f)
 665 |                continue;
 666 | 
 667 |             n = reflect(j, src_x, boundary_op);
 668 | 
 669 | #if RESAMPLER_DEBUG
 670 |             printf("%i(%f), ", n, weight);
 671 | #endif
 672 | 
 673 |             /* Increment the number of source
 674 |             * samples which contribute to the
 675 |             * current destination sample.
 676 |             */
 677 | 
 678 |             k = Pcontrib[i].n++;
 679 | 
 680 |             Pcontrib[i].p[k].pixel  = (unsigned short)(n);       /* store src sample number */
 681 |             Pcontrib[i].p[k].weight = weight; /* store src sample weight */
 682 | 
 683 |             total_weight += weight;          /* total weight of all contributors */
 684 | 
 685 |             if (weight > max_w)
 686 |             {
 687 |                max_w = weight;
 688 |                max_k = k;
 689 |             }
 690 |          }
 691 | 
 692 | #if RESAMPLER_DEBUG
 693 |          printf("\n\n");
 694 | #endif
 695 | 
 696 |          //resampler_assert(Pcontrib[i].n);
 697 |          //resampler_assert(max_k != -1);
 698 | 
 699 |          if ((max_k == -1) || (Pcontrib[i].n == 0))
 700 |          {
 701 |             free(Pcpool);
 702 |             free(Pcontrib);
 703 |             free(Pcontrib_bounds);
 704 |             return NULL;
 705 |          }
 706 | 
 707 |          if (total_weight != 1.0f)
 708 |             Pcontrib[i].p[max_k].weight += 1.0f - total_weight;
 709 |       }
 710 |    }
 711 | 
 712 | #if RESAMPLER_DEBUG
 713 |    printf("*******\n");
 714 | #endif
 715 | 
 716 |    free(Pcontrib_bounds);
 717 | 
 718 |    return Pcontrib;
 719 | }
 720 | 
 721 | void Resampler::resample_x(Sample* Pdst, const Sample* Psrc)
 722 | {
 723 |    resampler_assert(Pdst);
 724 |    resampler_assert(Psrc);
 725 | 
 726 |    int i, j;
 727 |    Sample total;
 728 |    Contrib_List *Pclist = m_Pclist_x;
 729 |    Contrib *p;
 730 | 
 731 |    for (i = m_resample_dst_x; i > 0; i--, Pclist++)
 732 |    {
 733 | #if RESAMPLER_DEBUG_OPS
 734 |       total_ops += Pclist->n;
 735 | #endif
 736 | 
 737 |       for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++)
 738 |          total += Psrc[p->pixel] * p->weight;
 739 | 
 740 |       *Pdst++ = total;
 741 |    }
 742 | }
 743 | 
 744 | void Resampler::scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x)
 745 | {
 746 |    int i;
 747 | 
 748 | #if RESAMPLER_DEBUG_OPS
 749 |    total_ops += dst_x;
 750 | #endif
 751 | 
 752 |    // Not += because temp buf wasn't cleared.
 753 |    for (i = dst_x; i > 0; i--)
 754 |       *Ptmp++ = *Psrc++ * weight;
 755 | }
 756 | 
 757 | void Resampler::scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x)
 758 | {
 759 | #if RESAMPLER_DEBUG_OPS
 760 |    total_ops += dst_x;
 761 | #endif
 762 | 
 763 |    for (int i = dst_x; i > 0; i--)
 764 |       (*Ptmp++) += *Psrc++ * weight;
 765 | }
 766 | 
 767 | void Resampler::clamp(Sample* Pdst, int n)
 768 | {
 769 |    while (n > 0)
 770 |    {
 771 |       *Pdst = clamp_sample(*Pdst);
 772 |       ++Pdst;
 773 |       n--;
 774 |    }
 775 | }
 776 | 
 777 | void Resampler::resample_y(Sample* Pdst)
 778 | {
 779 |    int i, j;
 780 |    Sample* Psrc;
 781 |    Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y];
 782 | 
 783 |    Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst;
 784 |    resampler_assert(Ptmp);
 785 | 
 786 |    /* Process each contributor. */
 787 | 
 788 |    for (i = 0; i < Pclist->n; i++)
 789 |    {
 790 |       /* locate the contributor's location in the scan
 791 |       * buffer -- the contributor must always be found!
 792 |       */
 793 | 
 794 |       for (j = 0; j < MAX_SCAN_BUF_SIZE; j++)
 795 |          if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel)
 796 |             break;
 797 | 
 798 |       resampler_assert(j < MAX_SCAN_BUF_SIZE);
 799 | 
 800 |       Psrc = m_Pscan_buf->scan_buf_l[j];
 801 | 
 802 |       if (!i)
 803 |          scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x);
 804 |       else
 805 |          scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x);
 806 | 
 807 |       /* If this source line doesn't contribute to any
 808 |       * more destination lines then mark the scanline buffer slot
 809 |       * which holds this source line as free.
 810 |       * (The max. number of slots used depends on the Y
 811 |       * axis sampling factor and the scaled filter width.)
 812 |       */
 813 | 
 814 |       if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0)
 815 |       {
 816 |          m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = FALSE;
 817 |          m_Pscan_buf->scan_buf_y[j] = -1;
 818 |       }
 819 |    }
 820 | 
 821 |    /* Now generate the destination line */
 822 | 
 823 |    if (m_delay_x_resample) // Was X resampling delayed until after Y resampling?
 824 |    {
 825 |       resampler_assert(Pdst != Ptmp);
 826 |       resample_x(Pdst, Ptmp);
 827 |    }
 828 |    else
 829 |    {
 830 |       resampler_assert(Pdst == Ptmp);
 831 |    }
 832 | 
 833 |    if (m_lo < m_hi)
 834 |       clamp(Pdst, m_resample_dst_x);
 835 | }
 836 | 
 837 | bool Resampler::put_line(const Sample* Psrc)
 838 | {
 839 |    int i;
 840 | 
 841 |    if (m_cur_src_y >= m_resample_src_y)
 842 |       return false;
 843 | 
 844 |    /* Does this source line contribute
 845 |    * to any destination line? if not,
 846 |    * exit now.
 847 |    */
 848 | 
 849 |    if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)])
 850 |    {
 851 |       m_cur_src_y++;
 852 |       return true;
 853 |    }
 854 | 
 855 |    /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */
 856 | 
 857 |    for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
 858 |       if (m_Pscan_buf->scan_buf_y[i] == -1)
 859 |          break;
 860 | 
 861 |    /* If the buffer is full, exit with an error. */
 862 | 
 863 |    if (i == MAX_SCAN_BUF_SIZE)
 864 |    {
 865 |       m_status = STATUS_SCAN_BUFFER_FULL;
 866 |       return false;
 867 |    }
 868 | 
 869 |    m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = TRUE;
 870 |    m_Pscan_buf->scan_buf_y[i]  = m_cur_src_y;
 871 | 
 872 |    /* Does this slot have any memory allocated to it? */
 873 | 
 874 |    if (!m_Pscan_buf->scan_buf_l[i])
 875 |    {
 876 |       if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL)
 877 |       {
 878 |          m_status = STATUS_OUT_OF_MEMORY;
 879 |          return false;
 880 |       }
 881 |    }
 882 | 
 883 |    // Resampling on the X axis first?
 884 |    if (m_delay_x_resample)
 885 |    {
 886 |       resampler_assert(m_intermediate_x == m_resample_src_x);
 887 | 
 888 |       // Y-X resampling order
 889 |       memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample));
 890 |    }
 891 |    else
 892 |    {
 893 |       resampler_assert(m_intermediate_x == m_resample_dst_x);
 894 | 
 895 |       // X-Y resampling order
 896 |       resample_x(m_Pscan_buf->scan_buf_l[i], Psrc);
 897 |    }
 898 | 
 899 |    m_cur_src_y++;
 900 | 
 901 |    return true;
 902 | }
 903 | 
 904 | const Resampler::Sample* Resampler::get_line()
 905 | {
 906 |    int i;
 907 | 
 908 |    /* If all the destination lines have been
 909 |    * generated, then always return NULL.
 910 |    */
 911 | 
 912 |    if (m_cur_dst_y == m_resample_dst_y)
 913 |       return NULL;
 914 | 
 915 |    /* Check to see if all the required
 916 |    * contributors are present, if not,
 917 |    * return NULL.
 918 |    */
 919 | 
 920 |    for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++)
 921 |       if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)])
 922 |          return NULL;
 923 | 
 924 |    resample_y(m_Pdst_buf);
 925 | 
 926 |    m_cur_dst_y++;
 927 | 
 928 |    return m_Pdst_buf;
 929 | }
 930 | 
 931 | Resampler::~Resampler()
 932 | {
 933 |    int i;
 934 | 
 935 | #if RESAMPLER_DEBUG_OPS
 936 |    printf("actual ops: %i\n", total_ops);
 937 | #endif
 938 | 
 939 |    free(m_Pdst_buf);
 940 |    m_Pdst_buf = NULL;
 941 | 
 942 |    if (m_Ptmp_buf)
 943 |    {
 944 |       free(m_Ptmp_buf);
 945 |       m_Ptmp_buf = NULL;
 946 |    }
 947 | 
 948 |    /* Don't deallocate a contibutor list
 949 |    * if the user passed us one of their own.
 950 |    */
 951 | 
 952 |    if ((m_Pclist_x) && (!m_clist_x_forced))
 953 |    {
 954 |       free(m_Pclist_x->p);
 955 |       free(m_Pclist_x);
 956 |       m_Pclist_x = NULL;
 957 |    }
 958 | 
 959 |    if ((m_Pclist_y) && (!m_clist_y_forced))
 960 |    {
 961 |       free(m_Pclist_y->p);
 962 |       free(m_Pclist_y);
 963 |       m_Pclist_y = NULL;
 964 |    }
 965 | 
 966 |    free(m_Psrc_y_count);
 967 |    m_Psrc_y_count = NULL;
 968 | 
 969 |    free(m_Psrc_y_flag);
 970 |    m_Psrc_y_flag = NULL;
 971 | 
 972 |    if (m_Pscan_buf)
 973 |    {
 974 |       for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
 975 |          free(m_Pscan_buf->scan_buf_l[i]);
 976 | 
 977 |       free(m_Pscan_buf);
 978 |       m_Pscan_buf = NULL;
 979 |    }
 980 | }
 981 | 
 982 | void Resampler::restart()
 983 | {
 984 |    if (STATUS_OKAY != m_status)
 985 |       return;
 986 | 
 987 |    m_cur_src_y = m_cur_dst_y = 0;
 988 | 
 989 |    int i, j;
 990 |    for (i = 0; i < m_resample_src_y; i++)
 991 |    {
 992 |       m_Psrc_y_count[i] = 0;
 993 |       m_Psrc_y_flag[i] = FALSE;
 994 |    }
 995 | 
 996 |    for (i = 0; i < m_resample_dst_y; i++)
 997 |    {
 998 |       for (j = 0; j < m_Pclist_y[i].n; j++)
 999 |          m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++;
1000 |    }
1001 | 
1002 |    for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
1003 |    {
1004 |       m_Pscan_buf->scan_buf_y[i] = -1;
1005 | 
1006 |       free(m_Pscan_buf->scan_buf_l[i]);
1007 |       m_Pscan_buf->scan_buf_l[i] = NULL;
1008 |    }
1009 | }
1010 | 
1011 | Resampler::Resampler(int src_x, int src_y,
1012 |                      int dst_x, int dst_y,
1013 |                      Boundary_Op boundary_op,
1014 |                      Resample_Real sample_low, Resample_Real sample_high,
1015 |                      const char* Pfilter_name,
1016 |                      Contrib_List* Pclist_x,
1017 |                      Contrib_List* Pclist_y,
1018 |                      Resample_Real filter_x_scale,
1019 |                      Resample_Real filter_y_scale,
1020 |                      Resample_Real src_x_ofs,
1021 |                      Resample_Real src_y_ofs)
1022 | {
1023 |    int i, j;
1024 |    Resample_Real support, (*func)(Resample_Real);
1025 | 
1026 |    resampler_assert(src_x > 0);
1027 |    resampler_assert(src_y > 0);
1028 |    resampler_assert(dst_x > 0);
1029 |    resampler_assert(dst_y > 0);
1030 | 
1031 | #if RESAMPLER_DEBUG_OPS
1032 |    total_ops = 0;
1033 | #endif
1034 | 
1035 |    m_lo = sample_low;
1036 |    m_hi = sample_high;
1037 | 
1038 |    m_delay_x_resample = false;
1039 |    m_intermediate_x = 0;
1040 |    m_Pdst_buf = NULL;
1041 |    m_Ptmp_buf = NULL;
1042 |    m_clist_x_forced = false;
1043 |    m_Pclist_x = NULL;
1044 |    m_clist_y_forced = false;
1045 |    m_Pclist_y = NULL;
1046 |    m_Psrc_y_count = NULL;
1047 |    m_Psrc_y_flag = NULL;
1048 |    m_Pscan_buf = NULL;
1049 |    m_status = STATUS_OKAY;
1050 | 
1051 |    m_resample_src_x = src_x;
1052 |    m_resample_src_y = src_y;
1053 |    m_resample_dst_x = dst_x;
1054 |    m_resample_dst_y = dst_y;
1055 | 
1056 |    m_boundary_op = boundary_op;
1057 | 
1058 |    if ((m_Pdst_buf = (Sample*)malloc(m_resample_dst_x * sizeof(Sample))) == NULL)
1059 |    {
1060 |       m_status = STATUS_OUT_OF_MEMORY;
1061 |       return;
1062 |    }
1063 | 
1064 |    // Find the specified filter.
1065 | 
1066 |    if (Pfilter_name == NULL)
1067 |       Pfilter_name = RESAMPLER_DEFAULT_FILTER;
1068 | 
1069 |    for (i = 0; i < NUM_FILTERS; i++)
1070 |       if (strcmp(Pfilter_name, g_filters[i].name) == 0)
1071 |          break;
1072 | 
1073 |    if (i == NUM_FILTERS)
1074 |    {
1075 |       m_status = STATUS_BAD_FILTER_NAME;
1076 |       return;
1077 |    }
1078 | 
1079 |    func = g_filters[i].func;
1080 |    support = g_filters[i].support;
1081 | 
1082 |    /* Create contributor lists, unless the user supplied custom lists. */
1083 | 
1084 |    if (!Pclist_x)
1085 |    {
1086 |       m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs);
1087 |       if (!m_Pclist_x)
1088 |       {
1089 |          m_status = STATUS_OUT_OF_MEMORY;
1090 |          return;
1091 |       }
1092 |    }
1093 |    else
1094 |    {
1095 |       m_Pclist_x = Pclist_x;
1096 |       m_clist_x_forced = true;
1097 |    }
1098 | 
1099 |    if (!Pclist_y)
1100 |    {
1101 |       m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs);
1102 |       if (!m_Pclist_y)
1103 |       {
1104 |          m_status = STATUS_OUT_OF_MEMORY;
1105 |          return;
1106 |       }
1107 |    }
1108 |    else
1109 |    {
1110 |       m_Pclist_y = Pclist_y;
1111 |       m_clist_y_forced = true;
1112 |    }
1113 | 
1114 |    if ((m_Psrc_y_count = (int*)calloc(m_resample_src_y, sizeof(int))) == NULL)
1115 |    {
1116 |       m_status = STATUS_OUT_OF_MEMORY;
1117 |       return;
1118 |    }
1119 | 
1120 |    if ((m_Psrc_y_flag = (unsigned char*)calloc(m_resample_src_y, sizeof(unsigned char))) == NULL)
1121 |    {
1122 |       m_status = STATUS_OUT_OF_MEMORY;
1123 |       return;
1124 |    }
1125 | 
1126 |    /* Count how many times each source line
1127 |    * contributes to a destination line.
1128 |    */
1129 | 
1130 |    for (i = 0; i < m_resample_dst_y; i++)
1131 |       for (j = 0; j < m_Pclist_y[i].n; j++)
1132 |          m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++;
1133 | 
1134 |    if ((m_Pscan_buf = (Scan_Buf*)malloc(sizeof(Scan_Buf))) == NULL)
1135 |    {
1136 |       m_status = STATUS_OUT_OF_MEMORY;
1137 |       return;
1138 |    }
1139 | 
1140 |    for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
1141 |    {
1142 |       m_Pscan_buf->scan_buf_y[i] = -1;
1143 |       m_Pscan_buf->scan_buf_l[i] = NULL;
1144 |    }
1145 | 
1146 |    m_cur_src_y = m_cur_dst_y = 0;
1147 |    {
1148 |       // Determine which axis to resample first by comparing the number of multiplies required
1149 |       // for each possibility.
1150 |       int x_ops = count_ops(m_Pclist_x, m_resample_dst_x);
1151 |       int y_ops = count_ops(m_Pclist_y, m_resample_dst_y);
1152 | 
1153 |       // Hack 10/2000: Weight Y axis ops a little more than X axis ops.
1154 |       // (Y axis ops use more cache resources.)
1155 |       int xy_ops = x_ops * m_resample_src_y +
1156 |          (4 * y_ops * m_resample_dst_x)/3;
1157 | 
1158 |       int yx_ops = (4 * y_ops * m_resample_src_x)/3 +
1159 |          x_ops * m_resample_dst_y;
1160 | 
1161 | #if RESAMPLER_DEBUG_OPS
1162 |       printf("src: %i %i\n", m_resample_src_x, m_resample_src_y);
1163 |       printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y);
1164 |       printf("x_ops: %i\n", x_ops);
1165 |       printf("y_ops: %i\n", y_ops);
1166 |       printf("xy_ops: %i\n", xy_ops);
1167 |       printf("yx_ops: %i\n", yx_ops);
1168 | #endif
1169 | 
1170 |       // Now check which resample order is better. In case of a tie, choose the order
1171 |       // which buffers the least amount of data.
1172 |       if ((xy_ops > yx_ops) ||
1173 |          ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))
1174 |          )
1175 |       {
1176 |          m_delay_x_resample = true;
1177 |          m_intermediate_x = m_resample_src_x;
1178 |       }
1179 |       else
1180 |       {
1181 |          m_delay_x_resample = false;
1182 |          m_intermediate_x = m_resample_dst_x;
1183 |       }
1184 | #if RESAMPLER_DEBUG_OPS
1185 |       printf("delaying: %i\n", m_delay_x_resample);
1186 | #endif
1187 |    }
1188 | 
1189 |    if (m_delay_x_resample)
1190 |    {
1191 |       if ((m_Ptmp_buf = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL)
1192 |       {
1193 |          m_status = STATUS_OUT_OF_MEMORY;
1194 |          return;
1195 |       }
1196 |    }
1197 | }
1198 | 
1199 | void Resampler::get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y)
1200 | {
1201 |    if (ptr_clist_x)
1202 |       *ptr_clist_x = m_Pclist_x;
1203 | 
1204 |    if (ptr_clist_y)
1205 |       *ptr_clist_y = m_Pclist_y;
1206 | }
1207 | 
1208 | int Resampler::get_filter_num()
1209 | {
1210 |    return NUM_FILTERS;
1211 | }
1212 | 
1213 | char* Resampler::get_filter_name(int filter_num)
1214 | {
1215 |    if ((filter_num < 0) || (filter_num >= NUM_FILTERS))
1216 |       return NULL;
1217 |    else
1218 |       return g_filters[filter_num].name;
1219 | }
1220 | 
1221 | 


--------------------------------------------------------------------------------
/stb_image.c:
--------------------------------------------------------------------------------
   1 | /* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
   2 |                       when you control the images you're loading
   3 | 
   4 |    QUICK NOTES:
   5 |       Primarily of interest to game developers and other people who can
   6 |           avoid problematic images and only need the trivial interface
   7 | 
   8 |       JPEG baseline (no JPEG progressive, no oddball channel decimations)
   9 |       PNG 8-bit only
  10 |       BMP non-1bpp, non-RLE
  11 |       TGA (not sure what subset, if a subset)
  12 |       PSD (composited view only, no extra channels)
  13 |       HDR (radiance rgbE format)
  14 |       writes BMP,TGA (define STBI_NO_WRITE to remove code)
  15 |       decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
  16 |       supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
  17 | 
  18 |    TODO:
  19 |       stbi_info_*
  20 | 
  21 |    history:
  22 |       1.18   fix a threading bug (local mutable static)
  23 |       1.17   support interlaced PNG
  24 |       1.16   major bugfix - convert_format converted one too many pixels
  25 |       1.15   initialize some fields for thread safety
  26 |       1.14   fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
  27 |       1.13   threadsafe
  28 |       1.12   const qualifiers in the API
  29 |       1.11   Support installable IDCT, colorspace conversion routines
  30 |       1.10   Fixes for 64-bit (don't use "unsigned long")
  31 |              optimized upsampling by Fabian "ryg" Giesen
  32 |       1.09   Fix format-conversion for PSD code (bad global variables!)
  33 |       1.08   Thatcher Ulrich's PSD code integrated by Nicolas Schulz
  34 |       1.07   attempt to fix C++ warning/errors again
  35 |       1.06   attempt to fix C++ warning/errors again
  36 |       1.05   fix TGA loading to return correct *comp and use good luminance calc
  37 |       1.04   default float alpha is 1, not 255; use 'void *' for stbi_image_free
  38 |       1.03   bugfixes to STBI_NO_STDIO, STBI_NO_HDR
  39 |       1.02   support for (subset of) HDR files, float interface for preferred access to them
  40 |       1.01   fix bug: possible bug in handling right-side up bmps... not sure
  41 |              fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
  42 |       1.00   interface to zlib that skips zlib header
  43 |       0.99   correct handling of alpha in palette
  44 |       0.98   TGA loader by lonesock; dynamically add loaders (untested)
  45 |       0.97   jpeg errors on too large a file; also catch another malloc failure
  46 |       0.96   fix detection of invalid v value - particleman@mollyrocket forum
  47 |       0.95   during header scan, seek to markers in case of padding
  48 |       0.94   STBI_NO_STDIO to disable stdio usage; rename all #defines the same
  49 |       0.93   handle jpegtran output; verbose errors
  50 |       0.92   read 4,8,16,24,32-bit BMP files of several formats
  51 |       0.91   output 24-bit Windows 3.0 BMP files
  52 |       0.90   fix a few more warnings; bump version number to approach 1.0
  53 |       0.61   bugfixes due to Marc LeBlanc, Christopher Lloyd
  54 |       0.60   fix compiling as c++
  55 |       0.59   fix warnings: merge Dave Moore's -Wall fixes
  56 |       0.58   fix bug: zlib uncompressed mode len/nlen was wrong endian
  57 |       0.57   fix bug: jpg last huffman symbol before marker was >9 bits but less
  58 |                       than 16 available
  59 |       0.56   fix bug: zlib uncompressed mode len vs. nlen
  60 |       0.55   fix bug: restart_interval not initialized to 0
  61 |       0.54   allow NULL for 'int *comp'
  62 |       0.53   fix bug in png 3->4; speedup png decoding
  63 |       0.52   png handles req_comp=3,4 directly; minor cleanup; jpeg comments
  64 |       0.51   obey req_comp requests, 1-component jpegs return as 1-component,
  65 |              on 'test' only check type, not whether we support this variant
  66 | */
  67 | 
  68 | 
  69 | #ifndef STBI_INCLUDE_STB_IMAGE_H
  70 | #define STBI_INCLUDE_STB_IMAGE_H
  71 | 
  72 | ////   begin header file  ////////////////////////////////////////////////////
  73 | //
  74 | // Limitations:
  75 | //    - no progressive/interlaced support (jpeg, png)
  76 | //    - 8-bit samples only (jpeg, png)
  77 | //    - not threadsafe
  78 | //    - channel subsampling of at most 2 in each dimension (jpeg)
  79 | //    - no delayed line count (jpeg) -- IJG doesn't support either
  80 | //
  81 | // Basic usage (see HDR discussion below):
  82 | //    int x,y,n;
  83 | //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
  84 | //    // ... process data if not NULL ...
  85 | //    // ... x = width, y = height, n = # 8-bit components per pixel ...
  86 | //    // ... replace '0' with '1'..'4' to force that many components per pixel
  87 | //    stbi_image_free(data)
  88 | //
  89 | // Standard parameters:
  90 | //    int *x       -- outputs image width in pixels
  91 | //    int *y       -- outputs image height in pixels
  92 | //    int *comp    -- outputs # of image components in image file
  93 | //    int req_comp -- if non-zero, # of image components requested in result
  94 | //
  95 | // The return value from an image loader is an 'unsigned char *' which points
  96 | // to the pixel data. The pixel data consists of *y scanlines of *x pixels,
  97 | // with each pixel consisting of N interleaved 8-bit components; the first
  98 | // pixel pointed to is top-left-most in the image. There is no padding between
  99 | // image scanlines or between pixels, regardless of format. The number of
 100 | // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
 101 | // If req_comp is non-zero, *comp has the number of components that _would_
 102 | // have been output otherwise. E.g. if you set req_comp to 4, you will always
 103 | // get RGBA output, but you can check *comp to easily see if it's opaque.
 104 | //
 105 | // An output image with N components has the following components interleaved
 106 | // in this order in each pixel:
 107 | //
 108 | //     N=#comp     components
 109 | //       1           grey
 110 | //       2           grey, alpha
 111 | //       3           red, green, blue
 112 | //       4           red, green, blue, alpha
 113 | //
 114 | // If image loading fails for any reason, the return value will be NULL,
 115 | // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
 116 | // can be queried for an extremely brief, end-user unfriendly explanation
 117 | // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
 118 | // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
 119 | // more user-friendly ones.
 120 | //
 121 | // Paletted PNG and BMP images are automatically depalettized.
 122 | //
 123 | //
 124 | // ===========================================================================
 125 | //
 126 | // HDR image support   (disable by defining STBI_NO_HDR)
 127 | //
 128 | // stb_image now supports loading HDR images in general, and currently
 129 | // the Radiance .HDR file format, although the support is provided
 130 | // generically. You can still load any file through the existing interface;
 131 | // if you attempt to load an HDR file, it will be automatically remapped to
 132 | // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
 133 | // both of these constants can be reconfigured through this interface:
 134 | //
 135 | //     stbi_hdr_to_ldr_gamma(2.2f);
 136 | //     stbi_hdr_to_ldr_scale(1.0f);
 137 | //
 138 | // (note, do not use _inverse_ constants; stbi_image will invert them
 139 | // appropriately).
 140 | //
 141 | // Additionally, there is a new, parallel interface for loading files as
 142 | // (linear) floats to preserve the full dynamic range:
 143 | //
 144 | //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
 145 | //
 146 | // If you load LDR images through this interface, those images will
 147 | // be promoted to floating point values, run through the inverse of
 148 | // constants corresponding to the above:
 149 | //
 150 | //     stbi_ldr_to_hdr_scale(1.0f);
 151 | //     stbi_ldr_to_hdr_gamma(2.2f);
 152 | //
 153 | // Finally, given a filename (or an open file or memory block--see header
 154 | // file for details) containing image data, you can query for the "most
 155 | // appropriate" interface to use (that is, whether the image is HDR or
 156 | // not), using:
 157 | //
 158 | //     stbi_is_hdr(char *filename);
 159 | 
 160 | #ifndef STBI_NO_STDIO
 161 | #include <stdio.h>
 162 | #endif
 163 | 
 164 | #define STBI_VERSION 1
 165 | 
 166 | enum
 167 | {
 168 |    STBI_default = 0, // only used for req_comp
 169 | 
 170 |    STBI_grey       = 1,
 171 |    STBI_grey_alpha = 2,
 172 |    STBI_rgb        = 3,
 173 |    STBI_rgb_alpha  = 4,
 174 | };
 175 | 
 176 | typedef unsigned char stbi_uc;
 177 | 
 178 | #ifdef __cplusplus
 179 | extern "C" {
 180 | #endif
 181 | 
 182 | // WRITING API
 183 | 
 184 | #if !defined(STBI_NO_WRITE) && !defined(STBI_NO_STDIO)
 185 | // write a BMP/TGA file given tightly packed 'comp' channels (no padding, nor bmp-stride-padding)
 186 | // (you must include the appropriate extension in the filename).
 187 | // returns TRUE on success, FALSE if couldn't open file, error writing file
 188 | extern int      stbi_write_bmp       (char const *filename,     int x, int y, int comp, void *data);
 189 | extern int      stbi_write_tga       (char const *filename,     int x, int y, int comp, void *data);
 190 | #endif
 191 | 
 192 | // PRIMARY API - works on images of any type
 193 | 
 194 | // load image by filename, open file, or memory buffer
 195 | #ifndef STBI_NO_STDIO
 196 | extern stbi_uc *stbi_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
 197 | extern stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 198 | extern int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
 199 | #endif
 200 | extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 201 | // for stbi_load_from_file, file pointer is left pointing immediately after image
 202 | 
 203 | #ifndef STBI_NO_HDR
 204 | #ifndef STBI_NO_STDIO
 205 | extern float *stbi_loadf            (char const *filename,     int *x, int *y, int *comp, int req_comp);
 206 | extern float *stbi_loadf_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 207 | #endif
 208 | extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 209 | 
 210 | extern void   stbi_hdr_to_ldr_gamma(float gamma);
 211 | extern void   stbi_hdr_to_ldr_scale(float scale);
 212 | 
 213 | extern void   stbi_ldr_to_hdr_gamma(float gamma);
 214 | extern void   stbi_ldr_to_hdr_scale(float scale);
 215 | 
 216 | #endif // STBI_NO_HDR
 217 | 
 218 | // get a VERY brief reason for failure
 219 | // NOT THREADSAFE
 220 | extern char    *stbi_failure_reason  (void);
 221 | 
 222 | // free the loaded image -- this is just free()
 223 | extern void     stbi_image_free      (void *retval_from_stbi_load);
 224 | 
 225 | // get image dimensions & components without fully decoding
 226 | extern int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 227 | extern int      stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
 228 | #ifndef STBI_NO_STDIO
 229 | extern int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
 230 | extern int      stbi_is_hdr          (char const *filename);
 231 | extern int      stbi_is_hdr_from_file(FILE *f);
 232 | #endif
 233 | 
 234 | // ZLIB client - used by PNG, available for other purposes
 235 | 
 236 | extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
 237 | extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
 238 | extern int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 239 | 
 240 | extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
 241 | extern int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 242 | 
 243 | // TYPE-SPECIFIC ACCESS
 244 | 
 245 | // is it a jpeg?
 246 | extern int      stbi_jpeg_test_memory     (stbi_uc const *buffer, int len);
 247 | extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 248 | extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 249 | 
 250 | #ifndef STBI_NO_STDIO
 251 | extern stbi_uc *stbi_jpeg_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
 252 | extern int      stbi_jpeg_test_file       (FILE *f);
 253 | extern stbi_uc *stbi_jpeg_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 254 | 
 255 | extern int      stbi_jpeg_info            (char const *filename,     int *x, int *y, int *comp);
 256 | extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
 257 | #endif
 258 | 
 259 | // is it a png?
 260 | extern int      stbi_png_test_memory      (stbi_uc const *buffer, int len);
 261 | extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 262 | extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 263 | 
 264 | #ifndef STBI_NO_STDIO
 265 | extern stbi_uc *stbi_png_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
 266 | extern int      stbi_png_info             (char const *filename,     int *x, int *y, int *comp);
 267 | extern int      stbi_png_test_file        (FILE *f);
 268 | extern stbi_uc *stbi_png_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 269 | extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
 270 | #endif
 271 | 
 272 | // is it a bmp?
 273 | extern int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len);
 274 | 
 275 | extern stbi_uc *stbi_bmp_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
 276 | extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 277 | #ifndef STBI_NO_STDIO
 278 | extern int      stbi_bmp_test_file        (FILE *f);
 279 | extern stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 280 | #endif
 281 | 
 282 | // is it a tga?
 283 | extern int      stbi_tga_test_memory      (stbi_uc const *buffer, int len);
 284 | 
 285 | extern stbi_uc *stbi_tga_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
 286 | extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 287 | #ifndef STBI_NO_STDIO
 288 | extern int      stbi_tga_test_file        (FILE *f);
 289 | extern stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 290 | #endif
 291 | 
 292 | // is it a psd?
 293 | extern int      stbi_psd_test_memory      (stbi_uc const *buffer, int len);
 294 | 
 295 | extern stbi_uc *stbi_psd_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
 296 | extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 297 | #ifndef STBI_NO_STDIO
 298 | extern int      stbi_psd_test_file        (FILE *f);
 299 | extern stbi_uc *stbi_psd_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 300 | #endif
 301 | 
 302 | // is it an hdr?
 303 | extern int      stbi_hdr_test_memory      (stbi_uc const *buffer, int len);
 304 | 
 305 | extern float *  stbi_hdr_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
 306 | extern float *  stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 307 | #ifndef STBI_NO_STDIO
 308 | extern int      stbi_hdr_test_file        (FILE *f);
 309 | extern float *  stbi_hdr_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
 310 | #endif
 311 | 
 312 | // define new loaders
 313 | typedef struct
 314 | {
 315 |    int       (*test_memory)(stbi_uc const *buffer, int len);
 316 |    stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
 317 |    #ifndef STBI_NO_STDIO
 318 |    int       (*test_file)(FILE *f);
 319 |    stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp);
 320 |    #endif
 321 | } stbi_loader;
 322 | 
 323 | // register a loader by filling out the above structure (you must defined ALL functions)
 324 | // returns 1 if added or already added, 0 if not added (too many loaders)
 325 | // NOT THREADSAFE
 326 | extern int stbi_register_loader(stbi_loader *loader);
 327 | 
 328 | // define faster low-level operations (typically SIMD support)
 329 | #if STBI_SIMD
 330 | typedef void (*stbi_idct_8x8)(uint8 *out, int out_stride, short data[64], unsigned short *dequantize);
 331 | // compute an integer IDCT on "input"
 332 | //     input[x] = data[x] * dequantize[x]
 333 | //     write results to 'out': 64 samples, each run of 8 spaced by 'out_stride'
 334 | //                             CLAMP results to 0..255
 335 | typedef void (*stbi_YCbCr_to_RGB_run)(uint8 *output, uint8 const *y, uint8 const *cb, uint8 const *cr, int count, int step);
 336 | // compute a conversion from YCbCr to RGB
 337 | //     'count' pixels
 338 | //     write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B
 339 | //     y: Y input channel
 340 | //     cb: Cb input channel; scale/biased to be 0..255
 341 | //     cr: Cr input channel; scale/biased to be 0..255
 342 | 
 343 | extern void stbi_install_idct(stbi_idct_8x8 func);
 344 | extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func);
 345 | #endif // STBI_SIMD
 346 | 
 347 | #ifdef __cplusplus
 348 | }
 349 | #endif
 350 | 
 351 | //
 352 | //
 353 | ////   end header file   /////////////////////////////////////////////////////
 354 | #endif // STBI_INCLUDE_STB_IMAGE_H
 355 | 
 356 | #ifndef STBI_HEADER_FILE_ONLY
 357 | 
 358 | #ifndef STBI_NO_HDR
 359 | #include <math.h>  // ldexp
 360 | #include <string.h> // strcmp
 361 | #endif
 362 | 
 363 | #ifndef STBI_NO_STDIO
 364 | #include <stdio.h>
 365 | #endif
 366 | #include <stdlib.h>
 367 | #include <memory.h>
 368 | #include <assert.h>
 369 | #include <stdarg.h>
 370 | 
 371 | #ifndef _MSC_VER
 372 |   #ifdef __cplusplus
 373 |     #define __forceinline inline
 374 |   #else
 375 |     #undef __forceinline
 376 |     #define __forceinline
 377 |   #endif
 378 | #endif
 379 | 
 380 | 
 381 | // implementation:
 382 | typedef unsigned char uint8;
 383 | typedef unsigned short uint16;
 384 | typedef   signed short  int16;
 385 | typedef unsigned int   uint32;
 386 | typedef   signed int    int32;
 387 | typedef unsigned int   uint;
 388 | 
 389 | // should produce compiler error if size is wrong
 390 | typedef unsigned char validate_uint32[sizeof(uint32)==4];
 391 | 
 392 | #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
 393 | #define STBI_NO_WRITE
 394 | #endif
 395 | 
 396 | //////////////////////////////////////////////////////////////////////////////
 397 | //
 398 | // Generic API that works on all image types
 399 | //
 400 | 
 401 | // this is not threadsafe
 402 | static char *failure_reason;
 403 | 
 404 | char *stbi_failure_reason(void)
 405 | {
 406 |    return failure_reason;
 407 | }
 408 | 
 409 | static int e(char *str)
 410 | {
 411 |    failure_reason = str;
 412 |    return 0;
 413 | }
 414 | 
 415 | #ifdef STBI_NO_FAILURE_STRINGS
 416 |    #define e(x,y)  0
 417 | #elif defined(STBI_FAILURE_USERMSG)
 418 |    #define e(x,y)  e(y)
 419 | #else
 420 |    #define e(x,y)  e(x)
 421 | #endif
 422 | 
 423 | #define epf(x,y)   ((float *) (e(x,y)?NULL:NULL))
 424 | #define epuc(x,y)  ((unsigned char *) (e(x,y)?NULL:NULL))
 425 | 
 426 | void stbi_image_free(void *retval_from_stbi_load)
 427 | {
 428 |    free(retval_from_stbi_load);
 429 | }
 430 | 
 431 | #define MAX_LOADERS  32
 432 | stbi_loader *loaders[MAX_LOADERS];
 433 | static int max_loaders = 0;
 434 | 
 435 | int stbi_register_loader(stbi_loader *loader)
 436 | {
 437 |    int i;
 438 |    for (i=0; i < MAX_LOADERS; ++i) {
 439 |       // already present?
 440 |       if (loaders[i] == loader)
 441 |          return 1;
 442 |       // end of the list?
 443 |       if (loaders[i] == NULL) {
 444 |          loaders[i] = loader;
 445 |          max_loaders = i+1;
 446 |          return 1;
 447 |       }
 448 |    }
 449 |    // no room for it
 450 |    return 0;
 451 | }
 452 | 
 453 | #ifndef STBI_NO_HDR
 454 | static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
 455 | static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp);
 456 | #endif
 457 | 
 458 | #ifndef STBI_NO_STDIO
 459 | unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
 460 | {
 461 |    FILE *f = fopen(filename, "rb");
 462 |    unsigned char *result;
 463 |    if (!f) return epuc("can't fopen", "Unable to open file");
 464 |    result = stbi_load_from_file(f,x,y,comp,req_comp);
 465 |    fclose(f);
 466 |    return result;
 467 | }
 468 | 
 469 | unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
 470 | {
 471 |    int i;
 472 |    if (stbi_jpeg_test_file(f))
 473 |       return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
 474 |    if (stbi_png_test_file(f))
 475 |       return stbi_png_load_from_file(f,x,y,comp,req_comp);
 476 |    if (stbi_bmp_test_file(f))
 477 |       return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
 478 |    if (stbi_psd_test_file(f))
 479 |       return stbi_psd_load_from_file(f,x,y,comp,req_comp);
 480 |    #ifndef STBI_NO_HDR
 481 |    if (stbi_hdr_test_file(f)) {
 482 |       float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
 483 |       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
 484 |    }
 485 |    #endif
 486 |    for (i=0; i < max_loaders; ++i)
 487 |       if (loaders[i]->test_file(f))
 488 |          return loaders[i]->load_from_file(f,x,y,comp,req_comp);
 489 |    // test tga last because it's a crappy test!
 490 |    if (stbi_tga_test_file(f))
 491 |       return stbi_tga_load_from_file(f,x,y,comp,req_comp);
 492 |    return epuc("unknown image type", "Image not of any known type, or corrupt");
 493 | }
 494 | #endif
 495 | 
 496 | unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
 497 | {
 498 |    int i;
 499 |    if (stbi_jpeg_test_memory(buffer,len))
 500 |       return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
 501 |    if (stbi_png_test_memory(buffer,len))
 502 |       return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
 503 |    if (stbi_bmp_test_memory(buffer,len))
 504 |       return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
 505 |    if (stbi_psd_test_memory(buffer,len))
 506 |       return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
 507 |    #ifndef STBI_NO_HDR
 508 |    if (stbi_hdr_test_memory(buffer, len)) {
 509 |       float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
 510 |       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
 511 |    }
 512 |    #endif
 513 |    for (i=0; i < max_loaders; ++i)
 514 |       if (loaders[i]->test_memory(buffer,len))
 515 |          return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
 516 |    // test tga last because it's a crappy test!
 517 |    if (stbi_tga_test_memory(buffer,len))
 518 |       return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
 519 |    return epuc("unknown image type", "Image not of any known type, or corrupt");
 520 | }
 521 | 
 522 | #ifndef STBI_NO_HDR
 523 | 
 524 | #ifndef STBI_NO_STDIO
 525 | float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
 526 | {
 527 |    FILE *f = fopen(filename, "rb");
 528 |    float *result;
 529 |    if (!f) return epf("can't fopen", "Unable to open file");
 530 |    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
 531 |    fclose(f);
 532 |    return result;
 533 | }
 534 | 
 535 | float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
 536 | {
 537 |    unsigned char *data;
 538 |    #ifndef STBI_NO_HDR
 539 |    if (stbi_hdr_test_file(f))
 540 |       return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
 541 |    #endif
 542 |    data = stbi_load_from_file(f, x, y, comp, req_comp);
 543 |    if (data)
 544 |       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
 545 |    return epf("unknown image type", "Image not of any known type, or corrupt");
 546 | }
 547 | #endif
 548 | 
 549 | float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
 550 | {
 551 |    stbi_uc *data;
 552 |    #ifndef STBI_NO_HDR
 553 |    if (stbi_hdr_test_memory(buffer, len))
 554 |       return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
 555 |    #endif
 556 |    data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
 557 |    if (data)
 558 |       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
 559 |    return epf("unknown image type", "Image not of any known type, or corrupt");
 560 | }
 561 | #endif
 562 | 
 563 | // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
 564 | // defined, for API simplicity; if STBI_NO_HDR is defined, it always
 565 | // reports false!
 566 | 
 567 | int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
 568 | {
 569 |    #ifndef STBI_NO_HDR
 570 |    return stbi_hdr_test_memory(buffer, len);
 571 |    #else
 572 |    return 0;
 573 |    #endif
 574 | }
 575 | 
 576 | #ifndef STBI_NO_STDIO
 577 | extern int      stbi_is_hdr          (char const *filename)
 578 | {
 579 |    FILE *f = fopen(filename, "rb");
 580 |    int result=0;
 581 |    if (f) {
 582 |       result = stbi_is_hdr_from_file(f);
 583 |       fclose(f);
 584 |    }
 585 |    return result;
 586 | }
 587 | 
 588 | extern int      stbi_is_hdr_from_file(FILE *f)
 589 | {
 590 |    #ifndef STBI_NO_HDR
 591 |    return stbi_hdr_test_file(f);
 592 |    #else
 593 |    return 0;
 594 |    #endif
 595 | }
 596 | 
 597 | #endif
 598 | 
 599 | // @TODO: get image dimensions & components without fully decoding
 600 | #ifndef STBI_NO_STDIO
 601 | extern int      stbi_info            (char const *filename,           int *x, int *y, int *comp);
 602 | extern int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
 603 | #endif
 604 | extern int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 605 | 
 606 | #ifndef STBI_NO_HDR
 607 | static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
 608 | static float l2h_gamma=2.2f, l2h_scale=1.0f;
 609 | 
 610 | void   stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
 611 | void   stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
 612 | 
 613 | void   stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
 614 | void   stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
 615 | #endif
 616 | 
 617 | 
 618 | //////////////////////////////////////////////////////////////////////////////
 619 | //
 620 | // Common code used by all image loaders
 621 | //
 622 | 
 623 | enum
 624 | {
 625 |    SCAN_load=0,
 626 |    SCAN_type,
 627 |    SCAN_header,
 628 | };
 629 | 
 630 | typedef struct
 631 | {
 632 |    uint32 img_x, img_y;
 633 |    int img_n, img_out_n;
 634 | 
 635 |    #ifndef STBI_NO_STDIO
 636 |    FILE  *img_file;
 637 |    #endif
 638 |    uint8 *img_buffer, *img_buffer_end;
 639 | } stbi;
 640 | 
 641 | #ifndef STBI_NO_STDIO
 642 | static void start_file(stbi *s, FILE *f)
 643 | {
 644 |    s->img_file = f;
 645 | }
 646 | #endif
 647 | 
 648 | static void start_mem(stbi *s, uint8 const *buffer, int len)
 649 | {
 650 | #ifndef STBI_NO_STDIO
 651 |    s->img_file = NULL;
 652 | #endif
 653 |    s->img_buffer = (uint8 *) buffer;
 654 |    s->img_buffer_end = (uint8 *) buffer+len;
 655 | }
 656 | 
 657 | __forceinline static int get8(stbi *s)
 658 | {
 659 | #ifndef STBI_NO_STDIO
 660 |    if (s->img_file) {
 661 |       int c = fgetc(s->img_file);
 662 |       return c == EOF ? 0 : c;
 663 |    }
 664 | #endif
 665 |    if (s->img_buffer < s->img_buffer_end)
 666 |       return *s->img_buffer++;
 667 |    return 0;
 668 | }
 669 | 
 670 | __forceinline static int at_eof(stbi *s)
 671 | {
 672 | #ifndef STBI_NO_STDIO
 673 |    if (s->img_file)
 674 |       return feof(s->img_file);
 675 | #endif
 676 |    return s->img_buffer >= s->img_buffer_end;
 677 | }
 678 | 
 679 | __forceinline static uint8 get8u(stbi *s)
 680 | {
 681 |    return (uint8) get8(s);
 682 | }
 683 | 
 684 | static void skip(stbi *s, int n)
 685 | {
 686 | #ifndef STBI_NO_STDIO
 687 |    if (s->img_file)
 688 |       fseek(s->img_file, n, SEEK_CUR);
 689 |    else
 690 | #endif
 691 |       s->img_buffer += n;
 692 | }
 693 | 
 694 | static int get16(stbi *s)
 695 | {
 696 |    int z = get8(s);
 697 |    return (z << 8) + get8(s);
 698 | }
 699 | 
 700 | static uint32 get32(stbi *s)
 701 | {
 702 |    uint32 z = get16(s);
 703 |    return (z << 16) + get16(s);
 704 | }
 705 | 
 706 | static int get16le(stbi *s)
 707 | {
 708 |    int z = get8(s);
 709 |    return z + (get8(s) << 8);
 710 | }
 711 | 
 712 | static uint32 get32le(stbi *s)
 713 | {
 714 |    uint32 z = get16le(s);
 715 |    return z + (get16le(s) << 16);
 716 | }
 717 | 
 718 | static void getn(stbi *s, stbi_uc *buffer, int n)
 719 | {
 720 | #ifndef STBI_NO_STDIO
 721 |    if (s->img_file) {
 722 |       fread(buffer, 1, n, s->img_file);
 723 |       return;
 724 |    }
 725 | #endif
 726 |    memcpy(buffer, s->img_buffer, n);
 727 |    s->img_buffer += n;
 728 | }
 729 | 
 730 | //////////////////////////////////////////////////////////////////////////////
 731 | //
 732 | //  generic converter from built-in img_n to req_comp
 733 | //    individual types do this automatically as much as possible (e.g. jpeg
 734 | //    does all cases internally since it needs to colorspace convert anyway,
 735 | //    and it never has alpha, so very few cases ). png can automatically
 736 | //    interleave an alpha=255 channel, but falls back to this for other cases
 737 | //
 738 | //  assume data buffer is malloced, so malloc a new one and free that one
 739 | //  only failure mode is malloc failing
 740 | 
 741 | static uint8 compute_y(int r, int g, int b)
 742 | {
 743 |    return (uint8) (((r*77) + (g*150) +  (29*b)) >> 8);
 744 | }
 745 | 
 746 | static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
 747 | {
 748 |    int i,j;
 749 |    unsigned char *good;
 750 | 
 751 |    if (req_comp == img_n) return data;
 752 |    assert(req_comp >= 1 && req_comp <= 4);
 753 | 
 754 |    good = (unsigned char *) malloc(req_comp * x * y);
 755 |    if (good == NULL) {
 756 |       free(data);
 757 |       return epuc("outofmem", "Out of memory");
 758 |    }
 759 | 
 760 |    for (j=0; j < (int) y; ++j) {
 761 |       unsigned char *src  = data + j * x * img_n   ;
 762 |       unsigned char *dest = good + j * x * req_comp;
 763 | 
 764 |       #define COMBO(a,b)  ((a)*8+(b))
 765 |       #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
 766 |       // convert source image with img_n components to one with req_comp components;
 767 |       // avoid switch per pixel, so use switch per scanline and massive macros
 768 |       switch(COMBO(img_n, req_comp)) {
 769 |          CASE(1,2) dest[0]=src[0], dest[1]=255; break;
 770 |          CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
 771 |          CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
 772 |          CASE(2,1) dest[0]=src[0]; break;
 773 |          CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
 774 |          CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
 775 |          CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
 776 |          CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
 777 |          CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
 778 |          CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
 779 |          CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
 780 |          CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
 781 |          default: assert(0);
 782 |       }
 783 |       #undef CASE
 784 |    }
 785 | 
 786 |    free(data);
 787 |    return good;
 788 | }
 789 | 
 790 | #ifndef STBI_NO_HDR
 791 | static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 792 | {
 793 |    int i,k,n;
 794 |    float *output = (float *) malloc(x * y * comp * sizeof(float));
 795 |    if (output == NULL) { free(data); return epf("outofmem", "Out of memory"); }
 796 |    // compute number of non-alpha components
 797 |    if (comp & 1) n = comp; else n = comp-1;
 798 |    for (i=0; i < x*y; ++i) {
 799 |       for (k=0; k < n; ++k) {
 800 |          output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
 801 |       }
 802 |       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
 803 |    }
 804 |    free(data);
 805 |    return output;
 806 | }
 807 | 
 808 | #define float2int(x)   ((int) (x))
 809 | static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp)
 810 | {
 811 |    int i,k,n;
 812 |    stbi_uc *output = (stbi_uc *) malloc(x * y * comp);
 813 |    if (output == NULL) { free(data); return epuc("outofmem", "Out of memory"); }
 814 |    // compute number of non-alpha components
 815 |    if (comp & 1) n = comp; else n = comp-1;
 816 |    for (i=0; i < x*y; ++i) {
 817 |       for (k=0; k < n; ++k) {
 818 |          float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
 819 |          if (z < 0) z = 0;
 820 |          if (z > 255) z = 255;
 821 |          output[i*comp + k] = float2int(z);
 822 |       }
 823 |       if (k < comp) {
 824 |          float z = data[i*comp+k] * 255 + 0.5f;
 825 |          if (z < 0) z = 0;
 826 |          if (z > 255) z = 255;
 827 |          output[i*comp + k] = float2int(z);
 828 |       }
 829 |    }
 830 |    free(data);
 831 |    return output;
 832 | }
 833 | #endif
 834 | 
 835 | //////////////////////////////////////////////////////////////////////////////
 836 | //
 837 | //  "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
 838 | //
 839 | //    simple implementation
 840 | //      - channel subsampling of at most 2 in each dimension
 841 | //      - doesn't support delayed output of y-dimension
 842 | //      - simple interface (only one output format: 8-bit interleaved RGB)
 843 | //      - doesn't try to recover corrupt jpegs
 844 | //      - doesn't allow partial loading, loading multiple at once
 845 | //      - still fast on x86 (copying globals into locals doesn't help x86)
 846 | //      - allocates lots of intermediate memory (full size of all components)
 847 | //        - non-interleaved case requires this anyway
 848 | //        - allows good upsampling (see next)
 849 | //    high-quality
 850 | //      - upsampled channels are bilinearly interpolated, even across blocks
 851 | //      - quality integer IDCT derived from IJG's 'slow'
 852 | //    performance
 853 | //      - fast huffman; reasonable integer IDCT
 854 | //      - uses a lot of intermediate memory, could cache poorly
 855 | //      - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
 856 | //          stb_jpeg:   1.34 seconds (MSVC6, default release build)
 857 | //          stb_jpeg:   1.06 seconds (MSVC6, processor = Pentium Pro)
 858 | //          IJL11.dll:  1.08 seconds (compiled by intel)
 859 | //          IJG 1998:   0.98 seconds (MSVC6, makefile provided by IJG)
 860 | //          IJG 1998:   0.95 seconds (MSVC6, makefile + proc=PPro)
 861 | 
 862 | // huffman decoding acceleration
 863 | #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
 864 | 
 865 | typedef struct
 866 | {
 867 |    uint8  fast[1 << FAST_BITS];
 868 |    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
 869 |    uint16 code[256];
 870 |    uint8  values[256];
 871 |    uint8  size[257];
 872 |    unsigned int maxcode[18];
 873 |    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
 874 | } huffman;
 875 | 
 876 | typedef struct
 877 | {
 878 |    #if STBI_SIMD
 879 |    unsigned short dequant2[4][64];
 880 |    #endif
 881 |    stbi s;
 882 |    huffman huff_dc[4];
 883 |    huffman huff_ac[4];
 884 |    uint8 dequant[4][64];
 885 | 
 886 | // sizes for components, interleaved MCUs
 887 |    int img_h_max, img_v_max;
 888 |    int img_mcu_x, img_mcu_y;
 889 |    int img_mcu_w, img_mcu_h;
 890 | 
 891 | // definition of jpeg image component
 892 |    struct
 893 |    {
 894 |       int id;
 895 |       int h,v;
 896 |       int tq;
 897 |       int hd,ha;
 898 |       int dc_pred;
 899 | 
 900 |       int x,y,w2,h2;
 901 |       uint8 *data;
 902 |       void *raw_data;
 903 |       uint8 *linebuf;
 904 |    } img_comp[4];
 905 | 
 906 |    uint32         code_buffer; // jpeg entropy-coded buffer
 907 |    int            code_bits;   // number of valid bits
 908 |    unsigned char  marker;      // marker seen while filling entropy buffer
 909 |    int            nomore;      // flag if we saw a marker so must stop
 910 | 
 911 |    int scan_n, order[4];
 912 |    int restart_interval, todo;
 913 | } jpeg;
 914 | 
 915 | static int build_huffman(huffman *h, int *count)
 916 | {
 917 |    int i,j,k=0,code;
 918 |    // build size list for each symbol (from JPEG spec)
 919 |    for (i=0; i < 16; ++i)
 920 |       for (j=0; j < count[i]; ++j)
 921 |          h->size[k++] = (uint8) (i+1);
 922 |    h->size[k] = 0;
 923 | 
 924 |    // compute actual symbols (from jpeg spec)
 925 |    code = 0;
 926 |    k = 0;
 927 |    for(j=1; j <= 16; ++j) {
 928 |       // compute delta to add to code to compute symbol id
 929 |       h->delta[j] = k - code;
 930 |       if (h->size[k] == j) {
 931 |          while (h->size[k] == j)
 932 |             h->code[k++] = (uint16) (code++);
 933 |          if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
 934 |       }
 935 |       // compute largest code + 1 for this size, preshifted as needed later
 936 |       h->maxcode[j] = code << (16-j);
 937 |       code <<= 1;
 938 |    }
 939 |    h->maxcode[j] = 0xffffffff;
 940 | 
 941 |    // build non-spec acceleration table; 255 is flag for not-accelerated
 942 |    memset(h->fast, 255, 1 << FAST_BITS);
 943 |    for (i=0; i < k; ++i) {
 944 |       int s = h->size[i];
 945 |       if (s <= FAST_BITS) {
 946 |          int c = h->code[i] << (FAST_BITS-s);
 947 |          int m = 1 << (FAST_BITS-s);
 948 |          for (j=0; j < m; ++j) {
 949 |             h->fast[c+j] = (uint8) i;
 950 |          }
 951 |       }
 952 |    }
 953 |    return 1;
 954 | }
 955 | 
 956 | static void grow_buffer_unsafe(jpeg *j)
 957 | {
 958 |    do {
 959 |       int b = j->nomore ? 0 : get8(&j->s);
 960 |       if (b == 0xff) {
 961 |          int c = get8(&j->s);
 962 |          if (c != 0) {
 963 |             j->marker = (unsigned char) c;
 964 |             j->nomore = 1;
 965 |             return;
 966 |          }
 967 |       }
 968 |       j->code_buffer = (j->code_buffer << 8) | b;
 969 |       j->code_bits += 8;
 970 |    } while (j->code_bits <= 24);
 971 | }
 972 | 
 973 | // (1 << n) - 1
 974 | static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
 975 | 
 976 | // decode a jpeg huffman value from the bitstream
 977 | __forceinline static int decode(jpeg *j, huffman *h)
 978 | {
 979 |    unsigned int temp;
 980 |    int c,k;
 981 | 
 982 |    if (j->code_bits < 16) grow_buffer_unsafe(j);
 983 | 
 984 |    // look at the top FAST_BITS and determine what symbol ID it is,
 985 |    // if the code is <= FAST_BITS
 986 |    c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1);
 987 |    k = h->fast[c];
 988 |    if (k < 255) {
 989 |       if (h->size[k] > j->code_bits)
 990 |          return -1;
 991 |       j->code_bits -= h->size[k];
 992 |       return h->values[k];
 993 |    }
 994 | 
 995 |    // naive test is to shift the code_buffer down so k bits are
 996 |    // valid, then test against maxcode. To speed this up, we've
 997 |    // preshifted maxcode left so that it has (16-k) 0s at the
 998 |    // end; in other words, regardless of the number of bits, it
 999 |    // wants to be compared against something shifted to have 16;
1000 |    // that way we don't need to shift inside the loop.
1001 |    if (j->code_bits < 16)
1002 |       temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff;
1003 |    else
1004 |       temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff;
1005 |    for (k=FAST_BITS+1 ; ; ++k)
1006 |       if (temp < h->maxcode[k])
1007 |          break;
1008 |    if (k == 17) {
1009 |       // error! code not found
1010 |       j->code_bits -= 16;
1011 |       return -1;
1012 |    }
1013 | 
1014 |    if (k > j->code_bits)
1015 |       return -1;
1016 | 
1017 |    // convert the huffman code to the symbol id
1018 |    c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k];
1019 |    assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
1020 | 
1021 |    // convert the id to a symbol
1022 |    j->code_bits -= k;
1023 |    return h->values[c];
1024 | }
1025 | 
1026 | // combined JPEG 'receive' and JPEG 'extend', since baseline
1027 | // always extends everything it receives.
1028 | __forceinline static int extend_receive(jpeg *j, int n)
1029 | {
1030 |    unsigned int m = 1 << (n-1);
1031 |    unsigned int k;
1032 |    if (j->code_bits < n) grow_buffer_unsafe(j);
1033 |    k = (j->code_buffer >> (j->code_bits - n)) & bmask[n];
1034 |    j->code_bits -= n;
1035 |    // the following test is probably a random branch that won't
1036 |    // predict well. I tried to table accelerate it but failed.
1037 |    // maybe it's compiling as a conditional move?
1038 |    if (k < m)
1039 |       return (-1 << n) + k + 1;
1040 |    else
1041 |       return k;
1042 | }
1043 | 
1044 | // given a value that's at position X in the zigzag stream,
1045 | // where does it appear in the 8x8 matrix coded as row-major?
1046 | static uint8 dezigzag[64+15] =
1047 | {
1048 |     0,  1,  8, 16,  9,  2,  3, 10,
1049 |    17, 24, 32, 25, 18, 11,  4,  5,
1050 |    12, 19, 26, 33, 40, 48, 41, 34,
1051 |    27, 20, 13,  6,  7, 14, 21, 28,
1052 |    35, 42, 49, 56, 57, 50, 43, 36,
1053 |    29, 22, 15, 23, 30, 37, 44, 51,
1054 |    58, 59, 52, 45, 38, 31, 39, 46,
1055 |    53, 60, 61, 54, 47, 55, 62, 63,
1056 |    // let corrupt input sample past end
1057 |    63, 63, 63, 63, 63, 63, 63, 63,
1058 |    63, 63, 63, 63, 63, 63, 63
1059 | };
1060 | 
1061 | // decode one 64-entry block--
1062 | static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
1063 | {
1064 |    int diff,dc,k;
1065 |    int t = decode(j, hdc);
1066 |    if (t < 0) return e("bad huffman code","Corrupt JPEG");
1067 | 
1068 |    // 0 all the ac values now so we can do it 32-bits at a time
1069 |    memset(data,0,64*sizeof(data[0]));
1070 | 
1071 |    diff = t ? extend_receive(j, t) : 0;
1072 |    dc = j->img_comp[b].dc_pred + diff;
1073 |    j->img_comp[b].dc_pred = dc;
1074 |    data[0] = (short) dc;
1075 | 
1076 |    // decode AC components, see JPEG spec
1077 |    k = 1;
1078 |    do {
1079 |       int r,s;
1080 |       int rs = decode(j, hac);
1081 |       if (rs < 0) return e("bad huffman code","Corrupt JPEG");
1082 |       s = rs & 15;
1083 |       r = rs >> 4;
1084 |       if (s == 0) {
1085 |          if (rs != 0xf0) break; // end block
1086 |          k += 16;
1087 |       } else {
1088 |          k += r;
1089 |          // decode into unzigzag'd location
1090 |          data[dezigzag[k++]] = (short) extend_receive(j,s);
1091 |       }
1092 |    } while (k < 64);
1093 |    return 1;
1094 | }
1095 | 
1096 | // take a -128..127 value and clamp it and convert to 0..255
1097 | __forceinline static uint8 clamp(int x)
1098 | {
1099 |    x += 128;
1100 |    // trick to use a single test to catch both cases
1101 |    if ((unsigned int) x > 255) {
1102 |       if (x < 0) return 0;
1103 |       if (x > 255) return 255;
1104 |    }
1105 |    return (uint8) x;
1106 | }
1107 | 
1108 | #define f2f(x)  (int) (((x) * 4096 + 0.5))
1109 | #define fsh(x)  ((x) << 12)
1110 | 
1111 | // derived from jidctint -- DCT_ISLOW
1112 | #define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7)       \
1113 |    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1114 |    p2 = s2;                                    \
1115 |    p3 = s6;                                    \
1116 |    p1 = (p2+p3) * f2f(0.5411961f);             \
1117 |    t2 = p1 + p3*f2f(-1.847759065f);            \
1118 |    t3 = p1 + p2*f2f( 0.765366865f);            \
1119 |    p2 = s0;                                    \
1120 |    p3 = s4;                                    \
1121 |    t0 = fsh(p2+p3);                            \
1122 |    t1 = fsh(p2-p3);                            \
1123 |    x0 = t0+t3;                                 \
1124 |    x3 = t0-t3;                                 \
1125 |    x1 = t1+t2;                                 \
1126 |    x2 = t1-t2;                                 \
1127 |    t0 = s7;                                    \
1128 |    t1 = s5;                                    \
1129 |    t2 = s3;                                    \
1130 |    t3 = s1;                                    \
1131 |    p3 = t0+t2;                                 \
1132 |    p4 = t1+t3;                                 \
1133 |    p1 = t0+t3;                                 \
1134 |    p2 = t1+t2;                                 \
1135 |    p5 = (p3+p4)*f2f( 1.175875602f);            \
1136 |    t0 = t0*f2f( 0.298631336f);                 \
1137 |    t1 = t1*f2f( 2.053119869f);                 \
1138 |    t2 = t2*f2f( 3.072711026f);                 \
1139 |    t3 = t3*f2f( 1.501321110f);                 \
1140 |    p1 = p5 + p1*f2f(-0.899976223f);            \
1141 |    p2 = p5 + p2*f2f(-2.562915447f);            \
1142 |    p3 = p3*f2f(-1.961570560f);                 \
1143 |    p4 = p4*f2f(-0.390180644f);                 \
1144 |    t3 += p1+p4;                                \
1145 |    t2 += p2+p3;                                \
1146 |    t1 += p2+p4;                                \
1147 |    t0 += p1+p3;
1148 | 
1149 | #if !STBI_SIMD
1150 | // .344 seconds on 3*anemones.jpg
1151 | static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
1152 | {
1153 |    int i,val[64],*v=val;
1154 |    uint8 *o,*dq = dequantize;
1155 |    short *d = data;
1156 | 
1157 |    // columns
1158 |    for (i=0; i < 8; ++i,++d,++dq, ++v) {
1159 |       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1160 |       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1161 |            && d[40]==0 && d[48]==0 && d[56]==0) {
1162 |          //    no shortcut                 0     seconds
1163 |          //    (1|2|3|4|5|6|7)==0          0     seconds
1164 |          //    all separate               -0.047 seconds
1165 |          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1166 |          int dcterm = d[0] * dq[0] << 2;
1167 |          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1168 |       } else {
1169 |          IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
1170 |                  d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
1171 |          // constants scaled things up by 1<<12; let's bring them back
1172 |          // down, but keep 2 extra bits of precision
1173 |          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1174 |          v[ 0] = (x0+t3) >> 10;
1175 |          v[56] = (x0-t3) >> 10;
1176 |          v[ 8] = (x1+t2) >> 10;
1177 |          v[48] = (x1-t2) >> 10;
1178 |          v[16] = (x2+t1) >> 10;
1179 |          v[40] = (x2-t1) >> 10;
1180 |          v[24] = (x3+t0) >> 10;
1181 |          v[32] = (x3-t0) >> 10;
1182 |       }
1183 |    }
1184 | 
1185 |    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1186 |       // no fast case since the first 1D IDCT spread components out
1187 |       IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1188 |       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1189 |       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1190 |       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1191 |       x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
1192 |       o[0] = clamp((x0+t3) >> 17);
1193 |       o[7] = clamp((x0-t3) >> 17);
1194 |       o[1] = clamp((x1+t2) >> 17);
1195 |       o[6] = clamp((x1-t2) >> 17);
1196 |       o[2] = clamp((x2+t1) >> 17);
1197 |       o[5] = clamp((x2-t1) >> 17);
1198 |       o[3] = clamp((x3+t0) >> 17);
1199 |       o[4] = clamp((x3-t0) >> 17);
1200 |    }
1201 | }
1202 | #else
1203 | static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize)
1204 | {
1205 |    int i,val[64],*v=val;
1206 |    uint8 *o;
1207 |    unsigned short *dq = dequantize;
1208 |    short *d = data;
1209 | 
1210 |    // columns
1211 |    for (i=0; i < 8; ++i,++d,++dq, ++v) {
1212 |       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1213 |       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1214 |            && d[40]==0 && d[48]==0 && d[56]==0) {
1215 |          //    no shortcut                 0     seconds
1216 |          //    (1|2|3|4|5|6|7)==0          0     seconds
1217 |          //    all separate               -0.047 seconds
1218 |          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1219 |          int dcterm = d[0] * dq[0] << 2;
1220 |          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1221 |       } else {
1222 |          IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
1223 |                  d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
1224 |          // constants scaled things up by 1<<12; let's bring them back
1225 |          // down, but keep 2 extra bits of precision
1226 |          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1227 |          v[ 0] = (x0+t3) >> 10;
1228 |          v[56] = (x0-t3) >> 10;
1229 |          v[ 8] = (x1+t2) >> 10;
1230 |          v[48] = (x1-t2) >> 10;
1231 |          v[16] = (x2+t1) >> 10;
1232 |          v[40] = (x2-t1) >> 10;
1233 |          v[24] = (x3+t0) >> 10;
1234 |          v[32] = (x3-t0) >> 10;
1235 |       }
1236 |    }
1237 | 
1238 |    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1239 |       // no fast case since the first 1D IDCT spread components out
1240 |       IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1241 |       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1242 |       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1243 |       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1244 |       x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
1245 |       o[0] = clamp((x0+t3) >> 17);
1246 |       o[7] = clamp((x0-t3) >> 17);
1247 |       o[1] = clamp((x1+t2) >> 17);
1248 |       o[6] = clamp((x1-t2) >> 17);
1249 |       o[2] = clamp((x2+t1) >> 17);
1250 |       o[5] = clamp((x2-t1) >> 17);
1251 |       o[3] = clamp((x3+t0) >> 17);
1252 |       o[4] = clamp((x3-t0) >> 17);
1253 |    }
1254 | }
1255 | static stbi_idct_8x8 stbi_idct_installed = idct_block;
1256 | 
1257 | extern void stbi_install_idct(stbi_idct_8x8 func)
1258 | {
1259 |    stbi_idct_installed = func;
1260 | }
1261 | #endif
1262 | 
1263 | #define MARKER_none  0xff
1264 | // if there's a pending marker from the entropy stream, return that
1265 | // otherwise, fetch from the stream and get a marker. if there's no
1266 | // marker, return 0xff, which is never a valid marker value
1267 | static uint8 get_marker(jpeg *j)
1268 | {
1269 |    uint8 x;
1270 |    if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; }
1271 |    x = get8u(&j->s);
1272 |    if (x != 0xff) return MARKER_none;
1273 |    while (x == 0xff)
1274 |       x = get8u(&j->s);
1275 |    return x;
1276 | }
1277 | 
1278 | // in each scan, we'll have scan_n components, and the order
1279 | // of the components is specified by order[]
1280 | #define RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
1281 | 
1282 | // after a restart interval, reset the entropy decoder and
1283 | // the dc prediction
1284 | static void reset(jpeg *j)
1285 | {
1286 |    j->code_bits = 0;
1287 |    j->code_buffer = 0;
1288 |    j->nomore = 0;
1289 |    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1290 |    j->marker = MARKER_none;
1291 |    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1292 |    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1293 |    // since we don't even allow 1<<30 pixels
1294 | }
1295 | 
1296 | static int parse_entropy_coded_data(jpeg *z)
1297 | {
1298 |    reset(z);
1299 |    if (z->scan_n == 1) {
1300 |       int i,j;
1301 |       #if STBI_SIMD
1302 |       __declspec(align(16))
1303 |       #endif
1304 |       short data[64];
1305 |       int n = z->order[0];
1306 |       // non-interleaved data, we just need to process one block at a time,
1307 |       // in trivial scanline order
1308 |       // number of blocks to do just depends on how many actual "pixels" this
1309 |       // component has, independent of interleaved MCU blocking and such
1310 |       int w = (z->img_comp[n].x+7) >> 3;
1311 |       int h = (z->img_comp[n].y+7) >> 3;
1312 |       for (j=0; j < h; ++j) {
1313 |          for (i=0; i < w; ++i) {
1314 |             if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1315 |             #if STBI_SIMD
1316 |             stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1317 |             #else
1318 |             idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1319 |             #endif
1320 |             // every data block is an MCU, so countdown the restart interval
1321 |             if (--z->todo <= 0) {
1322 |                if (z->code_bits < 24) grow_buffer_unsafe(z);
1323 |                // if it's NOT a restart, then just bail, so we get corrupt data
1324 |                // rather than no data
1325 |                if (!RESTART(z->marker)) return 1;
1326 |                reset(z);
1327 |             }
1328 |          }
1329 |       }
1330 |    } else { // interleaved!
1331 |       int i,j,k,x,y;
1332 |       short data[64];
1333 |       for (j=0; j < z->img_mcu_y; ++j) {
1334 |          for (i=0; i < z->img_mcu_x; ++i) {
1335 |             // scan an interleaved mcu... process scan_n components in order
1336 |             for (k=0; k < z->scan_n; ++k) {
1337 |                int n = z->order[k];
1338 |                // scan out an mcu's worth of this component; that's just determined
1339 |                // by the basic H and V specified for the component
1340 |                for (y=0; y < z->img_comp[n].v; ++y) {
1341 |                   for (x=0; x < z->img_comp[n].h; ++x) {
1342 |                      int x2 = (i*z->img_comp[n].h + x)*8;
1343 |                      int y2 = (j*z->img_comp[n].v + y)*8;
1344 |                      if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1345 |                      #if STBI_SIMD
1346 |                      stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1347 |                      #else
1348 |                      idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1349 |                      #endif
1350 |                   }
1351 |                }
1352 |             }
1353 |             // after all interleaved components, that's an interleaved MCU,
1354 |             // so now count down the restart interval
1355 |             if (--z->todo <= 0) {
1356 |                if (z->code_bits < 24) grow_buffer_unsafe(z);
1357 |                // if it's NOT a restart, then just bail, so we get corrupt data
1358 |                // rather than no data
1359 |                if (!RESTART(z->marker)) return 1;
1360 |                reset(z);
1361 |             }
1362 |          }
1363 |       }
1364 |    }
1365 |    return 1;
1366 | }
1367 | 
1368 | static int process_marker(jpeg *z, int m)
1369 | {
1370 |    int L;
1371 |    switch (m) {
1372 |       case MARKER_none: // no marker found
1373 |          return e("expected marker","Corrupt JPEG");
1374 | 
1375 |       case 0xC2: // SOF - progressive
1376 |          return e("progressive jpeg","JPEG format not supported (progressive)");
1377 | 
1378 |       case 0xDD: // DRI - specify restart interval
1379 |          if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG");
1380 |          z->restart_interval = get16(&z->s);
1381 |          return 1;
1382 | 
1383 |       case 0xDB: // DQT - define quantization table
1384 |          L = get16(&z->s)-2;
1385 |          while (L > 0) {
1386 |             int q = get8(&z->s);
1387 |             int p = q >> 4;
1388 |             int t = q & 15,i;
1389 |             if (p != 0) return e("bad DQT type","Corrupt JPEG");
1390 |             if (t > 3) return e("bad DQT table","Corrupt JPEG");
1391 |             for (i=0; i < 64; ++i)
1392 |                z->dequant[t][dezigzag[i]] = get8u(&z->s);
1393 |             #if STBI_SIMD
1394 |             for (i=0; i < 64; ++i)
1395 |                z->dequant2[t][i] = z->dequant[t][i];
1396 |             #endif
1397 |             L -= 65;
1398 |          }
1399 |          return L==0;
1400 | 
1401 |       case 0xC4: // DHT - define huffman table
1402 |          L = get16(&z->s)-2;
1403 |          while (L > 0) {
1404 |             uint8 *v;
1405 |             int sizes[16],i,m=0;
1406 |             int q = get8(&z->s);
1407 |             int tc = q >> 4;
1408 |             int th = q & 15;
1409 |             if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
1410 |             for (i=0; i < 16; ++i) {
1411 |                sizes[i] = get8(&z->s);
1412 |                m += sizes[i];
1413 |             }
1414 |             L -= 17;
1415 |             if (tc == 0) {
1416 |                if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1417 |                v = z->huff_dc[th].values;
1418 |             } else {
1419 |                if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1420 |                v = z->huff_ac[th].values;
1421 |             }
1422 |             for (i=0; i < m; ++i)
1423 |                v[i] = get8u(&z->s);
1424 |             L -= m;
1425 |          }
1426 |          return L==0;
1427 |    }
1428 |    // check for comment block or APP blocks
1429 |    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
1430 |       skip(&z->s, get16(&z->s)-2);
1431 |       return 1;
1432 |    }
1433 |    return 0;
1434 | }
1435 | 
1436 | // after we see SOS
1437 | static int process_scan_header(jpeg *z)
1438 | {
1439 |    int i;
1440 |    int Ls = get16(&z->s);
1441 |    z->scan_n = get8(&z->s);
1442 |    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG");
1443 |    if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG");
1444 |    for (i=0; i < z->scan_n; ++i) {
1445 |       int id = get8(&z->s), which;
1446 |       int q = get8(&z->s);
1447 |       for (which = 0; which < z->s.img_n; ++which)
1448 |          if (z->img_comp[which].id == id)
1449 |             break;
1450 |       if (which == z->s.img_n) return 0;
1451 |       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
1452 |       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
1453 |       z->order[i] = which;
1454 |    }
1455 |    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1456 |    get8(&z->s); // should be 63, but might be 0
1457 |    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1458 | 
1459 |    return 1;
1460 | }
1461 | 
1462 | static int process_frame_header(jpeg *z, int scan)
1463 | {
1464 |    stbi *s = &z->s;
1465 |    int Lf,p,i,q, h_max=1,v_max=1,c;
1466 |    Lf = get16(s);         if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
1467 |    p  = get8(s);          if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
1468 |    s->img_y = get16(s);   if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1469 |    s->img_x = get16(s);   if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
1470 |    c = get8(s);
1471 |    if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG");    // JFIF requires
1472 |    s->img_n = c;
1473 |    for (i=0; i < c; ++i) {
1474 |       z->img_comp[i].data = NULL;
1475 |       z->img_comp[i].linebuf = NULL;
1476 |    }
1477 | 
1478 |    if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG");
1479 | 
1480 |    for (i=0; i < s->img_n; ++i) {
1481 |       z->img_comp[i].id = get8(s);
1482 |       if (z->img_comp[i].id != i+1)   // JFIF requires
1483 |          if (z->img_comp[i].id != i)  // some version of jpegtran outputs non-JFIF-compliant files!
1484 |             return e("bad component ID","Corrupt JPEG");
1485 |       q = get8(s);
1486 |       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
1487 |       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
1488 |       z->img_comp[i].tq = get8(s);  if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
1489 |    }
1490 | 
1491 |    if (scan != SCAN_load) return 1;
1492 | 
1493 |    if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1494 | 
1495 |    for (i=0; i < s->img_n; ++i) {
1496 |       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1497 |       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1498 |    }
1499 | 
1500 |    // compute interleaved mcu info
1501 |    z->img_h_max = h_max;
1502 |    z->img_v_max = v_max;
1503 |    z->img_mcu_w = h_max * 8;
1504 |    z->img_mcu_h = v_max * 8;
1505 |    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1506 |    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1507 | 
1508 |    for (i=0; i < s->img_n; ++i) {
1509 |       // number of effective pixels (e.g. for non-interleaved MCU)
1510 |       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1511 |       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1512 |       // to simplify generation, we'll allocate enough memory to decode
1513 |       // the bogus oversized data from using interleaved MCUs and their
1514 |       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1515 |       // discard the extra data until colorspace conversion
1516 |       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1517 |       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1518 |       z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1519 |       if (z->img_comp[i].raw_data == NULL) {
1520 |          for(--i; i >= 0; --i) {
1521 |             free(z->img_comp[i].raw_data);
1522 |             z->img_comp[i].data = NULL;
1523 |          }
1524 |          return e("outofmem", "Out of memory");
1525 |       }
1526 |       // align blocks for installable-idct using mmx/sse
1527 |       z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1528 |       z->img_comp[i].linebuf = NULL;
1529 |    }
1530 | 
1531 |    return 1;
1532 | }
1533 | 
1534 | // use comparisons since in some cases we handle more than one case (e.g. SOF)
1535 | #define DNL(x)         ((x) == 0xdc)
1536 | #define SOI(x)         ((x) == 0xd8)
1537 | #define EOI(x)         ((x) == 0xd9)
1538 | #define SOF(x)         ((x) == 0xc0 || (x) == 0xc1)
1539 | #define SOS(x)         ((x) == 0xda)
1540 | 
1541 | static int decode_jpeg_header(jpeg *z, int scan)
1542 | {
1543 |    int m;
1544 |    z->marker = MARKER_none; // initialize cached marker to empty
1545 |    m = get_marker(z);
1546 |    if (!SOI(m)) return e("no SOI","Corrupt JPEG");
1547 |    if (scan == SCAN_type) return 1;
1548 |    m = get_marker(z);
1549 |    while (!SOF(m)) {
1550 |       if (!process_marker(z,m)) return 0;
1551 |       m = get_marker(z);
1552 |       while (m == MARKER_none) {
1553 |          // some files have extra padding after their blocks, so ok, we'll scan
1554 |          if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG");
1555 |          m = get_marker(z);
1556 |       }
1557 |    }
1558 |    if (!process_frame_header(z, scan)) return 0;
1559 |    return 1;
1560 | }
1561 | 
1562 | static int decode_jpeg_image(jpeg *j)
1563 | {
1564 |    int m;
1565 |    j->restart_interval = 0;
1566 |    if (!decode_jpeg_header(j, SCAN_load)) return 0;
1567 |    m = get_marker(j);
1568 |    while (!EOI(m)) {
1569 |       if (SOS(m)) {
1570 |          if (!process_scan_header(j)) return 0;
1571 |          if (!parse_entropy_coded_data(j)) return 0;
1572 |       } else {
1573 |          if (!process_marker(j, m)) return 0;
1574 |       }
1575 |       m = get_marker(j);
1576 |    }
1577 |    return 1;
1578 | }
1579 | 
1580 | // static jfif-centered resampling (across block boundaries)
1581 | 
1582 | typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1583 |                                     int w, int hs);
1584 | 
1585 | #define div4(x) ((uint8) ((x) >> 2))
1586 | 
1587 | static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1588 | {
1589 |    return in_near;
1590 | }
1591 | 
1592 | static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1593 | {
1594 |    // need to generate two samples vertically for every one in input
1595 |    int i;
1596 |    for (i=0; i < w; ++i)
1597 |       out[i] = div4(3*in_near[i] + in_far[i] + 2);
1598 |    return out;
1599 | }
1600 | 
1601 | static uint8*  resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1602 | {
1603 |    // need to generate two samples horizontally for every one in input
1604 |    int i;
1605 |    uint8 *input = in_near;
1606 |    if (w == 1) {
1607 |       // if only one sample, can't do any interpolation
1608 |       out[0] = out[1] = input[0];
1609 |       return out;
1610 |    }
1611 | 
1612 |    out[0] = input[0];
1613 |    out[1] = div4(input[0]*3 + input[1] + 2);
1614 |    for (i=1; i < w-1; ++i) {
1615 |       int n = 3*input[i]+2;
1616 |       out[i*2+0] = div4(n+input[i-1]);
1617 |       out[i*2+1] = div4(n+input[i+1]);
1618 |    }
1619 |    out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2);
1620 |    out[i*2+1] = input[w-1];
1621 |    return out;
1622 | }
1623 | 
1624 | #define div16(x) ((uint8) ((x) >> 4))
1625 | 
1626 | static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1627 | {
1628 |    // need to generate 2x2 samples for every one in input
1629 |    int i,t0,t1;
1630 |    if (w == 1) {
1631 |       out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2);
1632 |       return out;
1633 |    }
1634 | 
1635 |    t1 = 3*in_near[0] + in_far[0];
1636 |    out[0] = div4(t1+2);
1637 |    for (i=1; i < w; ++i) {
1638 |       t0 = t1;
1639 |       t1 = 3*in_near[i]+in_far[i];
1640 |       out[i*2-1] = div16(3*t0 + t1 + 8);
1641 |       out[i*2  ] = div16(3*t1 + t0 + 8);
1642 |    }
1643 |    out[w*2-1] = div4(t1+2);
1644 |    return out;
1645 | }
1646 | 
1647 | static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1648 | {
1649 |    // resample with nearest-neighbor
1650 |    int i,j;
1651 |    for (i=0; i < w; ++i)
1652 |       for (j=0; j < hs; ++j)
1653 |          out[i*hs+j] = in_near[i];
1654 |    return out;
1655 | }
1656 | 
1657 | #define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
1658 | 
1659 | // 0.38 seconds on 3*anemones.jpg   (0.25 with processor = Pro)
1660 | // VC6 without processor=Pro is generating multiple LEAs per multiply!
1661 | static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1662 | {
1663 |    int i;
1664 |    for (i=0; i < count; ++i) {
1665 |       int y_fixed = (y[i] << 16) + 32768; // rounding
1666 |       int r,g,b;
1667 |       int cr = pcr[i] - 128;
1668 |       int cb = pcb[i] - 128;
1669 |       r = y_fixed + cr*float2fixed(1.40200f);
1670 |       g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
1671 |       b = y_fixed                            + cb*float2fixed(1.77200f);
1672 |       r >>= 16;
1673 |       g >>= 16;
1674 |       b >>= 16;
1675 |       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1676 |       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1677 |       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1678 |       out[0] = (uint8)r;
1679 |       out[1] = (uint8)g;
1680 |       out[2] = (uint8)b;
1681 |       out[3] = 255;
1682 |       out += step;
1683 |    }
1684 | }
1685 | 
1686 | #if STBI_SIMD
1687 | static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1688 | 
1689 | void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1690 | {
1691 |    stbi_YCbCr_installed = func;
1692 | }
1693 | #endif
1694 | 
1695 | 
1696 | // clean up the temporary component buffers
1697 | static void cleanup_jpeg(jpeg *j)
1698 | {
1699 |    int i;
1700 |    for (i=0; i < j->s.img_n; ++i) {
1701 |       if (j->img_comp[i].data) {
1702 |          free(j->img_comp[i].raw_data);
1703 |          j->img_comp[i].data = NULL;
1704 |       }
1705 |       if (j->img_comp[i].linebuf) {
1706 |          free(j->img_comp[i].linebuf);
1707 |          j->img_comp[i].linebuf = NULL;
1708 |       }
1709 |    }
1710 | }
1711 | 
1712 | typedef struct
1713 | {
1714 |    resample_row_func resample;
1715 |    uint8 *line0,*line1;
1716 |    int hs,vs;   // expansion factor in each axis
1717 |    int w_lores; // horizontal pixels pre-expansion
1718 |    int ystep;   // how far through vertical expansion we are
1719 |    int ypos;    // which pre-expansion row we're on
1720 | } stbi_resample;
1721 | 
1722 | static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1723 | {
1724 |    int n, decode_n;
1725 |    // validate req_comp
1726 |    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1727 |    z->s.img_n = 0;
1728 | 
1729 |    // load a jpeg image from whichever source
1730 |    if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; }
1731 | 
1732 |    // determine actual number of components to generate
1733 |    n = req_comp ? req_comp : z->s.img_n;
1734 | 
1735 |    if (z->s.img_n == 3 && n < 3)
1736 |       decode_n = 1;
1737 |    else
1738 |       decode_n = z->s.img_n;
1739 | 
1740 |    // resample and color-convert
1741 |    {
1742 |       int k;
1743 |       uint i,j;
1744 |       uint8 *output;
1745 |       uint8 *coutput[4];
1746 | 
1747 |       stbi_resample res_comp[4];
1748 | 
1749 |       for (k=0; k < decode_n; ++k) {
1750 |          stbi_resample *r = &res_comp[k];
1751 | 
1752 |          // allocate line buffer big enough for upsampling off the edges
1753 |          // with upsample factor of 4
1754 |          z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3);
1755 |          if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1756 | 
1757 |          r->hs      = z->img_h_max / z->img_comp[k].h;
1758 |          r->vs      = z->img_v_max / z->img_comp[k].v;
1759 |          r->ystep   = r->vs >> 1;
1760 |          r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
1761 |          r->ypos    = 0;
1762 |          r->line0   = r->line1 = z->img_comp[k].data;
1763 | 
1764 |          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
1765 |          else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
1766 |          else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
1767 |          else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
1768 |          else                               r->resample = resample_row_generic;
1769 |       }
1770 | 
1771 |       // can't error after this so, this is safe
1772 |       output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1);
1773 |       if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1774 | 
1775 |       // now go ahead and resample
1776 |       for (j=0; j < z->s.img_y; ++j) {
1777 |          uint8 *out = output + n * z->s.img_x * j;
1778 |          for (k=0; k < decode_n; ++k) {
1779 |             stbi_resample *r = &res_comp[k];
1780 |             int y_bot = r->ystep >= (r->vs >> 1);
1781 |             coutput[k] = r->resample(z->img_comp[k].linebuf,
1782 |                                      y_bot ? r->line1 : r->line0,
1783 |                                      y_bot ? r->line0 : r->line1,
1784 |                                      r->w_lores, r->hs);
1785 |             if (++r->ystep >= r->vs) {
1786 |                r->ystep = 0;
1787 |                r->line0 = r->line1;
1788 |                if (++r->ypos < z->img_comp[k].y)
1789 |                   r->line1 += z->img_comp[k].w2;
1790 |             }
1791 |          }
1792 |          if (n >= 3) {
1793 |             uint8 *y = coutput[0];
1794 |             if (z->s.img_n == 3) {
1795 |                #if STBI_SIMD
1796 |                stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1797 |                #else
1798 |                YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
1799 |                #endif
1800 |             } else
1801 |                for (i=0; i < z->s.img_x; ++i) {
1802 |                   out[0] = out[1] = out[2] = y[i];
1803 |                   out[3] = 255; // not used if n==3
1804 |                   out += n;
1805 |                }
1806 |          } else {
1807 |             uint8 *y = coutput[0];
1808 |             if (n == 1)
1809 |                for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
1810 |             else
1811 |                for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
1812 |          }
1813 |       }
1814 |       cleanup_jpeg(z);
1815 |       *out_x = z->s.img_x;
1816 |       *out_y = z->s.img_y;
1817 |       if (comp) *comp  = z->s.img_n; // report original components, not output
1818 |       return output;
1819 |    }
1820 | }
1821 | 
1822 | #ifndef STBI_NO_STDIO
1823 | unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1824 | {
1825 |    jpeg j;
1826 |    start_file(&j.s, f);
1827 |    return load_jpeg_image(&j, x,y,comp,req_comp);
1828 | }
1829 | 
1830 | unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1831 | {
1832 |    unsigned char *data;
1833 |    FILE *f = fopen(filename, "rb");
1834 |    if (!f) return NULL;
1835 |    data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
1836 |    fclose(f);
1837 |    return data;
1838 | }
1839 | #endif
1840 | 
1841 | unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1842 | {
1843 |    jpeg j;
1844 |    start_mem(&j.s, buffer,len);
1845 |    return load_jpeg_image(&j, x,y,comp,req_comp);
1846 | }
1847 | 
1848 | #ifndef STBI_NO_STDIO
1849 | int stbi_jpeg_test_file(FILE *f)
1850 | {
1851 |    int n,r;
1852 |    jpeg j;
1853 |    n = ftell(f);
1854 |    start_file(&j.s, f);
1855 |    r = decode_jpeg_header(&j, SCAN_type);
1856 |    fseek(f,n,SEEK_SET);
1857 |    return r;
1858 | }
1859 | #endif
1860 | 
1861 | int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
1862 | {
1863 |    jpeg j;
1864 |    start_mem(&j.s, buffer,len);
1865 |    return decode_jpeg_header(&j, SCAN_type);
1866 | }
1867 | 
1868 | // @TODO:
1869 | #ifndef STBI_NO_STDIO
1870 | extern int      stbi_jpeg_info            (char const *filename,           int *x, int *y, int *comp);
1871 | extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
1872 | #endif
1873 | extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
1874 | 
1875 | // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
1876 | //    simple implementation
1877 | //      - all input must be provided in an upfront buffer
1878 | //      - all output is written to a single output buffer (can malloc/realloc)
1879 | //    performance
1880 | //      - fast huffman
1881 | 
1882 | // fast-way is faster to check than jpeg huffman, but slow way is slower
1883 | #define ZFAST_BITS  9 // accelerate all cases in default tables
1884 | #define ZFAST_MASK  ((1 << ZFAST_BITS) - 1)
1885 | 
1886 | // zlib-style huffman encoding
1887 | // (jpegs packs from left, zlib from right, so can't share code)
1888 | typedef struct
1889 | {
1890 |    uint16 fast[1 << ZFAST_BITS];
1891 |    uint16 firstcode[16];
1892 |    int maxcode[17];
1893 |    uint16 firstsymbol[16];
1894 |    uint8  size[288];
1895 |    uint16 value[288];
1896 | } zhuffman;
1897 | 
1898 | __forceinline static int bitreverse16(int n)
1899 | {
1900 |   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
1901 |   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
1902 |   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
1903 |   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
1904 |   return n;
1905 | }
1906 | 
1907 | __forceinline static int bit_reverse(int v, int bits)
1908 | {
1909 |    assert(bits <= 16);
1910 |    // to bit reverse n bits, reverse 16 and shift
1911 |    // e.g. 11 bits, bit reverse and shift away 5
1912 |    return bitreverse16(v) >> (16-bits);
1913 | }
1914 | 
1915 | static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1916 | {
1917 |    int i,k=0;
1918 |    int code, next_code[16], sizes[17];
1919 | 
1920 |    // DEFLATE spec for generating codes
1921 |    memset(sizes, 0, sizeof(sizes));
1922 |    memset(z->fast, 255, sizeof(z->fast));
1923 |    for (i=0; i < num; ++i)
1924 |       ++sizes[sizelist[i]];
1925 |    sizes[0] = 0;
1926 |    for (i=1; i < 16; ++i)
1927 |       assert(sizes[i] <= (1 << i));
1928 |    code = 0;
1929 |    for (i=1; i < 16; ++i) {
1930 |       next_code[i] = code;
1931 |       z->firstcode[i] = (uint16) code;
1932 |       z->firstsymbol[i] = (uint16) k;
1933 |       code = (code + sizes[i]);
1934 |       if (sizes[i])
1935 |          if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
1936 |       z->maxcode[i] = code << (16-i); // preshift for inner loop
1937 |       code <<= 1;
1938 |       k += sizes[i];
1939 |    }
1940 |    z->maxcode[16] = 0x10000; // sentinel
1941 |    for (i=0; i < num; ++i) {
1942 |       int s = sizelist[i];
1943 |       if (s) {
1944 |          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1945 |          z->size[c] = (uint8)s;
1946 |          z->value[c] = (uint16)i;
1947 |          if (s <= ZFAST_BITS) {
1948 |             int k = bit_reverse(next_code[s],s);
1949 |             while (k < (1 << ZFAST_BITS)) {
1950 |                z->fast[k] = (uint16) c;
1951 |                k += (1 << s);
1952 |             }
1953 |          }
1954 |          ++next_code[s];
1955 |       }
1956 |    }
1957 |    return 1;
1958 | }
1959 | 
1960 | // zlib-from-memory implementation for PNG reading
1961 | //    because PNG allows splitting the zlib stream arbitrarily,
1962 | //    and it's annoying structurally to have PNG call ZLIB call PNG,
1963 | //    we require PNG read all the IDATs and combine them into a single
1964 | //    memory buffer
1965 | 
1966 | typedef struct
1967 | {
1968 |    uint8 *zbuffer, *zbuffer_end;
1969 |    int num_bits;
1970 |    uint32 code_buffer;
1971 | 
1972 |    char *zout;
1973 |    char *zout_start;
1974 |    char *zout_end;
1975 |    int   z_expandable;
1976 | 
1977 |    zhuffman z_length, z_distance;
1978 | } zbuf;
1979 | 
1980 | __forceinline static int zget8(zbuf *z)
1981 | {
1982 |    if (z->zbuffer >= z->zbuffer_end) return 0;
1983 |    return *z->zbuffer++;
1984 | }
1985 | 
1986 | static void fill_bits(zbuf *z)
1987 | {
1988 |    do {
1989 |       assert(z->code_buffer < (1U << z->num_bits));
1990 |       z->code_buffer |= zget8(z) << z->num_bits;
1991 |       z->num_bits += 8;
1992 |    } while (z->num_bits <= 24);
1993 | }
1994 | 
1995 | __forceinline static unsigned int zreceive(zbuf *z, int n)
1996 | {
1997 |    unsigned int k;
1998 |    if (z->num_bits < n) fill_bits(z);
1999 |    k = z->code_buffer & ((1 << n) - 1);
2000 |    z->code_buffer >>= n;
2001 |    z->num_bits -= n;
2002 |    return k;
2003 | }
2004 | 
2005 | __forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
2006 | {
2007 |    int b,s,k;
2008 |    if (a->num_bits < 16) fill_bits(a);
2009 |    b = z->fast[a->code_buffer & ZFAST_MASK];
2010 |    if (b < 0xffff) {
2011 |       s = z->size[b];
2012 |       a->code_buffer >>= s;
2013 |       a->num_bits -= s;
2014 |       return z->value[b];
2015 |    }
2016 | 
2017 |    // not resolved by fast table, so compute it the slow way
2018 |    // use jpeg approach, which requires MSbits at top
2019 |    k = bit_reverse(a->code_buffer, 16);
2020 |    for (s=ZFAST_BITS+1; ; ++s)
2021 |       if (k < z->maxcode[s])
2022 |          break;
2023 |    if (s == 16) return -1; // invalid code!
2024 |    // code size is s, so:
2025 |    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
2026 |    assert(z->size[b] == s);
2027 |    a->code_buffer >>= s;
2028 |    a->num_bits -= s;
2029 |    return z->value[b];
2030 | }
2031 | 
2032 | static int expand(zbuf *z, int n)  // need to make room for n bytes
2033 | {
2034 |    char *q;
2035 |    int cur, limit;
2036 |    if (!z->z_expandable) return e("output buffer limit","Corrupt PNG");
2037 |    cur   = (int) (z->zout     - z->zout_start);
2038 |    limit = (int) (z->zout_end - z->zout_start);
2039 |    while (cur + n > limit)
2040 |       limit *= 2;
2041 |    q = (char *) realloc(z->zout_start, limit);
2042 |    if (q == NULL) return e("outofmem", "Out of memory");
2043 |    z->zout_start = q;
2044 |    z->zout       = q + cur;
2045 |    z->zout_end   = q + limit;
2046 |    return 1;
2047 | }
2048 | 
2049 | static int length_base[31] = {
2050 |    3,4,5,6,7,8,9,10,11,13,
2051 |    15,17,19,23,27,31,35,43,51,59,
2052 |    67,83,99,115,131,163,195,227,258,0,0 };
2053 | 
2054 | static int length_extra[31]=
2055 | { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
2056 | 
2057 | static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
2058 | 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
2059 | 
2060 | static int dist_extra[32] =
2061 | { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
2062 | 
2063 | static int parse_huffman_block(zbuf *a)
2064 | {
2065 |    for(;;) {
2066 |       int z = zhuffman_decode(a, &a->z_length);
2067 |       if (z < 256) {
2068 |          if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes
2069 |          if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
2070 |          *a->zout++ = (char) z;
2071 |       } else {
2072 |          uint8 *p;
2073 |          int len,dist;
2074 |          if (z == 256) return 1;
2075 |          z -= 257;
2076 |          len = length_base[z];
2077 |          if (length_extra[z]) len += zreceive(a, length_extra[z]);
2078 |          z = zhuffman_decode(a, &a->z_distance);
2079 |          if (z < 0) return e("bad huffman code","Corrupt PNG");
2080 |          dist = dist_base[z];
2081 |          if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
2082 |          if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG");
2083 |          if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
2084 |          p = (uint8 *) (a->zout - dist);
2085 |          while (len--)
2086 |             *a->zout++ = *p++;
2087 |       }
2088 |    }
2089 | }
2090 | 
2091 | static int compute_huffman_codes(zbuf *a)
2092 | {
2093 |    static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
2094 |    zhuffman z_codelength;
2095 |    uint8 lencodes[286+32+137];//padding for maximum single op
2096 |    uint8 codelength_sizes[19];
2097 |    int i,n;
2098 | 
2099 |    int hlit  = zreceive(a,5) + 257;
2100 |    int hdist = zreceive(a,5) + 1;
2101 |    int hclen = zreceive(a,4) + 4;
2102 | 
2103 |    memset(codelength_sizes, 0, sizeof(codelength_sizes));
2104 |    for (i=0; i < hclen; ++i) {
2105 |       int s = zreceive(a,3);
2106 |       codelength_sizes[length_dezigzag[i]] = (uint8) s;
2107 |    }
2108 |    if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
2109 | 
2110 |    n = 0;
2111 |    while (n < hlit + hdist) {
2112 |       int c = zhuffman_decode(a, &z_codelength);
2113 |       assert(c >= 0 && c < 19);
2114 |       if (c < 16)
2115 |          lencodes[n++] = (uint8) c;
2116 |       else if (c == 16) {
2117 |          c = zreceive(a,2)+3;
2118 |          memset(lencodes+n, lencodes[n-1], c);
2119 |          n += c;
2120 |       } else if (c == 17) {
2121 |          c = zreceive(a,3)+3;
2122 |          memset(lencodes+n, 0, c);
2123 |          n += c;
2124 |       } else {
2125 |          assert(c == 18);
2126 |          c = zreceive(a,7)+11;
2127 |          memset(lencodes+n, 0, c);
2128 |          n += c;
2129 |       }
2130 |    }
2131 |    if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG");
2132 |    if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
2133 |    if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
2134 |    return 1;
2135 | }
2136 | 
2137 | static int parse_uncompressed_block(zbuf *a)
2138 | {
2139 |    uint8 header[4];
2140 |    int len,nlen,k;
2141 |    if (a->num_bits & 7)
2142 |       zreceive(a, a->num_bits & 7); // discard
2143 |    // drain the bit-packed data into header
2144 |    k = 0;
2145 |    while (a->num_bits > 0) {
2146 |       header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
2147 |       a->code_buffer >>= 8;
2148 |       a->num_bits -= 8;
2149 |    }
2150 |    assert(a->num_bits == 0);
2151 |    // now fill header the normal way
2152 |    while (k < 4)
2153 |       header[k++] = (uint8) zget8(a);
2154 |    len  = header[1] * 256 + header[0];
2155 |    nlen = header[3] * 256 + header[2];
2156 |    if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG");
2157 |    if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG");
2158 |    if (a->zout + len > a->zout_end)
2159 |       if (!expand(a, len)) return 0;
2160 |    memcpy(a->zout, a->zbuffer, len);
2161 |    a->zbuffer += len;
2162 |    a->zout += len;
2163 |    return 1;
2164 | }
2165 | 
2166 | static int parse_zlib_header(zbuf *a)
2167 | {
2168 |    int cmf   = zget8(a);
2169 |    int cm    = cmf & 15;
2170 |    /* int cinfo = cmf >> 4; */
2171 |    int flg   = zget8(a);
2172 |    if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec
2173 |    if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
2174 |    if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png
2175 |    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
2176 |    return 1;
2177 | }
2178 | 
2179 | // @TODO: should statically initialize these for optimal thread safety
2180 | static uint8 default_length[288], default_distance[32];
2181 | static void init_defaults(void)
2182 | {
2183 |    int i;   // use <= to match clearly with spec
2184 |    for (i=0; i <= 143; ++i)     default_length[i]   = 8;
2185 |    for (   ; i <= 255; ++i)     default_length[i]   = 9;
2186 |    for (   ; i <= 279; ++i)     default_length[i]   = 7;
2187 |    for (   ; i <= 287; ++i)     default_length[i]   = 8;
2188 | 
2189 |    for (i=0; i <=  31; ++i)     default_distance[i] = 5;
2190 | }
2191 | 
2192 | int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
2193 | static int parse_zlib(zbuf *a, int parse_header)
2194 | {
2195 |    int final, type;
2196 |    if (parse_header)
2197 |       if (!parse_zlib_header(a)) return 0;
2198 |    a->num_bits = 0;
2199 |    a->code_buffer = 0;
2200 |    do {
2201 |       final = zreceive(a,1);
2202 |       type = zreceive(a,2);
2203 |       if (type == 0) {
2204 |          if (!parse_uncompressed_block(a)) return 0;
2205 |       } else if (type == 3) {
2206 |          return 0;
2207 |       } else {
2208 |          if (type == 1) {
2209 |             // use fixed code lengths
2210 |             if (!default_distance[31]) init_defaults();
2211 |             if (!zbuild_huffman(&a->z_length  , default_length  , 288)) return 0;
2212 |             if (!zbuild_huffman(&a->z_distance, default_distance,  32)) return 0;
2213 |          } else {
2214 |             if (!compute_huffman_codes(a)) return 0;
2215 |          }
2216 |          if (!parse_huffman_block(a)) return 0;
2217 |       }
2218 |       if (stbi_png_partial && a->zout - a->zout_start > 65536)
2219 |          break;
2220 |    } while (!final);
2221 |    return 1;
2222 | }
2223 | 
2224 | static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
2225 | {
2226 |    a->zout_start = obuf;
2227 |    a->zout       = obuf;
2228 |    a->zout_end   = obuf + olen;
2229 |    a->z_expandable = exp;
2230 | 
2231 |    return parse_zlib(a, parse_header);
2232 | }
2233 | 
2234 | char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
2235 | {
2236 |    zbuf a;
2237 |    char *p = (char *) malloc(initial_size);
2238 |    if (p == NULL) return NULL;
2239 |    a.zbuffer = (uint8 *) buffer;
2240 |    a.zbuffer_end = (uint8 *) buffer + len;
2241 |    if (do_zlib(&a, p, initial_size, 1, 1)) {
2242 |       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2243 |       return a.zout_start;
2244 |    } else {
2245 |       free(a.zout_start);
2246 |       return NULL;
2247 |    }
2248 | }
2249 | 
2250 | char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
2251 | {
2252 |    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
2253 | }
2254 | 
2255 | int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
2256 | {
2257 |    zbuf a;
2258 |    a.zbuffer = (uint8 *) ibuffer;
2259 |    a.zbuffer_end = (uint8 *) ibuffer + ilen;
2260 |    if (do_zlib(&a, obuffer, olen, 0, 1))
2261 |       return (int) (a.zout - a.zout_start);
2262 |    else
2263 |       return -1;
2264 | }
2265 | 
2266 | char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
2267 | {
2268 |    zbuf a;
2269 |    char *p = (char *) malloc(16384);
2270 |    if (p == NULL) return NULL;
2271 |    a.zbuffer = (uint8 *) buffer;
2272 |    a.zbuffer_end = (uint8 *) buffer+len;
2273 |    if (do_zlib(&a, p, 16384, 1, 0)) {
2274 |       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2275 |       return a.zout_start;
2276 |    } else {
2277 |       free(a.zout_start);
2278 |       return NULL;
2279 |    }
2280 | }
2281 | 
2282 | int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2283 | {
2284 |    zbuf a;
2285 |    a.zbuffer = (uint8 *) ibuffer;
2286 |    a.zbuffer_end = (uint8 *) ibuffer + ilen;
2287 |    if (do_zlib(&a, obuffer, olen, 0, 0))
2288 |       return (int) (a.zout - a.zout_start);
2289 |    else
2290 |       return -1;
2291 | }
2292 | 
2293 | // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
2294 | //    simple implementation
2295 | //      - only 8-bit samples
2296 | //      - no CRC checking
2297 | //      - allocates lots of intermediate memory
2298 | //        - avoids problem of streaming data between subsystems
2299 | //        - avoids explicit window management
2300 | //    performance
2301 | //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2302 | 
2303 | 
2304 | typedef struct
2305 | {
2306 |    uint32 length;
2307 |    uint32 type;
2308 | } chunk;
2309 | 
2310 | #define PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2311 | 
2312 | static chunk get_chunk_header(stbi *s)
2313 | {
2314 |    chunk c;
2315 |    c.length = get32(s);
2316 |    c.type   = get32(s);
2317 |    return c;
2318 | }
2319 | 
2320 | static int check_png_header(stbi *s)
2321 | {
2322 |    static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2323 |    int i;
2324 |    for (i=0; i < 8; ++i)
2325 |       if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG");
2326 |    return 1;
2327 | }
2328 | 
2329 | typedef struct
2330 | {
2331 |    stbi s;
2332 |    uint8 *idata, *expanded, *out;
2333 | } png;
2334 | 
2335 | 
2336 | enum {
2337 |    F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2338 |    F_avg_first, F_paeth_first,
2339 | };
2340 | 
2341 | static uint8 first_row_filter[5] =
2342 | {
2343 |    F_none, F_sub, F_none, F_avg_first, F_paeth_first
2344 | };
2345 | 
2346 | static int paeth(int a, int b, int c)
2347 | {
2348 |    int p = a + b - c;
2349 |    int pa = abs(p-a);
2350 |    int pb = abs(p-b);
2351 |    int pc = abs(p-c);
2352 |    if (pa <= pb && pa <= pc) return a;
2353 |    if (pb <= pc) return b;
2354 |    return c;
2355 | }
2356 | 
2357 | // create the png data from post-deflated data
2358 | static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2359 | {
2360 |    stbi *s = &a->s;
2361 |    uint32 i,j,stride = x*out_n;
2362 |    int k;
2363 |    int img_n = s->img_n; // copy it into a local for later
2364 |    assert(out_n == s->img_n || out_n == s->img_n+1);
2365 |    if (stbi_png_partial) y = 1;
2366 |    a->out = (uint8 *) malloc(x * y * out_n);
2367 |    if (!a->out) return e("outofmem", "Out of memory");
2368 |    if (!stbi_png_partial) {
2369 |       if (s->img_x == x && s->img_y == y)
2370 |          if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2371 |       else // interlaced:
2372 |          if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2373 |    }
2374 |    for (j=0; j < y; ++j) {
2375 |       uint8 *cur = a->out + stride*j;
2376 |       uint8 *prior = cur - stride;
2377 |       int filter = *raw++;
2378 |       if (filter > 4) return e("invalid filter","Corrupt PNG");
2379 |       // if first row, use special filter that doesn't sample previous row
2380 |       if (j == 0) filter = first_row_filter[filter];
2381 |       // handle first pixel explicitly
2382 |       for (k=0; k < img_n; ++k) {
2383 |          switch(filter) {
2384 |             case F_none       : cur[k] = raw[k]; break;
2385 |             case F_sub        : cur[k] = raw[k]; break;
2386 |             case F_up         : cur[k] = raw[k] + prior[k]; break;
2387 |             case F_avg        : cur[k] = raw[k] + (prior[k]>>1); break;
2388 |             case F_paeth      : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2389 |             case F_avg_first  : cur[k] = raw[k]; break;
2390 |             case F_paeth_first: cur[k] = raw[k]; break;
2391 |          }
2392 |       }
2393 |       if (img_n != out_n) cur[img_n] = 255;
2394 |       raw += img_n;
2395 |       cur += out_n;
2396 |       prior += out_n;
2397 |       // this is a little gross, so that we don't switch per-pixel or per-component
2398 |       if (img_n == out_n) {
2399 |          #define CASE(f) \
2400 |              case f:     \
2401 |                 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2402 |                    for (k=0; k < img_n; ++k)
2403 |          switch(filter) {
2404 |             CASE(F_none)  cur[k] = raw[k]; break;
2405 |             CASE(F_sub)   cur[k] = raw[k] + cur[k-img_n]; break;
2406 |             CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2407 |             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break;
2408 |             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
2409 |             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-img_n] >> 1); break;
2410 |             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break;
2411 |          }
2412 |          #undef CASE
2413 |       } else {
2414 |          assert(img_n+1 == out_n);
2415 |          #define CASE(f) \
2416 |              case f:     \
2417 |                 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2418 |                    for (k=0; k < img_n; ++k)
2419 |          switch(filter) {
2420 |             CASE(F_none)  cur[k] = raw[k]; break;
2421 |             CASE(F_sub)   cur[k] = raw[k] + cur[k-out_n]; break;
2422 |             CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2423 |             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break;
2424 |             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
2425 |             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-out_n] >> 1); break;
2426 |             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break;
2427 |          }
2428 |          #undef CASE
2429 |       }
2430 |    }
2431 |    return 1;
2432 | }
2433 | 
2434 | static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2435 | {
2436 |    uint8 *final;
2437 |    int p;
2438 |    int save;
2439 |    if (!interlaced)
2440 |       return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y);
2441 |    save = stbi_png_partial;
2442 |    stbi_png_partial = 0;
2443 | 
2444 |    // de-interlacing
2445 |    final = (uint8 *) malloc(a->s.img_x * a->s.img_y * out_n);
2446 |    for (p=0; p < 7; ++p) {
2447 |       int xorig[] = { 0,4,0,2,0,1,0 };
2448 |       int yorig[] = { 0,0,4,0,2,0,1 };
2449 |       int xspc[]  = { 8,8,4,4,2,2,1 };
2450 |       int yspc[]  = { 8,8,8,4,4,2,2 };
2451 |       int i,j,x,y;
2452 |       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2453 |       x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
2454 |       y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
2455 |       if (x && y) {
2456 |          if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
2457 |             free(final);
2458 |             return 0;
2459 |          }
2460 |          for (j=0; j < y; ++j)
2461 |             for (i=0; i < x; ++i)
2462 |                memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
2463 |                       a->out + (j*x+i)*out_n, out_n);
2464 |          free(a->out);
2465 |          raw += (x*out_n+1)*y;
2466 |          raw_len -= (x*out_n+1)*y;
2467 |       }
2468 |    }
2469 |    a->out = final;
2470 | 
2471 |    stbi_png_partial = save;
2472 |    return 1;
2473 | }
2474 | 
2475 | static int compute_transparency(png *z, uint8 tc[3], int out_n)
2476 | {
2477 |    stbi *s = &z->s;
2478 |    uint32 i, pixel_count = s->img_x * s->img_y;
2479 |    uint8 *p = z->out;
2480 | 
2481 |    // compute color-based transparency, assuming we've
2482 |    // already got 255 as the alpha value in the output
2483 |    assert(out_n == 2 || out_n == 4);
2484 | 
2485 |    if (out_n == 2) {
2486 |       for (i=0; i < pixel_count; ++i) {
2487 |          p[1] = (p[0] == tc[0] ? 0 : 255);
2488 |          p += 2;
2489 |       }
2490 |    } else {
2491 |       for (i=0; i < pixel_count; ++i) {
2492 |          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2493 |             p[3] = 0;
2494 |          p += 4;
2495 |       }
2496 |    }
2497 |    return 1;
2498 | }
2499 | 
2500 | static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2501 | {
2502 |    uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2503 |    uint8 *p, *temp_out, *orig = a->out;
2504 | 
2505 |    p = (uint8 *) malloc(pixel_count * pal_img_n);
2506 |    if (p == NULL) return e("outofmem", "Out of memory");
2507 | 
2508 |    // between here and free(out) below, exitting would leak
2509 |    temp_out = p;
2510 | 
2511 |    if (pal_img_n == 3) {
2512 |       for (i=0; i < pixel_count; ++i) {
2513 |          int n = orig[i]*4;
2514 |          p[0] = palette[n  ];
2515 |          p[1] = palette[n+1];
2516 |          p[2] = palette[n+2];
2517 |          p += 3;
2518 |       }
2519 |    } else {
2520 |       for (i=0; i < pixel_count; ++i) {
2521 |          int n = orig[i]*4;
2522 |          p[0] = palette[n  ];
2523 |          p[1] = palette[n+1];
2524 |          p[2] = palette[n+2];
2525 |          p[3] = palette[n+3];
2526 |          p += 4;
2527 |       }
2528 |    }
2529 |    free(a->out);
2530 |    a->out = temp_out;
2531 |    return 1;
2532 | }
2533 | 
2534 | static int parse_png_file(png *z, int scan, int req_comp)
2535 | {
2536 |    uint8 palette[1024], pal_img_n=0;
2537 |    uint8 has_trans=0, tc[3];
2538 |    uint32 ioff=0, idata_limit=0, i, pal_len=0;
2539 |    int first=1,k,interlace=0;
2540 |    stbi *s = &z->s;
2541 | 
2542 |    if (!check_png_header(s)) return 0;
2543 | 
2544 |    if (scan == SCAN_type) return 1;
2545 | 
2546 |    for(;;first=0) {
2547 |       chunk c = get_chunk_header(s);
2548 |       if (first && c.type != PNG_TYPE('I','H','D','R'))
2549 |          return e("first not IHDR","Corrupt PNG");
2550 |       switch (c.type) {
2551 |          case PNG_TYPE('I','H','D','R'): {
2552 |             int depth,color,comp,filter;
2553 |             if (!first) return e("multiple IHDR","Corrupt PNG");
2554 |             if (c.length != 13) return e("bad IHDR len","Corrupt PNG");
2555 |             s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)");
2556 |             s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)");
2557 |             depth = get8(s);  if (depth != 8)        return e("8bit only","PNG not supported: 8-bit only");
2558 |             color = get8(s);  if (color > 6)         return e("bad ctype","Corrupt PNG");
2559 |             if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG");
2560 |             comp  = get8(s);  if (comp) return e("bad comp method","Corrupt PNG");
2561 |             filter= get8(s);  if (filter) return e("bad filter method","Corrupt PNG");
2562 |             interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG");
2563 |             if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG");
2564 |             if (!pal_img_n) {
2565 |                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2566 |                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2567 |                if (scan == SCAN_header) return 1;
2568 |             } else {
2569 |                // if paletted, then pal_n is our final components, and
2570 |                // img_n is # components to decompress/filter.
2571 |                s->img_n = 1;
2572 |                if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG");
2573 |                // if SCAN_header, have to scan to see if we have a tRNS
2574 |             }
2575 |             break;
2576 |          }
2577 | 
2578 |          case PNG_TYPE('P','L','T','E'):  {
2579 |             if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG");
2580 |             pal_len = c.length / 3;
2581 |             if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG");
2582 |             for (i=0; i < pal_len; ++i) {
2583 |                palette[i*4+0] = get8u(s);
2584 |                palette[i*4+1] = get8u(s);
2585 |                palette[i*4+2] = get8u(s);
2586 |                palette[i*4+3] = 255;
2587 |             }
2588 |             break;
2589 |          }
2590 | 
2591 |          case PNG_TYPE('t','R','N','S'): {
2592 |             if (z->idata) return e("tRNS after IDAT","Corrupt PNG");
2593 |             if (pal_img_n) {
2594 |                if (scan == SCAN_header) { s->img_n = 4; return 1; }
2595 |                if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG");
2596 |                if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG");
2597 |                pal_img_n = 4;
2598 |                for (i=0; i < c.length; ++i)
2599 |                   palette[i*4+3] = get8u(s);
2600 |             } else {
2601 |                if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG");
2602 |                if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG");
2603 |                has_trans = 1;
2604 |                for (k=0; k < s->img_n; ++k)
2605 |                   tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2606 |             }
2607 |             break;
2608 |          }
2609 | 
2610 |          case PNG_TYPE('I','D','A','T'): {
2611 |             if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG");
2612 |             if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2613 |             if (ioff + c.length > idata_limit) {
2614 |                uint8 *p;
2615 |                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2616 |                while (ioff + c.length > idata_limit)
2617 |                   idata_limit *= 2;
2618 |                p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory");
2619 |                z->idata = p;
2620 |             }
2621 |             #ifndef STBI_NO_STDIO
2622 |             if (s->img_file)
2623 |             {
2624 |                if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG");
2625 |             }
2626 |             else
2627 |             #endif
2628 |             {
2629 |                memcpy(z->idata+ioff, s->img_buffer, c.length);
2630 |                s->img_buffer += c.length;
2631 |             }
2632 |             ioff += c.length;
2633 |             break;
2634 |          }
2635 | 
2636 |          case PNG_TYPE('I','E','N','D'): {
2637 |             uint32 raw_len;
2638 |             if (scan != SCAN_load) return 1;
2639 |             if (z->idata == NULL) return e("no IDAT","Corrupt PNG");
2640 |             z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len);
2641 |             if (z->expanded == NULL) return 0; // zlib should set error
2642 |             free(z->idata); z->idata = NULL;
2643 |             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
2644 |                s->img_out_n = s->img_n+1;
2645 |             else
2646 |                s->img_out_n = s->img_n;
2647 |             if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
2648 |             if (has_trans)
2649 |                if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2650 |             if (pal_img_n) {
2651 |                // pal_img_n == 3 or 4
2652 |                s->img_n = pal_img_n; // record the actual colors we had
2653 |                s->img_out_n = pal_img_n;
2654 |                if (req_comp >= 3) s->img_out_n = req_comp;
2655 |                if (!expand_palette(z, palette, pal_len, s->img_out_n))
2656 |                   return 0;
2657 |             }
2658 |             free(z->expanded); z->expanded = NULL;
2659 |             return 1;
2660 |          }
2661 | 
2662 |          default:
2663 |             // if critical, fail
2664 |             if ((c.type & (1 << 29)) == 0) {
2665 |                #ifndef STBI_NO_FAILURE_STRINGS
2666 |                // not threadsafe
2667 |                static char invalid_chunk[] = "XXXX chunk not known";
2668 |                invalid_chunk[0] = (uint8) (c.type >> 24);
2669 |                invalid_chunk[1] = (uint8) (c.type >> 16);
2670 |                invalid_chunk[2] = (uint8) (c.type >>  8);
2671 |                invalid_chunk[3] = (uint8) (c.type >>  0);
2672 |                #endif
2673 |                return e(invalid_chunk, "PNG not supported: unknown chunk type");
2674 |             }
2675 |             skip(s, c.length);
2676 |             break;
2677 |       }
2678 |       // end of chunk, read and skip CRC
2679 |       get32(s);
2680 |    }
2681 | }
2682 | 
2683 | static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2684 | {
2685 |    unsigned char *result=NULL;
2686 |    p->expanded = NULL;
2687 |    p->idata = NULL;
2688 |    p->out = NULL;
2689 |    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
2690 |    if (parse_png_file(p, SCAN_load, req_comp)) {
2691 |       result = p->out;
2692 |       p->out = NULL;
2693 |       if (req_comp && req_comp != p->s.img_out_n) {
2694 |          result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
2695 |          p->s.img_out_n = req_comp;
2696 |          if (result == NULL) return result;
2697 |       }
2698 |       *x = p->s.img_x;
2699 |       *y = p->s.img_y;
2700 |       if (n) *n = p->s.img_n;
2701 |    }
2702 |    free(p->out);      p->out      = NULL;
2703 |    free(p->expanded); p->expanded = NULL;
2704 |    free(p->idata);    p->idata    = NULL;
2705 | 
2706 |    return result;
2707 | }
2708 | 
2709 | #ifndef STBI_NO_STDIO
2710 | unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2711 | {
2712 |    png p;
2713 |    start_file(&p.s, f);
2714 |    return do_png(&p, x,y,comp,req_comp);
2715 | }
2716 | 
2717 | unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2718 | {
2719 |    unsigned char *data;
2720 |    FILE *f = fopen(filename, "rb");
2721 |    if (!f) return NULL;
2722 |    data = stbi_png_load_from_file(f,x,y,comp,req_comp);
2723 |    fclose(f);
2724 |    return data;
2725 | }
2726 | #endif
2727 | 
2728 | unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2729 | {
2730 |    png p;
2731 |    start_mem(&p.s, buffer,len);
2732 |    return do_png(&p, x,y,comp,req_comp);
2733 | }
2734 | 
2735 | #ifndef STBI_NO_STDIO
2736 | int stbi_png_test_file(FILE *f)
2737 | {
2738 |    png p;
2739 |    int n,r;
2740 |    n = ftell(f);
2741 |    start_file(&p.s, f);
2742 |    r = parse_png_file(&p, SCAN_type,STBI_default);
2743 |    fseek(f,n,SEEK_SET);
2744 |    return r;
2745 | }
2746 | #endif
2747 | 
2748 | int stbi_png_test_memory(stbi_uc const *buffer, int len)
2749 | {
2750 |    png p;
2751 |    start_mem(&p.s, buffer, len);
2752 |    return parse_png_file(&p, SCAN_type,STBI_default);
2753 | }
2754 | 
2755 | // TODO: load header from png
2756 | #ifndef STBI_NO_STDIO
2757 | int      stbi_png_info             (char const *filename,           int *x, int *y, int *comp)
2758 | {
2759 |    png p;
2760 |    FILE *f = fopen(filename, "rb");
2761 |    if (!f) return 0;
2762 |    start_file(&p.s, f);
2763 |    if (parse_png_file(&p, SCAN_header, 0)) {
2764 |       if(x) *x = p.s.img_x;
2765 |       if(y) *y = p.s.img_y;
2766 |       if (comp) *comp = p.s.img_n;
2767 |       fclose(f);
2768 |       return 1;
2769 |    }
2770 |    fclose(f);
2771 |    return 0;
2772 | }
2773 | 
2774 | extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
2775 | #endif
2776 | extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2777 | 
2778 | // Microsoft/Windows BMP image
2779 | 
2780 | static int bmp_test(stbi *s)
2781 | {
2782 |    int sz;
2783 |    if (get8(s) != 'B') return 0;
2784 |    if (get8(s) != 'M') return 0;
2785 |    get32le(s); // discard filesize
2786 |    get16le(s); // discard reserved
2787 |    get16le(s); // discard reserved
2788 |    get32le(s); // discard data offset
2789 |    sz = get32le(s);
2790 |    if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
2791 |    return 0;
2792 | }
2793 | 
2794 | #ifndef STBI_NO_STDIO
2795 | int      stbi_bmp_test_file        (FILE *f)
2796 | {
2797 |    stbi s;
2798 |    int r,n = ftell(f);
2799 |    start_file(&s,f);
2800 |    r = bmp_test(&s);
2801 |    fseek(f,n,SEEK_SET);
2802 |    return r;
2803 | }
2804 | #endif
2805 | 
2806 | int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len)
2807 | {
2808 |    stbi s;
2809 |    start_mem(&s, buffer, len);
2810 |    return bmp_test(&s);
2811 | }
2812 | 
2813 | // returns 0..31 for the highest set bit
2814 | static int high_bit(unsigned int z)
2815 | {
2816 |    int n=0;
2817 |    if (z == 0) return -1;
2818 |    if (z >= 0x10000) n += 16, z >>= 16;
2819 |    if (z >= 0x00100) n +=  8, z >>=  8;
2820 |    if (z >= 0x00010) n +=  4, z >>=  4;
2821 |    if (z >= 0x00004) n +=  2, z >>=  2;
2822 |    if (z >= 0x00002) n +=  1, z >>=  1;
2823 |    return n;
2824 | }
2825 | 
2826 | static int bitcount(unsigned int a)
2827 | {
2828 |    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
2829 |    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
2830 |    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
2831 |    a = (a + (a >> 8)); // max 16 per 8 bits
2832 |    a = (a + (a >> 16)); // max 32 per 8 bits
2833 |    return a & 0xff;
2834 | }
2835 | 
2836 | static int shiftsigned(int v, int shift, int bits)
2837 | {
2838 |    int result;
2839 |    int z=0;
2840 | 
2841 |    if (shift < 0) v <<= -shift;
2842 |    else v >>= shift;
2843 |    result = v;
2844 | 
2845 |    z = bits;
2846 |    while (z < 8) {
2847 |       result += v >> z;
2848 |       z += bits;
2849 |    }
2850 |    return result;
2851 | }
2852 | 
2853 | static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2854 | {
2855 |    uint8 *out;
2856 |    unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0;
2857 |    stbi_uc pal[256][4];
2858 |    int psize=0,i,j,compress=0,width;
2859 |    int bpp, flip_vertically, pad, target, offset, hsz;
2860 |    if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
2861 |    get32le(s); // discard filesize
2862 |    get16le(s); // discard reserved
2863 |    get16le(s); // discard reserved
2864 |    offset = get32le(s);
2865 |    hsz = get32le(s);
2866 |    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
2867 |    failure_reason = "bad BMP";
2868 |    if (hsz == 12) {
2869 |       s->img_x = get16le(s);
2870 |       s->img_y = get16le(s);
2871 |    } else {
2872 |       s->img_x = get32le(s);
2873 |       s->img_y = get32le(s);
2874 |    }
2875 |    if (get16le(s) != 1) return 0;
2876 |    bpp = get16le(s);
2877 |    if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
2878 |    flip_vertically = ((int) s->img_y) > 0;
2879 |    s->img_y = abs((int) s->img_y);
2880 |    if (hsz == 12) {
2881 |       if (bpp < 24)
2882 |          psize = (offset - 14 - 24) / 3;
2883 |    } else {
2884 |       compress = get32le(s);
2885 |       if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
2886 |       get32le(s); // discard sizeof
2887 |       get32le(s); // discard hres
2888 |       get32le(s); // discard vres
2889 |       get32le(s); // discard colorsused
2890 |       get32le(s); // discard max important
2891 |       if (hsz == 40 || hsz == 56) {
2892 |          if (hsz == 56) {
2893 |             get32le(s);
2894 |             get32le(s);
2895 |             get32le(s);
2896 |             get32le(s);
2897 |          }
2898 |          if (bpp == 16 || bpp == 32) {
2899 |             mr = mg = mb = 0;
2900 |             if (compress == 0) {
2901 |                if (bpp == 32) {
2902 |                   mr = 0xff << 16;
2903 |                   mg = 0xff <<  8;
2904 |                   mb = 0xff <<  0;
2905 |                   ma = 0xff << 24;
2906 |                   fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
2907 |                } else {
2908 |                   mr = 31 << 10;
2909 |                   mg = 31 <<  5;
2910 |                   mb = 31 <<  0;
2911 |                }
2912 |             } else if (compress == 3) {
2913 |                mr = get32le(s);
2914 |                mg = get32le(s);
2915 |                mb = get32le(s);
2916 |                // not documented, but generated by photoshop and handled by mspaint
2917 |                if (mr == mg && mg == mb) {
2918 |                   // ?!?!?
2919 |                   return NULL;
2920 |                }
2921 |             } else
2922 |                return NULL;
2923 |          }
2924 |       } else {
2925 |          assert(hsz == 108);
2926 |          mr = get32le(s);
2927 |          mg = get32le(s);
2928 |          mb = get32le(s);
2929 |          ma = get32le(s);
2930 |          get32le(s); // discard color space
2931 |          for (i=0; i < 12; ++i)
2932 |             get32le(s); // discard color space parameters
2933 |       }
2934 |       if (bpp < 16)
2935 |          psize = (offset - 14 - hsz) >> 2;
2936 |    }
2937 |    s->img_n = ma ? 4 : 3;
2938 |    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
2939 |       target = req_comp;
2940 |    else
2941 |       target = s->img_n; // if they want monochrome, we'll post-convert
2942 |    out = (stbi_uc *) malloc(target * s->img_x * s->img_y);
2943 |    if (!out) return epuc("outofmem", "Out of memory");
2944 |    if (bpp < 16) {
2945 |       int z=0;
2946 |       if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); }
2947 |       for (i=0; i < psize; ++i) {
2948 |          pal[i][2] = get8(s);
2949 |          pal[i][1] = get8(s);
2950 |          pal[i][0] = get8(s);
2951 |          if (hsz != 12) get8(s);
2952 |          pal[i][3] = 255;
2953 |       }
2954 |       skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
2955 |       if (bpp == 4) width = (s->img_x + 1) >> 1;
2956 |       else if (bpp == 8) width = s->img_x;
2957 |       else { free(out); return epuc("bad bpp", "Corrupt BMP"); }
2958 |       pad = (-width)&3;
2959 |       for (j=0; j < (int) s->img_y; ++j) {
2960 |          for (i=0; i < (int) s->img_x; i += 2) {
2961 |             int v=get8(s),v2=0;
2962 |             if (bpp == 4) {
2963 |                v2 = v & 15;
2964 |                v >>= 4;
2965 |             }
2966 |             out[z++] = pal[v][0];
2967 |             out[z++] = pal[v][1];
2968 |             out[z++] = pal[v][2];
2969 |             if (target == 4) out[z++] = 255;
2970 |             if (i+1 == (int) s->img_x) break;
2971 |             v = (bpp == 8) ? get8(s) : v2;
2972 |             out[z++] = pal[v][0];
2973 |             out[z++] = pal[v][1];
2974 |             out[z++] = pal[v][2];
2975 |             if (target == 4) out[z++] = 255;
2976 |          }
2977 |          skip(s, pad);
2978 |       }
2979 |    } else {
2980 |       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
2981 |       int z = 0;
2982 |       int easy=0;
2983 |       skip(s, offset - 14 - hsz);
2984 |       if (bpp == 24) width = 3 * s->img_x;
2985 |       else if (bpp == 16) width = 2*s->img_x;
2986 |       else /* bpp = 32 and pad = 0 */ width=0;
2987 |       pad = (-width) & 3;
2988 |       if (bpp == 24) {
2989 |          easy = 1;
2990 |       } else if (bpp == 32) {
2991 |          if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
2992 |             easy = 2;
2993 |       }
2994 |       if (!easy) {
2995 |          if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP");
2996 |          // right shift amt to put high bit in position #7
2997 |          rshift = high_bit(mr)-7; rcount = bitcount(mr);
2998 |          gshift = high_bit(mg)-7; gcount = bitcount(mr);
2999 |          bshift = high_bit(mb)-7; bcount = bitcount(mr);
3000 |          ashift = high_bit(ma)-7; acount = bitcount(mr);
3001 |       }
3002 |       for (j=0; j < (int) s->img_y; ++j) {
3003 |          if (easy) {
3004 |             for (i=0; i < (int) s->img_x; ++i) {
3005 |                int a;
3006 |                out[z+2] = get8(s);
3007 |                out[z+1] = get8(s);
3008 |                out[z+0] = get8(s);
3009 |                z += 3;
3010 |                a = (easy == 2 ? get8(s) : 255);
3011 |                if (target == 4) out[z++] = a;
3012 |             }
3013 |          } else {
3014 |             for (i=0; i < (int) s->img_x; ++i) {
3015 |                uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
3016 |                int a;
3017 |                out[z++] = shiftsigned(v & mr, rshift, rcount);
3018 |                out[z++] = shiftsigned(v & mg, gshift, gcount);
3019 |                out[z++] = shiftsigned(v & mb, bshift, bcount);
3020 |                a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
3021 |                if (target == 4) out[z++] = a;
3022 |             }
3023 |          }
3024 |          skip(s, pad);
3025 |       }
3026 |    }
3027 |    if (flip_vertically) {
3028 |       stbi_uc t;
3029 |       for (j=0; j < (int) s->img_y>>1; ++j) {
3030 |          stbi_uc *p1 = out +      j     *s->img_x*target;
3031 |          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
3032 |          for (i=0; i < (int) s->img_x*target; ++i) {
3033 |             t = p1[i], p1[i] = p2[i], p2[i] = t;
3034 |          }
3035 |       }
3036 |    }
3037 | 
3038 |    if (req_comp && req_comp != target) {
3039 |       out = convert_format(out, target, req_comp, s->img_x, s->img_y);
3040 |       if (out == NULL) return out; // convert_format frees input on failure
3041 |    }
3042 | 
3043 |    *x = s->img_x;
3044 |    *y = s->img_y;
3045 |    if (comp) *comp = target;
3046 |    return out;
3047 | }
3048 | 
3049 | #ifndef STBI_NO_STDIO
3050 | stbi_uc *stbi_bmp_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3051 | {
3052 |    stbi_uc *data;
3053 |    FILE *f = fopen(filename, "rb");
3054 |    if (!f) return NULL;
3055 |    data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
3056 |    fclose(f);
3057 |    return data;
3058 | }
3059 | 
3060 | stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3061 | {
3062 |    stbi s;
3063 |    start_file(&s, f);
3064 |    return bmp_load(&s, x,y,comp,req_comp);
3065 | }
3066 | #endif
3067 | 
3068 | stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3069 | {
3070 |    stbi s;
3071 |    start_mem(&s, buffer, len);
3072 |    return bmp_load(&s, x,y,comp,req_comp);
3073 | }
3074 | 
3075 | // Targa Truevision - TGA
3076 | // by Jonathan Dummer
3077 | 
3078 | static int tga_test(stbi *s)
3079 | {
3080 | 	int sz;
3081 | 	get8u(s);		//	discard Offset
3082 | 	sz = get8u(s);	//	color type
3083 | 	if( sz > 1 ) return 0;	//	only RGB or indexed allowed
3084 | 	sz = get8u(s);	//	image type
3085 | 	if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0;	//	only RGB or grey allowed, +/- RLE
3086 | 	get16(s);		//	discard palette start
3087 | 	get16(s);		//	discard palette length
3088 | 	get8(s);			//	discard bits per palette color entry
3089 | 	get16(s);		//	discard x origin
3090 | 	get16(s);		//	discard y origin
3091 | 	if( get16(s) < 1 ) return 0;		//	test width
3092 | 	if( get16(s) < 1 ) return 0;		//	test height
3093 | 	sz = get8(s);	//	bits per pixel
3094 | 	if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0;	//	only RGB or RGBA or grey allowed
3095 | 	return 1;		//	seems to have passed everything
3096 | }
3097 | 
3098 | #ifndef STBI_NO_STDIO
3099 | int      stbi_tga_test_file        (FILE *f)
3100 | {
3101 |    stbi s;
3102 |    int r,n = ftell(f);
3103 |    start_file(&s, f);
3104 |    r = tga_test(&s);
3105 |    fseek(f,n,SEEK_SET);
3106 |    return r;
3107 | }
3108 | #endif
3109 | 
3110 | int      stbi_tga_test_memory      (stbi_uc const *buffer, int len)
3111 | {
3112 |    stbi s;
3113 |    start_mem(&s, buffer, len);
3114 |    return tga_test(&s);
3115 | }
3116 | 
3117 | static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3118 | {
3119 | 	//	read in the TGA header stuff
3120 | 	int tga_offset = get8u(s);
3121 | 	int tga_indexed = get8u(s);
3122 | 	int tga_image_type = get8u(s);
3123 | 	int tga_is_RLE = 0;
3124 | 	int tga_palette_start = get16le(s);
3125 | 	int tga_palette_len = get16le(s);
3126 | 	int tga_palette_bits = get8u(s);
3127 | 	int tga_x_origin = get16le(s);
3128 | 	int tga_y_origin = get16le(s);
3129 | 	int tga_width = get16le(s);
3130 | 	int tga_height = get16le(s);
3131 | 	int tga_bits_per_pixel = get8u(s);
3132 | 	int tga_inverted = get8u(s);
3133 | 	//	image data
3134 | 	unsigned char *tga_data;
3135 | 	unsigned char *tga_palette = NULL;
3136 | 	int i, j;
3137 | 	unsigned char raw_data[4];
3138 | 	unsigned char trans_data[4];
3139 | 	int RLE_count = 0;
3140 | 	int RLE_repeating = 0;
3141 | 	int read_next_pixel = 1;
3142 | 	//	do a tiny bit of precessing
3143 | 	if( tga_image_type >= 8 )
3144 | 	{
3145 | 		tga_image_type -= 8;
3146 | 		tga_is_RLE = 1;
3147 | 	}
3148 | 	/* int tga_alpha_bits = tga_inverted & 15; */
3149 | 	tga_inverted = 1 - ((tga_inverted >> 5) & 1);
3150 | 
3151 | 	//	error check
3152 | 	if( //(tga_indexed) ||
3153 | 		(tga_width < 1) || (tga_height < 1) ||
3154 | 		(tga_image_type < 1) || (tga_image_type > 3) ||
3155 | 		((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
3156 | 		(tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
3157 | 		)
3158 | 	{
3159 | 		return NULL;
3160 | 	}
3161 | 
3162 | 	//	If I'm paletted, then I'll use the number of bits from the palette
3163 | 	if( tga_indexed )
3164 | 	{
3165 | 		tga_bits_per_pixel = tga_palette_bits;
3166 | 	}
3167 | 
3168 | 	//	tga info
3169 | 	*x = tga_width;
3170 | 	*y = tga_height;
3171 | 	if( (req_comp < 1) || (req_comp > 4) )
3172 | 	{
3173 | 		//	just use whatever the file was
3174 | 		req_comp = tga_bits_per_pixel / 8;
3175 | 		*comp = req_comp;
3176 | 	} else
3177 | 	{
3178 | 		//	force a new number of components
3179 | 		*comp = tga_bits_per_pixel/8;
3180 | 	}
3181 | 	tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp );
3182 | 
3183 | 	//	skip to the data's starting position (offset usually = 0)
3184 | 	skip(s, tga_offset );
3185 | 	//	do I need to load a palette?
3186 | 	if( tga_indexed )
3187 | 	{
3188 | 		//	any data to skip? (offset usually = 0)
3189 | 		skip(s, tga_palette_start );
3190 | 		//	load the palette
3191 | 		tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 );
3192 | 		getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 );
3193 | 	}
3194 | 	//	load the data
3195 | 	for( i = 0; i < tga_width * tga_height; ++i )
3196 | 	{
3197 | 		//	if I'm in RLE mode, do I need to get a RLE chunk?
3198 | 		if( tga_is_RLE )
3199 | 		{
3200 | 			if( RLE_count == 0 )
3201 | 			{
3202 | 				//	yep, get the next byte as a RLE command
3203 | 				int RLE_cmd = get8u(s);
3204 | 				RLE_count = 1 + (RLE_cmd & 127);
3205 | 				RLE_repeating = RLE_cmd >> 7;
3206 | 				read_next_pixel = 1;
3207 | 			} else if( !RLE_repeating )
3208 | 			{
3209 | 				read_next_pixel = 1;
3210 | 			}
3211 | 		} else
3212 | 		{
3213 | 			read_next_pixel = 1;
3214 | 		}
3215 | 		//	OK, if I need to read a pixel, do it now
3216 | 		if( read_next_pixel )
3217 | 		{
3218 | 			//	load however much data we did have
3219 | 			if( tga_indexed )
3220 | 			{
3221 | 				//	read in 1 byte, then perform the lookup
3222 | 				int pal_idx = get8u(s);
3223 | 				if( pal_idx >= tga_palette_len )
3224 | 				{
3225 | 					//	invalid index
3226 | 					pal_idx = 0;
3227 | 				}
3228 | 				pal_idx *= tga_bits_per_pixel / 8;
3229 | 				for( j = 0; j*8 < tga_bits_per_pixel; ++j )
3230 | 				{
3231 | 					raw_data[j] = tga_palette[pal_idx+j];
3232 | 				}
3233 | 			} else
3234 | 			{
3235 | 				//	read in the data raw
3236 | 				for( j = 0; j*8 < tga_bits_per_pixel; ++j )
3237 | 				{
3238 | 					raw_data[j] = get8u(s);
3239 | 				}
3240 | 			}
3241 | 			//	convert raw to the intermediate format
3242 | 			switch( tga_bits_per_pixel )
3243 | 			{
3244 | 			case 8:
3245 | 				//	Luminous => RGBA
3246 | 				trans_data[0] = raw_data[0];
3247 | 				trans_data[1] = raw_data[0];
3248 | 				trans_data[2] = raw_data[0];
3249 | 				trans_data[3] = 255;
3250 | 				break;
3251 | 			case 16:
3252 | 				//	Luminous,Alpha => RGBA
3253 | 				trans_data[0] = raw_data[0];
3254 | 				trans_data[1] = raw_data[0];
3255 | 				trans_data[2] = raw_data[0];
3256 | 				trans_data[3] = raw_data[1];
3257 | 				break;
3258 | 			case 24:
3259 | 				//	BGR => RGBA
3260 | 				trans_data[0] = raw_data[2];
3261 | 				trans_data[1] = raw_data[1];
3262 | 				trans_data[2] = raw_data[0];
3263 | 				trans_data[3] = 255;
3264 | 				break;
3265 | 			case 32:
3266 | 				//	BGRA => RGBA
3267 | 				trans_data[0] = raw_data[2];
3268 | 				trans_data[1] = raw_data[1];
3269 | 				trans_data[2] = raw_data[0];
3270 | 				trans_data[3] = raw_data[3];
3271 | 				break;
3272 | 			}
3273 | 			//	clear the reading flag for the next pixel
3274 | 			read_next_pixel = 0;
3275 | 		} // end of reading a pixel
3276 | 		//	convert to final format
3277 | 		switch( req_comp )
3278 | 		{
3279 | 		case 1:
3280 | 			//	RGBA => Luminance
3281 | 			tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3282 | 			break;
3283 | 		case 2:
3284 | 			//	RGBA => Luminance,Alpha
3285 | 			tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3286 | 			tga_data[i*req_comp+1] = trans_data[3];
3287 | 			break;
3288 | 		case 3:
3289 | 			//	RGBA => RGB
3290 | 			tga_data[i*req_comp+0] = trans_data[0];
3291 | 			tga_data[i*req_comp+1] = trans_data[1];
3292 | 			tga_data[i*req_comp+2] = trans_data[2];
3293 | 			break;
3294 | 		case 4:
3295 | 			//	RGBA => RGBA
3296 | 			tga_data[i*req_comp+0] = trans_data[0];
3297 | 			tga_data[i*req_comp+1] = trans_data[1];
3298 | 			tga_data[i*req_comp+2] = trans_data[2];
3299 | 			tga_data[i*req_comp+3] = trans_data[3];
3300 | 			break;
3301 | 		}
3302 | 		//	in case we're in RLE mode, keep counting down
3303 | 		--RLE_count;
3304 | 	}
3305 | 	//	do I need to invert the image?
3306 | 	if( tga_inverted )
3307 | 	{
3308 | 		for( j = 0; j*2 < tga_height; ++j )
3309 | 		{
3310 | 			int index1 = j * tga_width * req_comp;
3311 | 			int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3312 | 			for( i = tga_width * req_comp; i > 0; --i )
3313 | 			{
3314 | 				unsigned char temp = tga_data[index1];
3315 | 				tga_data[index1] = tga_data[index2];
3316 | 				tga_data[index2] = temp;
3317 | 				++index1;
3318 | 				++index2;
3319 | 			}
3320 | 		}
3321 | 	}
3322 | 	//	clear my palette, if I had one
3323 | 	if( tga_palette != NULL )
3324 | 	{
3325 | 		free( tga_palette );
3326 | 	}
3327 | 	//	the things I do to get rid of an error message, and yet keep
3328 | 	//	Microsoft's C compilers happy... [8^(
3329 | 	tga_palette_start = tga_palette_len = tga_palette_bits =
3330 | 			tga_x_origin = tga_y_origin = 0;
3331 | 	//	OK, done
3332 | 	return tga_data;
3333 | }
3334 | 
3335 | #ifndef STBI_NO_STDIO
3336 | stbi_uc *stbi_tga_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3337 | {
3338 |    stbi_uc *data;
3339 |    FILE *f = fopen(filename, "rb");
3340 |    if (!f) return NULL;
3341 |    data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
3342 |    fclose(f);
3343 |    return data;
3344 | }
3345 | 
3346 | stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3347 | {
3348 |    stbi s;
3349 |    start_file(&s, f);
3350 |    return tga_load(&s, x,y,comp,req_comp);
3351 | }
3352 | #endif
3353 | 
3354 | stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3355 | {
3356 |    stbi s;
3357 |    start_mem(&s, buffer, len);
3358 |    return tga_load(&s, x,y,comp,req_comp);
3359 | }
3360 | 
3361 | 
3362 | // *************************************************************************************************
3363 | // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB
3364 | 
3365 | static int psd_test(stbi *s)
3366 | {
3367 | 	if (get32(s) != 0x38425053) return 0;	// "8BPS"
3368 | 	else return 1;
3369 | }
3370 | 
3371 | #ifndef STBI_NO_STDIO
3372 | int stbi_psd_test_file(FILE *f)
3373 | {
3374 |    stbi s;
3375 |    int r,n = ftell(f);
3376 |    start_file(&s, f);
3377 |    r = psd_test(&s);
3378 |    fseek(f,n,SEEK_SET);
3379 |    return r;
3380 | }
3381 | #endif
3382 | 
3383 | int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3384 | {
3385 |    stbi s;
3386 |    start_mem(&s, buffer, len);
3387 |    return psd_test(&s);
3388 | }
3389 | 
3390 | static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3391 | {
3392 | 	int	pixelCount;
3393 | 	int channelCount, compression;
3394 | 	int channel, i, count, len;
3395 |    int w,h;
3396 |    uint8 *out;
3397 | 
3398 | 	// Check identifier
3399 | 	if (get32(s) != 0x38425053)	// "8BPS"
3400 | 		return epuc("not PSD", "Corrupt PSD image");
3401 | 
3402 | 	// Check file type version.
3403 | 	if (get16(s) != 1)
3404 | 		return epuc("wrong version", "Unsupported version of PSD image");
3405 | 
3406 | 	// Skip 6 reserved bytes.
3407 | 	skip(s, 6 );
3408 | 
3409 | 	// Read the number of channels (R, G, B, A, etc).
3410 | 	channelCount = get16(s);
3411 | 	if (channelCount < 0 || channelCount > 16)
3412 | 		return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3413 | 
3414 | 	// Read the rows and columns of the image.
3415 |    h = get32(s);
3416 |    w = get32(s);
3417 | 
3418 | 	// Make sure the depth is 8 bits.
3419 | 	if (get16(s) != 8)
3420 | 		return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3421 | 
3422 | 	// Make sure the color mode is RGB.
3423 | 	// Valid options are:
3424 | 	//   0: Bitmap
3425 | 	//   1: Grayscale
3426 | 	//   2: Indexed color
3427 | 	//   3: RGB color
3428 | 	//   4: CMYK color
3429 | 	//   7: Multichannel
3430 | 	//   8: Duotone
3431 | 	//   9: Lab color
3432 | 	if (get16(s) != 3)
3433 | 		return epuc("wrong color format", "PSD is not in RGB color format");
3434 | 
3435 | 	// Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
3436 | 	skip(s,get32(s) );
3437 | 
3438 | 	// Skip the image resources.  (resolution, pen tool paths, etc)
3439 | 	skip(s, get32(s) );
3440 | 
3441 | 	// Skip the reserved data.
3442 | 	skip(s, get32(s) );
3443 | 
3444 | 	// Find out if the data is compressed.
3445 | 	// Known values:
3446 | 	//   0: no compression
3447 | 	//   1: RLE compressed
3448 | 	compression = get16(s);
3449 | 	if (compression > 1)
3450 | 		return epuc("bad compression", "PSD has an unknown compression format");
3451 | 
3452 | 	// Create the destination image.
3453 | 	out = (stbi_uc *) malloc(4 * w*h);
3454 | 	if (!out) return epuc("outofmem", "Out of memory");
3455 |    pixelCount = w*h;
3456 | 
3457 | 	// Initialize the data to zero.
3458 | 	//memset( out, 0, pixelCount * 4 );
3459 | 
3460 | 	// Finally, the image data.
3461 | 	if (compression) {
3462 | 		// RLE as used by .PSD and .TIFF
3463 | 		// Loop until you get the number of unpacked bytes you are expecting:
3464 | 		//     Read the next source byte into n.
3465 | 		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3466 | 		//     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3467 | 		//     Else if n is 128, noop.
3468 | 		// Endloop
3469 | 
3470 | 		// The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3471 | 		// which we're going to just skip.
3472 | 		skip(s, h * channelCount * 2 );
3473 | 
3474 | 		// Read the RLE data by channel.
3475 | 		for (channel = 0; channel < 4; channel++) {
3476 | 			uint8 *p;
3477 | 
3478 |          p = out+channel;
3479 | 			if (channel >= channelCount) {
3480 | 				// Fill this channel with default data.
3481 | 				for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3482 | 			} else {
3483 | 				// Read the RLE data.
3484 | 				count = 0;
3485 | 				while (count < pixelCount) {
3486 | 					len = get8(s);
3487 | 					if (len == 128) {
3488 | 						// No-op.
3489 | 					} else if (len < 128) {
3490 | 						// Copy next len+1 bytes literally.
3491 | 						len++;
3492 | 						count += len;
3493 | 						while (len) {
3494 | 							*p = get8(s);
3495 |                      p += 4;
3496 | 							len--;
3497 | 						}
3498 | 					} else if (len > 128) {
3499 | 						uint32	val;
3500 | 						// Next -len+1 bytes in the dest are replicated from next source byte.
3501 | 						// (Interpret len as a negative 8-bit int.)
3502 | 						len ^= 0x0FF;
3503 | 						len += 2;
3504 |                   val = get8(s);
3505 | 						count += len;
3506 | 						while (len) {
3507 | 							*p = val;
3508 |                      p += 4;
3509 | 							len--;
3510 | 						}
3511 | 					}
3512 | 				}
3513 | 			}
3514 | 		}
3515 | 
3516 | 	} else {
3517 | 		// We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
3518 | 		// where each channel consists of an 8-bit value for each pixel in the image.
3519 | 
3520 | 		// Read the data by channel.
3521 | 		for (channel = 0; channel < 4; channel++) {
3522 | 			uint8 *p;
3523 | 
3524 |          p = out + channel;
3525 | 			if (channel > channelCount) {
3526 | 				// Fill this channel with default data.
3527 | 				for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3528 | 			} else {
3529 | 				// Read the data.
3530 | 				count = 0;
3531 | 				for (i = 0; i < pixelCount; i++)
3532 | 					*p = get8(s), p += 4;
3533 | 			}
3534 | 		}
3535 | 	}
3536 | 
3537 | 	if (req_comp && req_comp != 4) {
3538 | 		out = convert_format(out, 4, req_comp, w, h);
3539 | 		if (out == NULL) return out; // convert_format frees input on failure
3540 | 	}
3541 | 
3542 | 	if (comp) *comp = channelCount;
3543 | 	*y = h;
3544 | 	*x = w;
3545 | 
3546 | 	return out;
3547 | }
3548 | 
3549 | #ifndef STBI_NO_STDIO
3550 | stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3551 | {
3552 |    stbi_uc *data;
3553 |    FILE *f = fopen(filename, "rb");
3554 |    if (!f) return NULL;
3555 |    data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
3556 |    fclose(f);
3557 |    return data;
3558 | }
3559 | 
3560 | stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3561 | {
3562 |    stbi s;
3563 |    start_file(&s, f);
3564 |    return psd_load(&s, x,y,comp,req_comp);
3565 | }
3566 | #endif
3567 | 
3568 | stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3569 | {
3570 |    stbi s;
3571 |    start_mem(&s, buffer, len);
3572 |    return psd_load(&s, x,y,comp,req_comp);
3573 | }
3574 | 
3575 | 
3576 | // *************************************************************************************************
3577 | // Radiance RGBE HDR loader
3578 | // originally by Nicolas Schulz
3579 | #ifndef STBI_NO_HDR
3580 | static int hdr_test(stbi *s)
3581 | {
3582 |    char *signature = "#?RADIANCE\n";
3583 |    int i;
3584 |    for (i=0; signature[i]; ++i)
3585 |       if (get8(s) != signature[i])
3586 |          return 0;
3587 | 	return 1;
3588 | }
3589 | 
3590 | int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
3591 | {
3592 |    stbi s;
3593 | 	start_mem(&s, buffer, len);
3594 | 	return hdr_test(&s);
3595 | }
3596 | 
3597 | #ifndef STBI_NO_STDIO
3598 | int stbi_hdr_test_file(FILE *f)
3599 | {
3600 |    stbi s;
3601 |    int r,n = ftell(f);
3602 |    start_file(&s, f);
3603 |    r = hdr_test(&s);
3604 |    fseek(f,n,SEEK_SET);
3605 |    return r;
3606 | }
3607 | #endif
3608 | 
3609 | #define HDR_BUFLEN  1024
3610 | static char *hdr_gettoken(stbi *z, char *buffer)
3611 | {
3612 |    int len=0;
3613 | 	char *s = buffer, c = '\0';
3614 | 
3615 |    c = get8(z);
3616 | 
3617 | 	while (!at_eof(z) && c != '\n') {
3618 | 		buffer[len++] = c;
3619 |       if (len == HDR_BUFLEN-1) {
3620 |          // flush to end of line
3621 |          while (!at_eof(z) && get8(z) != '\n')
3622 |             ;
3623 |          break;
3624 |       }
3625 |       c = get8(z);
3626 | 	}
3627 | 
3628 |    buffer[len] = 0;
3629 | 	return buffer;
3630 | }
3631 | 
3632 | static void hdr_convert(float *output, stbi_uc *input, int req_comp)
3633 | {
3634 | 	if( input[3] != 0 ) {
3635 |       float f1;
3636 | 		// Exponent
3637 | 		f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
3638 |       if (req_comp <= 2)
3639 |          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
3640 |       else {
3641 |          output[0] = input[0] * f1;
3642 |          output[1] = input[1] * f1;
3643 |          output[2] = input[2] * f1;
3644 |       }
3645 |       if (req_comp == 2) output[1] = 1;
3646 |       if (req_comp == 4) output[3] = 1;
3647 | 	} else {
3648 |       switch (req_comp) {
3649 |          case 4: output[3] = 1; /* fallthrough */
3650 |          case 3: output[0] = output[1] = output[2] = 0;
3651 |                  break;
3652 |          case 2: output[1] = 1; /* fallthrough */
3653 |          case 1: output[0] = 0;
3654 |                  break;
3655 |       }
3656 | 	}
3657 | }
3658 | 
3659 | 
3660 | static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3661 | {
3662 |    char buffer[HDR_BUFLEN];
3663 | 	char *token;
3664 | 	int valid = 0;
3665 | 	int width, height;
3666 |    stbi_uc *scanline;
3667 | 	float *hdr_data;
3668 | 	int len;
3669 | 	unsigned char count, value;
3670 | 	int i, j, k, c1,c2, z;
3671 | 
3672 | 
3673 | 	// Check identifier
3674 | 	if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3675 | 		return epf("not HDR", "Corrupt HDR image");
3676 | 
3677 | 	// Parse header
3678 | 	while(1) {
3679 | 		token = hdr_gettoken(s,buffer);
3680 |       if (token[0] == 0) break;
3681 | 		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3682 |    }
3683 | 
3684 | 	if (!valid)    return epf("unsupported format", "Unsupported HDR format");
3685 | 
3686 |    // Parse width and height
3687 |    // can't use sscanf() if we're not using stdio!
3688 |    token = hdr_gettoken(s,buffer);
3689 |    if (strncmp(token, "-Y ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
3690 |    token += 3;
3691 |    height = strtol(token, &token, 10);
3692 |    while (*token == ' ') ++token;
3693 |    if (strncmp(token, "+X ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
3694 |    token += 3;
3695 |    width = strtol(token, NULL, 10);
3696 | 
3697 | 	*x = width;
3698 | 	*y = height;
3699 | 
3700 |    *comp = 3;
3701 | 	if (req_comp == 0) req_comp = 3;
3702 | 
3703 | 	// Read data
3704 | 	hdr_data = (float *) malloc(height * width * req_comp * sizeof(float));
3705 | 
3706 | 	// Load image data
3707 |    // image data is stored as some number of sca
3708 | 	if( width < 8 || width >= 32768) {
3709 | 		// Read flat data
3710 |       for (j=0; j < height; ++j) {
3711 |          for (i=0; i < width; ++i) {
3712 |             stbi_uc rgbe[4];
3713 |            main_decode_loop:
3714 |             getn(s, rgbe, 4);
3715 |             hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
3716 |          }
3717 |       }
3718 | 	} else {
3719 | 		// Read RLE-encoded data
3720 | 		scanline = NULL;
3721 | 
3722 | 		for (j = 0; j < height; ++j) {
3723 |          c1 = get8(s);
3724 |          c2 = get8(s);
3725 |          len = get8(s);
3726 |          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3727 |             // not run-length encoded, so we have to actually use THIS data as a decoded
3728 |             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3729 |             stbi_uc rgbe[4] = { c1,c2,len, get8(s) };
3730 |             hdr_convert(hdr_data, rgbe, req_comp);
3731 |             i = 1;
3732 |             j = 0;
3733 |             free(scanline);
3734 |             goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format
3735 |          }
3736 |          len <<= 8;
3737 |          len |= get8(s);
3738 |          if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); }
3739 |          if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4);
3740 | 
3741 | 			for (k = 0; k < 4; ++k) {
3742 | 				i = 0;
3743 | 				while (i < width) {
3744 | 					count = get8(s);
3745 | 					if (count > 128) {
3746 | 						// Run
3747 | 						value = get8(s);
3748 |                   count -= 128;
3749 | 						for (z = 0; z < count; ++z)
3750 | 							scanline[i++ * 4 + k] = value;
3751 | 					} else {
3752 | 						// Dump
3753 | 						for (z = 0; z < count; ++z)
3754 | 							scanline[i++ * 4 + k] = get8(s);
3755 | 					}
3756 | 				}
3757 | 			}
3758 |          for (i=0; i < width; ++i)
3759 |             hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
3760 | 		}
3761 |       free(scanline);
3762 | 	}
3763 | 
3764 |    return hdr_data;
3765 | }
3766 | 
3767 | #ifndef STBI_NO_STDIO
3768 | float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3769 | {
3770 |    stbi s;
3771 |    start_file(&s,f);
3772 |    return hdr_load(&s,x,y,comp,req_comp);
3773 | }
3774 | #endif
3775 | 
3776 | float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3777 | {
3778 |    stbi s;
3779 |    start_mem(&s,buffer, len);
3780 |    return hdr_load(&s,x,y,comp,req_comp);
3781 | }
3782 | 
3783 | #endif // STBI_NO_HDR
3784 | 
3785 | /////////////////////// write image ///////////////////////
3786 | 
3787 | #ifndef STBI_NO_WRITE
3788 | 
3789 | static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); }
3790 | 
3791 | static void writefv(FILE *f, char *fmt, va_list v)
3792 | {
3793 |    while (*fmt) {
3794 |       switch (*fmt++) {
3795 |          case ' ': break;
3796 |          case '1': { uint8 x = va_arg(v, int); write8(f,x); break; }
3797 |          case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; }
3798 |          case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; }
3799 |          default:
3800 |             assert(0);
3801 |             va_end(v);
3802 |             return;
3803 |       }
3804 |    }
3805 | }
3806 | 
3807 | static void writef(FILE *f, char *fmt, ...)
3808 | {
3809 |    va_list v;
3810 |    va_start(v, fmt);
3811 |    writefv(f,fmt,v);
3812 |    va_end(v);
3813 | }
3814 | 
3815 | static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad)
3816 | {
3817 |    uint8 bg[3] = { 255, 0, 255}, px[3];
3818 |    uint32 zero = 0;
3819 |    int i,j,k, j_end;
3820 | 
3821 |    if (vdir < 0)
3822 |       j_end = -1, j = y-1;
3823 |    else
3824 |       j_end =  y, j = 0;
3825 | 
3826 |    for (; j != j_end; j += vdir) {
3827 |       for (i=0; i < x; ++i) {
3828 |          uint8 *d = (uint8 *) data + (j*x+i)*comp;
3829 |          if (write_alpha < 0)
3830 |             fwrite(&d[comp-1], 1, 1, f);
3831 |          switch (comp) {
3832 |             case 1:
3833 |             case 2: writef(f, "111", d[0],d[0],d[0]);
3834 |                     break;
3835 |             case 4:
3836 |                if (!write_alpha) {
3837 |                   for (k=0; k < 3; ++k)
3838 |                      px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255;
3839 |                   writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]);
3840 |                   break;
3841 |                }
3842 |                /* FALLTHROUGH */
3843 |             case 3:
3844 |                writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]);
3845 |                break;
3846 |          }
3847 |          if (write_alpha > 0)
3848 |             fwrite(&d[comp-1], 1, 1, f);
3849 |       }
3850 |       fwrite(&zero,scanline_pad,1,f);
3851 |    }
3852 | }
3853 | 
3854 | static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...)
3855 | {
3856 |    FILE *f = fopen(filename, "wb");
3857 |    if (f) {
3858 |       va_list v;
3859 |       va_start(v, fmt);
3860 |       writefv(f, fmt, v);
3861 |       va_end(v);
3862 |       write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad);
3863 |       fclose(f);
3864 |    }
3865 |    return f != NULL;
3866 | }
3867 | 
3868 | int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data)
3869 | {
3870 |    int pad = (-x*3) & 3;
3871 |    return outfile(filename,-1,-1,x,y,comp,data,0,pad,
3872 |            "11 4 22 4" "4 44 22 444444",
3873 |            'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
3874 |             40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
3875 | }
3876 | 
3877 | int stbi_write_tga(char const *filename, int x, int y, int comp, void *data)
3878 | {
3879 |    int has_alpha = !(comp & 1);
3880 |    return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0,
3881 |                   "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha);
3882 | }
3883 | 
3884 | // any other image formats that do interleaved rgb data?
3885 | //    PNG: requires adler32,crc32 -- significant amount of code
3886 | //    PSD: no, channels output separately
3887 | //    TIFF: no, stripwise-interleaved... i think
3888 | 
3889 | #endif // STBI_NO_WRITE
3890 | 
3891 | #endif // STBI_HEADER_FILE_ONLY
3892 | 
3893 | 


--------------------------------------------------------------------------------